diff --git a/.gitignore b/.gitignore index fe509f7a..4c59cff6 100644 --- a/.gitignore +++ b/.gitignore @@ -6,10 +6,10 @@ *.cmd *.suo *.ncb -*.idb -*.obj -*.opt -*.pch +*.idb +*.obj +*.opt +*.pch *.pyc *.log *.exe @@ -21,6 +21,10 @@ *.manifest *.user *.bak +*.orig +*.lock* +*.waf* +.DS_Store bin/* src/msvc/Debug DLL src/msvc/Debug Static @@ -51,3 +55,31 @@ src/demo/searchfiles/msvc/Release DLL src/demo/searchfiles/msvc/Release Static src/demo/searchfiles/msvc/Debug DLL src/demo/searchfiles/msvc/Debug Static +CMakeCache.txt +CMakeFiles/ +build/ +CTestTestfile.cmake +Makefile +cmake_install.cmake +cmake_uninstall.cmake +include/Config.h +install_manifest.txt +liblucene++-contrib.pc +liblucene++.pc +src/contrib/CMakeFiles/ +src/contrib/CTestTestfile.cmake +src/contrib/Makefile +src/contrib/cmake_install.cmake +src/core/CMakeFiles/ +src/core/CTestTestfile.cmake +src/core/Makefile +src/core/cmake_install.cmake +src/demo/CMakeFiles/ +src/demo/CTestTestfile.cmake +src/demo/Makefile +src/demo/cmake_install.cmake +src/test/CMakeFiles/ +src/test/CTestTestfile.cmake +src/test/Makefile +src/test/cmake_install.cmake + diff --git a/CMakeLists.txt b/CMakeLists.txt index 7457c1b5..5c0fc020 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,191 +1,134 @@ -project(lucene++-base) - #################################### -# VERSION information -#These versions match the Lucene version -SET(LUCENE++_VERSION_MAJOR "3") -SET(LUCENE++_VERSION_MINOR "0") -SET(LUCENE++_VERSION_REVISION "3") -SET(LUCENE++_VERSION_PATCH "4") - -# SOVERSION information -#Must be incremented for releases if the api is not backwards compatible -SET(LUCENE++_SOVERSION "0") - -#derived versions -MATH(EXPR LUCENE++_INT_VERSION "(${LUCENE++_VERSION_MAJOR} * 1000000) + (${LUCENE++_VERSION_MINOR} * 10000) + (${LUCENE++_VERSION_REVISION} * 100) + (${LUCENE++_VERSION_PATCH} * 1)" ) -SET(LUCENE++_VERSION "${LUCENE++_VERSION_MAJOR}.${LUCENE++_VERSION_MINOR}.${LUCENE++_VERSION_REVISION}.${LUCENE++_VERSION_PATCH}") -MESSAGE(${LUCENE++_INT_VERSION}) -MESSAGE(${LUCENE++_VERSION}) - +# init #################################### -#################################### -# Build system options and includes -#################################### -CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0 FATAL_ERROR) -#build policies -if(COMMAND cmake_policy) - cmake_policy(SET CMP0003 NEW) -endif(COMMAND cmake_policy) +cmake_minimum_required(VERSION 3.5) -# include specific modules -set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") - -#if setup using the Toolchain-llvm.cmake file, then use llvm... -IF ( ENABLE_LLVM ) - INCLUDE (Toolchain-llvm) -ENDIF ( ENABLE_LLVM ) - -#define options... -INCLUDE (Lucene++Docs) -INCLUDE (FindThreads) -INCLUDE (TestCXXAcceptsFlag) -ENABLE_TESTING() - -#Single output directory for building all executables and libraries. -SET(EXECUTABLE_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Executable Output Directory" FORCE) -SET(LIBRARY_OUTPUT_PATH ${CMAKE_BINARY_DIR}/bin CACHE PATH "Library Output Directory" FORCE) -#################################### +project(lucene++) -#################################### -#user specified build options -#################################### -IF(NOT CMAKE_BUILD_TYPE) - SET(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING - "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." - FORCE) -ELSE(NOT CMAKE_BUILD_TYPE) - MESSAGE( "Compiling as ${CMAKE_BUILD_TYPE}" ) -ENDIF(NOT CMAKE_BUILD_TYPE) - -OPTION(ENABLE_PACKAGING - "create build scripts for creating lucene++ packages" - OFF) -OPTION(ENABLE_NEDMALLOC - "use nedmalloc for memory allocations" - OFF) -OPTION(LUCENE_USE_STATIC_BOOST_LIBS - "use static boost libraries " - OFF) -OPTION(ENABLE_CYCLIC_CHECK - "enable cyclic checking " - OFF) - -#install path options -SET(LIB_DESTINATION "lib" CACHE STRING "Define lib output directory name") - -IF ( ENABLE_NEDMALLOC ) - ADD_DEFINITIONS(-DLPP_USE_NEDMALLOC) -ENDIF ( ENABLE_NEDMALLOC ) -IF ( ENABLE_CYCLIC_CHECK ) - ADD_DEFINITIONS(-DLPP_USE_CYCLIC_CHECK) -ENDIF ( ENABLE_CYCLIC_CHECK ) -#################################### +set(lucene++_VERSION_MAJOR 3) +set(lucene++_VERSION_MINOR 0) +set(lucene++_VERSION_PATCH 9) +set(lucene++_SOVERSION "0") + +set(lucene++_VERSION + "${lucene++_VERSION_MAJOR}.${lucene++_VERSION_MINOR}.${lucene++_VERSION_PATCH}") + + +# set default build type as release +if(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) + set(CMAKE_BUILD_TYPE "Release") +endif() + +set(LIB_DESTINATION + "${CMAKE_INSTALL_LIBDIR}" CACHE STRING "Define lib output directory name") #################################### -# PLATFORM specific options +# CMake Modules #################################### -#add a debug build postfix -if(WIN32 OR WIN64) - set(CMAKE_DEBUG_POSTFIX "d") -endif(WIN32 OR WIN64) -if(NOT MSVC AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") - add_definitions(-fPIC) -endif(NOT MSVC AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") +# include specific modules +set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake") +include(options.cmake) + +# pre-compiled headers support +include(cotire) -INCLUDE(MacroCheckGccVisibility) -MACRO_CHECK_GCC_VISIBILITY(LPP_HAVE_GXXCLASSVISIBILITY) -if ( LPP_HAVE_GXXCLASSVISIBILITY ) - ADD_DEFINITIONS(-DLPP_HAVE_GXXCLASSVISIBILITY) +# if setup using the Toolchain-llvm.cmake file, then use llvm... +if(ENABLE_LLVM) + include(Toolchain-llvm) endif() -IF(CYGWIN) - ADD_DEFINITIONS(-D__LARGE64_FILES) -ENDIF(CYGWIN) - -#set ansi mode -SET(ENABLE_ANSI_MODE OFF) -IF(CMAKE_COMPILER_IS_GNUCXX) - SET(ENABLE_ANSI_MODE ON) - - #exceptions: - IF(MINGW OR CYGWIN) - SET(ENABLE_ANSI_MODE OFF) - ENDIF(MINGW OR CYGWIN) -ENDIF(CMAKE_COMPILER_IS_GNUCXX) -IF ( CMAKE_COMPILER_IS_GNUCC ) - IF( ENABLE_ANSI_MODE ) - SET (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -ansi") - ENDIF ( ENABLE_ANSI_MODE ) -ENDIF(CMAKE_COMPILER_IS_GNUCC) +# fetch dependencies +include(dependencies) -#################################### -#find boost -#################################### -SET(Boost_USE_STATIC_LIBS ${LUCENE_USE_STATIC_BOOST_LIBS}) -SET(Boost_USE_MULTITHREADED ON) -#Boost 1.38 required for bug fixes in basic_streambuf. -#The following line fails in earlier builds, so if altered, may allow older versions of boost: -#boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); -find_package( Boost 1.38.0 COMPONENTS date_time filesystem iostreams regex system thread unit_test_framework REQUIRED) -IF (Boost_FOUND) - MESSAGE( STATUS "boost found: includes in ${Boost_INCLUDE_DIRS}, library in ${Boost_LIBRARY_DIRS}") - SET(LUCENE_BOOST_LIBS - ${Boost_FILESYSTEM_LIBRARY_RELEASE} - ${Boost_IOSTREAMS_LIBRARY_RELEASE} - ${Boost_REGEX_LIBRARY_RELEASE} - ${Boost_SYSTEM_LIBRARY_RELEASE} - ${Boost_THREAD_LIBRARY_RELEASE}) -ENDIF (Boost_FOUND) +# build docs +include(Lucene++Docs) + +# Enable C++11 +set(CMAKE_CXX_STANDARD 11) +set(CMAKE_CXX_STANDARD_REQUIRED ON) #################################### -# Pre-Compiled headers +# platform specific options #################################### -INCLUDE(PCHSupport) +if(WIN32 OR WIN64) +set(CMAKE_DEBUG_POSTFIX "d") +endif() -#todo: make this optional and make it possible to add more headers - like boost threads +if(MSVC) +# Disable automatic boost linking on Windows as libraries are added to the linker explicitly +add_definitions(-DBOOST_ALL_NO_LIB) +# enable exceptions, see http://msdn.microsoft.com/en-us/library/1deeycx5.aspx +add_definitions(-EHsc) -#################################### -# The subdirs -#################################### -#include sub-projects -ADD_SUBDIRECTORY (src/core) -ADD_SUBDIRECTORY (src/contrib EXCLUDE_FROM_ALL) -ADD_SUBDIRECTORY (src/demo EXCLUDE_FROM_ALL) -ADD_SUBDIRECTORY (src/test) +# Disable including too many Windows headers +add_definitions(-DWIN32_LEAN_AND_MEAN) +# Disable the min/max macros that conflict with std::min/std::max +add_definitions(-DNOMINMAX) +endif() -################################# -# install pkg-config file -################################# -IF(NOT WIN32) - configure_file(${CMAKE_CURRENT_SOURCE_DIR}/liblucene++.pc.cmake ${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc @ONLY) - install(FILES ${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc - DESTINATION ${LIB_DESTINATION}/pkgconfig ) -ENDIF(NOT WIN32) +if(NOT WIN32 AND NOT CMAKE_SYSTEM MATCHES "SunOS-5*.") + set(CMAKE_POSITION_INDEPENDENT_CODE ON) +endif() + +if(CYGWIN) + add_definitions(-D__LARGE64_FILES) +endif() + +if(APPLE) + set(CMAKE_MACOSX_RPATH ON) + set(CMAKE_SKIP_BUILD_RPATH FALSE) + set(CMAKE_BUILD_WITH_INSTALL_RPATH FALSE) + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE) + list(FIND CMAKE_PLATFORM_IMPLICIT_LINK_DIRECTORIES "${CMAKE_INSTALL_PREFIX}/lib" isSystemDir) + if("${isSystemDir}" STREQUAL "-1") + set(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_PREFIX}/lib") + endif() +endif() #################################### -# Custom targets +# custom targets #################################### -#add uninstall command -CONFIGURE_FILE( +configure_file( "${CMAKE_MODULE_PATH}/cmake_uninstall.cmake.in" "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" - IMMEDIATE @ONLY) -ADD_CUSTOM_TARGET(uninstall - "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake") + IMMEDIATE @ONLY +) + +add_custom_target( + uninstall + "${CMAKE_COMMAND}" -P "${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake" + VERBATIM +) + +if(ENABLE_PACKAGING) + include(CreateLucene++Packages) +endif() #################################### -# Finalise build script +# bootstrap #################################### -#this must go last... -IF (ENABLE_PACKAGING) - INCLUDE(CreateLucene++Packages) -ENDIF ( ENABLE_PACKAGING) +include(TestCXXAcceptsFlag) +include(GNUInstallDirs) + +add_subdirectory(include) +add_subdirectory(src) + +message("\n\n** Build Summary **") +message(" Version: ${lucene++_VERSION}") +message(" Prefix: ${CMAKE_INSTALL_PREFIX}") +message(" Build Type: ${CMAKE_BUILD_TYPE}") +message(" Architecture: ${CMAKE_SYSTEM_PROCESSOR}") +message(" System: ${CMAKE_SYSTEM_NAME}") +message(" Boost Include: ${Boost_INCLUDE_DIRS}") +message(" Boost Libraries: ${Boost_LIBRARY_DIRS}") +message(" Zlib Include: ${ZLIB_INCLUDE_DIRS}") +message(" Zlib Library: ${ZLIB_LIBRARY_RELEASE}") diff --git a/COPYING b/COPYING index d6456956..cb871c4a 100644 --- a/COPYING +++ b/COPYING @@ -1,202 +1,98 @@ - - Apache License - Version 2.0, January 2004 - http://www.apache.org/licenses/ - - TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION - - 1. Definitions. - - "License" shall mean the terms and conditions for use, reproduction, - and distribution as defined by Sections 1 through 9 of this document. - - "Licensor" shall mean the copyright owner or entity authorized by - the copyright owner that is granting the License. - - "Legal Entity" shall mean the union of the acting entity and all - other entities that control, are controlled by, or are under common - control with that entity. For the purposes of this definition, - "control" means (i) the power, direct or indirect, to cause the - direction or management of such entity, whether by contract or - otherwise, or (ii) ownership of fifty percent (50%) or more of the - outstanding shares, or (iii) beneficial ownership of such entity. - - "You" (or "Your") shall mean an individual or Legal Entity - exercising permissions granted by this License. - - "Source" form shall mean the preferred form for making modifications, - including but not limited to software source code, documentation - source, and configuration files. - - "Object" form shall mean any form resulting from mechanical - transformation or translation of a Source form, including but - not limited to compiled object code, generated documentation, - and conversions to other media types. - - "Work" shall mean the work of authorship, whether in Source or - Object form, made available under the License, as indicated by a - copyright notice that is included in or attached to the work - (an example is provided in the Appendix below). - - "Derivative Works" shall mean any work, whether in Source or Object - form, that is based on (or derived from) the Work and for which the - editorial revisions, annotations, elaborations, or other modifications - represent, as a whole, an original work of authorship. For the purposes - of this License, Derivative Works shall not include works that remain - separable from, or merely link (or bind by name) to the interfaces of, - the Work and Derivative Works thereof. - - "Contribution" shall mean any work of authorship, including - the original version of the Work and any modifications or additions - to that Work or Derivative Works thereof, that is intentionally - submitted to Licensor for inclusion in the Work by the copyright owner - or by an individual or Legal Entity authorized to submit on behalf of - the copyright owner. For the purposes of this definition, "submitted" - means any form of electronic, verbal, or written communication sent - to the Licensor or its representatives, including but not limited to - communication on electronic mailing lists, source code control systems, - and issue tracking systems that are managed by, or on behalf of, the - Licensor for the purpose of discussing and improving the Work, but - excluding communication that is conspicuously marked or otherwise - designated in writing by the copyright owner as "Not a Contribution." - - "Contributor" shall mean Licensor and any individual or Legal Entity - on behalf of whom a Contribution has been received by Licensor and - subsequently incorporated within the Work. - - 2. Grant of Copyright License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - copyright license to reproduce, prepare Derivative Works of, - publicly display, publicly perform, sublicense, and distribute the - Work and such Derivative Works in Source or Object form. - - 3. Grant of Patent License. Subject to the terms and conditions of - this License, each Contributor hereby grants to You a perpetual, - worldwide, non-exclusive, no-charge, royalty-free, irrevocable - (except as stated in this section) patent license to make, have made, - use, offer to sell, sell, import, and otherwise transfer the Work, - where such license applies only to those patent claims licensable - by such Contributor that are necessarily infringed by their - Contribution(s) alone or by combination of their Contribution(s) - with the Work to which such Contribution(s) was submitted. If You - institute patent litigation against any entity (including a - cross-claim or counterclaim in a lawsuit) alleging that the Work - or a Contribution incorporated within the Work constitutes direct - or contributory patent infringement, then any patent licenses - granted to You under this License for that Work shall terminate - as of the date such litigation is filed. - - 4. Redistribution. You may reproduce and distribute copies of the - Work or Derivative Works thereof in any medium, with or without - modifications, and in Source or Object form, provided that You - meet the following conditions: - - (a) You must give any other recipients of the Work or - Derivative Works a copy of this License; and - - (b) You must cause any modified files to carry prominent notices - stating that You changed the files; and - - (c) You must retain, in the Source form of any Derivative Works - that You distribute, all copyright, patent, trademark, and - attribution notices from the Source form of the Work, - excluding those notices that do not pertain to any part of - the Derivative Works; and - - (d) If the Work includes a "NOTICE" text file as part of its - distribution, then any Derivative Works that You distribute must - include a readable copy of the attribution notices contained - within such NOTICE file, excluding those notices that do not - pertain to any part of the Derivative Works, in at least one - of the following places: within a NOTICE text file distributed - as part of the Derivative Works; within the Source form or - documentation, if provided along with the Derivative Works; or, - within a display generated by the Derivative Works, if and - wherever such third-party notices normally appear. The contents - of the NOTICE file are for informational purposes only and - do not modify the License. You may add Your own attribution - notices within Derivative Works that You distribute, alongside - or as an addendum to the NOTICE text from the Work, provided - that such additional attribution notices cannot be construed - as modifying the License. - - You may add Your own copyright statement to Your modifications and - may provide additional or different license terms and conditions - for use, reproduction, or distribution of Your modifications, or - for any such Derivative Works as a whole, provided Your use, - reproduction, and distribution of the Work otherwise complies with - the conditions stated in this License. - - 5. Submission of Contributions. Unless You explicitly state otherwise, - any Contribution intentionally submitted for inclusion in the Work - by You to the Licensor shall be under the terms and conditions of - this License, without any additional terms or conditions. - Notwithstanding the above, nothing herein shall supersede or modify - the terms of any separate license agreement you may have executed - with Licensor regarding such Contributions. - - 6. Trademarks. This License does not grant permission to use the trade - names, trademarks, service marks, or product names of the Licensor, - except as required for reasonable and customary use in describing the - origin of the Work and reproducing the content of the NOTICE file. - - 7. Disclaimer of Warranty. Unless required by applicable law or - agreed to in writing, Licensor provides the Work (and each - Contributor provides its Contributions) on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or - implied, including, without limitation, any warranties or conditions - of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A - PARTICULAR PURPOSE. You are solely responsible for determining the - appropriateness of using or redistributing the Work and assume any - risks associated with Your exercise of permissions under this License. - - 8. Limitation of Liability. In no event and under no legal theory, - whether in tort (including negligence), contract, or otherwise, - unless required by applicable law (such as deliberate and grossly - negligent acts) or agreed to in writing, shall any Contributor be - liable to You for damages, including any direct, indirect, special, - incidental, or consequential damages of any character arising as a - result of this License or out of the use or inability to use the - Work (including but not limited to damages for loss of goodwill, - work stoppage, computer failure or malfunction, or any and all - other commercial damages or losses), even if such Contributor - has been advised of the possibility of such damages. - - 9. Accepting Warranty or Additional Liability. While redistributing - the Work or Derivative Works thereof, You may choose to offer, - and charge a fee for, acceptance of support, warranty, indemnity, - or other liability obligations and/or rights consistent with this - License. However, in accepting such obligations, You may act only - on Your own behalf and on Your sole responsibility, not on behalf - of any other Contributor, and only if You agree to indemnify, - defend, and hold each Contributor harmless for any liability - incurred by, or claims asserted against, such Contributor by reason - of your accepting any such warranty or additional liability. - - END OF TERMS AND CONDITIONS - - APPENDIX: How to apply the Apache License to your work. - - To apply the Apache License to your work, attach the following - boilerplate notice, with the fields enclosed by brackets "[]" - replaced with your own identifying information. (Don't include - the brackets!) The text should be enclosed in the appropriate - comment syntax for the file format. We also recommend that a - file or class name and description of purpose be included on the - same "printed page" as the copyright notice for easier - identification within third-party archives. - - Copyright [yyyy] [name of copyright owner] - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. +This source code is dual-licensed. +================================== + +LGPL: + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. + + See the file LGPL.licence + +Apache 2.0: + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + + See the file APACHE.licence + + +Notes regarding licensing glib code. +==================================== + +Files: src/core/util/unicode/* + +Some portions of glib code have been used in Lucene++, a project +spawned from CLucene, with special permission from the author(s). +This is the email exchange that took place in 2006 between +Ben van Klinken, Owen Taylor and Tom Tromey: + +---------- Forwarded message ---------- +From: Owen Taylor +Date: 11 February 2006 04:48 +Subject: Re: Fwd: glib licensing +To: Ben van Klinken +Cc: tromey@redhat.com + + +On Fri, 2006-02-10 at 18:34 +0100, Ben van Klinken wrote: +> +> Hi Owen, +> +> I am the author and maintainer of CLucene (clucene.sourceforge.net). I +> was forwarded to you by Matthias Classen. We have an enquiry about +> licensing of glib: +> +> CLucene was licensed LGPL, but recently we changed our license to +> allow licensing under apache or LGPL. During the audit of the change, +> we made a mistake with some of the code (code in the +> gunichartables.cpp - various utf8<>wchar conversion functions) to be +> licensed apache, since some of the functions are from the glib +> library. The file in question contains various functions from the +> http://cvs.gnome.org/viewcvs/glib/glib/ directory. +> +> We are working on fixing this and are exploring several options. When +> discussing the issue on our mailing list, one suggestion was to +> enquire with you what the chances of re-licensing some of the glib +> unicode functions under +> the apache license would be? I believe you were the author of glib +> unicode support? + +I'd have to know more specifically what particular portions of +the GLib code are involved; can you be more specific about +particular functions and code portions? while I did much of the work, +there have been additions made later on by other people, and a good +portion of the code derives originally from libunicode by +Tom Tromey. (Cc'ed) + +For the portions that are actually my code I don't have any +objection to them also being licensed under the Apache license ... +it's pretty much straightforward implementations of algorithms from +the Unicode standard, and other implementations are widely available in +many forms. + +Regards, +Owen + +---------- Forwarded message ---------- +From: Tom Tromey +Date: 25 April 2006 02:42 +Subject: Re: Fwd: glib licensing +To: Ben van Klinken +Cc: Owen Taylor + +Ben> All the code has Tom Tromey's name at the top. Please let me know if +Ben> you need any other information. + +I was wondering ... if other people made substantial contributions +after this code was written, wouldn't they also be copyright holders? +You'd have to use the cvs history to see this. + +Ben> I don't think we need to do anything other for you to agree with this +Ben> licensing in this email correspondence. So unless you can shed any +Ben> more light on this process and if you agree to this licensing, i will +Ben> append the apache license to the top of our files with a note that it +Ben> has been licensed with your approval. + +It is fine by me. + +Tom diff --git a/GPL.license b/GPL.license new file mode 100644 index 00000000..94a9ed02 --- /dev/null +++ b/GPL.license @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + Copyright (C) + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 00000000..805e5fa3 --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +Lucene++ +========== + +Welcome to lucene++ version **3.0.9**. + +Lucene++ is a C++ port of the popular Java [Lucene](http://lucene.apache.org/) +library, a high-performance, full-featured text search engine. + + +Lucene++ Components +---------------- + +- liblucene++ library +- liblucene++-contrib library +- lucene++-tester (unit tester) +- deletefiles (demo) +- indexfiles (demo) +- searchfiles (demo) + +For information on building the Lucene++ suite, please read doc/BUILDING.md + + +Useful Resources +---------------- + +Official [Java Lucene](http://lucene.apache.org/java/docs/index.html) - useful links and +documentation relevant to Lucene and lucene++. [Lucene in Action](https://www.amazon.com/dp/1932394281/) +by Otis Gospodnetic and Erik Hatcher. + + +To run unit test suite +---------------------- + +lucene_tester is built using the [Google Testing Framework](https://code.google.com/p/googletest/). +you can run the test suite on unix with the following command run from the +repository root:: +``` + $ build/src/test/lucene++-tester +``` + +the test suite can also be run from the repository root on NT systems, but the required DLL +files must manually be copied into the test binary path before executing, otherwise you will +recieve errors telling you that required libraries cannot be found. +``` + $ build/src/test/lucene++-tester +``` + +Command options can be discovered by supplying `--help`. + + +To run the demos +---------------- + +Start by indexing a directory of files - open a command prompt and run +``` + ./indexfiles +``` +Once the indexer has finished, you can query the index using searchfiles +``` + ./searchfiles -index +``` +This uses an interactive command for you to enter queries, type a query to search the index press enter and you'll see the results. + + +Acknowledgements +---------------- + +- Ben van Klinken and contributors to the CLucene project for inspiring this project. +- md5 Copyright (C) 1999, 2000, 2002 Aladdin Enterprises +- `Unicode character properties (guniprop)[http://library.gnome.org/devel/glib/] Copyright (C) 1999 Tom Tromey, Copyright (C) 2000 Red Hat, Inc. +- `Cotire (compile time reducer)[https://github.com/sakra/cotire] by Sascha Kratky. diff --git a/README.rst b/README.rst deleted file mode 100644 index 163d4372..00000000 --- a/README.rst +++ /dev/null @@ -1,112 +0,0 @@ -Lucene++ -========== - -Welcome to lucene++ version **3.0.3**. - -Lucene++ is an up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine. - - -Components ----------------- - -- liblucene++ library -- liblucene_contrib library -- lucene_tester (unit tester) -- deletefiles (demo) -- indexfiles (demo) -- searchfiles (demo) - - -Useful Resources ----------------- - -Official `Java Lucene `_ - useful links and documentation relevant to Lucene and lucene++. -`Lucene in Action `_ by Otis Gospodnetic and Erik Hatcher. - - -Build Instructions using CMake ------------------------------- - -You'll need boost installed somewhere. - -On Debian systems, the following packages are required: - -- libboost-date-time-dev -- libboost-filesystem-dev -- libboost-regex-dev -- libboost-thread-dev -- libboost-iostreams-dev -- libboost-test-dev - - -Build Instructions using Waf ------------------------------- - -Alternatively you can use `Waf `_ to drive the build. Waf requires that you have a recent version of `Python `_ installed on your system. - -To build the library the following commands should be issued:: - - $ ./waf configure - $ ./waf --static build - - -Additionally static builds of the following libraries are required for a successful build: - -- boost::date_time -- boost::filesystem -- boost::regex -- boost::thread -- boost::system -- boost::iostreams -- boost::unit_test_framework - -The libraries and headers should be made available at a standard prefix (/usr/local for example). - - -Build Instructions for Windows systems --------------------------------------- - -Open solution lucene++.sln located in the *msvc* folder into Visual Studio 2008 and build. - -**Note: "BOOST_ROOT" environment variable must be defined to point to the boost library directory (eg. c:\\boost_1_44_0)** - -You'll need boost installed. - -`BoostPro `_ has some precompiled windows packages. You'll need the following extras installed:: - -- boost::system -- boost::thread -- boost::filesystem -- boost::regex -- boost::date_time -- boost::iostreams -- boost::unit_test_framework - - -Building Performance --------------------- - -Use of ccache will speed up build times a lot. I found it easiest to add the /usr/lib/ccache directory to the beginning of your paths. This works for most common compilers. - -PATH=/usr/lib/ccache:$PATH - - -To run unit test suite ----------------------- - -lucene_tester is built using the `Boost Unit Test Framework `_ and is launched by the following command:: - - $ bin/lucene_tester --show_progress=yes - -Other `command options `_ can be supplied. - - -Acknowledgements ----------------- - -- Ben van Klinken and contributors to the CLucene project for inspiring this project. -- Jamie Kirkpatrick for cross-platform and waf build support. - -- `nedmalloc `_ Copyright 2005-2006 Niall Douglas -- md5 Copyright (C) 1999, 2000, 2002 Aladdin Enterprises -- `Unicode character properties (guniprop) `_ Copyright (C) 1999 Tom Tromey, Copyright (C) 2000 Red Hat, Inc. diff --git a/bin/.gitignore b/bin/.gitignore deleted file mode 100644 index f59ec20a..00000000 --- a/bin/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/build/clang.py b/build/clang.py deleted file mode 100644 index fcdae898..00000000 --- a/build/clang.py +++ /dev/null @@ -1,55 +0,0 @@ -############################################################################# -## Copyright (c) 2009-2011 Alan Wright. All rights reserved. -## Distributable under the terms of either the Apache License (Version 2.0) -## or the GNU Lesser General Public License. -############################################################################# - -from TaskGen import feature -import Options -import sys - - -@feature('cc') -def apply_clang(self): - ''' - Replaced the default compiler with clang if required. - ''' - if not getattr(self, 'clang', False) or Options.options.disable_clang: - return - self.env['CC'] = self.env['CLANG'] or self.env['CC'] - if sys.platform == "darwin": - # workaround problems with non-static inline functions - # http://clang.llvm.org/compatibility.html - self.env['CCFLAGS'] += ['-std=gnu89'] - - -@feature('cc') -def apply_clang_cpp(self): - ''' - Replaced the default compiler with clang if required. - ''' - if not getattr(self, 'clang', False) or Options.options.disable_clang: - return - self.env['CPP'] = self.env['CLANGPP'] or self.env['CXX'] - self.env['CXX'] = self.env['CLANGPP'] or self.env['CXX'] - if sys.platform == "darwin": - self.env['shlib_CXXFLAGS'] = ['-fPIC'] - - -def options(opt): - """ - Add options specific the codehash tool - """ - opt.add_option('--noclang', - dest = 'disable_clang', - action = 'store_true', - default = False, - help = 'disable the clang compiler if it is available') - - -def configure(conf): - search_paths = ['/Xcode4/usr/bin/'] if sys.platform == "darwin" else [] - if not getattr(conf, 'clang', False) or Options.options.disable_clang: - return - conf.find_program('clang', var='CLANG') - conf.find_program('clang++', var='CLANGPP', path_list = search_paths) diff --git a/build/gch.py b/build/gch.py deleted file mode 100644 index 6e3ccb40..00000000 --- a/build/gch.py +++ /dev/null @@ -1,39 +0,0 @@ -############################################################################# -## Copyright (c) 2009-2011 Alan Wright. All rights reserved. -## Distributable under the terms of either the Apache License (Version 2.0) -## or the GNU Lesser General Public License. -############################################################################# - -#! /usr/bin/env python -# encoding: utf-8 -# Thomas Nagy, 2006 (ita) - -""" -for some obscure reason, the precompiled header will not be taken if -all.h is in the same directory as main.cpp -we recommend to add the header to compile in a separate directory without any sources - -Note: the #warning will come once when the .h is compiled, it will not come when the .cpp is compiled -Note: do not forget to set the include paths (include=...) -""" - -from waflib.TaskGen import feature, after -from waflib.Task import Task -from waflib.Tools import c_preproc - -#@feature('cxx') <- python >= 2.4 -#@after('apply_link') -def process_pch(self): - if getattr(self, 'pch', ''): - nodes = self.to_nodes(self.pch) - for x in nodes: - self.create_task('gchx', x, x.change_ext('.gch')) -feature('cxx')(process_pch) -after('apply_link')(process_pch) - -class gchx(Task): - run_str = '${CXX} ${CXXFLAGS} ${FRAMEWORKPATH_ST:FRAMEWORKPATH} ${CPPPATH_ST:INCPATHS} ${DEFINES_ST:DEFINES} ${CXX_SRC_F}${SRC} ${CXX_TGT_F}${TGT}' - scan = c_preproc.scan - ext_out = ['.h'] - color = 'BLUE' - diff --git a/cmake/CreateLucene++Packages.cmake b/cmake/CreateLucene++Packages.cmake index 8c9fd83e..2c3e98b6 100644 --- a/cmake/CreateLucene++Packages.cmake +++ b/cmake/CreateLucene++Packages.cmake @@ -1,85 +1,82 @@ #Creates all the relevant packages -SET(CPACK_PACKAGE_VERSION_MAJOR ${LUCENE++_VERSION_MAJOR}) -SET(CPACK_PACKAGE_VERSION_MINOR ${LUCENE++_VERSION_MINOR}) -SET(CPACK_PACKAGE_VERSION_REVISION ${LUCENE++_VERSION_REVISION}) -SET(CPACK_PACKAGE_VERSION_PATCH ${LUCENE++_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MAJOR ${lucene++_VERSION_MAJOR}) +set(CPACK_PACKAGE_VERSION_MINOR ${lucene++_VERSION_MINOR}) +set(CPACK_PACKAGE_VERSION_PATCH ${lucene++_VERSION_MAJOR}) -SET(CPACK_PACKAGE_VERSION ${LUCENE++_VERSION}) -SET(CPACK_PACKAGE_SOVERSION ${LUCENE++_SOVERSION}) +set(CPACK_PACKAGE_VERSION ${lucene++_VERSION}) +set(CPACK_PACKAGE_SOVERSION ${lucene++_SOVERSION}) -SET(CPACK_PACKAGE_VENDOR "Alan Wright") -SET(CPACK_PACKAGE_CONTACT "alanwright.home@googlemail.com") -SET(CPACK_PACKAGE_NAME "liblucene++") +set(CPACK_PACKAGE_VENDOR "Alan Wright") +set(CPACK_PACKAGE_CONTACT "alanwright.home@googlemail.com") +set(CPACK_PACKAGE_NAME "liblucene++") -SET(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") -SET(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lucene++ is an up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine") +set(CPACK_PACKAGE_DESCRIPTION_FILE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") +set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "Lucene++ is an up to date C++ port of the popular Java Lucene library, a high-performance, full-featured text search engine") -SET(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") -SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") -#SET(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") +set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.PACKAGE") +set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/COPYING") #so, what are we going to install? -SET(CPACK_INSTALL_CMAKE_PROJECTS +set(CPACK_INSTALL_CMAKE_PROJECTS "${CMAKE_BINARY_DIR};lucene++;ALL;/") -SET(CPACK_COMPONENTS_ALL development runtime) -SET(CPACK_GENERATOR "TGZ") -SET(CPACK_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_NAME}") +set(CPACK_COMPONENTS_ALL development runtime) +set(CPACK_GENERATOR "TGZ") +set(CPACK_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-${CMAKE_SYSTEM_NAME}") -IF( (WIN32 OR WIN64) AND NOT UNIX) - SET(CPACK_SOURCE_GENERATOR "ZIP") -ELSE( (WIN32 OR WIN64) AND NOT UNIX) - SET(CPACK_SOURCE_GENERATOR "TBZ2;TGZ") -ENDIF( (WIN32 OR WIN64) AND NOT UNIX) -SET(CPACK_SOURCE_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-Source") +if((WIN32 OR WIN64) AND NOT UNIX) + set(CPACK_SOURCE_GENERATOR "ZIP") +else() + set(CPACK_SOURCE_GENERATOR "TBZ2;TGZ") +endif() +set(CPACK_SOURCE_PACKAGE_FILE_NAME "lucene++-${CPACK_PACKAGE_VERSION}-Source") #specific packaging requirements:, -SET(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21), libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") -SET(CPACK_DEBIAN_PACKAGE_SECTION "libs") -SET(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0") -SET(CPACK_RPM_PACKAGE_GROUP "libs") -SET(CPACK_RPM_PACKAGE_REQUIRES "libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") +set(CPACK_DEBIAN_PACKAGE_DEPENDS "libc6 (>= 2.4), libgcc1 (>= 1:4.1.1-21), libstdc++6 (>= 4.1.1-21), libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") +set(CPACK_DEBIAN_PACKAGE_SECTION "libs") +set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0") +set(CPACK_RPM_PACKAGE_GROUP "libs") +set(CPACK_RPM_PACKAGE_REQUIRES "libboost-date-time1.42.0, libboost-filesystem1.42.0, libboost-regex1.42.0, libboost-thread1.42.0, libboost-iostreams1.42.0") #don't include the current binary dir. -get_filename_component(lucene++-base_BINARY_DIR_name ${lucene++-base_BINARY_DIR} NAME) -SET(CPACK_SOURCE_IGNORE_FILES +get_filename_component(lucene++_BINARY_DIR_name "${lucene++_BINARY_DIR}" NAME) +set(CPACK_SOURCE_IGNORE_FILES "/\\\\.svn/" "/\\\\.git/" - "/\\\\.waf*/" "\\\\.swp$" "\\\\.#;/#" ".*~" ".*\\\\.tmp" ".*\\\\.save" - "/${lucene++-base_BINARY_DIR_name}/" + "/${lucene++_BINARY_DIR_name}/" ) -IF( (WIN32 OR WIN64) AND NOT UNIX) +if((WIN32 OR WIN64) AND NOT UNIX) # There is a bug in NSI that does not handle full unix paths properly. Make # sure there is at least one set of four (4) backlasshes. - SET(CPACK_GENERATOR "${CPACK_GENERATOR};NSIS") - #SET(CPACK_PACKAGE_ICON "${CMake_SOURCE_DIR}/Utilities/Release\\\\InstallIcon.bmp") - #SET(CPACK_NSIS_INSTALLED_ICON_NAME "bin\\\\MyExecutable.exe") - SET(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_INSTALL_DIRECTORY} Lucene++ Library") - SET(CPACK_NSIS_HELP_LINK "http:\\\\\\\\lucene++.sourceforge.net") - SET(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\lucene++.sourceforge.net") - SET(CPACK_NSIS_CONTACT "lucene++-developers@lists.sourceforge.net") - #SET(CPACK_NSIS_MODIFY_PATH ON) -ELSE( (WIN32 OR WIN64) AND NOT UNIX) -# SET(CPACK_STRIP_FILES "bin/xxx") - SET(CPACK_SOURCE_STRIP_FILES "") -ENDIF( (WIN32 OR WIN64) AND NOT UNIX) -#SET(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") + set(CPACK_GENERATOR "${CPACK_GENERATOR};NSIS") + #set(CPACK_PACKAGE_ICON "${CMake_SOURCE_DIR}/Utilities/Release\\\\InstallIcon.bmp") + #set(CPACK_NSIS_INSTALLED_ICON_NAME "bin\\\\MyExecutable.exe") + set(CPACK_NSIS_DISPLAY_NAME "${CPACK_PACKAGE_INSTALL_DIRECTORY} Lucene++ Library") + set(CPACK_NSIS_HELP_LINK "http:\\\\\\\\lucene++.sourceforge.net") + set(CPACK_NSIS_URL_INFO_ABOUT "http:\\\\\\\\lucene++.sourceforge.net") + set(CPACK_NSIS_CONTACT "lucene++-developers@lists.sourceforge.net") + #set(CPACK_NSIS_MODIFY_PATH ON) +else() +# set(CPACK_STRIP_FILES "bin/xxx") + set(CPACK_SOURCE_STRIP_FILES "") +endif() +#set(CPACK_PACKAGE_EXECUTABLES "MyExecutable" "My Executable") -ADD_CUSTOM_TARGET(dist-package - COMMAND rsync -avP -e ssh ${CPACK_PACKAGE_FILE_NAME}.* ustramooner@frs.sourceforge.net:uploads/ +add_custom_target(dist-package + COMMAND rsync -avP -e ssh "${CPACK_PACKAGE_FILE_NAME}.*" ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package ) -ADD_CUSTOM_TARGET(dist-package_source - COMMAND rsync -avP -e ssh ${CPACK_SOURCE_PACKAGE_FILE_NAME}.* ustramooner@frs.sourceforge.net:uploads/ +add_custom_target(dist-package_source + COMMAND rsync -avP -e ssh "${CPACK_SOURCE_PACKAGE_FILE_NAME}.*" ustramooner@frs.sourceforge.net:uploads/ # DEPENDS package_source ) #this must be last -INCLUDE(CPack) +include(CPack) diff --git a/cmake/Lucene++Docs.cmake b/cmake/Lucene++Docs.cmake index bf2938b5..ff1d2815 100644 --- a/cmake/Lucene++Docs.cmake +++ b/cmake/Lucene++Docs.cmake @@ -2,7 +2,6 @@ # This file provides support for building the Lucene++ Documentation. # To build the documention, you will have to enable it # and then do the equivalent of "make doc". -OPTION(ENABLE_DOCS "Build the Lucene++ documentation." OFF) MACRO(SET_YESNO) FOREACH(param ${ARGV}) @@ -22,9 +21,9 @@ MACRO(SET_BLANK) ENDMACRO(SET_BLANK) IF (ENABLE_DOCS) - OPTION(DOCS_HTML_HELP + OPTION(DOCS_HTML_HELP "Doxygen should compile HTML into a Help file (CHM)." NO) - + OPTION(DOCS_HTML "Doxygen should build HTML documentation." YES) OPTION(DOCS_XML @@ -35,7 +34,7 @@ IF (ENABLE_DOCS) "Doxygen should build man documentation." NO) OPTION(DOCS_TAGFILE "Doxygen should build a tagfile." NO) - + OPTION(DOCS_LATEX "Doxygen should build Latex documentation." NO ) @@ -48,12 +47,12 @@ IF (ENABLE_DOCS) DOCS_MAN DOCS_TAGFILE ) - + # # Check for the tools # FIND_PACKAGE(Doxygen) - + IF ( DOXYGEN_FOUND ) # This creates a new target to build documentation. # It runs ${DOXYGEN_EXECUTABLE} which is the full path and executable to @@ -62,9 +61,10 @@ IF (ENABLE_DOCS) # It runs the final generated Doxyfile against it. # The DOT_PATH is substituted into the Doxyfile. ADD_CUSTOM_TARGET(doc - ${DOXYGEN_EXECUTABLE} ${PROJECT_BINARY_DIR}/doc/doxyfile + "${DOXYGEN_EXECUTABLE}" "${PROJECT_BINARY_DIR}/doc/doxyfile" + VERBATIM ) - + IF ( DOCS_HTML_HELP ) IF ( NOT DOCS_HTML ) MESSAGE ( FATAL_ERROR "DOCS_HTML is required to buidl DOCS_HTML_HELP" ) @@ -73,34 +73,34 @@ IF (ENABLE_DOCS) IF ( NOT HTML_HELP_COMPILER ) MESSAGE(FATAL_ERROR "HTML Help compiler not found, turn DOCS_HTML_HELP off to proceed") ENDIF ( NOT HTML_HELP_COMPILER ) - + #make cygwin work with hhc... IF ( CYGWIN ) EXECUTE_PROCESS ( COMMAND cygpath "${HTML_HELP_COMPILER}" OUTPUT_VARIABLE HTML_HELP_COMPILER_EX ) - STRING ( REPLACE "\n" "" HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER_EX} ) - STRING ( REPLACE "\r" "" HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER_EX} ) + STRING ( REPLACE "\n" "" HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER_EX}" ) + STRING ( REPLACE "\r" "" HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER_EX}" ) SET ( HTML_HELP_COMPILER_EX "\"${HTML_HELP_COMPILER_EX}\"" ) ELSE ( CYGWIN ) - SET ( HTML_HELP_COMPILER_EX ${HTML_HELP_COMPILER} ) + SET ( HTML_HELP_COMPILER_EX "${HTML_HELP_COMPILER}" ) ENDIF ( CYGWIN ) ENDIF ( DOCS_HTML_HELP ) - + IF ( DOCS_LATEX ) FIND_PACKAGE(LATEX) IF ( NOT LATEX_COMPILER ) MESSAGE(FATAL_ERROR "Latex compiler not found, turn DOCS_LATEX off to proceed") ENDIF ( NOT LATEX_COMPILER ) ENDIF ( DOCS_LATEX ) - + FIND_PACKAGE(Perl) - + IF ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "YES" ) ELSE ( DOXYGEN_DOT_EXECUTABLE ) SET ( HAVE_DOT "NO" ) ENDIF ( DOXYGEN_DOT_EXECUTABLE ) - + #doxygen expects YES/NO parameters SET_YESNO( DOCS_HTML_HELP @@ -117,17 +117,17 @@ IF (ENABLE_DOCS) HTML_HELP_COMPILER LATEX_COMPILER ) - + IF ( DOCS_TAGFILE ) SET ( DOCS_TAGFILE_LOCATION "${PROJECT_BINARY_DIR}/doc/tag/lucene++.tag" ) ENDIF ( DOCS_TAGFILE ) - + # This processes our Doxyfile.cmake and substitutes paths to generate a final Doxyfile - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/Doxyfile.cmake ${PROJECT_BINARY_DIR}/doc/doxyfile ) - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/helpheader.htm.cmake ${PROJECT_BINARY_DIR}/doc/helpheader.htm ) - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/helpfooter.htm.cmake ${PROJECT_BINARY_DIR}/doc/helpfooter.htm ) - CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/doc/doxygen.css.cmake ${PROJECT_BINARY_DIR}/doc/html/doxygen.css ) - + CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/Doxyfile.cmake" "${PROJECT_BINARY_DIR}/doc/doxyfile") + CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/helpheader.htm.cmake" "${PROJECT_BINARY_DIR}/doc/helpheader.htm") + CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/helpfooter.htm.cmake" "${PROJECT_BINARY_DIR}/doc/helpfooter.htm") + CONFIGURE_FILE("${PROJECT_SOURCE_DIR}/doc/doxygen/doxygen.css.cmake" "${PROJECT_BINARY_DIR}/doc/html/doxygen.css") + #create a target for tar.gz html help FIND_PACKAGE(UnixCommands) IF ( TAR AND GZIP ) @@ -135,17 +135,23 @@ IF (ENABLE_DOCS) COMMAND "${TAR}" "-czf" "${PROJECT_BINARY_DIR}/doc/lucene++-doc.tar.gz" ./ WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/doc/html/" #DEPENDS doc + VERBATIM ) ENDIF ( TAR AND GZIP ) - - #install man if it was built + + #install HTML pages if they were built + IF ( DOCS_HTML AND NOT WIN32 ) + INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/doc/html/" DESTINATION share/doc/lucene++-doc/html) + ENDIF ( DOCS_HTML AND NOT WIN32 ) + + #install man pages if they were built IF ( DOCS_MAN ) - INSTALL(DIRECTORY ${PROJECT_BINARY_DIR}/doc/man/ DESTINATION man) + INSTALL(DIRECTORY "${PROJECT_BINARY_DIR}/doc/man/" DESTINATION man) ENDIF ( DOCS_MAN ) - + ELSE ( DOXYGEN_FOUND ) MESSAGE(FATAL_ERROR "Doxygen not found, turn ENABLE_DOCS off to proceed") ENDIF ( DOXYGEN_FOUND ) - + ENDIF (ENABLE_DOCS) diff --git a/cmake/MacroCheckGccVisibility.cmake b/cmake/MacroCheckGccVisibility.cmake deleted file mode 100644 index 2022aa31..00000000 --- a/cmake/MacroCheckGccVisibility.cmake +++ /dev/null @@ -1,58 +0,0 @@ -# -# Copyright (c) 2006, Alexander Neundorf -# Copyright (c) 2006, Laurent Montel, -# -# Redistribution and use is allowed according to the terms of the BSD license. -# For details see the accompanying COPYING-CMAKE-SCRIPTS file. - -macro(MACRO_CHECK_GCC_VISIBILITY GccVisibility) - if (CMAKE_COMPILER_IS_GNUCXX) - include(CheckCXXCompilerFlag) - include(MacroEnsureVersion) - # visibility support - check_cxx_compiler_flag(-fvisibility=hidden ${GccVisibility}) - - # get the gcc version - exec_program(${CMAKE_C_COMPILER} ARGS --version OUTPUT_VARIABLE _gcc_version_info) - - string (REGEX MATCH "[345]\\.[0-9]\\.[0-9]" _gcc_version "${_gcc_version_info}") - if (NOT _gcc_version) - - # clang reports: clang version 1.1 (trunk 95754) - string (REGEX MATCH "clang version ([123]\\.[0-9])" _gcc_version "${_gcc_version_info}") - if ( _gcc_version ) - string(REGEX REPLACE "clang version (.*)" "\\1.0" _gcc_version "${_gcc_version}" ) - endif ( _gcc_version ) - - # gcc on mac just reports: "gcc (GCC) 3.3 20030304 ..." without the patch level, handle this here: - if (NOT _gcc_version) - string (REGEX REPLACE ".*\\(GCC\\).* ([34]\\.[0-9]) .*" "\\1.0" _gcc_version "${_gcc_version_info}") - endif (NOT _gcc_version) - endif (NOT _gcc_version) - - - - macro_ensure_version("4.1.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_1) - macro_ensure_version("4.2.0" "${_gcc_version}" GCC_IS_NEWER_THAN_4_2) - - set(_GCC_COMPILED_WITH_BAD_ALLOCATOR FALSE) - if (GCC_IS_NEWER_THAN_4_1) - exec_program(${CMAKE_C_COMPILER} ARGS -v OUTPUT_VARIABLE _gcc_alloc_info) - string(REGEX MATCH "(--enable-libstdcxx-allocator=mt)" _GCC_COMPILED_WITH_BAD_ALLOCATOR "${_gcc_alloc_info}") - endif (GCC_IS_NEWER_THAN_4_1) - - if (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility=hidden") - set (KDE4_C_FLAGS "${KDE4_C_FLAGS}" "-fvisibility=hidden") - - if (GCC_IS_NEWER_THAN_4_2) - set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fvisibility-inlines-hidden") - endif (GCC_IS_NEWER_THAN_4_2) - else (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) - set (${GccVisibility} 0) - endif (${GccVisibility} AND GCC_IS_NEWER_THAN_4_1 AND NOT _GCC_COMPILED_WITH_BAD_ALLOCATOR) - - else (CMAKE_COMPILER_IS_GNUCXX) - set(${GccVisibility} FALSE) - endif (CMAKE_COMPILER_IS_GNUCXX) -endmacro(MACRO_CHECK_GCC_VISIBILITY) diff --git a/cmake/PCHSupport.cmake b/cmake/PCHSupport.cmake deleted file mode 100644 index af3b3dac..00000000 --- a/cmake/PCHSupport.cmake +++ /dev/null @@ -1,263 +0,0 @@ -# - Try to find precompiled headers support for GCC 3.4 and 4.x -# Once done this will define: -# -# Variable: -# PCHSupport_FOUND -# PCHSupport_ENABLED -# -# Macro: -# ADD_PRECOMPILED_HEADER _targetName _input _dowarn -# ADD_PRECOMPILED_HEADER_TO_TARGET _targetName _input _pch_output_to_use _dowarn -# -# Since this macro overides COMPILER_FLAGS on a target, you must use the following -# variables instead. -# set PCH_ADDITIONAL_COMPILER_FLAGS to add extra COMPILER_FLAGS to targets -# set PCH_ADDITIONAL_COMPILER_FLAGS_${targetName} to add extra COMPILER_FLAGS to a specific target -# - -IF(CMAKE_COMPILER_IS_GNUCXX) - - EXEC_PROGRAM( - ${CMAKE_CXX_COMPILER} - ARGS ${CMAKE_CXX_COMPILER_ARG1} -dumpversion - OUTPUT_VARIABLE gcc_compiler_version) - #MESSAGE("GCC Version: ${gcc_compiler_version}") - IF(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") - SET(PCHSupport_FOUND TRUE) - ELSE(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") - IF(gcc_compiler_version MATCHES "3\\.4\\.[0-9]") - SET(PCHSupport_FOUND TRUE) - ENDIF(gcc_compiler_version MATCHES "3\\.4\\.[0-9]") - ENDIF(gcc_compiler_version MATCHES "4\\.[0-9]\\.[0-9]") - - SET(_PCH_include_prefix "-I") - -ELSE(CMAKE_COMPILER_IS_GNUCXX) - IF( (WIN32 OR WIN64) ) - #SET(PCHSupport_FOUND TRUE) # for experimental msvc support - #SET(_PCH_include_prefix "/I") - SET(PCHSupport_FOUND FALSE) - ELSE( (WIN32 OR WIN64) ) - SET(PCHSupport_FOUND FALSE) - ENDIF( (WIN32 OR WIN64) ) -ENDIF(CMAKE_COMPILER_IS_GNUCXX) - -IF ( DEFINED PCHSupport_ENABLED AND NOT PCHSupport_ENABLED ) - SET(PCHSupport_FOUND FALSE) -ENDIF ( DEFINED PCHSupport_ENABLED AND NOT PCHSupport_ENABLED) - -MACRO(_PCH_GET_COMPILE_FLAGS _out_compile_flags) - - - STRING(TOUPPER "CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE}" _flags_var_name) - SET(${_out_compile_flags} ${${_flags_var_name}} ) - - IF(CMAKE_COMPILER_IS_GNUCXX) - - GET_TARGET_PROPERTY(_targetType ${_PCH_current_target} TYPE) - IF(${_targetType} STREQUAL SHARED_LIBRARY) - LIST(APPEND ${_out_compile_flags} "${${_out_compile_flags}} -fPIC") - ENDIF(${_targetType} STREQUAL SHARED_LIBRARY) - - ELSE(CMAKE_COMPILER_IS_GNUCXX) - ## TODO ... ? or does it work out of the box - ENDIF(CMAKE_COMPILER_IS_GNUCXX) - - GET_DIRECTORY_PROPERTY(DIRINC INCLUDE_DIRECTORIES ) - FOREACH(item ${DIRINC}) - LIST(APPEND ${_out_compile_flags} "${_PCH_include_prefix}${item}") - ENDFOREACH(item) - - GET_DIRECTORY_PROPERTY(_directory_flags DEFINITIONS) - #MESSAGE("_directory_flags ${_directory_flags}" ) - LIST(APPEND ${_out_compile_flags} ${_directory_flags}) - LIST(APPEND ${_out_compile_flags} ${CMAKE_CXX_FLAGS} ) - - SEPARATE_ARGUMENTS(${_out_compile_flags}) - -ENDMACRO(_PCH_GET_COMPILE_FLAGS) - - -MACRO(_PCH_WRITE_PCHDEP_CXX _targetName _include_file _dephelp) - - SET(${_dephelp} ${CMAKE_CURRENT_BINARY_DIR}/${_targetName}_pch_dephelp.cxx) - FILE(WRITE ${${_dephelp}} -"#include \"${_include_file}\" -int testfunction() -{ - return 0; -} -" - ) - -ENDMACRO(_PCH_WRITE_PCHDEP_CXX ) - -MACRO(_PCH_GET_COMPILE_COMMAND out_command _input _output) - - FILE(TO_NATIVE_PATH ${_input} _native_input) - FILE(TO_NATIVE_PATH ${_output} _native_output) - - - IF(CMAKE_COMPILER_IS_GNUCXX) - IF(CMAKE_CXX_COMPILER_ARG1) - # remove leading space in compiler argument - STRING(REGEX REPLACE "^ +" "" pchsupport_compiler_cxx_arg1 ${CMAKE_CXX_COMPILER_ARG1}) - - SET(${out_command} - ${CMAKE_CXX_COMPILER} ${pchsupport_compiler_cxx_arg1} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input} - ) - ELSE(CMAKE_CXX_COMPILER_ARG1) - SET(${out_command} - ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input} - ) - ENDIF(CMAKE_CXX_COMPILER_ARG1) - ELSE(CMAKE_COMPILER_IS_GNUCXX) - - SET(_dummy_str "#include <${_input}>") - FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/pch_dummy.cpp ${_dummy_str}) - - SET(${out_command} - ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} /c /Fp${_native_output} /Yc${_native_input} pch_dummy.cpp - ) - #/out:${_output} - - ENDIF(CMAKE_COMPILER_IS_GNUCXX) - -ENDMACRO(_PCH_GET_COMPILE_COMMAND ) - - - - -MACRO(_PCH_GET_TARGET_COMPILE_FLAGS _targetName _cflags _header_name _pch_path _dowarn ) - - FILE(TO_NATIVE_PATH ${_pch_path} _native_pch_path) - #message(${_native_pch_path}) - - IF(CMAKE_COMPILER_IS_GNUCXX) - # for use with distcc and gcc >4.0.1 if preprocessed files are accessible - # on all remote machines set - # PCH_ADDITIONAL_COMPILER_FLAGS to -fpch-preprocess - # if you want warnings for invalid header files (which is very inconvenient - # if you have different versions of the headers for different build types - # you may set _pch_dowarn - IF (_dowarn) - SET(${_cflags} "${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} -include ${CMAKE_CURRENT_BINARY_DIR}/${_header_name} -Winvalid-pch " ) - ELSE (_dowarn) - SET(${_cflags} "${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} -include ${CMAKE_CURRENT_BINARY_DIR}/${_header_name} " ) - ENDIF (_dowarn) - ELSE(CMAKE_COMPILER_IS_GNUCXX) - - set(${_cflags} "/Fp${_native_pch_path} /Yu${_header_name}" ) - - ENDIF(CMAKE_COMPILER_IS_GNUCXX) - -ENDMACRO(_PCH_GET_TARGET_COMPILE_FLAGS ) - -MACRO(GET_PRECOMPILED_HEADER_OUTPUT _targetName _input _output) - GET_FILENAME_COMPONENT(_name ${_input} NAME) - GET_FILENAME_COMPONENT(_path ${_input} PATH) - SET(_output "${CMAKE_CURRENT_BINARY_DIR}/${_name}.gch/${_targetName}_${CMAKE_BUILD_TYPE}.h++") -ENDMACRO(GET_PRECOMPILED_HEADER_OUTPUT _targetName _input) - - -MACRO(ADD_PRECOMPILED_HEADER_TO_TARGET _targetName _input _pch_output_to_use ) - if ( PCHSupport_FOUND ) - # to do: test whether compiler flags match between target _targetName - # and _pch_output_to_use - GET_FILENAME_COMPONENT(_name ${_input} NAME) - - IF( "${ARGN}" STREQUAL "0") - SET(_dowarn 0) - ELSE( "${ARGN}" STREQUAL "0") - SET(_dowarn 1) - ENDIF("${ARGN}" STREQUAL "0") - - - _PCH_GET_TARGET_COMPILE_FLAGS(${_targetName} _target_cflags ${_name} ${_pch_output_to_use} ${_dowarn}) - #MESSAGE("Add flags ${_target_cflags} to ${_targetName} " ) - - SET_TARGET_PROPERTIES(${_targetName} - PROPERTIES - COMPILE_FLAGS ${_target_cflags} - ) - - ADD_CUSTOM_TARGET(pch_Generate_${_targetName} - DEPENDS ${_pch_output_to_use} - ) - - ADD_DEPENDENCIES(${_targetName} pch_Generate_${_targetName} ) - else ( PCHSupport_FOUND ) - SET_TARGET_PROPERTIES(${_targetName} - PROPERTIES - COMPILE_FLAGS ${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}} - ) - endif ( PCHSupport_FOUND ) -ENDMACRO(ADD_PRECOMPILED_HEADER_TO_TARGET) - - -MACRO(ADD_PRECOMPILED_HEADER _targetName _input) - if ( PCHSupport_FOUND ) - SET(_PCH_current_target ${_targetName}) - - IF(NOT CMAKE_BUILD_TYPE) - MESSAGE(FATAL_ERROR - "This is the ADD_PRECOMPILED_HEADER macro. " - "You must set CMAKE_BUILD_TYPE!" - ) - ENDIF(NOT CMAKE_BUILD_TYPE) - - IF( "${ARGN}" STREQUAL "0") - SET(_dowarn 0) - ELSE( "${ARGN}" STREQUAL "0") - SET(_dowarn 1) - ENDIF("${ARGN}" STREQUAL "0") - - - GET_FILENAME_COMPONENT(_name ${_input} NAME) - GET_FILENAME_COMPONENT(_path ${_input} PATH) - GET_PRECOMPILED_HEADER_OUTPUT( ${_targetName} ${_input} _output) - GET_FILENAME_COMPONENT(_outdir ${_output} PATH ) - - GET_TARGET_PROPERTY(_targetType ${_PCH_current_target} TYPE) - _PCH_WRITE_PCHDEP_CXX(${_targetName} ${_input} _pch_dephelp_cxx) - - #MESSAGE(${_pch_dephelp_cxx}) - IF(${_targetType} STREQUAL SHARED_LIBRARY) - ADD_LIBRARY(${_targetName}_pch_dephelp SHARED ${_pch_dephelp_cxx} ) - ELSE(${_targetType} STREQUAL SHARED_LIBRARY) - ADD_LIBRARY(${_targetName}_pch_dephelp STATIC ${_pch_dephelp_cxx}) - ENDIF(${_targetType} STREQUAL SHARED_LIBRARY) - - FILE(MAKE_DIRECTORY ${_outdir}) - - - _PCH_GET_COMPILE_FLAGS(_compile_FLAGS) - - SET(_compile_FLAGS ${_compile_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS} ${PCH_ADDITIONAL_COMPILER_FLAGS_${_targetName}}) - - #MESSAGE("_compile_FLAGS: ${_compile_FLAGS}") - #message("COMMAND ${CMAKE_CXX_COMPILER} ${_compile_FLAGS} -x c++-header -o ${_output} ${_input}") - SET_SOURCE_FILES_PROPERTIES(${CMAKE_CURRENT_BINARY_DIR}/${_name} PROPERTIES GENERATED 1) - ADD_CUSTOM_COMMAND( - OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/${_name} - COMMAND ${CMAKE_COMMAND} -E copy ${_input} ${CMAKE_CURRENT_BINARY_DIR}/${_name} # ensure same directory! Required by gcc - DEPENDS ${_input} - ) - - #message("_command ${_input} ${_output}") - _PCH_GET_COMPILE_COMMAND(_command ${CMAKE_CURRENT_BINARY_DIR}/${_name} ${_output} ) - - #message(${_input} ) - #message("_output ${_output}") - - ADD_CUSTOM_COMMAND( - OUTPUT ${_output} - COMMAND ${_command} - DEPENDS ${_input} ${CMAKE_CURRENT_BINARY_DIR}/${_name} ${_targetName}_pch_dephelp - ) - - - ADD_PRECOMPILED_HEADER_TO_TARGET(${_targetName} ${_input} ${_output} ${_dowarn}) - endif ( PCHSupport_FOUND ) -ENDMACRO(ADD_PRECOMPILED_HEADER) - diff --git a/cmake/Toolchain-llvm.cmake b/cmake/Toolchain-llvm.cmake index 6d2c66f1..d98db8a1 100644 --- a/cmake/Toolchain-llvm.cmake +++ b/cmake/Toolchain-llvm.cmake @@ -7,7 +7,6 @@ SET(CMAKE_CXX_COMPILER clang++) SET(ENABLE_LLVM CACHE BOOL TRUE) SET(ENABLE_LLVM_BC CACHE BOOL FALSE) -SET(PCHSupport_ENABLED FALSE) IF ( ENABLE_LLVM_BC ) #TODO: make this work... diff --git a/cmake/Toolchain-mingw32.cmake b/cmake/Toolchain-mingw32.cmake index 8562fd81..73f6124c 100644 --- a/cmake/Toolchain-mingw32.cmake +++ b/cmake/Toolchain-mingw32.cmake @@ -5,29 +5,29 @@ # cmake -DCMAKE_TOOLCHAIN_FILE=../cmake/Toolchain-mingw32.cmake -C ../cmake/Toolchain-mingw32.cmake .. # the name of the target operating system -SET(CMAKE_SYSTEM_NAME Windows) +set(CMAKE_SYSTEM_NAME Windows) # which compilers to use for C and C++ -SET(CMAKE_C_COMPILER i586-mingw32msvc-gcc) -SET(CMAKE_CXX_COMPILER i586-mingw32msvc-g++) +set(CMAKE_C_COMPILER i586-mingw32msvc-gcc) +set(CMAKE_CXX_COMPILER i586-mingw32msvc-g++) # here is the target environment located -SET(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc /home/alex/mingw-install ) +set(CMAKE_FIND_ROOT_PATH /usr/i586-mingw32msvc /home/alex/mingw-install ) -INCLUDE_DIRECTORIES(/usr/lib/gcc/i586-mingw32msvc/4.2.1-sjlj/include/c++) +include_directories(/usr/lib/gcc/i586-mingw32msvc/4.2.1-sjlj/include/c++) # adjust the default behaviour of the FIND_XXX() commands: -# search headers and libraries in the target environment, search +# search headers and libraries in the target environment, search # programs in the host environment set(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER) set(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY) set(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY) -SET(_CL_HAVE_GCCVISIBILITYPATCH 0) -SET(_CL_HAVE_NAMESPACES_EXITCODE 0) -SET(_CL_HAVE_NO_SNPRINTF_BUG_EXITCODE 0) -SET(_CL_HAVE_NO_SNWPRINTF_BUG_EXITCODE 0) -SET(LUCENE_STATIC_CONSTANT_SYNTAX_EXITCODE 1) -SET(_CL_HAVE_TRY_BLOCKS_EXITCODE 0) -SET(ENABLE_ANSI_MODE OFF) +set(_CL_HAVE_GCCVISIBILITYPATCH 0) +set(_CL_HAVE_NAMESPACES_EXITCODE 0) +set(_CL_HAVE_NO_SNPRINTF_BUG_EXITCODE 0) +set(_CL_HAVE_NO_SNWPRINTF_BUG_EXITCODE 0) +set(LUCENE_STATIC_CONSTANT_SYNTAX_EXITCODE 1) +set(_CL_HAVE_TRY_BLOCKS_EXITCODE 0) +set(ENABLE_ANSI_MODE OFF) diff --git a/cmake/cotire.cmake b/cmake/cotire.cmake new file mode 100644 index 00000000..9a4982d4 --- /dev/null +++ b/cmake/cotire.cmake @@ -0,0 +1,4190 @@ +# - cotire (compile time reducer) +# +# See the cotire manual for usage hints. +# +#============================================================================= +# Copyright 2012-2018 Sascha Kratky +# +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation +# files (the "Software"), to deal in the Software without +# restriction, including without limitation the rights to use, +# copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the +# Software is furnished to do so, subject to the following +# conditions: +# +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +# OTHER DEALINGS IN THE SOFTWARE. +#============================================================================= + +if(__COTIRE_INCLUDED) + return() +endif() +set(__COTIRE_INCLUDED TRUE) + +# call cmake_minimum_required, but prevent modification of the CMake policy stack in include mode +# cmake_minimum_required also sets the policy version as a side effect, which we have to avoid +if (NOT CMAKE_SCRIPT_MODE_FILE) + cmake_policy(PUSH) +endif() +cmake_minimum_required(VERSION 2.8.12) +if (NOT CMAKE_SCRIPT_MODE_FILE) + cmake_policy(POP) +endif() + +set (COTIRE_CMAKE_MODULE_FILE "${CMAKE_CURRENT_LIST_FILE}") +set (COTIRE_CMAKE_MODULE_VERSION "1.8.0") + +# activate select policies +if (POLICY CMP0025) + # Compiler id for Apple Clang is now AppleClang + cmake_policy(SET CMP0025 NEW) +endif() + +if (POLICY CMP0026) + # disallow use of the LOCATION target property + cmake_policy(SET CMP0026 NEW) +endif() + +if (POLICY CMP0038) + # targets may not link directly to themselves + cmake_policy(SET CMP0038 NEW) +endif() + +if (POLICY CMP0039) + # utility targets may not have link dependencies + cmake_policy(SET CMP0039 NEW) +endif() + +if (POLICY CMP0040) + # target in the TARGET signature of add_custom_command() must exist + cmake_policy(SET CMP0040 NEW) +endif() + +if (POLICY CMP0045) + # error on non-existent target in get_target_property + cmake_policy(SET CMP0045 NEW) +endif() + +if (POLICY CMP0046) + # error on non-existent dependency in add_dependencies + cmake_policy(SET CMP0046 NEW) +endif() + +if (POLICY CMP0049) + # do not expand variables in target source entries + cmake_policy(SET CMP0049 NEW) +endif() + +if (POLICY CMP0050) + # disallow add_custom_command SOURCE signatures + cmake_policy(SET CMP0050 NEW) +endif() + +if (POLICY CMP0051) + # include TARGET_OBJECTS expressions in a target's SOURCES property + cmake_policy(SET CMP0051 NEW) +endif() + +if (POLICY CMP0053) + # simplify variable reference and escape sequence evaluation + cmake_policy(SET CMP0053 NEW) +endif() + +if (POLICY CMP0054) + # only interpret if() arguments as variables or keywords when unquoted + cmake_policy(SET CMP0054 NEW) +endif() + +if (POLICY CMP0055) + # strict checking for break() command + cmake_policy(SET CMP0055 NEW) +endif() + +include(CMakeParseArguments) +include(ProcessorCount) + +function (cotire_get_configuration_types _configsVar) + set (_configs "") + if (CMAKE_CONFIGURATION_TYPES) + list (APPEND _configs ${CMAKE_CONFIGURATION_TYPES}) + endif() + if (CMAKE_BUILD_TYPE) + list (APPEND _configs "${CMAKE_BUILD_TYPE}") + endif() + if (_configs) + list (REMOVE_DUPLICATES _configs) + set (${_configsVar} ${_configs} PARENT_SCOPE) + else() + set (${_configsVar} "None" PARENT_SCOPE) + endif() +endfunction() + +function (cotire_get_source_file_extension _sourceFile _extVar) + # get_filename_component returns extension from first occurrence of . in file name + # this function computes the extension from last occurrence of . in file name + string (FIND "${_sourceFile}" "." _index REVERSE) + if (_index GREATER -1) + math (EXPR _index "${_index} + 1") + string (SUBSTRING "${_sourceFile}" ${_index} -1 _sourceExt) + else() + set (_sourceExt "") + endif() + set (${_extVar} "${_sourceExt}" PARENT_SCOPE) +endfunction() + +macro (cotire_check_is_path_relative_to _path _isRelativeVar) + set (${_isRelativeVar} FALSE) + if (IS_ABSOLUTE "${_path}") + foreach (_dir ${ARGN}) + file (RELATIVE_PATH _relPath "${_dir}" "${_path}") + if (NOT _relPath OR (NOT IS_ABSOLUTE "${_relPath}" AND NOT "${_relPath}" MATCHES "^\\.\\.")) + set (${_isRelativeVar} TRUE) + break() + endif() + endforeach() + endif() +endmacro() + +function (cotire_filter_language_source_files _language _target _sourceFilesVar _excludedSourceFilesVar _cotiredSourceFilesVar) + if (CMAKE_${_language}_SOURCE_FILE_EXTENSIONS) + set (_languageExtensions "${CMAKE_${_language}_SOURCE_FILE_EXTENSIONS}") + else() + set (_languageExtensions "") + endif() + if (CMAKE_${_language}_IGNORE_EXTENSIONS) + set (_ignoreExtensions "${CMAKE_${_language}_IGNORE_EXTENSIONS}") + else() + set (_ignoreExtensions "") + endif() + if (COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS) + set (_excludeExtensions "${COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS}") + else() + set (_excludeExtensions "") + endif() + if (COTIRE_DEBUG AND _languageExtensions) + message (STATUS "${_language} source file extensions: ${_languageExtensions}") + endif() + if (COTIRE_DEBUG AND _ignoreExtensions) + message (STATUS "${_language} ignore extensions: ${_ignoreExtensions}") + endif() + if (COTIRE_DEBUG AND _excludeExtensions) + message (STATUS "${_language} exclude extensions: ${_excludeExtensions}") + endif() + if (CMAKE_VERSION VERSION_LESS "3.1.0") + set (_allSourceFiles ${ARGN}) + else() + # as of CMake 3.1 target sources may contain generator expressions + # since we cannot obtain required property information about source files added + # through generator expressions at configure time, we filter them out + string (GENEX_STRIP "${ARGN}" _allSourceFiles) + endif() + set (_filteredSourceFiles "") + set (_excludedSourceFiles "") + foreach (_sourceFile ${_allSourceFiles}) + get_source_file_property(_sourceIsHeaderOnly "${_sourceFile}" HEADER_FILE_ONLY) + get_source_file_property(_sourceIsExternal "${_sourceFile}" EXTERNAL_OBJECT) + get_source_file_property(_sourceIsSymbolic "${_sourceFile}" SYMBOLIC) + if (NOT _sourceIsHeaderOnly AND NOT _sourceIsExternal AND NOT _sourceIsSymbolic) + cotire_get_source_file_extension("${_sourceFile}" _sourceExt) + if (_sourceExt) + list (FIND _ignoreExtensions "${_sourceExt}" _ignoreIndex) + if (_ignoreIndex LESS 0) + list (FIND _excludeExtensions "${_sourceExt}" _excludeIndex) + if (_excludeIndex GREATER -1) + list (APPEND _excludedSourceFiles "${_sourceFile}") + else() + list (FIND _languageExtensions "${_sourceExt}" _sourceIndex) + if (_sourceIndex GREATER -1) + # consider source file unless it is excluded explicitly + get_source_file_property(_sourceIsExcluded "${_sourceFile}" COTIRE_EXCLUDED) + if (_sourceIsExcluded) + list (APPEND _excludedSourceFiles "${_sourceFile}") + else() + list (APPEND _filteredSourceFiles "${_sourceFile}") + endif() + else() + get_source_file_property(_sourceLanguage "${_sourceFile}" LANGUAGE) + if ("${_sourceLanguage}" STREQUAL "${_language}") + # add to excluded sources, if file is not ignored and has correct language without having the correct extension + list (APPEND _excludedSourceFiles "${_sourceFile}") + endif() + endif() + endif() + endif() + endif() + endif() + endforeach() + # separate filtered source files from already cotired ones + # the COTIRE_TARGET property of a source file may be set while a target is being processed by cotire + set (_sourceFiles "") + set (_cotiredSourceFiles "") + foreach (_sourceFile ${_filteredSourceFiles}) + get_source_file_property(_sourceIsCotired "${_sourceFile}" COTIRE_TARGET) + if (_sourceIsCotired) + list (APPEND _cotiredSourceFiles "${_sourceFile}") + else() + get_source_file_property(_sourceCompileFlags "${_sourceFile}" COMPILE_FLAGS) + if (_sourceCompileFlags) + # add to excluded sources, if file has custom compile flags + list (APPEND _excludedSourceFiles "${_sourceFile}") + else() + get_source_file_property(_sourceCompileOptions "${_sourceFile}" COMPILE_OPTIONS) + if (_sourceCompileOptions) + # add to excluded sources, if file has list of custom compile options + list (APPEND _excludedSourceFiles "${_sourceFile}") + else() + list (APPEND _sourceFiles "${_sourceFile}") + endif() + endif() + endif() + endforeach() + if (COTIRE_DEBUG) + if (_sourceFiles) + message (STATUS "Filtered ${_target} ${_language} sources: ${_sourceFiles}") + endif() + if (_excludedSourceFiles) + message (STATUS "Excluded ${_target} ${_language} sources: ${_excludedSourceFiles}") + endif() + if (_cotiredSourceFiles) + message (STATUS "Cotired ${_target} ${_language} sources: ${_cotiredSourceFiles}") + endif() + endif() + set (${_sourceFilesVar} ${_sourceFiles} PARENT_SCOPE) + set (${_excludedSourceFilesVar} ${_excludedSourceFiles} PARENT_SCOPE) + set (${_cotiredSourceFilesVar} ${_cotiredSourceFiles} PARENT_SCOPE) +endfunction() + +function (cotire_get_objects_with_property_on _filteredObjectsVar _property _type) + set (_filteredObjects "") + foreach (_object ${ARGN}) + get_property(_isSet ${_type} "${_object}" PROPERTY ${_property} SET) + if (_isSet) + get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) + if (_propertyValue) + list (APPEND _filteredObjects "${_object}") + endif() + endif() + endforeach() + set (${_filteredObjectsVar} ${_filteredObjects} PARENT_SCOPE) +endfunction() + +function (cotire_get_objects_with_property_off _filteredObjectsVar _property _type) + set (_filteredObjects "") + foreach (_object ${ARGN}) + get_property(_isSet ${_type} "${_object}" PROPERTY ${_property} SET) + if (_isSet) + get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) + if (NOT _propertyValue) + list (APPEND _filteredObjects "${_object}") + endif() + endif() + endforeach() + set (${_filteredObjectsVar} ${_filteredObjects} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_file_property_values _valuesVar _property) + set (_values "") + foreach (_sourceFile ${ARGN}) + get_source_file_property(_propertyValue "${_sourceFile}" ${_property}) + if (_propertyValue) + list (APPEND _values "${_propertyValue}") + endif() + endforeach() + set (${_valuesVar} ${_values} PARENT_SCOPE) +endfunction() + +function (cotire_resolve_config_properties _configurations _propertiesVar) + set (_properties "") + foreach (_property ${ARGN}) + if ("${_property}" MATCHES "") + foreach (_config ${_configurations}) + string (TOUPPER "${_config}" _upperConfig) + string (REPLACE "" "${_upperConfig}" _configProperty "${_property}") + list (APPEND _properties ${_configProperty}) + endforeach() + else() + list (APPEND _properties ${_property}) + endif() + endforeach() + set (${_propertiesVar} ${_properties} PARENT_SCOPE) +endfunction() + +function (cotire_copy_set_properties _configurations _type _source _target) + cotire_resolve_config_properties("${_configurations}" _properties ${ARGN}) + foreach (_property ${_properties}) + get_property(_isSet ${_type} ${_source} PROPERTY ${_property} SET) + if (_isSet) + get_property(_propertyValue ${_type} ${_source} PROPERTY ${_property}) + set_property(${_type} ${_target} PROPERTY ${_property} "${_propertyValue}") + endif() + endforeach() +endfunction() + +function (cotire_get_target_usage_requirements _target _config _targetRequirementsVar) + set (_targetRequirements "") + get_target_property(_librariesToProcess ${_target} LINK_LIBRARIES) + while (_librariesToProcess) + # remove from head + list (GET _librariesToProcess 0 _library) + list (REMOVE_AT _librariesToProcess 0) + if (_library MATCHES "^\\$<\\$:([A-Za-z0-9_:-]+)>$") + set (_library "${CMAKE_MATCH_1}") + elseif (_config STREQUAL "None" AND _library MATCHES "^\\$<\\$:([A-Za-z0-9_:-]+)>$") + set (_library "${CMAKE_MATCH_1}") + endif() + if (TARGET ${_library}) + list (FIND _targetRequirements ${_library} _index) + if (_index LESS 0) + list (APPEND _targetRequirements ${_library}) + # BFS traversal of transitive libraries + get_target_property(_libraries ${_library} INTERFACE_LINK_LIBRARIES) + if (_libraries) + list (APPEND _librariesToProcess ${_libraries}) + list (REMOVE_DUPLICATES _librariesToProcess) + endif() + endif() + endif() + endwhile() + set (${_targetRequirementsVar} ${_targetRequirements} PARENT_SCOPE) +endfunction() + +function (cotire_filter_compile_flags _language _flagFilter _matchedOptionsVar _unmatchedOptionsVar) + if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") + set (_flagPrefix "[/-]") + else() + set (_flagPrefix "--?") + endif() + set (_optionFlag "") + set (_matchedOptions "") + set (_unmatchedOptions "") + foreach (_compileFlag ${ARGN}) + if (_compileFlag) + if (_optionFlag AND NOT "${_compileFlag}" MATCHES "^${_flagPrefix}") + # option with separate argument + list (APPEND _matchedOptions "${_compileFlag}") + set (_optionFlag "") + elseif ("${_compileFlag}" MATCHES "^(${_flagPrefix})(${_flagFilter})$") + # remember option + set (_optionFlag "${CMAKE_MATCH_2}") + elseif ("${_compileFlag}" MATCHES "^(${_flagPrefix})(${_flagFilter})(.+)$") + # option with joined argument + list (APPEND _matchedOptions "${CMAKE_MATCH_3}") + set (_optionFlag "") + else() + # flush remembered option + if (_optionFlag) + list (APPEND _matchedOptions "${_optionFlag}") + set (_optionFlag "") + endif() + # add to unfiltered options + list (APPEND _unmatchedOptions "${_compileFlag}") + endif() + endif() + endforeach() + if (_optionFlag) + list (APPEND _matchedOptions "${_optionFlag}") + endif() + if (COTIRE_DEBUG AND _matchedOptions) + message (STATUS "Filter ${_flagFilter} matched: ${_matchedOptions}") + endif() + if (COTIRE_DEBUG AND _unmatchedOptions) + message (STATUS "Filter ${_flagFilter} unmatched: ${_unmatchedOptions}") + endif() + set (${_matchedOptionsVar} ${_matchedOptions} PARENT_SCOPE) + set (${_unmatchedOptionsVar} ${_unmatchedOptions} PARENT_SCOPE) +endfunction() + +function (cotire_is_target_supported _target _isSupportedVar) + if (NOT TARGET "${_target}") + set (${_isSupportedVar} FALSE PARENT_SCOPE) + return() + endif() + get_target_property(_imported ${_target} IMPORTED) + if (_imported) + set (${_isSupportedVar} FALSE PARENT_SCOPE) + return() + endif() + get_target_property(_targetType ${_target} TYPE) + if (NOT _targetType MATCHES "EXECUTABLE|(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") + set (${_isSupportedVar} FALSE PARENT_SCOPE) + return() + endif() + set (${_isSupportedVar} TRUE PARENT_SCOPE) +endfunction() + +function (cotire_get_target_compile_flags _config _language _target _flagsVar) + string (TOUPPER "${_config}" _upperConfig) + # collect options from CMake language variables + set (_compileFlags "") + if (CMAKE_${_language}_FLAGS) + set (_compileFlags "${_compileFlags} ${CMAKE_${_language}_FLAGS}") + endif() + if (CMAKE_${_language}_FLAGS_${_upperConfig}) + set (_compileFlags "${_compileFlags} ${CMAKE_${_language}_FLAGS_${_upperConfig}}") + endif() + if (_target) + # add target compile flags + get_target_property(_targetflags ${_target} COMPILE_FLAGS) + if (_targetflags) + set (_compileFlags "${_compileFlags} ${_targetflags}") + endif() + endif() + if (UNIX) + separate_arguments(_compileFlags UNIX_COMMAND "${_compileFlags}") + elseif(WIN32) + separate_arguments(_compileFlags WINDOWS_COMMAND "${_compileFlags}") + else() + separate_arguments(_compileFlags) + endif() + # target compile options + if (_target) + get_target_property(_targetOptions ${_target} COMPILE_OPTIONS) + if (_targetOptions) + list (APPEND _compileFlags ${_targetOptions}) + endif() + endif() + # interface compile options from linked library targets + if (_target) + set (_linkedTargets "") + cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) + foreach (_linkedTarget ${_linkedTargets}) + get_target_property(_targetOptions ${_linkedTarget} INTERFACE_COMPILE_OPTIONS) + if (_targetOptions) + list (APPEND _compileFlags ${_targetOptions}) + endif() + endforeach() + endif() + # handle language standard properties + if (CMAKE_${_language}_STANDARD_DEFAULT) + # used compiler supports language standard levels + if (_target) + get_target_property(_targetLanguageStandard ${_target} ${_language}_STANDARD) + if (_targetLanguageStandard) + set (_type "EXTENSION") + get_property(_isSet TARGET ${_target} PROPERTY ${_language}_EXTENSIONS SET) + if (_isSet) + get_target_property(_targetUseLanguageExtensions ${_target} ${_language}_EXTENSIONS) + if (NOT _targetUseLanguageExtensions) + set (_type "STANDARD") + endif() + endif() + if (CMAKE_${_language}${_targetLanguageStandard}_${_type}_COMPILE_OPTION) + list (APPEND _compileFlags "${CMAKE_${_language}${_targetLanguageStandard}_${_type}_COMPILE_OPTION}") + endif() + endif() + endif() + endif() + # handle the POSITION_INDEPENDENT_CODE target property + if (_target) + get_target_property(_targetPIC ${_target} POSITION_INDEPENDENT_CODE) + if (_targetPIC) + get_target_property(_targetType ${_target} TYPE) + if (_targetType STREQUAL "EXECUTABLE" AND CMAKE_${_language}_COMPILE_OPTIONS_PIE) + list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_PIE}") + elseif (CMAKE_${_language}_COMPILE_OPTIONS_PIC) + list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_PIC}") + endif() + endif() + endif() + # handle visibility target properties + if (_target) + get_target_property(_targetVisibility ${_target} ${_language}_VISIBILITY_PRESET) + if (_targetVisibility AND CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY) + list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY}${_targetVisibility}") + endif() + get_target_property(_targetVisibilityInlines ${_target} VISIBILITY_INLINES_HIDDEN) + if (_targetVisibilityInlines AND CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN) + list (APPEND _compileFlags "${CMAKE_${_language}_COMPILE_OPTIONS_VISIBILITY_INLINES_HIDDEN}") + endif() + endif() + # platform specific flags + if (APPLE) + get_target_property(_architectures ${_target} OSX_ARCHITECTURES_${_upperConfig}) + if (NOT _architectures) + get_target_property(_architectures ${_target} OSX_ARCHITECTURES) + endif() + if (_architectures) + foreach (_arch ${_architectures}) + list (APPEND _compileFlags "-arch" "${_arch}") + endforeach() + endif() + if (CMAKE_OSX_SYSROOT) + if (CMAKE_${_language}_SYSROOT_FLAG) + list (APPEND _compileFlags "${CMAKE_${_language}_SYSROOT_FLAG}" "${CMAKE_OSX_SYSROOT}") + else() + list (APPEND _compileFlags "-isysroot" "${CMAKE_OSX_SYSROOT}") + endif() + endif() + if (CMAKE_OSX_DEPLOYMENT_TARGET) + if (CMAKE_${_language}_OSX_DEPLOYMENT_TARGET_FLAG) + list (APPEND _compileFlags "${CMAKE_${_language}_OSX_DEPLOYMENT_TARGET_FLAG}${CMAKE_OSX_DEPLOYMENT_TARGET}") + else() + list (APPEND _compileFlags "-mmacosx-version-min=${CMAKE_OSX_DEPLOYMENT_TARGET}") + endif() + endif() + endif() + if (COTIRE_DEBUG AND _compileFlags) + message (STATUS "Target ${_target} compile flags: ${_compileFlags}") + endif() + set (${_flagsVar} ${_compileFlags} PARENT_SCOPE) +endfunction() + +function (cotire_get_target_include_directories _config _language _target _includeDirsVar _systemIncludeDirsVar) + set (_includeDirs "") + set (_systemIncludeDirs "") + # default include dirs + if (CMAKE_INCLUDE_CURRENT_DIR) + list (APPEND _includeDirs "${CMAKE_CURRENT_BINARY_DIR}") + list (APPEND _includeDirs "${CMAKE_CURRENT_SOURCE_DIR}") + endif() + set (_targetFlags "") + cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) + # parse additional include directories from target compile flags + if (CMAKE_INCLUDE_FLAG_${_language}) + string (STRIP "${CMAKE_INCLUDE_FLAG_${_language}}" _includeFlag) + string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") + if (_includeFlag) + set (_dirs "") + cotire_filter_compile_flags("${_language}" "${_includeFlag}" _dirs _ignore ${_targetFlags}) + if (_dirs) + list (APPEND _includeDirs ${_dirs}) + endif() + endif() + endif() + # parse additional system include directories from target compile flags + if (CMAKE_INCLUDE_SYSTEM_FLAG_${_language}) + string (STRIP "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" _includeFlag) + string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") + if (_includeFlag) + set (_dirs "") + cotire_filter_compile_flags("${_language}" "${_includeFlag}" _dirs _ignore ${_targetFlags}) + if (_dirs) + list (APPEND _systemIncludeDirs ${_dirs}) + endif() + endif() + endif() + # target include directories + get_directory_property(_dirs DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" INCLUDE_DIRECTORIES) + if (_target) + get_target_property(_targetDirs ${_target} INCLUDE_DIRECTORIES) + if (_targetDirs) + list (APPEND _dirs ${_targetDirs}) + endif() + get_target_property(_targetDirs ${_target} INTERFACE_SYSTEM_INCLUDE_DIRECTORIES) + if (_targetDirs) + list (APPEND _systemIncludeDirs ${_targetDirs}) + endif() + endif() + # interface include directories from linked library targets + if (_target) + set (_linkedTargets "") + cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) + foreach (_linkedTarget ${_linkedTargets}) + get_target_property(_linkedTargetType ${_linkedTarget} TYPE) + if (CMAKE_INCLUDE_CURRENT_DIR_IN_INTERFACE AND NOT CMAKE_VERSION VERSION_LESS "3.4.0" AND + _linkedTargetType MATCHES "(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") + # CMAKE_INCLUDE_CURRENT_DIR_IN_INTERFACE refers to CMAKE_CURRENT_BINARY_DIR and CMAKE_CURRENT_SOURCE_DIR + # at the time, when the target was created. These correspond to the target properties BINARY_DIR and SOURCE_DIR + # which are only available with CMake 3.4 or later. + get_target_property(_targetDirs ${_linkedTarget} BINARY_DIR) + if (_targetDirs) + list (APPEND _dirs ${_targetDirs}) + endif() + get_target_property(_targetDirs ${_linkedTarget} SOURCE_DIR) + if (_targetDirs) + list (APPEND _dirs ${_targetDirs}) + endif() + endif() + get_target_property(_targetDirs ${_linkedTarget} INTERFACE_INCLUDE_DIRECTORIES) + if (_targetDirs) + list (APPEND _dirs ${_targetDirs}) + endif() + get_target_property(_targetDirs ${_linkedTarget} INTERFACE_SYSTEM_INCLUDE_DIRECTORIES) + if (_targetDirs) + list (APPEND _systemIncludeDirs ${_targetDirs}) + endif() + endforeach() + endif() + if (dirs) + list (REMOVE_DUPLICATES _dirs) + endif() + list (LENGTH _includeDirs _projectInsertIndex) + foreach (_dir ${_dirs}) + if (CMAKE_INCLUDE_DIRECTORIES_PROJECT_BEFORE) + cotire_check_is_path_relative_to("${_dir}" _isRelative "${CMAKE_SOURCE_DIR}" "${CMAKE_BINARY_DIR}") + if (_isRelative) + list (LENGTH _includeDirs _len) + if (_len EQUAL _projectInsertIndex) + list (APPEND _includeDirs "${_dir}") + else() + list (INSERT _includeDirs _projectInsertIndex "${_dir}") + endif() + math (EXPR _projectInsertIndex "${_projectInsertIndex} + 1") + else() + list (APPEND _includeDirs "${_dir}") + endif() + else() + list (APPEND _includeDirs "${_dir}") + endif() + endforeach() + list (REMOVE_DUPLICATES _includeDirs) + list (REMOVE_DUPLICATES _systemIncludeDirs) + if (CMAKE_${_language}_IMPLICIT_INCLUDE_DIRECTORIES) + list (REMOVE_ITEM _includeDirs ${CMAKE_${_language}_IMPLICIT_INCLUDE_DIRECTORIES}) + endif() + if (WIN32 AND NOT MINGW) + # convert Windows paths in include directories to CMake paths + if (_includeDirs) + set (_paths "") + foreach (_dir ${_includeDirs}) + file (TO_CMAKE_PATH "${_dir}" _path) + list (APPEND _paths "${_path}") + endforeach() + set (_includeDirs ${_paths}) + endif() + if (_systemIncludeDirs) + set (_paths "") + foreach (_dir ${_systemIncludeDirs}) + file (TO_CMAKE_PATH "${_dir}" _path) + list (APPEND _paths "${_path}") + endforeach() + set (_systemIncludeDirs ${_paths}) + endif() + endif() + if (COTIRE_DEBUG AND _includeDirs) + message (STATUS "Target ${_target} include dirs: ${_includeDirs}") + endif() + set (${_includeDirsVar} ${_includeDirs} PARENT_SCOPE) + if (COTIRE_DEBUG AND _systemIncludeDirs) + message (STATUS "Target ${_target} system include dirs: ${_systemIncludeDirs}") + endif() + set (${_systemIncludeDirsVar} ${_systemIncludeDirs} PARENT_SCOPE) +endfunction() + +function (cotire_get_target_export_symbol _target _exportSymbolVar) + set (_exportSymbol "") + get_target_property(_targetType ${_target} TYPE) + get_target_property(_enableExports ${_target} ENABLE_EXPORTS) + if (_targetType MATCHES "(SHARED|MODULE)_LIBRARY" OR + (_targetType STREQUAL "EXECUTABLE" AND _enableExports)) + get_target_property(_exportSymbol ${_target} DEFINE_SYMBOL) + if (NOT _exportSymbol) + set (_exportSymbol "${_target}_EXPORTS") + endif() + string (MAKE_C_IDENTIFIER "${_exportSymbol}" _exportSymbol) + endif() + set (${_exportSymbolVar} ${_exportSymbol} PARENT_SCOPE) +endfunction() + +function (cotire_get_target_compile_definitions _config _language _target _definitionsVar) + string (TOUPPER "${_config}" _upperConfig) + set (_configDefinitions "") + # CMAKE_INTDIR for multi-configuration build systems + if (NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".") + list (APPEND _configDefinitions "CMAKE_INTDIR=\"${_config}\"") + endif() + # target export define symbol + cotire_get_target_export_symbol("${_target}" _defineSymbol) + if (_defineSymbol) + list (APPEND _configDefinitions "${_defineSymbol}") + endif() + # directory compile definitions + get_directory_property(_definitions DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMPILE_DEFINITIONS) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + get_directory_property(_definitions DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" COMPILE_DEFINITIONS_${_upperConfig}) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + # target compile definitions + get_target_property(_definitions ${_target} COMPILE_DEFINITIONS) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + get_target_property(_definitions ${_target} COMPILE_DEFINITIONS_${_upperConfig}) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + # interface compile definitions from linked library targets + set (_linkedTargets "") + cotire_get_target_usage_requirements(${_target} ${_config} _linkedTargets) + foreach (_linkedTarget ${_linkedTargets}) + get_target_property(_definitions ${_linkedTarget} INTERFACE_COMPILE_DEFINITIONS) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + endforeach() + # parse additional compile definitions from target compile flags + # and do not look at directory compile definitions, which we already handled + set (_targetFlags "") + cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) + cotire_filter_compile_flags("${_language}" "D" _definitions _ignore ${_targetFlags}) + if (_definitions) + list (APPEND _configDefinitions ${_definitions}) + endif() + list (REMOVE_DUPLICATES _configDefinitions) + if (COTIRE_DEBUG AND _configDefinitions) + message (STATUS "Target ${_target} compile definitions: ${_configDefinitions}") + endif() + set (${_definitionsVar} ${_configDefinitions} PARENT_SCOPE) +endfunction() + +function (cotire_get_target_compiler_flags _config _language _target _compilerFlagsVar) + # parse target compile flags omitting compile definitions and include directives + set (_targetFlags "") + cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) + set (_flagFilter "D") + if (CMAKE_INCLUDE_FLAG_${_language}) + string (STRIP "${CMAKE_INCLUDE_FLAG_${_language}}" _includeFlag) + string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") + if (_includeFlag) + set (_flagFilter "${_flagFilter}|${_includeFlag}") + endif() + endif() + if (CMAKE_INCLUDE_SYSTEM_FLAG_${_language}) + string (STRIP "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" _includeFlag) + string (REGEX REPLACE "^[-/]+" "" _includeFlag "${_includeFlag}") + if (_includeFlag) + set (_flagFilter "${_flagFilter}|${_includeFlag}") + endif() + endif() + set (_compilerFlags "") + cotire_filter_compile_flags("${_language}" "${_flagFilter}" _ignore _compilerFlags ${_targetFlags}) + if (COTIRE_DEBUG AND _compilerFlags) + message (STATUS "Target ${_target} compiler flags: ${_compilerFlags}") + endif() + set (${_compilerFlagsVar} ${_compilerFlags} PARENT_SCOPE) +endfunction() + +function (cotire_add_sys_root_paths _pathsVar) + if (APPLE) + if (CMAKE_OSX_SYSROOT AND CMAKE_${_language}_HAS_ISYSROOT) + foreach (_path IN LISTS ${_pathsVar}) + if (IS_ABSOLUTE "${_path}") + get_filename_component(_path "${CMAKE_OSX_SYSROOT}/${_path}" ABSOLUTE) + if (EXISTS "${_path}") + list (APPEND ${_pathsVar} "${_path}") + endif() + endif() + endforeach() + endif() + endif() + set (${_pathsVar} ${${_pathsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_extra_properties _sourceFile _pattern _resultVar) + set (_extraProperties ${ARGN}) + set (_result "") + if (_extraProperties) + list (FIND _extraProperties "${_sourceFile}" _index) + if (_index GREATER -1) + math (EXPR _index "${_index} + 1") + list (LENGTH _extraProperties _len) + math (EXPR _len "${_len} - 1") + foreach (_index RANGE ${_index} ${_len}) + list (GET _extraProperties ${_index} _value) + if (_value MATCHES "${_pattern}") + list (APPEND _result "${_value}") + else() + break() + endif() + endforeach() + endif() + endif() + set (${_resultVar} ${_result} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_compile_definitions _config _language _sourceFile _definitionsVar) + set (_compileDefinitions "") + if (NOT CMAKE_SCRIPT_MODE_FILE) + string (TOUPPER "${_config}" _upperConfig) + get_source_file_property(_definitions "${_sourceFile}" COMPILE_DEFINITIONS) + if (_definitions) + list (APPEND _compileDefinitions ${_definitions}) + endif() + get_source_file_property(_definitions "${_sourceFile}" COMPILE_DEFINITIONS_${_upperConfig}) + if (_definitions) + list (APPEND _compileDefinitions ${_definitions}) + endif() + endif() + cotire_get_source_extra_properties("${_sourceFile}" "^[a-zA-Z0-9_]+(=.*)?$" _definitions ${ARGN}) + if (_definitions) + list (APPEND _compileDefinitions ${_definitions}) + endif() + if (COTIRE_DEBUG AND _compileDefinitions) + message (STATUS "Source ${_sourceFile} compile definitions: ${_compileDefinitions}") + endif() + set (${_definitionsVar} ${_compileDefinitions} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_files_compile_definitions _config _language _definitionsVar) + set (_configDefinitions "") + foreach (_sourceFile ${ARGN}) + cotire_get_source_compile_definitions("${_config}" "${_language}" "${_sourceFile}" _sourceDefinitions) + if (_sourceDefinitions) + list (APPEND _configDefinitions "${_sourceFile}" ${_sourceDefinitions} "-") + endif() + endforeach() + set (${_definitionsVar} ${_configDefinitions} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_undefs _sourceFile _property _sourceUndefsVar) + set (_sourceUndefs "") + if (NOT CMAKE_SCRIPT_MODE_FILE) + get_source_file_property(_undefs "${_sourceFile}" ${_property}) + if (_undefs) + list (APPEND _sourceUndefs ${_undefs}) + endif() + endif() + cotire_get_source_extra_properties("${_sourceFile}" "^[a-zA-Z0-9_]+$" _undefs ${ARGN}) + if (_undefs) + list (APPEND _sourceUndefs ${_undefs}) + endif() + if (COTIRE_DEBUG AND _sourceUndefs) + message (STATUS "Source ${_sourceFile} ${_property} undefs: ${_sourceUndefs}") + endif() + set (${_sourceUndefsVar} ${_sourceUndefs} PARENT_SCOPE) +endfunction() + +function (cotire_get_source_files_undefs _property _sourceUndefsVar) + set (_sourceUndefs "") + foreach (_sourceFile ${ARGN}) + cotire_get_source_undefs("${_sourceFile}" ${_property} _undefs) + if (_undefs) + list (APPEND _sourceUndefs "${_sourceFile}" ${_undefs} "-") + endif() + endforeach() + set (${_sourceUndefsVar} ${_sourceUndefs} PARENT_SCOPE) +endfunction() + +macro (cotire_set_cmd_to_prologue _cmdVar) + set (${_cmdVar} "${CMAKE_COMMAND}") + if (COTIRE_DEBUG) + list (APPEND ${_cmdVar} "--warn-uninitialized") + endif() + list (APPEND ${_cmdVar} "-DCOTIRE_BUILD_TYPE:STRING=$") + if (XCODE) + list (APPEND ${_cmdVar} "-DXCODE:BOOL=TRUE") + endif() + if (COTIRE_VERBOSE) + list (APPEND ${_cmdVar} "-DCOTIRE_VERBOSE:BOOL=ON") + elseif("${CMAKE_GENERATOR}" MATCHES "Makefiles") + list (APPEND ${_cmdVar} "-DCOTIRE_VERBOSE:BOOL=$(VERBOSE)") + endif() +endmacro() + +function (cotire_init_compile_cmd _cmdVar _language _compilerLauncher _compilerExe _compilerArg1) + if (NOT _compilerLauncher) + set (_compilerLauncher ${CMAKE_${_language}_COMPILER_LAUNCHER}) + endif() + if (NOT _compilerExe) + set (_compilerExe "${CMAKE_${_language}_COMPILER}") + endif() + if (NOT _compilerArg1) + set (_compilerArg1 ${CMAKE_${_language}_COMPILER_ARG1}) + endif() + if (WIN32) + file (TO_NATIVE_PATH "${_compilerExe}" _compilerExe) + endif() + string (STRIP "${_compilerArg1}" _compilerArg1) + if ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") + # compiler launcher is only supported for Makefile and Ninja + set (${_cmdVar} ${_compilerLauncher} "${_compilerExe}" ${_compilerArg1} PARENT_SCOPE) + else() + set (${_cmdVar} "${_compilerExe}" ${_compilerArg1} PARENT_SCOPE) + endif() +endfunction() + +macro (cotire_add_definitions_to_cmd _cmdVar _language) + foreach (_definition ${ARGN}) + if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") + list (APPEND ${_cmdVar} "/D${_definition}") + else() + list (APPEND ${_cmdVar} "-D${_definition}") + endif() + endforeach() +endmacro() + +function (cotire_add_includes_to_cmd _cmdVar _language _includesVar _systemIncludesVar) + set (_includeDirs ${${_includesVar}} ${${_systemIncludesVar}}) + if (_includeDirs) + list (REMOVE_DUPLICATES _includeDirs) + foreach (_include ${_includeDirs}) + if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") + file (TO_NATIVE_PATH "${_include}" _include) + list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") + else() + set (_index -1) + if ("${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}" MATCHES ".+") + list (FIND ${_systemIncludesVar} "${_include}" _index) + endif() + if (_index GREATER -1) + list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_SYSTEM_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") + else() + list (APPEND ${_cmdVar} "${CMAKE_INCLUDE_FLAG_${_language}}${CMAKE_INCLUDE_FLAG_SEP_${_language}}${_include}") + endif() + endif() + endforeach() + endif() + set (${_cmdVar} ${${_cmdVar}} PARENT_SCOPE) +endfunction() + +function (cotire_add_frameworks_to_cmd _cmdVar _language _includesVar _systemIncludesVar) + if (APPLE) + set (_frameworkDirs "") + foreach (_include ${${_includesVar}}) + if (IS_ABSOLUTE "${_include}" AND _include MATCHES "\\.framework$") + get_filename_component(_frameworkDir "${_include}" DIRECTORY) + list (APPEND _frameworkDirs "${_frameworkDir}") + endif() + endforeach() + set (_systemFrameworkDirs "") + foreach (_include ${${_systemIncludesVar}}) + if (IS_ABSOLUTE "${_include}" AND _include MATCHES "\\.framework$") + get_filename_component(_frameworkDir "${_include}" DIRECTORY) + list (APPEND _systemFrameworkDirs "${_frameworkDir}") + endif() + endforeach() + if (_systemFrameworkDirs) + list (APPEND _frameworkDirs ${_systemFrameworkDirs}) + endif() + if (_frameworkDirs) + list (REMOVE_DUPLICATES _frameworkDirs) + foreach (_frameworkDir ${_frameworkDirs}) + set (_index -1) + if ("${CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG}" MATCHES ".+") + list (FIND _systemFrameworkDirs "${_frameworkDir}" _index) + endif() + if (_index GREATER -1) + list (APPEND ${_cmdVar} "${CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG}${_frameworkDir}") + else() + list (APPEND ${_cmdVar} "${CMAKE_${_language}_FRAMEWORK_SEARCH_FLAG}${_frameworkDir}") + endif() + endforeach() + endif() + endif() + set (${_cmdVar} ${${_cmdVar}} PARENT_SCOPE) +endfunction() + +macro (cotire_add_compile_flags_to_cmd _cmdVar) + foreach (_flag ${ARGN}) + list (APPEND ${_cmdVar} "${_flag}") + endforeach() +endmacro() + +function (cotire_check_file_up_to_date _fileIsUpToDateVar _file) + if (EXISTS "${_file}") + set (_triggerFile "") + foreach (_dependencyFile ${ARGN}) + if (EXISTS "${_dependencyFile}") + # IS_NEWER_THAN returns TRUE if both files have the same timestamp + # thus we do the comparison in both directions to exclude ties + if ("${_dependencyFile}" IS_NEWER_THAN "${_file}" AND + NOT "${_file}" IS_NEWER_THAN "${_dependencyFile}") + set (_triggerFile "${_dependencyFile}") + break() + endif() + endif() + endforeach() + if (_triggerFile) + if (COTIRE_VERBOSE) + get_filename_component(_fileName "${_file}" NAME) + message (STATUS "${_fileName} update triggered by ${_triggerFile} change.") + endif() + set (${_fileIsUpToDateVar} FALSE PARENT_SCOPE) + else() + if (COTIRE_VERBOSE) + get_filename_component(_fileName "${_file}" NAME) + message (STATUS "${_fileName} is up-to-date.") + endif() + set (${_fileIsUpToDateVar} TRUE PARENT_SCOPE) + endif() + else() + if (COTIRE_VERBOSE) + get_filename_component(_fileName "${_file}" NAME) + message (STATUS "${_fileName} does not exist yet.") + endif() + set (${_fileIsUpToDateVar} FALSE PARENT_SCOPE) + endif() +endfunction() + +macro (cotire_find_closest_relative_path _headerFile _includeDirs _relPathVar) + set (${_relPathVar} "") + foreach (_includeDir ${_includeDirs}) + if (IS_DIRECTORY "${_includeDir}") + file (RELATIVE_PATH _relPath "${_includeDir}" "${_headerFile}") + if (NOT IS_ABSOLUTE "${_relPath}" AND NOT "${_relPath}" MATCHES "^\\.\\.") + string (LENGTH "${${_relPathVar}}" _closestLen) + string (LENGTH "${_relPath}" _relLen) + if (_closestLen EQUAL 0 OR _relLen LESS _closestLen) + set (${_relPathVar} "${_relPath}") + endif() + endif() + elseif ("${_includeDir}" STREQUAL "${_headerFile}") + # if path matches exactly, return short non-empty string + set (${_relPathVar} "1") + break() + endif() + endforeach() +endmacro() + +macro (cotire_check_header_file_location _headerFile _insideIncludeDirs _outsideIncludeDirs _headerIsInside) + # check header path against ignored and honored include directories + cotire_find_closest_relative_path("${_headerFile}" "${_insideIncludeDirs}" _insideRelPath) + if (_insideRelPath) + # header is inside, but could be become outside if there is a shorter outside match + cotire_find_closest_relative_path("${_headerFile}" "${_outsideIncludeDirs}" _outsideRelPath) + if (_outsideRelPath) + string (LENGTH "${_insideRelPath}" _insideRelPathLen) + string (LENGTH "${_outsideRelPath}" _outsideRelPathLen) + if (_outsideRelPathLen LESS _insideRelPathLen) + set (${_headerIsInside} FALSE) + else() + set (${_headerIsInside} TRUE) + endif() + else() + set (${_headerIsInside} TRUE) + endif() + else() + # header is outside + set (${_headerIsInside} FALSE) + endif() +endmacro() + +macro (cotire_check_ignore_header_file_path _headerFile _headerIsIgnoredVar) + if (NOT EXISTS "${_headerFile}") + set (${_headerIsIgnoredVar} TRUE) + elseif (IS_DIRECTORY "${_headerFile}") + set (${_headerIsIgnoredVar} TRUE) + elseif ("${_headerFile}" MATCHES "\\.\\.|[_-]fixed" AND "${_headerFile}" MATCHES "\\.h$") + # heuristic: ignore C headers with embedded parent directory references or "-fixed" or "_fixed" in path + # these often stem from using GCC #include_next tricks, which may break the precompiled header compilation + # with the error message "error: no include path in which to search for header.h" + set (${_headerIsIgnoredVar} TRUE) + else() + set (${_headerIsIgnoredVar} FALSE) + endif() +endmacro() + +macro (cotire_check_ignore_header_file_ext _headerFile _ignoreExtensionsVar _headerIsIgnoredVar) + # check header file extension + cotire_get_source_file_extension("${_headerFile}" _headerFileExt) + set (${_headerIsIgnoredVar} FALSE) + if (_headerFileExt) + list (FIND ${_ignoreExtensionsVar} "${_headerFileExt}" _index) + if (_index GREATER -1) + set (${_headerIsIgnoredVar} TRUE) + endif() + endif() +endmacro() + +macro (cotire_parse_line _line _headerFileVar _headerDepthVar) + if (MSVC) + # cl.exe /showIncludes produces different output, depending on the language pack used, e.g.: + # English: "Note: including file: C:\directory\file" + # German: "Hinweis: Einlesen der Datei: C:\directory\file" + # We use a very general regular expression, relying on the presence of the : characters + if (_line MATCHES "( +)([a-zA-Z]:[^:]+)$") + string (LENGTH "${CMAKE_MATCH_1}" ${_headerDepthVar}) + get_filename_component(${_headerFileVar} "${CMAKE_MATCH_2}" ABSOLUTE) + else() + set (${_headerFileVar} "") + set (${_headerDepthVar} 0) + endif() + else() + if (_line MATCHES "^(\\.+) (.*)$") + # GCC like output + string (LENGTH "${CMAKE_MATCH_1}" ${_headerDepthVar}) + if (IS_ABSOLUTE "${CMAKE_MATCH_2}") + set (${_headerFileVar} "${CMAKE_MATCH_2}") + else() + get_filename_component(${_headerFileVar} "${CMAKE_MATCH_2}" REALPATH) + endif() + else() + set (${_headerFileVar} "") + set (${_headerDepthVar} 0) + endif() + endif() +endmacro() + +function (cotire_parse_includes _language _scanOutput _ignoredIncludeDirs _honoredIncludeDirs _ignoredExtensions _selectedIncludesVar _unparsedLinesVar) + if (WIN32) + # prevent CMake macro invocation errors due to backslash characters in Windows paths + string (REPLACE "\\" "/" _scanOutput "${_scanOutput}") + endif() + # canonize slashes + string (REPLACE "//" "/" _scanOutput "${_scanOutput}") + # prevent semicolon from being interpreted as a line separator + string (REPLACE ";" "\\;" _scanOutput "${_scanOutput}") + # then separate lines + string (REGEX REPLACE "\n" ";" _scanOutput "${_scanOutput}") + list (LENGTH _scanOutput _len) + # remove duplicate lines to speed up parsing + list (REMOVE_DUPLICATES _scanOutput) + list (LENGTH _scanOutput _uniqueLen) + if (COTIRE_VERBOSE OR COTIRE_DEBUG) + message (STATUS "Scanning ${_uniqueLen} unique lines of ${_len} for includes") + if (_ignoredExtensions) + message (STATUS "Ignored extensions: ${_ignoredExtensions}") + endif() + if (_ignoredIncludeDirs) + message (STATUS "Ignored paths: ${_ignoredIncludeDirs}") + endif() + if (_honoredIncludeDirs) + message (STATUS "Included paths: ${_honoredIncludeDirs}") + endif() + endif() + set (_sourceFiles ${ARGN}) + set (_selectedIncludes "") + set (_unparsedLines "") + # stack keeps track of inside/outside project status of processed header files + set (_headerIsInsideStack "") + foreach (_line IN LISTS _scanOutput) + if (_line) + cotire_parse_line("${_line}" _headerFile _headerDepth) + if (_headerFile) + cotire_check_header_file_location("${_headerFile}" "${_ignoredIncludeDirs}" "${_honoredIncludeDirs}" _headerIsInside) + if (COTIRE_DEBUG) + message (STATUS "${_headerDepth}: ${_headerFile} ${_headerIsInside}") + endif() + # update stack + list (LENGTH _headerIsInsideStack _stackLen) + if (_headerDepth GREATER _stackLen) + math (EXPR _stackLen "${_stackLen} + 1") + foreach (_index RANGE ${_stackLen} ${_headerDepth}) + list (APPEND _headerIsInsideStack ${_headerIsInside}) + endforeach() + else() + foreach (_index RANGE ${_headerDepth} ${_stackLen}) + list (REMOVE_AT _headerIsInsideStack -1) + endforeach() + list (APPEND _headerIsInsideStack ${_headerIsInside}) + endif() + if (COTIRE_DEBUG) + message (STATUS "${_headerIsInsideStack}") + endif() + # header is a candidate if it is outside project + if (NOT _headerIsInside) + # get parent header file's inside/outside status + if (_headerDepth GREATER 1) + math (EXPR _index "${_headerDepth} - 2") + list (GET _headerIsInsideStack ${_index} _parentHeaderIsInside) + else() + set (_parentHeaderIsInside TRUE) + endif() + # select header file if parent header file is inside project + # (e.g., a project header file that includes a standard header file) + if (_parentHeaderIsInside) + cotire_check_ignore_header_file_path("${_headerFile}" _headerIsIgnored) + if (NOT _headerIsIgnored) + cotire_check_ignore_header_file_ext("${_headerFile}" _ignoredExtensions _headerIsIgnored) + if (NOT _headerIsIgnored) + list (APPEND _selectedIncludes "${_headerFile}") + else() + # fix header's inside status on stack, it is ignored by extension now + list (REMOVE_AT _headerIsInsideStack -1) + list (APPEND _headerIsInsideStack TRUE) + endif() + endif() + if (COTIRE_DEBUG) + message (STATUS "${_headerFile} ${_ignoredExtensions} ${_headerIsIgnored}") + endif() + endif() + endif() + else() + if (MSVC) + # for cl.exe do not keep unparsed lines which solely consist of a source file name + string (FIND "${_sourceFiles}" "${_line}" _index) + if (_index LESS 0) + list (APPEND _unparsedLines "${_line}") + endif() + else() + list (APPEND _unparsedLines "${_line}") + endif() + endif() + endif() + endforeach() + list (REMOVE_DUPLICATES _selectedIncludes) + set (${_selectedIncludesVar} ${_selectedIncludes} PARENT_SCOPE) + set (${_unparsedLinesVar} ${_unparsedLines} PARENT_SCOPE) +endfunction() + +function (cotire_scan_includes _includesVar) + set(_options "") + set(_oneValueArgs COMPILER_ID COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_VERSION LANGUAGE UNPARSED_LINES SCAN_RESULT) + set(_multiValueArgs COMPILE_DEFINITIONS COMPILE_FLAGS INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES + IGNORE_PATH INCLUDE_PATH IGNORE_EXTENSIONS INCLUDE_PRIORITY_PATH COMPILER_LAUNCHER) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) + if (NOT _option_LANGUAGE) + set (_option_LANGUAGE "CXX") + endif() + if (NOT _option_COMPILER_ID) + set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") + endif() + if (NOT _option_COMPILER_VERSION) + set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") + endif() + cotire_init_compile_cmd(_cmd "${_option_LANGUAGE}" "${_option_COMPILER_LAUNCHER}" "${_option_COMPILER_EXECUTABLE}" "${_option_COMPILER_ARG1}") + cotire_add_definitions_to_cmd(_cmd "${_option_LANGUAGE}" ${_option_COMPILE_DEFINITIONS}) + cotire_add_compile_flags_to_cmd(_cmd ${_option_COMPILE_FLAGS}) + cotire_add_includes_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) + cotire_add_frameworks_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) + cotire_add_makedep_flags("${_option_LANGUAGE}" "${_option_COMPILER_ID}" "${_option_COMPILER_VERSION}" _cmd) + # only consider existing source files for scanning + set (_existingSourceFiles "") + foreach (_sourceFile ${_sourceFiles}) + if (EXISTS "${_sourceFile}") + list (APPEND _existingSourceFiles "${_sourceFile}") + endif() + endforeach() + if (NOT _existingSourceFiles) + set (${_includesVar} "" PARENT_SCOPE) + return() + endif() + # add source files to be scanned + if (WIN32) + foreach (_sourceFile ${_existingSourceFiles}) + file (TO_NATIVE_PATH "${_sourceFile}" _sourceFileNative) + list (APPEND _cmd "${_sourceFileNative}") + endforeach() + else() + list (APPEND _cmd ${_existingSourceFiles}) + endif() + if (COTIRE_VERBOSE) + message (STATUS "execute_process: ${_cmd}") + endif() + if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") + # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared + unset (ENV{VS_UNICODE_OUTPUT}) + endif() + execute_process( + COMMAND ${_cmd} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE _result + OUTPUT_QUIET + ERROR_VARIABLE _output) + if (_result) + message (STATUS "Result ${_result} scanning includes of ${_existingSourceFiles}.") + endif() + cotire_parse_includes( + "${_option_LANGUAGE}" "${_output}" + "${_option_IGNORE_PATH}" "${_option_INCLUDE_PATH}" + "${_option_IGNORE_EXTENSIONS}" + _includes _unparsedLines + ${_sourceFiles}) + if (_option_INCLUDE_PRIORITY_PATH) + set (_sortedIncludes "") + foreach (_priorityPath ${_option_INCLUDE_PRIORITY_PATH}) + foreach (_include ${_includes}) + string (FIND ${_include} ${_priorityPath} _position) + if (_position GREATER -1) + list (APPEND _sortedIncludes ${_include}) + endif() + endforeach() + endforeach() + if (_sortedIncludes) + list (INSERT _includes 0 ${_sortedIncludes}) + list (REMOVE_DUPLICATES _includes) + endif() + endif() + set (${_includesVar} ${_includes} PARENT_SCOPE) + if (_option_UNPARSED_LINES) + set (${_option_UNPARSED_LINES} ${_unparsedLines} PARENT_SCOPE) + endif() + if (_option_SCAN_RESULT) + set (${_option_SCAN_RESULT} ${_result} PARENT_SCOPE) + endif() +endfunction() + +macro (cotire_append_undefs _contentsVar) + set (_undefs ${ARGN}) + if (_undefs) + list (REMOVE_DUPLICATES _undefs) + foreach (_definition ${_undefs}) + list (APPEND ${_contentsVar} "#undef ${_definition}") + endforeach() + endif() +endmacro() + +macro (cotire_comment_str _language _commentText _commentVar) + if ("${_language}" STREQUAL "CMAKE") + set (${_commentVar} "# ${_commentText}") + else() + set (${_commentVar} "/* ${_commentText} */") + endif() +endmacro() + +function (cotire_write_file _language _file _contents _force) + get_filename_component(_moduleName "${COTIRE_CMAKE_MODULE_FILE}" NAME) + cotire_comment_str("${_language}" "${_moduleName} ${COTIRE_CMAKE_MODULE_VERSION} generated file" _header1) + cotire_comment_str("${_language}" "${_file}" _header2) + set (_contents "${_header1}\n${_header2}\n${_contents}") + if (COTIRE_DEBUG) + message (STATUS "${_contents}") + endif() + if (_force OR NOT EXISTS "${_file}") + file (WRITE "${_file}" "${_contents}") + else() + file (READ "${_file}" _oldContents) + if (NOT "${_oldContents}" STREQUAL "${_contents}") + file (WRITE "${_file}" "${_contents}") + else() + if (COTIRE_DEBUG) + message (STATUS "${_file} unchanged") + endif() + endif() + endif() +endfunction() + +function (cotire_generate_unity_source _unityFile) + set(_options "") + set(_oneValueArgs LANGUAGE) + set(_multiValueArgs + DEPENDS SOURCES_COMPILE_DEFINITIONS + PRE_UNDEFS SOURCES_PRE_UNDEFS POST_UNDEFS SOURCES_POST_UNDEFS PROLOGUE EPILOGUE) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + if (_option_DEPENDS) + cotire_check_file_up_to_date(_unityFileIsUpToDate "${_unityFile}" ${_option_DEPENDS}) + if (_unityFileIsUpToDate) + return() + endif() + endif() + set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) + if (NOT _option_PRE_UNDEFS) + set (_option_PRE_UNDEFS "") + endif() + if (NOT _option_SOURCES_PRE_UNDEFS) + set (_option_SOURCES_PRE_UNDEFS "") + endif() + if (NOT _option_POST_UNDEFS) + set (_option_POST_UNDEFS "") + endif() + if (NOT _option_SOURCES_POST_UNDEFS) + set (_option_SOURCES_POST_UNDEFS "") + endif() + set (_contents "") + if (_option_PROLOGUE) + list (APPEND _contents ${_option_PROLOGUE}) + endif() + if (_option_LANGUAGE AND _sourceFiles) + if ("${_option_LANGUAGE}" STREQUAL "CXX") + list (APPEND _contents "#ifdef __cplusplus") + elseif ("${_option_LANGUAGE}" STREQUAL "C") + list (APPEND _contents "#ifndef __cplusplus") + endif() + endif() + set (_compileUndefinitions "") + foreach (_sourceFile ${_sourceFiles}) + cotire_get_source_compile_definitions( + "${_option_CONFIGURATION}" "${_option_LANGUAGE}" "${_sourceFile}" _compileDefinitions + ${_option_SOURCES_COMPILE_DEFINITIONS}) + cotire_get_source_undefs("${_sourceFile}" COTIRE_UNITY_SOURCE_PRE_UNDEFS _sourcePreUndefs ${_option_SOURCES_PRE_UNDEFS}) + cotire_get_source_undefs("${_sourceFile}" COTIRE_UNITY_SOURCE_POST_UNDEFS _sourcePostUndefs ${_option_SOURCES_POST_UNDEFS}) + if (_option_PRE_UNDEFS) + list (APPEND _compileUndefinitions ${_option_PRE_UNDEFS}) + endif() + if (_sourcePreUndefs) + list (APPEND _compileUndefinitions ${_sourcePreUndefs}) + endif() + if (_compileUndefinitions) + cotire_append_undefs(_contents ${_compileUndefinitions}) + set (_compileUndefinitions "") + endif() + if (_sourcePostUndefs) + list (APPEND _compileUndefinitions ${_sourcePostUndefs}) + endif() + if (_option_POST_UNDEFS) + list (APPEND _compileUndefinitions ${_option_POST_UNDEFS}) + endif() + foreach (_definition ${_compileDefinitions}) + if (_definition MATCHES "^([a-zA-Z0-9_]+)=(.+)$") + list (APPEND _contents "#define ${CMAKE_MATCH_1} ${CMAKE_MATCH_2}") + list (INSERT _compileUndefinitions 0 "${CMAKE_MATCH_1}") + else() + list (APPEND _contents "#define ${_definition}") + list (INSERT _compileUndefinitions 0 "${_definition}") + endif() + endforeach() + # use absolute path as source file location + get_filename_component(_sourceFileLocation "${_sourceFile}" ABSOLUTE) + if (WIN32) + file (TO_NATIVE_PATH "${_sourceFileLocation}" _sourceFileLocation) + endif() + list (APPEND _contents "#include \"${_sourceFileLocation}\"") + endforeach() + if (_compileUndefinitions) + cotire_append_undefs(_contents ${_compileUndefinitions}) + set (_compileUndefinitions "") + endif() + if (_option_LANGUAGE AND _sourceFiles) + list (APPEND _contents "#endif") + endif() + if (_option_EPILOGUE) + list (APPEND _contents ${_option_EPILOGUE}) + endif() + list (APPEND _contents "") + string (REPLACE ";" "\n" _contents "${_contents}") + if (COTIRE_VERBOSE) + message ("${_contents}") + endif() + cotire_write_file("${_option_LANGUAGE}" "${_unityFile}" "${_contents}" TRUE) +endfunction() + +function (cotire_generate_prefix_header _prefixFile) + set(_options "") + set(_oneValueArgs LANGUAGE COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_ID COMPILER_VERSION) + set(_multiValueArgs DEPENDS COMPILE_DEFINITIONS COMPILE_FLAGS + INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES IGNORE_PATH INCLUDE_PATH + IGNORE_EXTENSIONS INCLUDE_PRIORITY_PATH COMPILER_LAUNCHER) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + if (NOT _option_COMPILER_ID) + set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") + endif() + if (NOT _option_COMPILER_VERSION) + set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") + endif() + if (_option_DEPENDS) + cotire_check_file_up_to_date(_prefixFileIsUpToDate "${_prefixFile}" ${_option_DEPENDS}) + if (_prefixFileIsUpToDate) + # create empty log file + set (_unparsedLinesFile "${_prefixFile}.log") + file (WRITE "${_unparsedLinesFile}" "") + return() + endif() + endif() + set (_prologue "") + set (_epilogue "") + if (_option_COMPILER_ID MATCHES "Clang") + set (_prologue "#pragma clang system_header") + elseif (_option_COMPILER_ID MATCHES "GNU") + set (_prologue "#pragma GCC system_header") + elseif (_option_COMPILER_ID MATCHES "MSVC") + set (_prologue "#pragma warning(push, 0)") + set (_epilogue "#pragma warning(pop)") + elseif (_option_COMPILER_ID MATCHES "Intel") + # Intel compiler requires hdrstop pragma to stop generating PCH file + set (_epilogue "#pragma hdrstop") + endif() + set (_sourceFiles ${_option_UNPARSED_ARGUMENTS}) + cotire_scan_includes(_selectedHeaders ${_sourceFiles} + LANGUAGE "${_option_LANGUAGE}" + COMPILER_LAUNCHER "${_option_COMPILER_LAUNCHER}" + COMPILER_EXECUTABLE "${_option_COMPILER_EXECUTABLE}" + COMPILER_ARG1 "${_option_COMPILER_ARG1}" + COMPILER_ID "${_option_COMPILER_ID}" + COMPILER_VERSION "${_option_COMPILER_VERSION}" + COMPILE_DEFINITIONS ${_option_COMPILE_DEFINITIONS} + COMPILE_FLAGS ${_option_COMPILE_FLAGS} + INCLUDE_DIRECTORIES ${_option_INCLUDE_DIRECTORIES} + SYSTEM_INCLUDE_DIRECTORIES ${_option_SYSTEM_INCLUDE_DIRECTORIES} + IGNORE_PATH ${_option_IGNORE_PATH} + INCLUDE_PATH ${_option_INCLUDE_PATH} + IGNORE_EXTENSIONS ${_option_IGNORE_EXTENSIONS} + INCLUDE_PRIORITY_PATH ${_option_INCLUDE_PRIORITY_PATH} + UNPARSED_LINES _unparsedLines + SCAN_RESULT _scanResult) + cotire_generate_unity_source("${_prefixFile}" + PROLOGUE ${_prologue} EPILOGUE ${_epilogue} LANGUAGE "${_option_LANGUAGE}" ${_selectedHeaders}) + set (_unparsedLinesFile "${_prefixFile}.log") + if (_unparsedLines) + if (COTIRE_VERBOSE OR _scanResult OR NOT _selectedHeaders) + list (LENGTH _unparsedLines _skippedLineCount) + if (WIN32) + file (TO_NATIVE_PATH "${_unparsedLinesFile}" _unparsedLinesLogPath) + else() + set (_unparsedLinesLogPath "${_unparsedLinesFile}") + endif() + message (STATUS "${_skippedLineCount} line(s) skipped, see ${_unparsedLinesLogPath}") + endif() + string (REPLACE ";" "\n" _unparsedLines "${_unparsedLines}") + endif() + file (WRITE "${_unparsedLinesFile}" "${_unparsedLines}\n") +endfunction() + +function (cotire_add_makedep_flags _language _compilerID _compilerVersion _flagsVar) + set (_flags ${${_flagsVar}}) + if (_compilerID MATCHES "MSVC") + # cl.exe options used + # /nologo suppresses display of sign-on banner + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + # /EP preprocess to stdout without #line directives + # /showIncludes list include files + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags /nologo "${_sourceFileType${_language}}" /EP /showIncludes) + else() + # return as a flag string + set (_flags "${_sourceFileType${_language}} /EP /showIncludes") + endif() + elseif (_compilerID MATCHES "GNU") + # GCC options used + # -H print the name of each header file used + # -E invoke preprocessor + # -fdirectives-only do not expand macros, requires GCC >= 4.3 + if (_flags) + # append to list + list (APPEND _flags -H -E) + if (NOT "${_compilerVersion}" VERSION_LESS "4.3.0") + list (APPEND _flags -fdirectives-only) + endif() + else() + # return as a flag string + set (_flags "-H -E") + if (NOT "${_compilerVersion}" VERSION_LESS "4.3.0") + set (_flags "${_flags} -fdirectives-only") + endif() + endif() + elseif (_compilerID MATCHES "Clang") + if (UNIX) + # Clang options used + # -H print the name of each header file used + # -E invoke preprocessor + # -fno-color-diagnostics do not print diagnostics in color + # -Eonly just run preprocessor, no output + if (_flags) + # append to list + list (APPEND _flags -H -E -fno-color-diagnostics -Xclang -Eonly) + else() + # return as a flag string + set (_flags "-H -E -fno-color-diagnostics -Xclang -Eonly") + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + # /EP preprocess to stdout without #line directives + # -H print the name of each header file used + # -fno-color-diagnostics do not print diagnostics in color + # -Eonly just run preprocessor, no output + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags "${_sourceFileType${_language}}" /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly) + else() + # return as a flag string + set (_flags "${_sourceFileType${_language}} /EP -fno-color-diagnostics -Xclang -H -Xclang -Eonly") + endif() + endif() + elseif (_compilerID MATCHES "Intel") + if (WIN32) + # Windows Intel options used + # /nologo do not display compiler version information + # /QH display the include file order + # /EP preprocess to stdout, omitting #line directives + # /TC process all source or unrecognized file types as C source files + # /TP process all source or unrecognized file types as C++ source files + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags /nologo "${_sourceFileType${_language}}" /EP /QH) + else() + # return as a flag string + set (_flags "${_sourceFileType${_language}} /EP /QH") + endif() + else() + # Linux / Mac OS X Intel options used + # -H print the name of each header file used + # -EP preprocess to stdout, omitting #line directives + # -Kc++ process all source or unrecognized file types as C++ source files + if (_flags) + # append to list + if ("${_language}" STREQUAL "CXX") + list (APPEND _flags -Kc++) + endif() + list (APPEND _flags -H -EP) + else() + # return as a flag string + if ("${_language}" STREQUAL "CXX") + set (_flags "-Kc++ ") + endif() + set (_flags "${_flags}-H -EP") + endif() + endif() + else() + message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") + endif() + set (${_flagsVar} ${_flags} PARENT_SCOPE) +endfunction() + +function (cotire_add_pch_compilation_flags _language _compilerID _compilerVersion _prefixFile _pchFile _hostFile _flagsVar) + set (_flags ${${_flagsVar}}) + if (_compilerID MATCHES "MSVC") + file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) + file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) + file (TO_NATIVE_PATH "${_hostFile}" _hostFileNative) + # cl.exe options used + # /Yc creates a precompiled header file + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + # /Zs syntax check only + # /Zm precompiled header memory allocation scaling factor + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags /nologo "${_sourceFileType${_language}}" + "/Yc${_prefixFileNative}" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}" /Zs "${_hostFileNative}") + if (COTIRE_PCH_MEMORY_SCALING_FACTOR) + list (APPEND _flags "/Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") + endif() + else() + # return as a flag string + set (_flags "/Yc\"${_prefixFileNative}\" /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") + if (COTIRE_PCH_MEMORY_SCALING_FACTOR) + set (_flags "${_flags} /Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") + endif() + endif() + elseif (_compilerID MATCHES "GNU") + # GCC options used + # -x specify the source language + # -c compile but do not link + # -o place output in file + # note that we cannot use -w to suppress all warnings upon pre-compiling, because turning off a warning may + # alter compile flags as a side effect (e.g., -Wwrite-string implies -fconst-strings) + set (_xLanguage_C "c-header") + set (_xLanguage_CXX "c++-header") + if (_flags) + # append to list + list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") + else() + # return as a flag string + set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") + endif() + elseif (_compilerID MATCHES "Clang") + if (UNIX) + # Clang options used + # -x specify the source language + # -c compile but do not link + # -o place output in file + # -fno-pch-timestamp disable inclusion of timestamp in precompiled headers (clang 4.0.0+) + set (_xLanguage_C "c-header") + set (_xLanguage_CXX "c++-header") + if (_flags) + # append to list + list (APPEND _flags -x "${_xLanguage_${_language}}" -c "${_prefixFile}" -o "${_pchFile}") + if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") + list (APPEND _flags -Xclang -fno-pch-timestamp) + endif() + else() + # return as a flag string + set (_flags "-x ${_xLanguage_${_language}} -c \"${_prefixFile}\" -o \"${_pchFile}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "4.0.0") + set (_flags "${_flags} -Xclang -fno-pch-timestamp") + endif() + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /Yc creates a precompiled header file + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + # /Zs syntax check only + # /TC treat all files named on the command line as C source files + # /TP treat all files named on the command line as C++ source files + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags "${_sourceFileType${_language}}" + "/Yc${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}" /Zs "${_hostFile}") + else() + # return as a flag string + set (_flags "/Yc\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") + endif() + endif() + elseif (_compilerID MATCHES "Intel") + if (WIN32) + file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) + file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) + file (TO_NATIVE_PATH "${_hostFile}" _hostFileNative) + # Windows Intel options used + # /nologo do not display compiler version information + # /Yc create a precompiled header (PCH) file + # /Fp specify a path or file name for precompiled header files + # /FI tells the preprocessor to include a specified file name as the header file + # /TC process all source or unrecognized file types as C source files + # /TP process all source or unrecognized file types as C++ source files + # /Zs syntax check only + # /Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) + set (_sourceFileTypeC "/TC") + set (_sourceFileTypeCXX "/TP") + if (_flags) + # append to list + list (APPEND _flags /nologo "${_sourceFileType${_language}}" + "/Yc" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}" /Zs "${_hostFileNative}") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + list (APPEND _flags "/Wpch-messages") + endif() + else() + # return as a flag string + set (_flags "/Yc /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + set (_flags "${_flags} /Wpch-messages") + endif() + endif() + else() + # Linux / Mac OS X Intel options used + # -pch-dir location for precompiled header files + # -pch-create name of the precompiled header (PCH) to create + # -Kc++ process all source or unrecognized file types as C++ source files + # -fsyntax-only check only for correct syntax + # -Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) + get_filename_component(_pchDir "${_pchFile}" DIRECTORY) + get_filename_component(_pchName "${_pchFile}" NAME) + set (_xLanguage_C "c-header") + set (_xLanguage_CXX "c++-header") + set (_pchSuppressMessages FALSE) + if ("${CMAKE_${_language}_FLAGS}" MATCHES ".*-Wno-pch-messages.*") + set(_pchSuppressMessages TRUE) + endif() + if (_flags) + # append to list + if ("${_language}" STREQUAL "CXX") + list (APPEND _flags -Kc++) + endif() + list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-create "${_pchName}" -fsyntax-only "${_hostFile}") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + if (NOT _pchSuppressMessages) + list (APPEND _flags -Wpch-messages) + endif() + endif() + else() + # return as a flag string + set (_flags "-include \"${_prefixFile}\" -pch-dir \"${_pchDir}\" -pch-create \"${_pchName}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + if (NOT _pchSuppressMessages) + set (_flags "${_flags} -Wpch-messages") + endif() + endif() + endif() + endif() + else() + message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") + endif() + set (${_flagsVar} ${_flags} PARENT_SCOPE) +endfunction() + +function (cotire_add_prefix_pch_inclusion_flags _language _compilerID _compilerVersion _prefixFile _pchFile _flagsVar) + set (_flags ${${_flagsVar}}) + if (_compilerID MATCHES "MSVC") + file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) + # cl.exe options used + # /Yu uses a precompiled header file during build + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + # /Zm precompiled header memory allocation scaling factor + if (_pchFile) + file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) + if (_flags) + # append to list + list (APPEND _flags "/Yu${_prefixFileNative}" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}") + if (COTIRE_PCH_MEMORY_SCALING_FACTOR) + list (APPEND _flags "/Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") + endif() + else() + # return as a flag string + set (_flags "/Yu\"${_prefixFileNative}\" /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") + if (COTIRE_PCH_MEMORY_SCALING_FACTOR) + set (_flags "${_flags} /Zm${COTIRE_PCH_MEMORY_SCALING_FACTOR}") + endif() + endif() + else() + # no precompiled header, force inclusion of prefix header + if (_flags) + # append to list + list (APPEND _flags "/FI${_prefixFileNative}") + else() + # return as a flag string + set (_flags "/FI\"${_prefixFileNative}\"") + endif() + endif() + elseif (_compilerID MATCHES "GNU") + # GCC options used + # -include process include file as the first line of the primary source file + # -Winvalid-pch warns if precompiled header is found but cannot be used + # note: ccache requires the -include flag to be used in order to process precompiled header correctly + if (_flags) + # append to list + list (APPEND _flags -Winvalid-pch -include "${_prefixFile}") + else() + # return as a flag string + set (_flags "-Winvalid-pch -include \"${_prefixFile}\"") + endif() + elseif (_compilerID MATCHES "Clang") + if (UNIX) + # Clang options used + # -include process include file as the first line of the primary source file + # note: ccache requires the -include flag to be used in order to process precompiled header correctly + if (_flags) + # append to list + list (APPEND _flags -include "${_prefixFile}") + else() + # return as a flag string + set (_flags "-include \"${_prefixFile}\"") + endif() + elseif (WIN32) + # Clang-cl.exe options used + # /Yu uses a precompiled header file during build + # /Fp specifies precompiled header binary file name + # /FI forces inclusion of file + if (_pchFile) + if (_flags) + # append to list + list (APPEND _flags "/Yu${_prefixFile}" "/Fp${_pchFile}" "/FI${_prefixFile}") + else() + # return as a flag string + set (_flags "/Yu\"${_prefixFile}\" /Fp\"${_pchFile}\" /FI\"${_prefixFile}\"") + endif() + else() + # no precompiled header, force inclusion of prefix header + if (_flags) + # append to list + list (APPEND _flags "/FI${_prefixFile}") + else() + # return as a flag string + set (_flags "/FI\"${_prefixFile}\"") + endif() + endif() + endif() + elseif (_compilerID MATCHES "Intel") + if (WIN32) + file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileNative) + # Windows Intel options used + # /Yu use a precompiled header (PCH) file + # /Fp specify a path or file name for precompiled header files + # /FI tells the preprocessor to include a specified file name as the header file + # /Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) + if (_pchFile) + file (TO_NATIVE_PATH "${_pchFile}" _pchFileNative) + if (_flags) + # append to list + list (APPEND _flags "/Yu" "/Fp${_pchFileNative}" "/FI${_prefixFileNative}") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + list (APPEND _flags "/Wpch-messages") + endif() + else() + # return as a flag string + set (_flags "/Yu /Fp\"${_pchFileNative}\" /FI\"${_prefixFileNative}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + set (_flags "${_flags} /Wpch-messages") + endif() + endif() + else() + # no precompiled header, force inclusion of prefix header + if (_flags) + # append to list + list (APPEND _flags "/FI${_prefixFileNative}") + else() + # return as a flag string + set (_flags "/FI\"${_prefixFileNative}\"") + endif() + endif() + else() + # Linux / Mac OS X Intel options used + # -pch-dir location for precompiled header files + # -pch-use name of the precompiled header (PCH) to use + # -include process include file as the first line of the primary source file + # -Wpch-messages enable diagnostics related to pre-compiled headers (requires Intel XE 2013 Update 2) + if (_pchFile) + get_filename_component(_pchDir "${_pchFile}" DIRECTORY) + get_filename_component(_pchName "${_pchFile}" NAME) + set (_pchSuppressMessages FALSE) + if ("${CMAKE_${_language}_FLAGS}" MATCHES ".*-Wno-pch-messages.*") + set(_pchSuppressMessages TRUE) + endif() + if (_flags) + # append to list + list (APPEND _flags -include "${_prefixFile}" -pch-dir "${_pchDir}" -pch-use "${_pchName}") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + if (NOT _pchSuppressMessages) + list (APPEND _flags -Wpch-messages) + endif() + endif() + else() + # return as a flag string + set (_flags "-include \"${_prefixFile}\" -pch-dir \"${_pchDir}\" -pch-use \"${_pchName}\"") + if (NOT "${_compilerVersion}" VERSION_LESS "13.1.0") + if (NOT _pchSuppressMessages) + set (_flags "${_flags} -Wpch-messages") + endif() + endif() + endif() + else() + # no precompiled header, force inclusion of prefix header + if (_flags) + # append to list + list (APPEND _flags -include "${_prefixFile}") + else() + # return as a flag string + set (_flags "-include \"${_prefixFile}\"") + endif() + endif() + endif() + else() + message (FATAL_ERROR "cotire: unsupported ${_language} compiler ${_compilerID} version ${_compilerVersion}.") + endif() + set (${_flagsVar} ${_flags} PARENT_SCOPE) +endfunction() + +function (cotire_precompile_prefix_header _prefixFile _pchFile _hostFile) + set(_options "") + set(_oneValueArgs COMPILER_EXECUTABLE COMPILER_ARG1 COMPILER_ID COMPILER_VERSION LANGUAGE) + set(_multiValueArgs COMPILE_DEFINITIONS COMPILE_FLAGS INCLUDE_DIRECTORIES SYSTEM_INCLUDE_DIRECTORIES SYS COMPILER_LAUNCHER) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + if (NOT _option_LANGUAGE) + set (_option_LANGUAGE "CXX") + endif() + if (NOT _option_COMPILER_ID) + set (_option_COMPILER_ID "${CMAKE_${_option_LANGUAGE}_ID}") + endif() + if (NOT _option_COMPILER_VERSION) + set (_option_COMPILER_VERSION "${CMAKE_${_option_LANGUAGE}_COMPILER_VERSION}") + endif() + cotire_init_compile_cmd(_cmd "${_option_LANGUAGE}" "${_option_COMPILER_LAUNCHER}" "${_option_COMPILER_EXECUTABLE}" "${_option_COMPILER_ARG1}") + cotire_add_definitions_to_cmd(_cmd "${_option_LANGUAGE}" ${_option_COMPILE_DEFINITIONS}) + cotire_add_compile_flags_to_cmd(_cmd ${_option_COMPILE_FLAGS}) + cotire_add_includes_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) + cotire_add_frameworks_to_cmd(_cmd "${_option_LANGUAGE}" _option_INCLUDE_DIRECTORIES _option_SYSTEM_INCLUDE_DIRECTORIES) + cotire_add_pch_compilation_flags( + "${_option_LANGUAGE}" "${_option_COMPILER_ID}" "${_option_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" "${_hostFile}" _cmd) + if (COTIRE_VERBOSE) + message (STATUS "execute_process: ${_cmd}") + endif() + if (MSVC_IDE OR _option_COMPILER_ID MATCHES "MSVC") + # cl.exe messes with the output streams unless the environment variable VS_UNICODE_OUTPUT is cleared + unset (ENV{VS_UNICODE_OUTPUT}) + elseif (_option_COMPILER_ID MATCHES "Clang" AND _option_COMPILER_VERSION VERSION_LESS "4.0.0") + if (_option_COMPILER_LAUNCHER MATCHES "ccache" OR + _option_COMPILER_EXECUTABLE MATCHES "ccache") + # Newer versions of Clang embed a compilation timestamp into the precompiled header binary, + # which results in "file has been modified since the precompiled header was built" errors if ccache is used. + # We work around the problem by disabling ccache upon pre-compiling the prefix header. + set (ENV{CCACHE_DISABLE} "true") + endif() + endif() + execute_process( + COMMAND ${_cmd} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + RESULT_VARIABLE _result) + if (_result) + message (FATAL_ERROR "cotire: error ${_result} precompiling ${_prefixFile}.") + endif() +endfunction() + +function (cotire_check_precompiled_header_support _language _target _msgVar) + set (_unsupportedCompiler + "Precompiled headers not supported for ${_language} compiler ${CMAKE_${_language}_COMPILER_ID}") + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC") + # PCH supported since Visual Studio C++ 6.0 + # and CMake does not support an earlier version + set (${_msgVar} "" PARENT_SCOPE) + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "GNU") + # GCC PCH support requires version >= 3.4 + if ("${CMAKE_${_language}_COMPILER_VERSION}" VERSION_LESS "3.4.0") + set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}." PARENT_SCOPE) + else() + set (${_msgVar} "" PARENT_SCOPE) + endif() + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Clang") + if (UNIX) + # all Unix Clang versions have PCH support + set (${_msgVar} "" PARENT_SCOPE) + elseif (WIN32) + # only clang-cl is supported under Windows + get_filename_component(_compilerName "${CMAKE_${_language}_COMPILER}" NAME_WE) + if (NOT _compilerName MATCHES "cl$") + set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}. Use clang-cl instead." PARENT_SCOPE) + endif() + endif() + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Intel") + # Intel PCH support requires version >= 8.0.0 + if ("${CMAKE_${_language}_COMPILER_VERSION}" VERSION_LESS "8.0.0") + set (${_msgVar} "${_unsupportedCompiler} version ${CMAKE_${_language}_COMPILER_VERSION}." PARENT_SCOPE) + else() + set (${_msgVar} "" PARENT_SCOPE) + endif() + else() + set (${_msgVar} "${_unsupportedCompiler}." PARENT_SCOPE) + endif() + # check if ccache is used as a compiler launcher + get_target_property(_launcher ${_target} ${_language}_COMPILER_LAUNCHER) + get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" REALPATH) + if (_realCompilerExe MATCHES "ccache" OR _launcher MATCHES "ccache") + # verify that ccache configuration is compatible with precompiled headers + # always check environment variable CCACHE_SLOPPINESS, because earlier versions of ccache + # do not report the "sloppiness" setting correctly upon printing ccache configuration + if (DEFINED ENV{CCACHE_SLOPPINESS}) + if (NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "pch_defines" OR + NOT "$ENV{CCACHE_SLOPPINESS}" MATCHES "time_macros") + set (${_msgVar} + "ccache requires the environment variable CCACHE_SLOPPINESS to be set to \"pch_defines,time_macros\"." + PARENT_SCOPE) + endif() + else() + if (_realCompilerExe MATCHES "ccache") + set (_ccacheExe "${_realCompilerExe}") + else() + set (_ccacheExe "${_launcher}") + endif() + execute_process( + COMMAND "${_ccacheExe}" "--print-config" + WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}" + RESULT_VARIABLE _result + OUTPUT_VARIABLE _ccacheConfig OUTPUT_STRIP_TRAILING_WHITESPACE + ERROR_QUIET) + if (_result) + set (${_msgVar} "ccache configuration cannot be determined." PARENT_SCOPE) + elseif (NOT _ccacheConfig MATCHES "sloppiness.*=.*time_macros" OR + NOT _ccacheConfig MATCHES "sloppiness.*=.*pch_defines") + set (${_msgVar} + "ccache requires configuration setting \"sloppiness\" to be set to \"pch_defines,time_macros\"." + PARENT_SCOPE) + endif() + endif() + endif() + if (APPLE) + # PCH compilation not supported by GCC / Clang for multi-architecture builds (e.g., i386, x86_64) + cotire_get_configuration_types(_configs) + foreach (_config ${_configs}) + set (_targetFlags "") + cotire_get_target_compile_flags("${_config}" "${_language}" "${_target}" _targetFlags) + cotire_filter_compile_flags("${_language}" "arch" _architectures _ignore ${_targetFlags}) + list (LENGTH _architectures _numberOfArchitectures) + if (_numberOfArchitectures GREATER 1) + string (REPLACE ";" ", " _architectureStr "${_architectures}") + set (${_msgVar} + "Precompiled headers not supported on Darwin for multi-architecture builds (${_architectureStr})." + PARENT_SCOPE) + break() + endif() + endforeach() + endif() +endfunction() + +macro (cotire_get_intermediate_dir _cotireDir) + # ${CMAKE_CFG_INTDIR} may reference a build-time variable when using a generator which supports configuration types + get_filename_component(${_cotireDir} "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${COTIRE_INTDIR}" ABSOLUTE) +endmacro() + +macro (cotire_setup_file_extension_variables) + set (_unityFileExt_C ".c") + set (_unityFileExt_CXX ".cxx") + set (_prefixFileExt_C ".h") + set (_prefixFileExt_CXX ".hxx") + set (_prefixSourceFileExt_C ".c") + set (_prefixSourceFileExt_CXX ".cxx") +endmacro() + +function (cotire_make_single_unity_source_file_path _language _target _unityFileVar) + cotire_setup_file_extension_variables() + if (NOT DEFINED _unityFileExt_${_language}) + set (${_unityFileVar} "" PARENT_SCOPE) + return() + endif() + set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") + set (_unityFileName "${_unityFileBaseName}${_unityFileExt_${_language}}") + cotire_get_intermediate_dir(_baseDir) + set (_unityFile "${_baseDir}/${_unityFileName}") + set (${_unityFileVar} "${_unityFile}" PARENT_SCOPE) +endfunction() + +function (cotire_make_unity_source_file_paths _language _target _maxIncludes _unityFilesVar) + cotire_setup_file_extension_variables() + if (NOT DEFINED _unityFileExt_${_language}) + set (${_unityFileVar} "" PARENT_SCOPE) + return() + endif() + set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") + cotire_get_intermediate_dir(_baseDir) + set (_startIndex 0) + set (_index 0) + set (_unityFiles "") + set (_sourceFiles ${ARGN}) + foreach (_sourceFile ${_sourceFiles}) + get_source_file_property(_startNew "${_sourceFile}" COTIRE_START_NEW_UNITY_SOURCE) + math (EXPR _unityFileCount "${_index} - ${_startIndex}") + if (_startNew OR (_maxIncludes GREATER 0 AND NOT _unityFileCount LESS _maxIncludes)) + if (_index GREATER 0) + # start new unity file segment + math (EXPR _endIndex "${_index} - 1") + set (_unityFileName "${_unityFileBaseName}_${_startIndex}_${_endIndex}${_unityFileExt_${_language}}") + list (APPEND _unityFiles "${_baseDir}/${_unityFileName}") + endif() + set (_startIndex ${_index}) + endif() + math (EXPR _index "${_index} + 1") + endforeach() + list (LENGTH _sourceFiles _numberOfSources) + if (_startIndex EQUAL 0) + # there is only a single unity file + cotire_make_single_unity_source_file_path(${_language} ${_target} _unityFiles) + elseif (_startIndex LESS _numberOfSources) + # end with final unity file segment + math (EXPR _endIndex "${_index} - 1") + set (_unityFileName "${_unityFileBaseName}_${_startIndex}_${_endIndex}${_unityFileExt_${_language}}") + list (APPEND _unityFiles "${_baseDir}/${_unityFileName}") + endif() + set (${_unityFilesVar} ${_unityFiles} PARENT_SCOPE) + if (COTIRE_DEBUG AND _unityFiles) + message (STATUS "unity files: ${_unityFiles}") + endif() +endfunction() + +function (cotire_unity_to_prefix_file_path _language _target _unityFile _prefixFileVar) + cotire_setup_file_extension_variables() + if (NOT DEFINED _unityFileExt_${_language}) + set (${_prefixFileVar} "" PARENT_SCOPE) + return() + endif() + set (_unityFileBaseName "${_target}_${_language}${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}") + set (_prefixFileBaseName "${_target}_${_language}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") + string (REPLACE "${_unityFileBaseName}" "${_prefixFileBaseName}" _prefixFile "${_unityFile}") + string (REGEX REPLACE "${_unityFileExt_${_language}}$" "${_prefixFileExt_${_language}}" _prefixFile "${_prefixFile}") + set (${_prefixFileVar} "${_prefixFile}" PARENT_SCOPE) +endfunction() + +function (cotire_prefix_header_to_source_file_path _language _prefixHeaderFile _prefixSourceFileVar) + cotire_setup_file_extension_variables() + if (NOT DEFINED _prefixSourceFileExt_${_language}) + set (${_prefixSourceFileVar} "" PARENT_SCOPE) + return() + endif() + string (REGEX REPLACE "${_prefixFileExt_${_language}}$" "${_prefixSourceFileExt_${_language}}" _prefixSourceFile "${_prefixHeaderFile}") + set (${_prefixSourceFileVar} "${_prefixSourceFile}" PARENT_SCOPE) +endfunction() + +function (cotire_make_prefix_file_name _language _target _prefixFileBaseNameVar _prefixFileNameVar) + cotire_setup_file_extension_variables() + if (NOT _language) + set (_prefixFileBaseName "${_target}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") + set (_prefixFileName "${_prefixFileBaseName}${_prefixFileExt_C}") + elseif (DEFINED _prefixFileExt_${_language}) + set (_prefixFileBaseName "${_target}_${_language}${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}") + set (_prefixFileName "${_prefixFileBaseName}${_prefixFileExt_${_language}}") + else() + set (_prefixFileBaseName "") + set (_prefixFileName "") + endif() + set (${_prefixFileBaseNameVar} "${_prefixFileBaseName}" PARENT_SCOPE) + set (${_prefixFileNameVar} "${_prefixFileName}" PARENT_SCOPE) +endfunction() + +function (cotire_make_prefix_file_path _language _target _prefixFileVar) + cotire_make_prefix_file_name("${_language}" "${_target}" _prefixFileBaseName _prefixFileName) + set (${_prefixFileVar} "" PARENT_SCOPE) + if (_prefixFileName) + if (NOT _language) + set (_language "C") + endif() + if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang|Intel|MSVC") + cotire_get_intermediate_dir(_baseDir) + set (${_prefixFileVar} "${_baseDir}/${_prefixFileName}" PARENT_SCOPE) + endif() + endif() +endfunction() + +function (cotire_make_pch_file_path _language _target _pchFileVar) + cotire_make_prefix_file_name("${_language}" "${_target}" _prefixFileBaseName _prefixFileName) + set (${_pchFileVar} "" PARENT_SCOPE) + if (_prefixFileBaseName AND _prefixFileName) + cotire_check_precompiled_header_support("${_language}" "${_target}" _msg) + if (NOT _msg) + if (XCODE) + # For Xcode, we completely hand off the compilation of the prefix header to the IDE + return() + endif() + cotire_get_intermediate_dir(_baseDir) + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC") + # MSVC uses the extension .pch added to the prefix header base name + set (${_pchFileVar} "${_baseDir}/${_prefixFileBaseName}.pch" PARENT_SCOPE) + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Clang") + # Clang looks for a precompiled header corresponding to the prefix header with the extension .pch appended + set (${_pchFileVar} "${_baseDir}/${_prefixFileName}.pch" PARENT_SCOPE) + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "GNU") + # GCC looks for a precompiled header corresponding to the prefix header with the extension .gch appended + set (${_pchFileVar} "${_baseDir}/${_prefixFileName}.gch" PARENT_SCOPE) + elseif (CMAKE_${_language}_COMPILER_ID MATCHES "Intel") + # Intel uses the extension .pchi added to the prefix header base name + set (${_pchFileVar} "${_baseDir}/${_prefixFileBaseName}.pchi" PARENT_SCOPE) + endif() + endif() + endif() +endfunction() + +function (cotire_select_unity_source_files _unityFile _sourcesVar) + set (_sourceFiles ${ARGN}) + if (_sourceFiles AND "${_unityFile}" MATCHES "${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}_([0-9]+)_([0-9]+)") + set (_startIndex ${CMAKE_MATCH_1}) + set (_endIndex ${CMAKE_MATCH_2}) + list (LENGTH _sourceFiles _numberOfSources) + if (NOT _startIndex LESS _numberOfSources) + math (EXPR _startIndex "${_numberOfSources} - 1") + endif() + if (NOT _endIndex LESS _numberOfSources) + math (EXPR _endIndex "${_numberOfSources} - 1") + endif() + set (_files "") + foreach (_index RANGE ${_startIndex} ${_endIndex}) + list (GET _sourceFiles ${_index} _file) + list (APPEND _files "${_file}") + endforeach() + else() + set (_files ${_sourceFiles}) + endif() + set (${_sourcesVar} ${_files} PARENT_SCOPE) +endfunction() + +function (cotire_get_unity_source_dependencies _language _target _dependencySourcesVar) + set (_dependencySources "") + # depend on target's generated source files + get_target_property(_targetSourceFiles ${_target} SOURCES) + cotire_get_objects_with_property_on(_generatedSources GENERATED SOURCE ${_targetSourceFiles}) + if (_generatedSources) + # but omit all generated source files that have the COTIRE_EXCLUDED property set to true + cotire_get_objects_with_property_on(_excludedGeneratedSources COTIRE_EXCLUDED SOURCE ${_generatedSources}) + if (_excludedGeneratedSources) + list (REMOVE_ITEM _generatedSources ${_excludedGeneratedSources}) + endif() + # and omit all generated source files that have the COTIRE_DEPENDENCY property set to false explicitly + cotire_get_objects_with_property_off(_excludedNonDependencySources COTIRE_DEPENDENCY SOURCE ${_generatedSources}) + if (_excludedNonDependencySources) + list (REMOVE_ITEM _generatedSources ${_excludedNonDependencySources}) + endif() + if (_generatedSources) + list (APPEND _dependencySources ${_generatedSources}) + endif() + endif() + if (COTIRE_DEBUG AND _dependencySources) + message (STATUS "${_language} ${_target} unity source dependencies: ${_dependencySources}") + endif() + set (${_dependencySourcesVar} ${_dependencySources} PARENT_SCOPE) +endfunction() + +function (cotire_get_prefix_header_dependencies _language _target _dependencySourcesVar) + set (_dependencySources "") + # depend on target source files marked with custom COTIRE_DEPENDENCY property + get_target_property(_targetSourceFiles ${_target} SOURCES) + cotire_get_objects_with_property_on(_dependencySources COTIRE_DEPENDENCY SOURCE ${_targetSourceFiles}) + if (COTIRE_DEBUG AND _dependencySources) + message (STATUS "${_language} ${_target} prefix header dependencies: ${_dependencySources}") + endif() + set (${_dependencySourcesVar} ${_dependencySources} PARENT_SCOPE) +endfunction() + +function (cotire_generate_target_script _language _configurations _target _targetScriptVar _targetConfigScriptVar) + set (_targetSources ${ARGN}) + cotire_get_prefix_header_dependencies(${_language} ${_target} COTIRE_TARGET_PREFIX_DEPENDS ${_targetSources}) + cotire_get_unity_source_dependencies(${_language} ${_target} COTIRE_TARGET_UNITY_DEPENDS ${_targetSources}) + # set up variables to be configured + set (COTIRE_TARGET_LANGUAGE "${_language}") + get_target_property(COTIRE_TARGET_IGNORE_PATH ${_target} COTIRE_PREFIX_HEADER_IGNORE_PATH) + cotire_add_sys_root_paths(COTIRE_TARGET_IGNORE_PATH) + get_target_property(COTIRE_TARGET_INCLUDE_PATH ${_target} COTIRE_PREFIX_HEADER_INCLUDE_PATH) + cotire_add_sys_root_paths(COTIRE_TARGET_INCLUDE_PATH) + get_target_property(COTIRE_TARGET_PRE_UNDEFS ${_target} COTIRE_UNITY_SOURCE_PRE_UNDEFS) + get_target_property(COTIRE_TARGET_POST_UNDEFS ${_target} COTIRE_UNITY_SOURCE_POST_UNDEFS) + get_target_property(COTIRE_TARGET_MAXIMUM_NUMBER_OF_INCLUDES ${_target} COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES) + get_target_property(COTIRE_TARGET_INCLUDE_PRIORITY_PATH ${_target} COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH) + cotire_get_source_files_undefs(COTIRE_UNITY_SOURCE_PRE_UNDEFS COTIRE_TARGET_SOURCES_PRE_UNDEFS ${_targetSources}) + cotire_get_source_files_undefs(COTIRE_UNITY_SOURCE_POST_UNDEFS COTIRE_TARGET_SOURCES_POST_UNDEFS ${_targetSources}) + set (COTIRE_TARGET_CONFIGURATION_TYPES "${_configurations}") + foreach (_config ${_configurations}) + string (TOUPPER "${_config}" _upperConfig) + cotire_get_target_include_directories( + "${_config}" "${_language}" "${_target}" COTIRE_TARGET_INCLUDE_DIRECTORIES_${_upperConfig} COTIRE_TARGET_SYSTEM_INCLUDE_DIRECTORIES_${_upperConfig}) + cotire_get_target_compile_definitions( + "${_config}" "${_language}" "${_target}" COTIRE_TARGET_COMPILE_DEFINITIONS_${_upperConfig}) + cotire_get_target_compiler_flags( + "${_config}" "${_language}" "${_target}" COTIRE_TARGET_COMPILE_FLAGS_${_upperConfig}) + cotire_get_source_files_compile_definitions( + "${_config}" "${_language}" COTIRE_TARGET_SOURCES_COMPILE_DEFINITIONS_${_upperConfig} ${_targetSources}) + endforeach() + get_target_property(COTIRE_TARGET_${_language}_COMPILER_LAUNCHER ${_target} ${_language}_COMPILER_LAUNCHER) + # set up COTIRE_TARGET_SOURCES + set (COTIRE_TARGET_SOURCES "") + foreach (_sourceFile ${_targetSources}) + get_source_file_property(_generated "${_sourceFile}" GENERATED) + if (_generated) + # use absolute paths for generated files only, retrieving the LOCATION property is an expensive operation + get_source_file_property(_sourceLocation "${_sourceFile}" LOCATION) + list (APPEND COTIRE_TARGET_SOURCES "${_sourceLocation}") + else() + list (APPEND COTIRE_TARGET_SOURCES "${_sourceFile}") + endif() + endforeach() + # copy variable definitions to cotire target script + get_cmake_property(_vars VARIABLES) + string (REGEX MATCHALL "COTIRE_[A-Za-z0-9_]+" _matchVars "${_vars}") + # omit COTIRE_*_INIT variables + string (REGEX MATCHALL "COTIRE_[A-Za-z0-9_]+_INIT" _initVars "${_matchVars}") + if (_initVars) + list (REMOVE_ITEM _matchVars ${_initVars}) + endif() + # omit COTIRE_VERBOSE which is passed as a CMake define on command line + list (REMOVE_ITEM _matchVars COTIRE_VERBOSE) + set (_contents "") + set (_contentsHasGeneratorExpressions FALSE) + foreach (_var IN LISTS _matchVars ITEMS + XCODE MSVC CMAKE_GENERATOR CMAKE_BUILD_TYPE CMAKE_CONFIGURATION_TYPES + CMAKE_${_language}_COMPILER_ID CMAKE_${_language}_COMPILER_VERSION + CMAKE_${_language}_COMPILER_LAUNCHER CMAKE_${_language}_COMPILER CMAKE_${_language}_COMPILER_ARG1 + CMAKE_INCLUDE_FLAG_${_language} CMAKE_INCLUDE_FLAG_SEP_${_language} + CMAKE_INCLUDE_SYSTEM_FLAG_${_language} + CMAKE_${_language}_FRAMEWORK_SEARCH_FLAG + CMAKE_${_language}_SYSTEM_FRAMEWORK_SEARCH_FLAG + CMAKE_${_language}_SOURCE_FILE_EXTENSIONS) + if (DEFINED ${_var}) + string (REPLACE "\"" "\\\"" _value "${${_var}}") + set (_contents "${_contents}set (${_var} \"${_value}\")\n") + if (NOT _contentsHasGeneratorExpressions) + if ("${_value}" MATCHES "\\$<.*>") + set (_contentsHasGeneratorExpressions TRUE) + endif() + endif() + endif() + endforeach() + # generate target script file + get_filename_component(_moduleName "${COTIRE_CMAKE_MODULE_FILE}" NAME) + set (_targetCotireScript "${CMAKE_CURRENT_BINARY_DIR}/${_target}_${_language}_${_moduleName}") + cotire_write_file("CMAKE" "${_targetCotireScript}" "${_contents}" FALSE) + if (_contentsHasGeneratorExpressions) + # use file(GENERATE ...) to expand generator expressions in the target script at CMake generate-time + set (_configNameOrNoneGeneratorExpression "$<$:None>$<$>:$>") + set (_targetCotireConfigScript "${CMAKE_CURRENT_BINARY_DIR}/${_target}_${_language}_${_configNameOrNoneGeneratorExpression}_${_moduleName}") + file (GENERATE OUTPUT "${_targetCotireConfigScript}" INPUT "${_targetCotireScript}") + else() + set (_targetCotireConfigScript "${_targetCotireScript}") + endif() + set (${_targetScriptVar} "${_targetCotireScript}" PARENT_SCOPE) + set (${_targetConfigScriptVar} "${_targetCotireConfigScript}" PARENT_SCOPE) +endfunction() + +function (cotire_setup_pch_file_compilation _language _target _targetScript _prefixFile _pchFile _hostFile) + set (_sourceFiles ${ARGN}) + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # for MSVC, Intel and Clang-cl, we attach the precompiled header compilation to the host file + # the remaining files include the precompiled header, see cotire_setup_pch_file_inclusion + if (_sourceFiles) + set (_flags "") + cotire_add_pch_compilation_flags( + "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" "${_hostFile}" _flags) + set_property (SOURCE ${_hostFile} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") + set_property (SOURCE ${_hostFile} APPEND PROPERTY OBJECT_OUTPUTS "${_pchFile}") + # make object file generated from host file depend on prefix header + set_property (SOURCE ${_hostFile} APPEND PROPERTY OBJECT_DEPENDS "${_prefixFile}") + # mark host file as cotired to prevent it from being used in another cotired target + set_property (SOURCE ${_hostFile} PROPERTY COTIRE_TARGET "${_target}") + endif() + elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") + # for makefile based generator, we add a custom command to precompile the prefix header + if (_targetScript) + cotire_set_cmd_to_prologue(_cmds) + list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "precompile" "${_targetScript}" "${_prefixFile}" "${_pchFile}" "${_hostFile}") + if (MSVC_IDE) + file (TO_NATIVE_PATH "${_pchFile}" _pchFileLogPath) + else() + file (RELATIVE_PATH _pchFileLogPath "${CMAKE_BINARY_DIR}" "${_pchFile}") + endif() + # make precompiled header compilation depend on the actual compiler executable used to force + # re-compilation when the compiler executable is updated. This prevents "created by a different GCC executable" + # warnings when the precompiled header is included. + get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" ABSOLUTE) + if (COTIRE_DEBUG) + message (STATUS "add_custom_command: OUTPUT ${_pchFile} ${_cmds} DEPENDS ${_prefixFile} ${_realCompilerExe} IMPLICIT_DEPENDS ${_language} ${_prefixFile}") + endif() + set_property (SOURCE "${_pchFile}" PROPERTY GENERATED TRUE) + add_custom_command( + OUTPUT "${_pchFile}" + COMMAND ${_cmds} + DEPENDS "${_prefixFile}" "${_realCompilerExe}" + IMPLICIT_DEPENDS ${_language} "${_prefixFile}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + COMMENT "Building ${_language} precompiled header ${_pchFileLogPath}" + VERBATIM) + endif() + endif() +endfunction() + +function (cotire_setup_pch_file_inclusion _language _target _wholeTarget _prefixFile _pchFile _hostFile) + if (CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" OR + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # for MSVC, Intel and clang-cl, we include the precompiled header in all but the host file + # the host file does the precompiled header compilation, see cotire_setup_pch_file_compilation + set (_sourceFiles ${ARGN}) + list (LENGTH _sourceFiles _numberOfSourceFiles) + if (_numberOfSourceFiles GREATER 0) + # mark sources as cotired to prevent them from being used in another cotired target + set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") + set (_flags "") + cotire_add_prefix_pch_inclusion_flags( + "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" _flags) + set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") + # make object files generated from source files depend on precompiled header + set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_pchFile}") + endif() + elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") + set (_sourceFiles ${_hostFile} ${ARGN}) + if (NOT _wholeTarget) + # for makefile based generator, we force the inclusion of the prefix header for a subset + # of the source files, if this is a multi-language target or has excluded files + set (_flags "") + cotire_add_prefix_pch_inclusion_flags( + "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" _flags) + set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") + # mark sources as cotired to prevent them from being used in another cotired target + set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") + endif() + # make object files generated from source files depend on precompiled header + set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_pchFile}") + endif() +endfunction() + +function (cotire_setup_prefix_file_inclusion _language _target _prefixFile) + set (_sourceFiles ${ARGN}) + # force the inclusion of the prefix header for the given source files + set (_flags "") + set (_pchFile "") + cotire_add_prefix_pch_inclusion_flags( + "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" _flags) + set_property (SOURCE ${_sourceFiles} APPEND_STRING PROPERTY COMPILE_FLAGS " ${_flags} ") + # mark sources as cotired to prevent them from being used in another cotired target + set_source_files_properties(${_sourceFiles} PROPERTIES COTIRE_TARGET "${_target}") + # make object files generated from source files depend on prefix header + set_property (SOURCE ${_sourceFiles} APPEND PROPERTY OBJECT_DEPENDS "${_prefixFile}") +endfunction() + +function (cotire_get_first_set_property_value _propertyValueVar _type _object) + set (_properties ${ARGN}) + foreach (_property ${_properties}) + get_property(_propertyValue ${_type} "${_object}" PROPERTY ${_property}) + if (_propertyValue) + set (${_propertyValueVar} ${_propertyValue} PARENT_SCOPE) + return() + endif() + endforeach() + set (${_propertyValueVar} "" PARENT_SCOPE) +endfunction() + +function (cotire_setup_combine_command _language _targetScript _joinedFile _cmdsVar) + set (_files ${ARGN}) + set (_filesPaths "") + foreach (_file ${_files}) + get_filename_component(_filePath "${_file}" ABSOLUTE) + list (APPEND _filesPaths "${_filePath}") + endforeach() + cotire_set_cmd_to_prologue(_prefixCmd) + list (APPEND _prefixCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "combine") + if (_targetScript) + list (APPEND _prefixCmd "${_targetScript}") + endif() + list (APPEND _prefixCmd "${_joinedFile}" ${_filesPaths}) + if (COTIRE_DEBUG) + message (STATUS "add_custom_command: OUTPUT ${_joinedFile} COMMAND ${_prefixCmd} DEPENDS ${_files}") + endif() + set_property (SOURCE "${_joinedFile}" PROPERTY GENERATED TRUE) + if (MSVC_IDE) + file (TO_NATIVE_PATH "${_joinedFile}" _joinedFileLogPath) + else() + file (RELATIVE_PATH _joinedFileLogPath "${CMAKE_BINARY_DIR}" "${_joinedFile}") + endif() + get_filename_component(_joinedFileBaseName "${_joinedFile}" NAME_WE) + get_filename_component(_joinedFileExt "${_joinedFile}" EXT) + if (_language AND _joinedFileBaseName MATCHES "${COTIRE_UNITY_SOURCE_FILENAME_SUFFIX}$") + set (_comment "Generating ${_language} unity source ${_joinedFileLogPath}") + elseif (_language AND _joinedFileBaseName MATCHES "${COTIRE_PREFIX_HEADER_FILENAME_SUFFIX}$") + if (_joinedFileExt MATCHES "^\\.c") + set (_comment "Generating ${_language} prefix source ${_joinedFileLogPath}") + else() + set (_comment "Generating ${_language} prefix header ${_joinedFileLogPath}") + endif() + else() + set (_comment "Generating ${_joinedFileLogPath}") + endif() + add_custom_command( + OUTPUT "${_joinedFile}" + COMMAND ${_prefixCmd} + DEPENDS ${_files} + COMMENT "${_comment}" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + VERBATIM) + list (APPEND ${_cmdsVar} COMMAND ${_prefixCmd}) + set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_setup_target_pch_usage _languages _target _wholeTarget) + if (XCODE) + # for Xcode, we attach a pre-build action to generate the unity sources and prefix headers + set (_prefixFiles "") + foreach (_language ${_languages}) + get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) + if (_prefixFile) + list (APPEND _prefixFiles "${_prefixFile}") + endif() + endforeach() + set (_cmds ${ARGN}) + list (LENGTH _prefixFiles _numberOfPrefixFiles) + if (_numberOfPrefixFiles GREATER 1) + # we also generate a generic, single prefix header which includes all language specific prefix headers + set (_language "") + set (_targetScript "") + cotire_make_prefix_file_path("${_language}" ${_target} _prefixHeader) + cotire_setup_combine_command("${_language}" "${_targetScript}" "${_prefixHeader}" _cmds ${_prefixFiles}) + else() + set (_prefixHeader "${_prefixFiles}") + endif() + if (COTIRE_DEBUG) + message (STATUS "add_custom_command: TARGET ${_target} PRE_BUILD ${_cmds}") + endif() + # because CMake PRE_BUILD command does not support dependencies, + # we check dependencies explicity in cotire script mode when the pre-build action is run + add_custom_command( + TARGET "${_target}" + PRE_BUILD ${_cmds} + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + COMMENT "Updating target ${_target} prefix headers" + VERBATIM) + # make Xcode precompile the generated prefix header with ProcessPCH and ProcessPCH++ + set_target_properties(${_target} PROPERTIES XCODE_ATTRIBUTE_GCC_PRECOMPILE_PREFIX_HEADER "YES") + set_target_properties(${_target} PROPERTIES XCODE_ATTRIBUTE_GCC_PREFIX_HEADER "${_prefixHeader}") + elseif ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") + # for makefile based generator, we force inclusion of the prefix header for all target source files + # if this is a single-language target without any excluded files + if (_wholeTarget) + set (_language "${_languages}") + # for MSVC, Intel and clang-cl, precompiled header inclusion is always done on the source file level + # see cotire_setup_pch_file_inclusion + if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) + if (_prefixFile) + get_property(_pchFile TARGET ${_target} PROPERTY COTIRE_${_language}_PRECOMPILED_HEADER) + set (_options COMPILE_OPTIONS) + cotire_add_prefix_pch_inclusion_flags( + "${_language}" "${CMAKE_${_language}_COMPILER_ID}" "${CMAKE_${_language}_COMPILER_VERSION}" + "${_prefixFile}" "${_pchFile}" _options) + set_property(TARGET ${_target} APPEND PROPERTY ${_options}) + endif() + endif() + endif() + endif() +endfunction() + +function (cotire_setup_unity_generation_commands _language _target _targetScript _targetConfigScript _unityFiles _cmdsVar) + set (_dependencySources "") + cotire_get_unity_source_dependencies(${_language} ${_target} _dependencySources ${ARGN}) + foreach (_unityFile ${_unityFiles}) + set_property (SOURCE "${_unityFile}" PROPERTY GENERATED TRUE) + # set up compiled unity source dependencies via OBJECT_DEPENDS + # this ensures that missing source files are generated before the unity file is compiled + if (COTIRE_DEBUG AND _dependencySources) + message (STATUS "${_unityFile} OBJECT_DEPENDS ${_dependencySources}") + endif() + if (_dependencySources) + # the OBJECT_DEPENDS property requires a list of full paths + set (_objectDependsPaths "") + foreach (_sourceFile ${_dependencySources}) + get_source_file_property(_sourceLocation "${_sourceFile}" LOCATION) + list (APPEND _objectDependsPaths "${_sourceLocation}") + endforeach() + set_property (SOURCE "${_unityFile}" PROPERTY OBJECT_DEPENDS ${_objectDependsPaths}) + endif() + if (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel") + # unity file compilation results in potentially huge object file, + # thus use /bigobj by default unter cl.exe and Windows Intel + set_property (SOURCE "${_unityFile}" APPEND_STRING PROPERTY COMPILE_FLAGS "/bigobj") + endif() + cotire_set_cmd_to_prologue(_unityCmd) + list (APPEND _unityCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "unity" "${_targetConfigScript}" "${_unityFile}") + if (CMAKE_VERSION VERSION_LESS "3.1.0") + set (_unityCmdDepends "${_targetScript}") + else() + # CMake 3.1.0 supports generator expressions in arguments to DEPENDS + set (_unityCmdDepends "${_targetConfigScript}") + endif() + if (MSVC_IDE) + file (TO_NATIVE_PATH "${_unityFile}" _unityFileLogPath) + else() + file (RELATIVE_PATH _unityFileLogPath "${CMAKE_BINARY_DIR}" "${_unityFile}") + endif() + if (COTIRE_DEBUG) + message (STATUS "add_custom_command: OUTPUT ${_unityFile} COMMAND ${_unityCmd} DEPENDS ${_unityCmdDepends}") + endif() + add_custom_command( + OUTPUT "${_unityFile}" + COMMAND ${_unityCmd} + DEPENDS ${_unityCmdDepends} + COMMENT "Generating ${_language} unity source ${_unityFileLogPath}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + VERBATIM) + list (APPEND ${_cmdsVar} COMMAND ${_unityCmd}) + endforeach() + set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_setup_prefix_generation_command _language _target _targetScript _prefixFile _unityFiles _cmdsVar) + set (_sourceFiles ${ARGN}) + set (_dependencySources "") + cotire_get_prefix_header_dependencies(${_language} ${_target} _dependencySources ${_sourceFiles}) + cotire_set_cmd_to_prologue(_prefixCmd) + list (APPEND _prefixCmd -P "${COTIRE_CMAKE_MODULE_FILE}" "prefix" "${_targetScript}" "${_prefixFile}" ${_unityFiles}) + set_property (SOURCE "${_prefixFile}" PROPERTY GENERATED TRUE) + # make prefix header generation depend on the actual compiler executable used to force + # re-generation when the compiler executable is updated. This prevents "file not found" + # errors for compiler version specific system header files. + get_filename_component(_realCompilerExe "${CMAKE_${_language}_COMPILER}" ABSOLUTE) + if (COTIRE_DEBUG) + message (STATUS "add_custom_command: OUTPUT ${_prefixFile} COMMAND ${_prefixCmd} DEPENDS ${_unityFile} ${_dependencySources} ${_realCompilerExe}") + endif() + if (MSVC_IDE) + file (TO_NATIVE_PATH "${_prefixFile}" _prefixFileLogPath) + else() + file (RELATIVE_PATH _prefixFileLogPath "${CMAKE_BINARY_DIR}" "${_prefixFile}") + endif() + get_filename_component(_prefixFileExt "${_prefixFile}" EXT) + if (_prefixFileExt MATCHES "^\\.c") + set (_comment "Generating ${_language} prefix source ${_prefixFileLogPath}") + else() + set (_comment "Generating ${_language} prefix header ${_prefixFileLogPath}") + endif() + # prevent pre-processing errors upon generating the prefix header when a target's generated include file does not yet exist + # we do not add a file-level dependency for the target's generated files though, because we only want to depend on their existence + # thus we make the prefix header generation depend on a custom helper target which triggers the generation of the files + set (_preTargetName "${_target}${COTIRE_PCH_TARGET_SUFFIX}_pre") + if (TARGET ${_preTargetName}) + # custom helper target has already been generated while processing a different language + list (APPEND _dependencySources ${_preTargetName}) + else() + get_target_property(_targetSourceFiles ${_target} SOURCES) + cotire_get_objects_with_property_on(_generatedSources GENERATED SOURCE ${_targetSourceFiles}) + if (_generatedSources) + add_custom_target("${_preTargetName}" DEPENDS ${_generatedSources}) + cotire_init_target("${_preTargetName}") + list (APPEND _dependencySources ${_preTargetName}) + endif() + endif() + add_custom_command( + OUTPUT "${_prefixFile}" "${_prefixFile}.log" + COMMAND ${_prefixCmd} + DEPENDS ${_unityFiles} ${_dependencySources} "${_realCompilerExe}" + COMMENT "${_comment}" + WORKING_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}" + VERBATIM) + list (APPEND ${_cmdsVar} COMMAND ${_prefixCmd}) + set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_setup_prefix_generation_from_unity_command _language _target _targetScript _prefixFile _unityFiles _cmdsVar) + set (_sourceFiles ${ARGN}) + if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") + # GNU and Clang require indirect compilation of the prefix header to make them honor the system_header pragma + cotire_prefix_header_to_source_file_path(${_language} "${_prefixFile}" _prefixSourceFile) + else() + set (_prefixSourceFile "${_prefixFile}") + endif() + cotire_setup_prefix_generation_command( + ${_language} ${_target} "${_targetScript}" + "${_prefixSourceFile}" "${_unityFiles}" ${_cmdsVar} ${_sourceFiles}) + if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") + # set up generation of a prefix source file which includes the prefix header + cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixFile}" _cmds ${_prefixSourceFile}) + endif() + set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_setup_prefix_generation_from_provided_command _language _target _targetScript _prefixFile _cmdsVar) + set (_prefixHeaderFiles ${ARGN}) + if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") + # GNU and Clang require indirect compilation of the prefix header to make them honor the system_header pragma + cotire_prefix_header_to_source_file_path(${_language} "${_prefixFile}" _prefixSourceFile) + else() + set (_prefixSourceFile "${_prefixFile}") + endif() + cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixSourceFile}" _cmds ${_prefixHeaderFiles}) + if (CMAKE_${_language}_COMPILER_ID MATCHES "GNU|Clang") + # set up generation of a prefix source file which includes the prefix header + cotire_setup_combine_command(${_language} "${_targetScript}" "${_prefixFile}" _cmds ${_prefixSourceFile}) + endif() + set (${_cmdsVar} ${${_cmdsVar}} PARENT_SCOPE) +endfunction() + +function (cotire_init_cotire_target_properties _target) + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER TRUE) + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD TRUE) + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_ADD_CLEAN SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_ADD_CLEAN FALSE) + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH "${CMAKE_SOURCE_DIR}") + cotire_check_is_path_relative_to("${CMAKE_BINARY_DIR}" _isRelative "${CMAKE_SOURCE_DIR}") + if (NOT _isRelative) + set_property(TARGET ${_target} APPEND PROPERTY COTIRE_PREFIX_HEADER_IGNORE_PATH "${CMAKE_BINARY_DIR}") + endif() + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PATH SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PATH "") + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH "") + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_PRE_UNDEFS SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_PRE_UNDEFS "") + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_POST_UNDEFS SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_POST_UNDEFS "") + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_LINK_LIBRARIES_INIT SET) + if (NOT _isSet) + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_LINK_LIBRARIES_INIT "COPY_UNITY") + endif() + get_property(_isSet TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES SET) + if (NOT _isSet) + if (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES) + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES "${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES}") + else() + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES "") + endif() + endif() +endfunction() + +function (cotire_make_target_message _target _languages _disableMsg _targetMsgVar) + get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) + get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) + string (REPLACE ";" " " _languagesStr "${_languages}") + math (EXPR _numberOfExcludedFiles "${ARGC} - 4") + if (_numberOfExcludedFiles EQUAL 0) + set (_excludedStr "") + elseif (COTIRE_VERBOSE OR _numberOfExcludedFiles LESS 4) + string (REPLACE ";" ", " _excludedStr "excluding ${ARGN}") + else() + set (_excludedStr "excluding ${_numberOfExcludedFiles} files") + endif() + set (_targetMsg "") + if (NOT _languages) + set (_targetMsg "Target ${_target} cannot be cotired.") + if (_disableMsg) + set (_targetMsg "${_targetMsg} ${_disableMsg}") + endif() + elseif (NOT _targetUsePCH AND NOT _targetAddSCU) + set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build and precompiled header.") + if (_disableMsg) + set (_targetMsg "${_targetMsg} ${_disableMsg}") + endif() + elseif (NOT _targetUsePCH) + if (_excludedStr) + set (_targetMsg "${_languagesStr} target ${_target} cotired without precompiled header ${_excludedStr}.") + else() + set (_targetMsg "${_languagesStr} target ${_target} cotired without precompiled header.") + endif() + if (_disableMsg) + set (_targetMsg "${_targetMsg} ${_disableMsg}") + endif() + elseif (NOT _targetAddSCU) + if (_excludedStr) + set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build ${_excludedStr}.") + else() + set (_targetMsg "${_languagesStr} target ${_target} cotired without unity build.") + endif() + if (_disableMsg) + set (_targetMsg "${_targetMsg} ${_disableMsg}") + endif() + else() + if (_excludedStr) + set (_targetMsg "${_languagesStr} target ${_target} cotired ${_excludedStr}.") + else() + set (_targetMsg "${_languagesStr} target ${_target} cotired.") + endif() + endif() + set (${_targetMsgVar} "${_targetMsg}" PARENT_SCOPE) +endfunction() + +function (cotire_choose_target_languages _target _targetLanguagesVar _wholeTargetVar) + set (_languages ${ARGN}) + set (_allSourceFiles "") + set (_allExcludedSourceFiles "") + set (_allCotiredSourceFiles "") + set (_targetLanguages "") + set (_pchEligibleTargetLanguages "") + get_target_property(_targetType ${_target} TYPE) + get_target_property(_targetSourceFiles ${_target} SOURCES) + get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) + get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) + set (_disableMsg "") + foreach (_language ${_languages}) + get_target_property(_prefixHeader ${_target} COTIRE_${_language}_PREFIX_HEADER) + get_target_property(_unityBuildFile ${_target} COTIRE_${_language}_UNITY_SOURCE) + if (_prefixHeader OR _unityBuildFile) + message (STATUS "cotire: target ${_target} has already been cotired.") + set (${_targetLanguagesVar} "" PARENT_SCOPE) + return() + endif() + if (_targetUsePCH AND "${_language}" MATCHES "^C|CXX$" AND DEFINED CMAKE_${_language}_COMPILER_ID) + if (CMAKE_${_language}_COMPILER_ID) + cotire_check_precompiled_header_support("${_language}" "${_target}" _disableMsg) + if (_disableMsg) + set (_targetUsePCH FALSE) + endif() + endif() + endif() + set (_sourceFiles "") + set (_excludedSources "") + set (_cotiredSources "") + cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) + if (_sourceFiles OR _excludedSources OR _cotiredSources) + list (APPEND _targetLanguages ${_language}) + endif() + if (_sourceFiles) + list (APPEND _allSourceFiles ${_sourceFiles}) + endif() + list (LENGTH _sourceFiles _numberOfSources) + if (NOT _numberOfSources LESS ${COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES}) + list (APPEND _pchEligibleTargetLanguages ${_language}) + endif() + if (_excludedSources) + list (APPEND _allExcludedSourceFiles ${_excludedSources}) + endif() + if (_cotiredSources) + list (APPEND _allCotiredSourceFiles ${_cotiredSources}) + endif() + endforeach() + set (_targetMsgLevel STATUS) + if (NOT _targetLanguages) + string (REPLACE ";" " or " _languagesStr "${_languages}") + set (_disableMsg "No ${_languagesStr} source files.") + set (_targetUsePCH FALSE) + set (_targetAddSCU FALSE) + endif() + if (_targetUsePCH) + if (_allCotiredSourceFiles) + cotire_get_source_file_property_values(_cotireTargets COTIRE_TARGET ${_allCotiredSourceFiles}) + list (REMOVE_DUPLICATES _cotireTargets) + string (REPLACE ";" ", " _cotireTargetsStr "${_cotireTargets}") + set (_disableMsg "Target sources already include a precompiled header for target(s) ${_cotireTargets}.") + set (_disableMsg "${_disableMsg} Set target property COTIRE_ENABLE_PRECOMPILED_HEADER to FALSE for targets ${_target},") + set (_disableMsg "${_disableMsg} ${_cotireTargetsStr} to get a workable build system.") + set (_targetMsgLevel SEND_ERROR) + set (_targetUsePCH FALSE) + elseif (NOT _pchEligibleTargetLanguages) + set (_disableMsg "Too few applicable sources.") + set (_targetUsePCH FALSE) + elseif (XCODE AND _allExcludedSourceFiles) + # for Xcode, we cannot apply the precompiled header to individual sources, only to the whole target + set (_disableMsg "Exclusion of source files not supported for generator Xcode.") + set (_targetUsePCH FALSE) + elseif (XCODE AND "${_targetType}" STREQUAL "OBJECT_LIBRARY") + # for Xcode, we cannot apply the required PRE_BUILD action to generate the prefix header to an OBJECT_LIBRARY target + set (_disableMsg "Required PRE_BUILD action not supported for OBJECT_LIBRARY targets for generator Xcode.") + set (_targetUsePCH FALSE) + endif() + endif() + if (_targetAddSCU) + # disable unity builds if automatic Qt processing is used + get_target_property(_targetAutoMoc ${_target} AUTOMOC) + get_target_property(_targetAutoUic ${_target} AUTOUIC) + get_target_property(_targetAutoRcc ${_target} AUTORCC) + if (_targetAutoMoc OR _targetAutoUic OR _targetAutoRcc) + if (_disableMsg) + set (_disableMsg "${_disableMsg} Target uses automatic CMake Qt processing.") + else() + set (_disableMsg "Target uses automatic CMake Qt processing.") + endif() + set (_targetAddSCU FALSE) + endif() + endif() + set_property(TARGET ${_target} PROPERTY COTIRE_ENABLE_PRECOMPILED_HEADER ${_targetUsePCH}) + set_property(TARGET ${_target} PROPERTY COTIRE_ADD_UNITY_BUILD ${_targetAddSCU}) + cotire_make_target_message(${_target} "${_targetLanguages}" "${_disableMsg}" _targetMsg ${_allExcludedSourceFiles}) + if (_targetMsg) + if (NOT DEFINED COTIREMSG_${_target}) + set (COTIREMSG_${_target} "") + endif() + if (COTIRE_VERBOSE OR NOT "${_targetMsgLevel}" STREQUAL "STATUS" OR + NOT "${COTIREMSG_${_target}}" STREQUAL "${_targetMsg}") + # cache message to avoid redundant messages on re-configure + set (COTIREMSG_${_target} "${_targetMsg}" CACHE INTERNAL "${_target} cotire message.") + message (${_targetMsgLevel} "${_targetMsg}") + endif() + endif() + list (LENGTH _targetLanguages _numberOfLanguages) + if (_numberOfLanguages GREATER 1 OR _allExcludedSourceFiles) + set (${_wholeTargetVar} FALSE PARENT_SCOPE) + else() + set (${_wholeTargetVar} TRUE PARENT_SCOPE) + endif() + set (${_targetLanguagesVar} ${_targetLanguages} PARENT_SCOPE) +endfunction() + +function (cotire_compute_unity_max_number_of_includes _target _maxIncludesVar) + set (_sourceFiles ${ARGN}) + get_target_property(_maxIncludes ${_target} COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES) + if (_maxIncludes MATCHES "(-j|--parallel|--jobs) ?([0-9]*)") + if (DEFINED CMAKE_MATCH_2) + set (_numberOfThreads "${CMAKE_MATCH_2}") + else() + set (_numberOfThreads "") + endif() + if (NOT _numberOfThreads) + # use all available cores + ProcessorCount(_numberOfThreads) + endif() + list (LENGTH _sourceFiles _numberOfSources) + math (EXPR _maxIncludes "(${_numberOfSources} + ${_numberOfThreads} - 1) / ${_numberOfThreads}") + elseif (NOT _maxIncludes MATCHES "[0-9]+") + set (_maxIncludes 0) + endif() + if (COTIRE_DEBUG) + message (STATUS "${_target} unity source max includes: ${_maxIncludes}") + endif() + set (${_maxIncludesVar} ${_maxIncludes} PARENT_SCOPE) +endfunction() + +function (cotire_process_target_language _language _configurations _target _wholeTarget _cmdsVar) + set (${_cmdsVar} "" PARENT_SCOPE) + get_target_property(_targetSourceFiles ${_target} SOURCES) + set (_sourceFiles "") + set (_excludedSources "") + set (_cotiredSources "") + cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) + if (NOT _sourceFiles AND NOT _cotiredSources) + return() + endif() + set (_cmds "") + # check for user provided unity source file list + get_property(_unitySourceFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE_INIT) + if (NOT _unitySourceFiles) + set (_unitySourceFiles ${_sourceFiles} ${_cotiredSources}) + endif() + cotire_generate_target_script( + ${_language} "${_configurations}" ${_target} _targetScript _targetConfigScript ${_unitySourceFiles}) + # set up unity files for parallel compilation + cotire_compute_unity_max_number_of_includes(${_target} _maxIncludes ${_unitySourceFiles}) + cotire_make_unity_source_file_paths(${_language} ${_target} ${_maxIncludes} _unityFiles ${_unitySourceFiles}) + list (LENGTH _unityFiles _numberOfUnityFiles) + if (_numberOfUnityFiles EQUAL 0) + return() + elseif (_numberOfUnityFiles GREATER 1) + cotire_setup_unity_generation_commands( + ${_language} ${_target} "${_targetScript}" "${_targetConfigScript}" "${_unityFiles}" _cmds ${_unitySourceFiles}) + endif() + # set up single unity file for prefix header generation + cotire_make_single_unity_source_file_path(${_language} ${_target} _unityFile) + cotire_setup_unity_generation_commands( + ${_language} ${_target} "${_targetScript}" "${_targetConfigScript}" "${_unityFile}" _cmds ${_unitySourceFiles}) + cotire_make_prefix_file_path(${_language} ${_target} _prefixFile) + # set up prefix header + if (_prefixFile) + # check for user provided prefix header files + get_property(_prefixHeaderFiles TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER_INIT) + if (_prefixHeaderFiles) + cotire_setup_prefix_generation_from_provided_command( + ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" _cmds ${_prefixHeaderFiles}) + else() + cotire_setup_prefix_generation_from_unity_command( + ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" "${_unityFile}" _cmds ${_unitySourceFiles}) + endif() + # check if selected language has enough sources at all + list (LENGTH _sourceFiles _numberOfSources) + if (_numberOfSources LESS ${COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES}) + set (_targetUsePCH FALSE) + else() + get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) + endif() + if (_targetUsePCH) + cotire_make_pch_file_path(${_language} ${_target} _pchFile) + if (_pchFile) + # first file in _sourceFiles is passed as the host file + cotire_setup_pch_file_compilation( + ${_language} ${_target} "${_targetConfigScript}" "${_prefixFile}" "${_pchFile}" ${_sourceFiles}) + cotire_setup_pch_file_inclusion( + ${_language} ${_target} ${_wholeTarget} "${_prefixFile}" "${_pchFile}" ${_sourceFiles}) + endif() + elseif (_prefixHeaderFiles) + # user provided prefix header must be included unconditionally + cotire_setup_prefix_file_inclusion(${_language} ${_target} "${_prefixFile}" ${_sourceFiles}) + endif() + endif() + # mark target as cotired for language + set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE "${_unityFiles}") + if (_prefixFile) + set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER "${_prefixFile}") + if (_targetUsePCH AND _pchFile) + set_property(TARGET ${_target} PROPERTY COTIRE_${_language}_PRECOMPILED_HEADER "${_pchFile}") + endif() + endif() + set (${_cmdsVar} ${_cmds} PARENT_SCOPE) +endfunction() + +function (cotire_setup_clean_target _target) + set (_cleanTargetName "${_target}${COTIRE_CLEAN_TARGET_SUFFIX}") + if (NOT TARGET "${_cleanTargetName}") + cotire_set_cmd_to_prologue(_cmds) + get_filename_component(_outputDir "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}" ABSOLUTE) + list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "cleanup" "${_outputDir}" "${COTIRE_INTDIR}" "${_target}") + add_custom_target(${_cleanTargetName} + COMMAND ${_cmds} + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + COMMENT "Cleaning up target ${_target} cotire generated files" + VERBATIM) + cotire_init_target("${_cleanTargetName}") + endif() +endfunction() + +function (cotire_setup_pch_target _languages _configurations _target) + if ("${CMAKE_GENERATOR}" MATCHES "Make|Ninja") + # for makefile based generators, we add a custom target to trigger the generation of the cotire related files + set (_dependsFiles "") + foreach (_language ${_languages}) + set (_props COTIRE_${_language}_PREFIX_HEADER COTIRE_${_language}_UNITY_SOURCE) + if (NOT CMAKE_${_language}_COMPILER_ID MATCHES "MSVC|Intel" AND NOT + (WIN32 AND CMAKE_${_language}_COMPILER_ID MATCHES "Clang")) + # MSVC, Intel and clang-cl only create precompiled header as a side effect + list (INSERT _props 0 COTIRE_${_language}_PRECOMPILED_HEADER) + endif() + cotire_get_first_set_property_value(_dependsFile TARGET ${_target} ${_props}) + if (_dependsFile) + list (APPEND _dependsFiles "${_dependsFile}") + endif() + endforeach() + if (_dependsFiles) + set (_pchTargetName "${_target}${COTIRE_PCH_TARGET_SUFFIX}") + add_custom_target("${_pchTargetName}" DEPENDS ${_dependsFiles}) + cotire_init_target("${_pchTargetName}") + cotire_add_to_pch_all_target(${_pchTargetName}) + endif() + else() + # for other generators, we add the "clean all" target to clean up the precompiled header + cotire_setup_clean_all_target() + endif() +endfunction() + +function (cotire_filter_object_libraries _target _objectLibrariesVar) + set (_objectLibraries "") + foreach (_source ${ARGN}) + if (_source MATCHES "^\\$$") + list (APPEND _objectLibraries "${_source}") + endif() + endforeach() + set (${_objectLibrariesVar} ${_objectLibraries} PARENT_SCOPE) +endfunction() + +function (cotire_collect_unity_target_sources _target _languages _unityTargetSourcesVar) + get_target_property(_targetSourceFiles ${_target} SOURCES) + set (_unityTargetSources ${_targetSourceFiles}) + foreach (_language ${_languages}) + get_property(_unityFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE) + if (_unityFiles) + # remove source files that are included in the unity source + set (_sourceFiles "") + set (_excludedSources "") + set (_cotiredSources "") + cotire_filter_language_source_files(${_language} ${_target} _sourceFiles _excludedSources _cotiredSources ${_targetSourceFiles}) + if (_sourceFiles OR _cotiredSources) + list (REMOVE_ITEM _unityTargetSources ${_sourceFiles} ${_cotiredSources}) + endif() + # add unity source files instead + list (APPEND _unityTargetSources ${_unityFiles}) + endif() + endforeach() + # handle object libraries which are part of the target's sources + get_target_property(_linkLibrariesStrategy ${_target} COTIRE_UNITY_LINK_LIBRARIES_INIT) + if ("${_linkLibrariesStrategy}" MATCHES "^COPY_UNITY$") + cotire_filter_object_libraries(${_target} _objectLibraries ${_targetSourceFiles}) + if (_objectLibraries) + cotire_map_libraries("${_linkLibrariesStrategy}" _unityObjectLibraries ${_objectLibraries}) + list (REMOVE_ITEM _unityTargetSources ${_objectLibraries}) + list (APPEND _unityTargetSources ${_unityObjectLibraries}) + endif() + endif() + set (${_unityTargetSourcesVar} ${_unityTargetSources} PARENT_SCOPE) +endfunction() + +function (cotire_setup_unity_target_pch_usage _languages _target) + foreach (_language ${_languages}) + get_property(_unityFiles TARGET ${_target} PROPERTY COTIRE_${_language}_UNITY_SOURCE) + if (_unityFiles) + get_property(_userPrefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER_INIT) + get_property(_prefixFile TARGET ${_target} PROPERTY COTIRE_${_language}_PREFIX_HEADER) + if (_userPrefixFile AND _prefixFile) + # user provided prefix header must be included unconditionally by unity sources + cotire_setup_prefix_file_inclusion(${_language} ${_target} "${_prefixFile}" ${_unityFiles}) + endif() + endif() + endforeach() +endfunction() + +function (cotire_setup_unity_build_target _languages _configurations _target) + get_target_property(_unityTargetName ${_target} COTIRE_UNITY_TARGET_NAME) + if (NOT _unityTargetName) + set (_unityTargetName "${_target}${COTIRE_UNITY_BUILD_TARGET_SUFFIX}") + endif() + # determine unity target sub type + get_target_property(_targetType ${_target} TYPE) + if ("${_targetType}" STREQUAL "EXECUTABLE") + set (_unityTargetSubType "") + elseif (_targetType MATCHES "(STATIC|SHARED|MODULE|OBJECT)_LIBRARY") + set (_unityTargetSubType "${CMAKE_MATCH_1}") + else() + message (WARNING "cotire: target ${_target} has unknown target type ${_targetType}.") + return() + endif() + # determine unity target sources + set (_unityTargetSources "") + cotire_collect_unity_target_sources(${_target} "${_languages}" _unityTargetSources) + # prevent AUTOMOC, AUTOUIC and AUTORCC properties from being set when the unity target is created + set (CMAKE_AUTOMOC OFF) + set (CMAKE_AUTOUIC OFF) + set (CMAKE_AUTORCC OFF) + if (COTIRE_DEBUG) + message (STATUS "add target ${_targetType} ${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}") + endif() + # generate unity target + if ("${_targetType}" STREQUAL "EXECUTABLE") + add_executable(${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}) + else() + add_library(${_unityTargetName} ${_unityTargetSubType} EXCLUDE_FROM_ALL ${_unityTargetSources}) + endif() + # copy output location properties + set (_outputDirProperties + ARCHIVE_OUTPUT_DIRECTORY ARCHIVE_OUTPUT_DIRECTORY_ + LIBRARY_OUTPUT_DIRECTORY LIBRARY_OUTPUT_DIRECTORY_ + RUNTIME_OUTPUT_DIRECTORY RUNTIME_OUTPUT_DIRECTORY_) + if (COTIRE_UNITY_OUTPUT_DIRECTORY) + set (_setDefaultOutputDir TRUE) + if (IS_ABSOLUTE "${COTIRE_UNITY_OUTPUT_DIRECTORY}") + set (_outputDir "${COTIRE_UNITY_OUTPUT_DIRECTORY}") + else() + # append relative COTIRE_UNITY_OUTPUT_DIRECTORY to target's actual output directory + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} ${_outputDirProperties}) + cotire_resolve_config_properties("${_configurations}" _properties ${_outputDirProperties}) + foreach (_property ${_properties}) + get_property(_outputDir TARGET ${_target} PROPERTY ${_property}) + if (_outputDir) + get_filename_component(_outputDir "${_outputDir}/${COTIRE_UNITY_OUTPUT_DIRECTORY}" ABSOLUTE) + set_property(TARGET ${_unityTargetName} PROPERTY ${_property} "${_outputDir}") + set (_setDefaultOutputDir FALSE) + endif() + endforeach() + if (_setDefaultOutputDir) + get_filename_component(_outputDir "${CMAKE_CURRENT_BINARY_DIR}/${COTIRE_UNITY_OUTPUT_DIRECTORY}" ABSOLUTE) + endif() + endif() + if (_setDefaultOutputDir) + set_target_properties(${_unityTargetName} PROPERTIES + ARCHIVE_OUTPUT_DIRECTORY "${_outputDir}" + LIBRARY_OUTPUT_DIRECTORY "${_outputDir}" + RUNTIME_OUTPUT_DIRECTORY "${_outputDir}") + endif() + else() + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + ${_outputDirProperties}) + endif() + # copy output name + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + ARCHIVE_OUTPUT_NAME ARCHIVE_OUTPUT_NAME_ + LIBRARY_OUTPUT_NAME LIBRARY_OUTPUT_NAME_ + OUTPUT_NAME OUTPUT_NAME_ + RUNTIME_OUTPUT_NAME RUNTIME_OUTPUT_NAME_ + PREFIX _POSTFIX SUFFIX + IMPORT_PREFIX IMPORT_SUFFIX) + # copy compile stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + COMPILE_DEFINITIONS COMPILE_DEFINITIONS_ + COMPILE_FLAGS COMPILE_OPTIONS + Fortran_FORMAT Fortran_MODULE_DIRECTORY + INCLUDE_DIRECTORIES + INTERPROCEDURAL_OPTIMIZATION INTERPROCEDURAL_OPTIMIZATION_ + POSITION_INDEPENDENT_CODE + C_COMPILER_LAUNCHER CXX_COMPILER_LAUNCHER + C_INCLUDE_WHAT_YOU_USE CXX_INCLUDE_WHAT_YOU_USE + C_VISIBILITY_PRESET CXX_VISIBILITY_PRESET VISIBILITY_INLINES_HIDDEN + C_CLANG_TIDY CXX_CLANG_TIDY) + # copy compile features + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + C_EXTENSIONS C_STANDARD C_STANDARD_REQUIRED + CXX_EXTENSIONS CXX_STANDARD CXX_STANDARD_REQUIRED + COMPILE_FEATURES) + # copy interface stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + COMPATIBLE_INTERFACE_BOOL COMPATIBLE_INTERFACE_NUMBER_MAX COMPATIBLE_INTERFACE_NUMBER_MIN + COMPATIBLE_INTERFACE_STRING + INTERFACE_COMPILE_DEFINITIONS INTERFACE_COMPILE_FEATURES INTERFACE_COMPILE_OPTIONS + INTERFACE_INCLUDE_DIRECTORIES INTERFACE_SOURCES + INTERFACE_POSITION_INDEPENDENT_CODE INTERFACE_SYSTEM_INCLUDE_DIRECTORIES + INTERFACE_AUTOUIC_OPTIONS NO_SYSTEM_FROM_IMPORTED) + # copy link stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + BUILD_WITH_INSTALL_RPATH BUILD_WITH_INSTALL_NAME_DIR + INSTALL_RPATH INSTALL_RPATH_USE_LINK_PATH SKIP_BUILD_RPATH + LINKER_LANGUAGE LINK_DEPENDS LINK_DEPENDS_NO_SHARED + LINK_FLAGS LINK_FLAGS_ + LINK_INTERFACE_LIBRARIES LINK_INTERFACE_LIBRARIES_ + LINK_INTERFACE_MULTIPLICITY LINK_INTERFACE_MULTIPLICITY_ + LINK_SEARCH_START_STATIC LINK_SEARCH_END_STATIC + STATIC_LIBRARY_FLAGS STATIC_LIBRARY_FLAGS_ + NO_SONAME SOVERSION VERSION + LINK_WHAT_YOU_USE BUILD_RPATH) + # copy cmake stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + IMPLICIT_DEPENDS_INCLUDE_TRANSFORM RULE_LAUNCH_COMPILE RULE_LAUNCH_CUSTOM RULE_LAUNCH_LINK) + # copy Apple platform specific stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + BUNDLE BUNDLE_EXTENSION FRAMEWORK FRAMEWORK_VERSION INSTALL_NAME_DIR + MACOSX_BUNDLE MACOSX_BUNDLE_INFO_PLIST MACOSX_FRAMEWORK_INFO_PLIST MACOSX_RPATH + OSX_ARCHITECTURES OSX_ARCHITECTURES_ PRIVATE_HEADER PUBLIC_HEADER RESOURCE XCTEST + IOS_INSTALL_COMBINED XCODE_EXPLICIT_FILE_TYPE XCODE_PRODUCT_TYPE) + # copy Windows platform specific stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + GNUtoMS + COMPILE_PDB_NAME COMPILE_PDB_NAME_ + COMPILE_PDB_OUTPUT_DIRECTORY COMPILE_PDB_OUTPUT_DIRECTORY_ + PDB_NAME PDB_NAME_ PDB_OUTPUT_DIRECTORY PDB_OUTPUT_DIRECTORY_ + VS_DESKTOP_EXTENSIONS_VERSION VS_DOTNET_REFERENCES VS_DOTNET_TARGET_FRAMEWORK_VERSION + VS_GLOBAL_KEYWORD VS_GLOBAL_PROJECT_TYPES VS_GLOBAL_ROOTNAMESPACE + VS_IOT_EXTENSIONS_VERSION VS_IOT_STARTUP_TASK + VS_KEYWORD VS_MOBILE_EXTENSIONS_VERSION + VS_SCC_AUXPATH VS_SCC_LOCALPATH VS_SCC_PROJECTNAME VS_SCC_PROVIDER + VS_WINDOWS_TARGET_PLATFORM_MIN_VERSION + VS_WINRT_COMPONENT VS_WINRT_EXTENSIONS VS_WINRT_REFERENCES + WIN32_EXECUTABLE WINDOWS_EXPORT_ALL_SYMBOLS + DEPLOYMENT_REMOTE_DIRECTORY VS_CONFIGURATION_TYPE + VS_SDK_REFERENCES VS_USER_PROPS VS_DEBUGGER_WORKING_DIRECTORY) + # copy Android platform specific stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + ANDROID_API ANDROID_API_MIN ANDROID_GUI + ANDROID_ANT_ADDITIONAL_OPTIONS ANDROID_ARCH ANDROID_ASSETS_DIRECTORIES + ANDROID_JAR_DEPENDENCIES ANDROID_JAR_DIRECTORIES ANDROID_JAVA_SOURCE_DIR + ANDROID_NATIVE_LIB_DEPENDENCIES ANDROID_NATIVE_LIB_DIRECTORIES + ANDROID_PROCESS_MAX ANDROID_PROGUARD ANDROID_PROGUARD_CONFIG_PATH + ANDROID_SECURE_PROPS_PATH ANDROID_SKIP_ANT_STEP ANDROID_STL_TYPE) + # copy CUDA platform specific stuff + cotire_copy_set_properties("${_configurations}" TARGET ${_target} ${_unityTargetName} + CUDA_PTX_COMPILATION CUDA_SEPARABLE_COMPILATION CUDA_RESOLVE_DEVICE_SYMBOLS + CUDA_EXTENSIONS CUDA_STANDARD CUDA_STANDARD_REQUIRED) + # use output name from original target + get_target_property(_targetOutputName ${_unityTargetName} OUTPUT_NAME) + if (NOT _targetOutputName) + set_property(TARGET ${_unityTargetName} PROPERTY OUTPUT_NAME "${_target}") + endif() + # use export symbol from original target + cotire_get_target_export_symbol("${_target}" _defineSymbol) + if (_defineSymbol) + set_property(TARGET ${_unityTargetName} PROPERTY DEFINE_SYMBOL "${_defineSymbol}") + if ("${_targetType}" STREQUAL "EXECUTABLE") + set_property(TARGET ${_unityTargetName} PROPERTY ENABLE_EXPORTS TRUE) + endif() + endif() + # enable parallel compilation for MSVC + if (MSVC AND "${CMAKE_GENERATOR}" MATCHES "Visual Studio") + list (LENGTH _unityTargetSources _numberOfUnityTargetSources) + if (_numberOfUnityTargetSources GREATER 1) + set_property(TARGET ${_unityTargetName} APPEND PROPERTY COMPILE_OPTIONS "/MP") + endif() + endif() + cotire_init_target(${_unityTargetName}) + cotire_add_to_unity_all_target(${_unityTargetName}) + set_property(TARGET ${_target} PROPERTY COTIRE_UNITY_TARGET_NAME "${_unityTargetName}") +endfunction(cotire_setup_unity_build_target) + +function (cotire_target _target) + set(_options "") + set(_oneValueArgs "") + set(_multiValueArgs LANGUAGES CONFIGURATIONS) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + if (NOT _option_LANGUAGES) + get_property (_option_LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES) + endif() + if (NOT _option_CONFIGURATIONS) + cotire_get_configuration_types(_option_CONFIGURATIONS) + endif() + # check if cotire can be applied to target at all + cotire_is_target_supported(${_target} _isSupported) + if (NOT _isSupported) + get_target_property(_imported ${_target} IMPORTED) + get_target_property(_targetType ${_target} TYPE) + if (_imported) + message (WARNING "cotire: imported ${_targetType} target ${_target} cannot be cotired.") + else() + message (STATUS "cotire: ${_targetType} target ${_target} cannot be cotired.") + endif() + return() + endif() + # resolve alias + get_target_property(_aliasName ${_target} ALIASED_TARGET) + if (_aliasName) + if (COTIRE_DEBUG) + message (STATUS "${_target} is an alias. Applying cotire to aliased target ${_aliasName} instead.") + endif() + set (_target ${_aliasName}) + endif() + # check if target needs to be cotired for build type + # when using configuration types, the test is performed at build time + cotire_init_cotire_target_properties(${_target}) + if (NOT CMAKE_CONFIGURATION_TYPES) + if (CMAKE_BUILD_TYPE) + list (FIND _option_CONFIGURATIONS "${CMAKE_BUILD_TYPE}" _index) + else() + list (FIND _option_CONFIGURATIONS "None" _index) + endif() + if (_index EQUAL -1) + if (COTIRE_DEBUG) + message (STATUS "CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} not cotired (${_option_CONFIGURATIONS})") + endif() + return() + endif() + endif() + # when not using configuration types, immediately create cotire intermediate dir + if (NOT CMAKE_CONFIGURATION_TYPES) + cotire_get_intermediate_dir(_baseDir) + file (MAKE_DIRECTORY "${_baseDir}") + endif() + # choose languages that apply to the target + cotire_choose_target_languages("${_target}" _targetLanguages _wholeTarget ${_option_LANGUAGES}) + if (NOT _targetLanguages) + return() + endif() + set (_cmds "") + foreach (_language ${_targetLanguages}) + cotire_process_target_language("${_language}" "${_option_CONFIGURATIONS}" ${_target} ${_wholeTarget} _cmd) + if (_cmd) + list (APPEND _cmds ${_cmd}) + endif() + endforeach() + get_target_property(_targetAddSCU ${_target} COTIRE_ADD_UNITY_BUILD) + if (_targetAddSCU) + cotire_setup_unity_build_target("${_targetLanguages}" "${_option_CONFIGURATIONS}" ${_target}) + endif() + get_target_property(_targetUsePCH ${_target} COTIRE_ENABLE_PRECOMPILED_HEADER) + if (_targetUsePCH) + cotire_setup_target_pch_usage("${_targetLanguages}" ${_target} ${_wholeTarget} ${_cmds}) + cotire_setup_pch_target("${_targetLanguages}" "${_option_CONFIGURATIONS}" ${_target}) + if (_targetAddSCU) + cotire_setup_unity_target_pch_usage("${_targetLanguages}" ${_target}) + endif() + endif() + get_target_property(_targetAddCleanTarget ${_target} COTIRE_ADD_CLEAN) + if (_targetAddCleanTarget) + cotire_setup_clean_target(${_target}) + endif() +endfunction(cotire_target) + +function (cotire_map_libraries _strategy _mappedLibrariesVar) + set (_mappedLibraries "") + foreach (_library ${ARGN}) + if (_library MATCHES "^\\$$") + set (_libraryName "${CMAKE_MATCH_1}") + set (_linkOnly TRUE) + set (_objectLibrary FALSE) + elseif (_library MATCHES "^\\$$") + set (_libraryName "${CMAKE_MATCH_1}") + set (_linkOnly FALSE) + set (_objectLibrary TRUE) + else() + set (_libraryName "${_library}") + set (_linkOnly FALSE) + set (_objectLibrary FALSE) + endif() + if ("${_strategy}" MATCHES "COPY_UNITY") + cotire_is_target_supported(${_libraryName} _isSupported) + if (_isSupported) + # use target's corresponding unity target, if available + get_target_property(_libraryUnityTargetName ${_libraryName} COTIRE_UNITY_TARGET_NAME) + if (TARGET "${_libraryUnityTargetName}") + if (_linkOnly) + list (APPEND _mappedLibraries "$") + elseif (_objectLibrary) + list (APPEND _mappedLibraries "$") + else() + list (APPEND _mappedLibraries "${_libraryUnityTargetName}") + endif() + else() + list (APPEND _mappedLibraries "${_library}") + endif() + else() + list (APPEND _mappedLibraries "${_library}") + endif() + else() + list (APPEND _mappedLibraries "${_library}") + endif() + endforeach() + list (REMOVE_DUPLICATES _mappedLibraries) + set (${_mappedLibrariesVar} ${_mappedLibraries} PARENT_SCOPE) +endfunction() + +function (cotire_target_link_libraries _target) + cotire_is_target_supported(${_target} _isSupported) + if (NOT _isSupported) + return() + endif() + get_target_property(_unityTargetName ${_target} COTIRE_UNITY_TARGET_NAME) + if (TARGET "${_unityTargetName}") + get_target_property(_linkLibrariesStrategy ${_target} COTIRE_UNITY_LINK_LIBRARIES_INIT) + if (COTIRE_DEBUG) + message (STATUS "unity target ${_unityTargetName} link strategy: ${_linkLibrariesStrategy}") + endif() + if ("${_linkLibrariesStrategy}" MATCHES "^(COPY|COPY_UNITY)$") + get_target_property(_linkLibraries ${_target} LINK_LIBRARIES) + if (_linkLibraries) + cotire_map_libraries("${_linkLibrariesStrategy}" _unityLinkLibraries ${_linkLibraries}) + set_target_properties(${_unityTargetName} PROPERTIES LINK_LIBRARIES "${_unityLinkLibraries}") + if (COTIRE_DEBUG) + message (STATUS "unity target ${_unityTargetName} link libraries: ${_unityLinkLibraries}") + endif() + endif() + get_target_property(_interfaceLinkLibraries ${_target} INTERFACE_LINK_LIBRARIES) + if (_interfaceLinkLibraries) + cotire_map_libraries("${_linkLibrariesStrategy}" _unityLinkInterfaceLibraries ${_interfaceLinkLibraries}) + set_target_properties(${_unityTargetName} PROPERTIES INTERFACE_LINK_LIBRARIES "${_unityLinkInterfaceLibraries}") + if (COTIRE_DEBUG) + message (STATUS "unity target ${_unityTargetName} interface link libraries: ${_unityLinkInterfaceLibraries}") + endif() + endif() + get_target_property(_manualDependencies ${_target} MANUALLY_ADDED_DEPENDENCIES) + if (_manualDependencies) + cotire_map_libraries("${_linkLibrariesStrategy}" _unityManualDependencies ${_manualDependencies}) + if (_unityManualDependencies) + add_dependencies("${_unityTargetName}" ${_unityManualDependencies}) + endif() + endif() + endif() + endif() +endfunction(cotire_target_link_libraries) + +function (cotire_cleanup _binaryDir _cotireIntermediateDirName _targetName) + if (_targetName) + file (GLOB_RECURSE _cotireFiles "${_binaryDir}/${_targetName}*.*") + else() + file (GLOB_RECURSE _cotireFiles "${_binaryDir}/*.*") + endif() + # filter files in intermediate directory + set (_filesToRemove "") + foreach (_file ${_cotireFiles}) + get_filename_component(_dir "${_file}" DIRECTORY) + get_filename_component(_dirName "${_dir}" NAME) + if ("${_dirName}" STREQUAL "${_cotireIntermediateDirName}") + list (APPEND _filesToRemove "${_file}") + endif() + endforeach() + if (_filesToRemove) + if (COTIRE_VERBOSE) + message (STATUS "cleaning up ${_filesToRemove}") + endif() + file (REMOVE ${_filesToRemove}) + endif() +endfunction() + +function (cotire_init_target _targetName) + if (COTIRE_TARGETS_FOLDER) + set_target_properties(${_targetName} PROPERTIES FOLDER "${COTIRE_TARGETS_FOLDER}") + endif() + set_target_properties(${_targetName} PROPERTIES EXCLUDE_FROM_ALL TRUE) + if (MSVC_IDE) + set_target_properties(${_targetName} PROPERTIES EXCLUDE_FROM_DEFAULT_BUILD TRUE) + endif() +endfunction() + +function (cotire_add_to_pch_all_target _pchTargetName) + set (_targetName "${COTIRE_PCH_ALL_TARGET_NAME}") + if (NOT TARGET "${_targetName}") + add_custom_target("${_targetName}" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + VERBATIM) + cotire_init_target("${_targetName}") + endif() + cotire_setup_clean_all_target() + add_dependencies(${_targetName} ${_pchTargetName}) +endfunction() + +function (cotire_add_to_unity_all_target _unityTargetName) + set (_targetName "${COTIRE_UNITY_BUILD_ALL_TARGET_NAME}") + if (NOT TARGET "${_targetName}") + add_custom_target("${_targetName}" + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + VERBATIM) + cotire_init_target("${_targetName}") + endif() + cotire_setup_clean_all_target() + add_dependencies(${_targetName} ${_unityTargetName}) +endfunction() + +function (cotire_setup_clean_all_target) + set (_targetName "${COTIRE_CLEAN_ALL_TARGET_NAME}") + if (NOT TARGET "${_targetName}") + cotire_set_cmd_to_prologue(_cmds) + list (APPEND _cmds -P "${COTIRE_CMAKE_MODULE_FILE}" "cleanup" "${CMAKE_BINARY_DIR}" "${COTIRE_INTDIR}") + add_custom_target(${_targetName} + COMMAND ${_cmds} + WORKING_DIRECTORY "${CMAKE_BINARY_DIR}" + COMMENT "Cleaning up all cotire generated files" + VERBATIM) + cotire_init_target("${_targetName}") + endif() +endfunction() + +function (cotire) + set(_options "") + set(_oneValueArgs "") + set(_multiValueArgs LANGUAGES CONFIGURATIONS) + cmake_parse_arguments(_option "${_options}" "${_oneValueArgs}" "${_multiValueArgs}" ${ARGN}) + set (_targets ${_option_UNPARSED_ARGUMENTS}) + foreach (_target ${_targets}) + if (TARGET ${_target}) + cotire_target(${_target} LANGUAGES ${_option_LANGUAGES} CONFIGURATIONS ${_option_CONFIGURATIONS}) + else() + message (WARNING "cotire: ${_target} is not a target.") + endif() + endforeach() + foreach (_target ${_targets}) + if (TARGET ${_target}) + cotire_target_link_libraries(${_target}) + endif() + endforeach() +endfunction() + +if (CMAKE_SCRIPT_MODE_FILE) + + # cotire is being run in script mode + # locate -P on command args + set (COTIRE_ARGC -1) + foreach (_index RANGE ${CMAKE_ARGC}) + if (COTIRE_ARGC GREATER -1) + set (COTIRE_ARGV${COTIRE_ARGC} "${CMAKE_ARGV${_index}}") + math (EXPR COTIRE_ARGC "${COTIRE_ARGC} + 1") + elseif ("${CMAKE_ARGV${_index}}" STREQUAL "-P") + set (COTIRE_ARGC 0) + endif() + endforeach() + + # include target script if available + if ("${COTIRE_ARGV2}" MATCHES "\\.cmake$") + # the included target scripts sets up additional variables relating to the target (e.g., COTIRE_TARGET_SOURCES) + include("${COTIRE_ARGV2}") + endif() + + if (COTIRE_DEBUG) + message (STATUS "${COTIRE_ARGV0} ${COTIRE_ARGV1} ${COTIRE_ARGV2} ${COTIRE_ARGV3} ${COTIRE_ARGV4} ${COTIRE_ARGV5}") + endif() + + if (NOT COTIRE_BUILD_TYPE) + set (COTIRE_BUILD_TYPE "None") + endif() + string (TOUPPER "${COTIRE_BUILD_TYPE}" _upperConfig) + set (_includeDirs ${COTIRE_TARGET_INCLUDE_DIRECTORIES_${_upperConfig}}) + set (_systemIncludeDirs ${COTIRE_TARGET_SYSTEM_INCLUDE_DIRECTORIES_${_upperConfig}}) + set (_compileDefinitions ${COTIRE_TARGET_COMPILE_DEFINITIONS_${_upperConfig}}) + set (_compileFlags ${COTIRE_TARGET_COMPILE_FLAGS_${_upperConfig}}) + # check if target has been cotired for actual build type COTIRE_BUILD_TYPE + list (FIND COTIRE_TARGET_CONFIGURATION_TYPES "${COTIRE_BUILD_TYPE}" _index) + if (_index GREATER -1) + set (_sources ${COTIRE_TARGET_SOURCES}) + set (_sourcesDefinitions ${COTIRE_TARGET_SOURCES_COMPILE_DEFINITIONS_${_upperConfig}}) + else() + if (COTIRE_DEBUG) + message (STATUS "COTIRE_BUILD_TYPE=${COTIRE_BUILD_TYPE} not cotired (${COTIRE_TARGET_CONFIGURATION_TYPES})") + endif() + set (_sources "") + set (_sourcesDefinitions "") + endif() + set (_targetPreUndefs ${COTIRE_TARGET_PRE_UNDEFS}) + set (_targetPostUndefs ${COTIRE_TARGET_POST_UNDEFS}) + set (_sourcesPreUndefs ${COTIRE_TARGET_SOURCES_PRE_UNDEFS}) + set (_sourcesPostUndefs ${COTIRE_TARGET_SOURCES_POST_UNDEFS}) + + if ("${COTIRE_ARGV1}" STREQUAL "unity") + + if (XCODE) + # executing pre-build action under Xcode, check dependency on target script + set (_dependsOption DEPENDS "${COTIRE_ARGV2}") + else() + # executing custom command, no need to re-check for dependencies + set (_dependsOption "") + endif() + + cotire_select_unity_source_files("${COTIRE_ARGV3}" _sources ${_sources}) + + cotire_generate_unity_source( + "${COTIRE_ARGV3}" ${_sources} + LANGUAGE "${COTIRE_TARGET_LANGUAGE}" + SOURCES_COMPILE_DEFINITIONS ${_sourcesDefinitions} + PRE_UNDEFS ${_targetPreUndefs} + POST_UNDEFS ${_targetPostUndefs} + SOURCES_PRE_UNDEFS ${_sourcesPreUndefs} + SOURCES_POST_UNDEFS ${_sourcesPostUndefs} + ${_dependsOption}) + + elseif ("${COTIRE_ARGV1}" STREQUAL "prefix") + + if (XCODE) + # executing pre-build action under Xcode, check dependency on unity file and prefix dependencies + set (_dependsOption DEPENDS "${COTIRE_ARGV4}" ${COTIRE_TARGET_PREFIX_DEPENDS}) + else() + # executing custom command, no need to re-check for dependencies + set (_dependsOption "") + endif() + + set (_files "") + foreach (_index RANGE 4 ${COTIRE_ARGC}) + if (COTIRE_ARGV${_index}) + list (APPEND _files "${COTIRE_ARGV${_index}}") + endif() + endforeach() + + cotire_generate_prefix_header( + "${COTIRE_ARGV3}" ${_files} + COMPILER_LAUNCHER "${COTIRE_TARGET_${COTIRE_TARGET_LANGUAGE}_COMPILER_LAUNCHER}" + COMPILER_EXECUTABLE "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER}" + COMPILER_ARG1 ${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ARG1} + COMPILER_ID "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ID}" + COMPILER_VERSION "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_VERSION}" + LANGUAGE "${COTIRE_TARGET_LANGUAGE}" + IGNORE_PATH "${COTIRE_TARGET_IGNORE_PATH};${COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH}" + INCLUDE_PATH ${COTIRE_TARGET_INCLUDE_PATH} + IGNORE_EXTENSIONS "${CMAKE_${COTIRE_TARGET_LANGUAGE}_SOURCE_FILE_EXTENSIONS};${COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS}" + INCLUDE_PRIORITY_PATH ${COTIRE_TARGET_INCLUDE_PRIORITY_PATH} + INCLUDE_DIRECTORIES ${_includeDirs} + SYSTEM_INCLUDE_DIRECTORIES ${_systemIncludeDirs} + COMPILE_DEFINITIONS ${_compileDefinitions} + COMPILE_FLAGS ${_compileFlags} + ${_dependsOption}) + + elseif ("${COTIRE_ARGV1}" STREQUAL "precompile") + + set (_files "") + foreach (_index RANGE 5 ${COTIRE_ARGC}) + if (COTIRE_ARGV${_index}) + list (APPEND _files "${COTIRE_ARGV${_index}}") + endif() + endforeach() + + cotire_precompile_prefix_header( + "${COTIRE_ARGV3}" "${COTIRE_ARGV4}" "${COTIRE_ARGV5}" + COMPILER_LAUNCHER "${COTIRE_TARGET_${COTIRE_TARGET_LANGUAGE}_COMPILER_LAUNCHER}" + COMPILER_EXECUTABLE "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER}" + COMPILER_ARG1 ${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ARG1} + COMPILER_ID "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_ID}" + COMPILER_VERSION "${CMAKE_${COTIRE_TARGET_LANGUAGE}_COMPILER_VERSION}" + LANGUAGE "${COTIRE_TARGET_LANGUAGE}" + INCLUDE_DIRECTORIES ${_includeDirs} + SYSTEM_INCLUDE_DIRECTORIES ${_systemIncludeDirs} + COMPILE_DEFINITIONS ${_compileDefinitions} + COMPILE_FLAGS ${_compileFlags}) + + elseif ("${COTIRE_ARGV1}" STREQUAL "combine") + + if (COTIRE_TARGET_LANGUAGE) + set (_combinedFile "${COTIRE_ARGV3}") + set (_startIndex 4) + else() + set (_combinedFile "${COTIRE_ARGV2}") + set (_startIndex 3) + endif() + set (_files "") + foreach (_index RANGE ${_startIndex} ${COTIRE_ARGC}) + if (COTIRE_ARGV${_index}) + list (APPEND _files "${COTIRE_ARGV${_index}}") + endif() + endforeach() + + if (XCODE) + # executing pre-build action under Xcode, check dependency on files to be combined + set (_dependsOption DEPENDS ${_files}) + else() + # executing custom command, no need to re-check for dependencies + set (_dependsOption "") + endif() + + if (COTIRE_TARGET_LANGUAGE) + cotire_generate_unity_source( + "${_combinedFile}" ${_files} + LANGUAGE "${COTIRE_TARGET_LANGUAGE}" + ${_dependsOption}) + else() + cotire_generate_unity_source("${_combinedFile}" ${_files} ${_dependsOption}) + endif() + + elseif ("${COTIRE_ARGV1}" STREQUAL "cleanup") + + cotire_cleanup("${COTIRE_ARGV2}" "${COTIRE_ARGV3}" "${COTIRE_ARGV4}") + + else() + message (FATAL_ERROR "cotire: unknown command \"${COTIRE_ARGV1}\".") + endif() + +else() + + # cotire is being run in include mode + # set up all variable and property definitions + + if (NOT DEFINED COTIRE_DEBUG_INIT) + if (DEFINED COTIRE_DEBUG) + set (COTIRE_DEBUG_INIT ${COTIRE_DEBUG}) + else() + set (COTIRE_DEBUG_INIT FALSE) + endif() + endif() + option (COTIRE_DEBUG "Enable cotire debugging output?" ${COTIRE_DEBUG_INIT}) + + if (NOT DEFINED COTIRE_VERBOSE_INIT) + if (DEFINED COTIRE_VERBOSE) + set (COTIRE_VERBOSE_INIT ${COTIRE_VERBOSE}) + else() + set (COTIRE_VERBOSE_INIT FALSE) + endif() + endif() + option (COTIRE_VERBOSE "Enable cotire verbose output?" ${COTIRE_VERBOSE_INIT}) + + set (COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS "inc;inl;ipp" CACHE STRING + "Ignore headers with the listed file extensions from the generated prefix header.") + + set (COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH "" CACHE STRING + "Ignore headers from these directories when generating the prefix header.") + + set (COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS "m;mm" CACHE STRING + "Ignore sources with the listed file extensions from the generated unity source.") + + set (COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES "2" CACHE STRING + "Minimum number of sources in target required to enable use of precompiled header.") + + if (NOT DEFINED COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT) + if (DEFINED COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES) + set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT ${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES}) + elseif ("${CMAKE_GENERATOR}" MATCHES "JOM|Ninja|Visual Studio") + # enable parallelization for generators that run multiple jobs by default + set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT "-j") + else() + set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT "0") + endif() + endif() + set (COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES "${COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES_INIT}" CACHE STRING + "Maximum number of source files to include in a single unity source file.") + + if (NOT COTIRE_PREFIX_HEADER_FILENAME_SUFFIX) + set (COTIRE_PREFIX_HEADER_FILENAME_SUFFIX "_prefix") + endif() + if (NOT COTIRE_UNITY_SOURCE_FILENAME_SUFFIX) + set (COTIRE_UNITY_SOURCE_FILENAME_SUFFIX "_unity") + endif() + if (NOT COTIRE_INTDIR) + set (COTIRE_INTDIR "cotire") + endif() + if (NOT COTIRE_PCH_ALL_TARGET_NAME) + set (COTIRE_PCH_ALL_TARGET_NAME "all_pch") + endif() + if (NOT COTIRE_UNITY_BUILD_ALL_TARGET_NAME) + set (COTIRE_UNITY_BUILD_ALL_TARGET_NAME "all_unity") + endif() + if (NOT COTIRE_CLEAN_ALL_TARGET_NAME) + set (COTIRE_CLEAN_ALL_TARGET_NAME "clean_cotire") + endif() + if (NOT COTIRE_CLEAN_TARGET_SUFFIX) + set (COTIRE_CLEAN_TARGET_SUFFIX "_clean_cotire") + endif() + if (NOT COTIRE_PCH_TARGET_SUFFIX) + set (COTIRE_PCH_TARGET_SUFFIX "_pch") + endif() + if (MSVC) + # MSVC default PCH memory scaling factor of 100 percent (75 MB) is too small for template heavy C++ code + # use a bigger default factor of 170 percent (128 MB) + if (NOT DEFINED COTIRE_PCH_MEMORY_SCALING_FACTOR) + set (COTIRE_PCH_MEMORY_SCALING_FACTOR "170") + endif() + endif() + if (NOT COTIRE_UNITY_BUILD_TARGET_SUFFIX) + set (COTIRE_UNITY_BUILD_TARGET_SUFFIX "_unity") + endif() + if (NOT DEFINED COTIRE_TARGETS_FOLDER) + set (COTIRE_TARGETS_FOLDER "cotire") + endif() + if (NOT DEFINED COTIRE_UNITY_OUTPUT_DIRECTORY) + if ("${CMAKE_GENERATOR}" MATCHES "Ninja") + # generated Ninja build files do not work if the unity target produces the same output file as the cotired target + set (COTIRE_UNITY_OUTPUT_DIRECTORY "unity") + else() + set (COTIRE_UNITY_OUTPUT_DIRECTORY "") + endif() + endif() + + # define cotire cache variables + + define_property( + CACHED_VARIABLE PROPERTY "COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_PATH" + BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." + FULL_DOCS + "The variable can be set to a semicolon separated list of include directories." + "If a header file is found in one of these directories or sub-directories, it will be excluded from the generated prefix header." + "If not defined, defaults to empty list." + ) + + define_property( + CACHED_VARIABLE PROPERTY "COTIRE_ADDITIONAL_PREFIX_HEADER_IGNORE_EXTENSIONS" + BRIEF_DOCS "Ignore includes with the listed file extensions from the generated prefix header." + FULL_DOCS + "The variable can be set to a semicolon separated list of file extensions." + "If a header file extension matches one in the list, it will be excluded from the generated prefix header." + "Includes with an extension in CMAKE__SOURCE_FILE_EXTENSIONS are always ignored." + "If not defined, defaults to inc;inl;ipp." + ) + + define_property( + CACHED_VARIABLE PROPERTY "COTIRE_UNITY_SOURCE_EXCLUDE_EXTENSIONS" + BRIEF_DOCS "Exclude sources with the listed file extensions from the generated unity source." + FULL_DOCS + "The variable can be set to a semicolon separated list of file extensions." + "If a source file extension matches one in the list, it will be excluded from the generated unity source file." + "Source files with an extension in CMAKE__IGNORE_EXTENSIONS are always excluded." + "If not defined, defaults to m;mm." + ) + + define_property( + CACHED_VARIABLE PROPERTY "COTIRE_MINIMUM_NUMBER_OF_TARGET_SOURCES" + BRIEF_DOCS "Minimum number of sources in target required to enable use of precompiled header." + FULL_DOCS + "The variable can be set to an integer > 0." + "If a target contains less than that number of source files, cotire will not enable the use of the precompiled header for the target." + "If not defined, defaults to 2." + ) + + define_property( + CACHED_VARIABLE PROPERTY "COTIRE_MAXIMUM_NUMBER_OF_UNITY_INCLUDES" + BRIEF_DOCS "Maximum number of source files to include in a single unity source file." + FULL_DOCS + "This may be set to an integer >= 0." + "If 0, cotire will only create a single unity source file." + "If a target contains more than that number of source files, cotire will create multiple unity source files for it." + "Can be set to \"-j\" to optimize the count of unity source files for the number of available processor cores." + "Can be set to \"-j jobs\" to optimize the number of unity source files for the given number of simultaneous jobs." + "Is used to initialize the target property COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES." + "Defaults to \"-j\" for the generators Visual Studio, JOM or Ninja. Defaults to 0 otherwise." + ) + + # define cotire directory properties + + define_property( + DIRECTORY PROPERTY "COTIRE_ENABLE_PRECOMPILED_HEADER" + BRIEF_DOCS "Modify build command of cotired targets added in this directory to make use of the generated precompiled header." + FULL_DOCS + "See target property COTIRE_ENABLE_PRECOMPILED_HEADER." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_ADD_UNITY_BUILD" + BRIEF_DOCS "Add a new target that performs a unity build for cotired targets added in this directory." + FULL_DOCS + "See target property COTIRE_ADD_UNITY_BUILD." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_ADD_CLEAN" + BRIEF_DOCS "Add a new target that cleans all cotire generated files for cotired targets added in this directory." + FULL_DOCS + "See target property COTIRE_ADD_CLEAN." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_IGNORE_PATH" + BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." + FULL_DOCS + "See target property COTIRE_PREFIX_HEADER_IGNORE_PATH." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PATH" + BRIEF_DOCS "Honor headers from these directories when generating the prefix header." + FULL_DOCS + "See target property COTIRE_PREFIX_HEADER_INCLUDE_PATH." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH" + BRIEF_DOCS "Header paths matching one of these directories are put at the top of the prefix header." + FULL_DOCS + "See target property COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of each source file." + FULL_DOCS + "See target property COTIRE_UNITY_SOURCE_PRE_UNDEFS." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of each source file." + FULL_DOCS + "See target property COTIRE_UNITY_SOURCE_POST_UNDEFS." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES" + BRIEF_DOCS "Maximum number of source files to include in a single unity source file." + FULL_DOCS + "See target property COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES." + ) + + define_property( + DIRECTORY PROPERTY "COTIRE_UNITY_LINK_LIBRARIES_INIT" + BRIEF_DOCS "Define strategy for setting up the unity target's link libraries." + FULL_DOCS + "See target property COTIRE_UNITY_LINK_LIBRARIES_INIT." + ) + + # define cotire target properties + + define_property( + TARGET PROPERTY "COTIRE_ENABLE_PRECOMPILED_HEADER" INHERITED + BRIEF_DOCS "Modify this target's build command to make use of the generated precompiled header." + FULL_DOCS + "If this property is set to TRUE, cotire will modify the build command to make use of the generated precompiled header." + "Irrespective of the value of this property, cotire will setup custom commands to generate the unity source and prefix header for the target." + "For makefile based generators cotire will also set up a custom target to manually invoke the generation of the precompiled header." + "The target name will be set to this target's name with the suffix _pch appended." + "Inherited from directory." + "Defaults to TRUE." + ) + + define_property( + TARGET PROPERTY "COTIRE_ADD_UNITY_BUILD" INHERITED + BRIEF_DOCS "Add a new target that performs a unity build for this target." + FULL_DOCS + "If this property is set to TRUE, cotire creates a new target of the same type that uses the generated unity source file instead of the target sources." + "Most of the relevant target properties will be copied from this target to the new unity build target." + "Target dependencies and linked libraries have to be manually set up for the new unity build target." + "The unity target name will be set to this target's name with the suffix _unity appended." + "Inherited from directory." + "Defaults to TRUE." + ) + + define_property( + TARGET PROPERTY "COTIRE_ADD_CLEAN" INHERITED + BRIEF_DOCS "Add a new target that cleans all cotire generated files for this target." + FULL_DOCS + "If this property is set to TRUE, cotire creates a new target that clean all files (unity source, prefix header, precompiled header)." + "The clean target name will be set to this target's name with the suffix _clean_cotire appended." + "Inherited from directory." + "Defaults to FALSE." + ) + + define_property( + TARGET PROPERTY "COTIRE_PREFIX_HEADER_IGNORE_PATH" INHERITED + BRIEF_DOCS "Ignore headers from these directories when generating the prefix header." + FULL_DOCS + "The property can be set to a list of directories." + "If a header file is found in one of these directories or sub-directories, it will be excluded from the generated prefix header." + "Inherited from directory." + "If not set, this property is initialized to \${CMAKE_SOURCE_DIR};\${CMAKE_BINARY_DIR}." + ) + + define_property( + TARGET PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PATH" INHERITED + BRIEF_DOCS "Honor headers from these directories when generating the prefix header." + FULL_DOCS + "The property can be set to a list of directories." + "If a header file is found in one of these directories or sub-directories, it will be included in the generated prefix header." + "If a header file is both selected by COTIRE_PREFIX_HEADER_IGNORE_PATH and COTIRE_PREFIX_HEADER_INCLUDE_PATH," + "the option which yields the closer relative path match wins." + "Inherited from directory." + "If not set, this property is initialized to the empty list." + ) + + define_property( + TARGET PROPERTY "COTIRE_PREFIX_HEADER_INCLUDE_PRIORITY_PATH" INHERITED + BRIEF_DOCS "Header paths matching one of these directories are put at the top of prefix header." + FULL_DOCS + "The property can be set to a list of directories." + "Header file paths matching one of these directories will be inserted at the beginning of the generated prefix header." + "Header files are sorted according to the order of the directories in the property." + "If not set, this property is initialized to the empty list." + ) + + define_property( + TARGET PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" INHERITED + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of each target source file." + FULL_DOCS + "This may be set to a semicolon-separated list of preprocessor symbols." + "cotire will add corresponding #undef directives to the generated unit source file before each target source file." + "Inherited from directory." + "Defaults to empty string." + ) + + define_property( + TARGET PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" INHERITED + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of each target source file." + FULL_DOCS + "This may be set to a semicolon-separated list of preprocessor symbols." + "cotire will add corresponding #undef directives to the generated unit source file after each target source file." + "Inherited from directory." + "Defaults to empty string." + ) + + define_property( + TARGET PROPERTY "COTIRE_UNITY_SOURCE_MAXIMUM_NUMBER_OF_INCLUDES" INHERITED + BRIEF_DOCS "Maximum number of source files to include in a single unity source file." + FULL_DOCS + "This may be set to an integer > 0." + "If a target contains more than that number of source files, cotire will create multiple unity build files for it." + "If not set, cotire will only create a single unity source file." + "Inherited from directory." + "Defaults to empty." + ) + + define_property( + TARGET PROPERTY "COTIRE__UNITY_SOURCE_INIT" + BRIEF_DOCS "User provided unity source file to be used instead of the automatically generated one." + FULL_DOCS + "If set, cotire will only add the given file(s) to the generated unity source file." + "If not set, cotire will add all the target source files to the generated unity source file." + "The property can be set to a user provided unity source file." + "Defaults to empty." + ) + + define_property( + TARGET PROPERTY "COTIRE__PREFIX_HEADER_INIT" + BRIEF_DOCS "User provided prefix header file to be used instead of the automatically generated one." + FULL_DOCS + "If set, cotire will add the given header file(s) to the generated prefix header file." + "If not set, cotire will generate a prefix header by tracking the header files included by the unity source file." + "The property can be set to a user provided prefix header file (e.g., stdafx.h)." + "Defaults to empty." + ) + + define_property( + TARGET PROPERTY "COTIRE_UNITY_LINK_LIBRARIES_INIT" INHERITED + BRIEF_DOCS "Define strategy for setting up unity target's link libraries." + FULL_DOCS + "If this property is empty or set to NONE, the generated unity target's link libraries have to be set up manually." + "If this property is set to COPY, the unity target's link libraries will be copied from this target." + "If this property is set to COPY_UNITY, the unity target's link libraries will be copied from this target with considering existing unity targets." + "Inherited from directory." + "Defaults to empty." + ) + + define_property( + TARGET PROPERTY "COTIRE__UNITY_SOURCE" + BRIEF_DOCS "Read-only property. The generated unity source file(s)." + FULL_DOCS + "cotire sets this property to the path of the generated single computation unit source file for the target." + "Defaults to empty string." + ) + + define_property( + TARGET PROPERTY "COTIRE__PREFIX_HEADER" + BRIEF_DOCS "Read-only property. The generated prefix header file." + FULL_DOCS + "cotire sets this property to the full path of the generated language prefix header for the target." + "Defaults to empty string." + ) + + define_property( + TARGET PROPERTY "COTIRE__PRECOMPILED_HEADER" + BRIEF_DOCS "Read-only property. The generated precompiled header file." + FULL_DOCS + "cotire sets this property to the full path of the generated language precompiled header binary for the target." + "Defaults to empty string." + ) + + define_property( + TARGET PROPERTY "COTIRE_UNITY_TARGET_NAME" + BRIEF_DOCS "The name of the generated unity build target corresponding to this target." + FULL_DOCS + "This property can be set to the desired name of the unity target that will be created by cotire." + "If not set, the unity target name will be set to this target's name with the suffix _unity appended." + "After this target has been processed by cotire, the property is set to the actual name of the generated unity target." + "Defaults to empty string." + ) + + # define cotire source properties + + define_property( + SOURCE PROPERTY "COTIRE_EXCLUDED" + BRIEF_DOCS "Do not modify source file's build command." + FULL_DOCS + "If this property is set to TRUE, the source file's build command will not be modified to make use of the precompiled header." + "The source file will also be excluded from the generated unity source file." + "Source files that have their COMPILE_FLAGS property set will be excluded by default." + "Defaults to FALSE." + ) + + define_property( + SOURCE PROPERTY "COTIRE_DEPENDENCY" + BRIEF_DOCS "Add this source file to dependencies of the automatically generated prefix header file." + FULL_DOCS + "If this property is set to TRUE, the source file is added to dependencies of the generated prefix header file." + "If the file is modified, cotire will re-generate the prefix header source upon build." + "Defaults to FALSE." + ) + + define_property( + SOURCE PROPERTY "COTIRE_UNITY_SOURCE_PRE_UNDEFS" + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file before the inclusion of this source file." + FULL_DOCS + "This may be set to a semicolon-separated list of preprocessor symbols." + "cotire will add corresponding #undef directives to the generated unit source file before this file is included." + "Defaults to empty string." + ) + + define_property( + SOURCE PROPERTY "COTIRE_UNITY_SOURCE_POST_UNDEFS" + BRIEF_DOCS "Preprocessor undefs to place in the generated unity source file after the inclusion of this source file." + FULL_DOCS + "This may be set to a semicolon-separated list of preprocessor symbols." + "cotire will add corresponding #undef directives to the generated unit source file after this file is included." + "Defaults to empty string." + ) + + define_property( + SOURCE PROPERTY "COTIRE_START_NEW_UNITY_SOURCE" + BRIEF_DOCS "Start a new unity source file which includes this source file as the first one." + FULL_DOCS + "If this property is set to TRUE, cotire will complete the current unity file and start a new one." + "The new unity source file will include this source file as the first one." + "This property essentially works as a separator for unity source files." + "Defaults to FALSE." + ) + + define_property( + SOURCE PROPERTY "COTIRE_TARGET" + BRIEF_DOCS "Read-only property. Mark this source file as cotired for the given target." + FULL_DOCS + "cotire sets this property to the name of target, that the source file's build command has been altered for." + "Defaults to empty string." + ) + + message (STATUS "cotire ${COTIRE_CMAKE_MODULE_VERSION} loaded.") + +endif() \ No newline at end of file diff --git a/cmake/dependencies.cmake b/cmake/dependencies.cmake new file mode 100644 index 00000000..74c3761b --- /dev/null +++ b/cmake/dependencies.cmake @@ -0,0 +1,28 @@ +#################################### +# get dependencies +#################################### + +find_package(Boost COMPONENTS + date_time + filesystem + iostreams + regex + system + thread + REQUIRED +) + +set(Boost_USE_MULTITHREADED ON) +set(Boost_USE_STATIC_LIBS ${LUCENE_USE_STATIC_BOOST_LIBS}) + +set(lucene_boost_libs + ${Boost_LIBRARIES} + ${Boost_FILESYSTEM_LIBRARIES} + ${Boost_IOSTREAMS_LIBRARIES} + ${Boost_REGEX_LIBRARIES} + ${Boost_SYSTEM_LIBRARIES} + ${Boost_THREAD_LIBRARIES} +) + +find_package(ZLIB REQUIRED) +find_package(Threads REQUIRED) diff --git a/doc/BUILDING.md b/doc/BUILDING.md new file mode 100644 index 00000000..746e2841 --- /dev/null +++ b/doc/BUILDING.md @@ -0,0 +1,56 @@ +Build Instructions +========== + +You'll need the following dependencies installed on your system. + +- [ZLib](https://zlib.net/) +- [Boost](http://www.boost.org) libraries:: + - date-time + - filesystem + - regex + - thread + - iostreams + +e.g. on Debian systems, the following packages are required: +- zlib1g-dev +- libboost-date-time-dev +- libboost-filesystem-dev +- libboost-regex-dev +- libboost-thread-dev +- libboost-iostreams-dev + + +Build Instructions for linux systems +-------------------------------------- + +To build the library the following commands should be issued:: + + $ mkdir build; cd build + $ cmake .. + $ make + $ make install + +Build Instructions for Windows systems +-------------------------------------- + +Once you have installed the dependencies and added the installation +location to your `CMAKE_PREFIX_PATH`, open cmake-gui and configure the +build. When building on windows, ensure that the `ENABLE_CYCLIC_CHECK` +option is set to `true`. + +Next, open the visual studio project with the 'open project' button. the +project is built using the `ALL_BUILD` solution in the projects column. +If you would like to install the project, build the `INSTALL` solution +after the fact. + +** +Note: if you wish to install the Lucene++ library to a protected area, you +must re-open the visual studio project as an administrator +** + +** +Note: "BOOST_ROOT" environment variable must be defined to point to the +Boost library directory (eg. c:\\local\\Boost). cmake should automatically +find the installed libraries if they are installed within that path; +e.g. C:\\local\\Boost\\lib64-msvc-14.2 +** diff --git a/doc/Doxyfile.cmake b/doc/doxygen/Doxyfile.cmake similarity index 90% rename from doc/Doxyfile.cmake rename to doc/doxygen/Doxyfile.cmake index 7eda0e61..0a4a5797 100644 --- a/doc/Doxyfile.cmake +++ b/doc/doxygen/Doxyfile.cmake @@ -5,7 +5,7 @@ #--------------------------------------------------------------------------- PROJECT_NAME = Lucene++ -PROJECT_NUMBER = @LUCENE++_SOVERSION@ +PROJECT_NUMBER = @lucene++_SOVERSION@ OUTPUT_DIRECTORY = @PROJECT_BINARY_DIR@/doc OUTPUT_LANGUAGE = English @@ -24,7 +24,7 @@ REPEAT_BRIEF = YES ALWAYS_DETAILED_SEC = NO INLINE_INHERITED_MEMB = NO FULL_PATH_NAMES = NO -STRIP_FROM_PATH = +STRIP_FROM_PATH = INTERNAL_DOCS = NO STRIP_CODE_COMMENTS = YES CASE_SENSE_NAMES = YES @@ -45,7 +45,7 @@ GENERATE_TESTLIST = YES GENERATE_BUGLIST = YES GENERATE_DEPRECATEDLIST= YES ALIASES = "memory=\par Memory management:\n" -ENABLED_SECTIONS = +ENABLED_SECTIONS = MAX_INITIALIZER_LINES = 30 OPTIMIZE_OUTPUT_FOR_C = YES OPTIMIZE_OUTPUT_JAVA = NO @@ -93,11 +93,11 @@ EXCLUDE_PATTERNS = "**/.svn/**" \ "*/md5/*" \ "*/nedmalloc/*" \ "*/utf8/*" -EXAMPLE_PATH = -EXAMPLE_PATTERNS = +EXAMPLE_PATH = +EXAMPLE_PATTERNS = EXAMPLE_RECURSIVE = NO -IMAGE_PATH = -INPUT_FILTER = +IMAGE_PATH = +INPUT_FILTER = FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- @@ -115,7 +115,7 @@ REFERENCES_RELATION = YES ALPHABETICAL_INDEX = NO COLS_IN_ALPHA_INDEX = 5 -IGNORE_PREFIX = +IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output @@ -126,7 +126,7 @@ HTML_OUTPUT = html HTML_FILE_EXTENSION = .html HTML_HEADER = @PROJECT_BINARY_DIR@/doc/helpheader.htm HTML_FOOTER = @PROJECT_BINARY_DIR@/doc/helpfooter.htm -HTML_STYLESHEET = +HTML_STYLESHEET = HTML_ALIGN_MEMBERS = YES HTML_DYNAMIC_SECTIONS = YES @@ -151,8 +151,8 @@ LATEX_CMD_NAME = @LATEX_COMPILER@ MAKEINDEX_CMD_NAME = makeindex COMPACT_LATEX = NO PAPER_TYPE = a4wide -EXTRA_PACKAGES = -LATEX_HEADER = +EXTRA_PACKAGES = +LATEX_HEADER = PDF_HYPERLINKS = YES USE_PDFLATEX = NO LATEX_BATCHMODE = NO @@ -165,8 +165,8 @@ GENERATE_RTF = @DOCS_RTF@ RTF_OUTPUT = rtf COMPACT_RTF = NO RTF_HYPERLINKS = NO -RTF_STYLESHEET_FILE = -RTF_EXTENSIONS_FILE = +RTF_STYLESHEET_FILE = +RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output @@ -181,8 +181,8 @@ MAN_LINKS = NO #--------------------------------------------------------------------------- GENERATE_XML = @DOCS_XML@ -XML_SCHEMA = -XML_DTD = +XML_SCHEMA = +XML_DTD = XML_OUTPUT = xml XML_PROGRAMLISTING = YES @@ -193,33 +193,33 @@ XML_PROGRAMLISTING = YES GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- -# Configuration options related to the preprocessor +# Configuration options related to the preprocessor #--------------------------------------------------------------------------- ENABLE_PREPROCESSING = YES MACRO_EXPANSION = YES EXPAND_ONLY_PREDEF = NO SEARCH_INCLUDES = YES -INCLUDE_PATH = -INCLUDE_FILE_PATTERNS = +INCLUDE_PATH = +INCLUDE_FILE_PATTERNS = PREDEFINED = "" -EXPAND_AS_DEFINED = +EXPAND_AS_DEFINED = SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- -# Configuration::addtions related to external references +# Configuration::addtions related to external references #--------------------------------------------------------------------------- -TAGFILES = +TAGFILES = GENERATE_TAGFILE = @DOCS_TAGFILE_LOCATION@ ALLEXTERNALS = NO EXTERNAL_GROUPS = YES PERL_PATH = @PERL_EXECUTABLE@ #--------------------------------------------------------------------------- -# Configuration options related to the dot tool +# Configuration options related to the dot tool #--------------------------------------------------------------------------- CLASS_DIAGRAMS = YES @@ -233,18 +233,18 @@ INCLUDED_BY_GRAPH = YES GRAPHICAL_HIERARCHY = YES DOT_IMAGE_FORMAT = png DOT_PATH = @DOXYGEN_DOT_EXECUTABLE@ -DOTFILE_DIRS = +DOTFILE_DIRS = GENERATE_LEGEND = YES DOT_CLEANUP = YES DOT_FONTNAME = FreeSans -DOT_FONTPATH = +DOT_FONTPATH = DOT_FONTSIZE = 10 DOT_GRAPH_MAX_NODES = 50 DOT_MULTI_TARGETS = NO DOT_TRANSPARENT = NO #--------------------------------------------------------------------------- -# Configuration::addtions related to the search engine +# Configuration::addtions related to the search engine #--------------------------------------------------------------------------- SEARCHENGINE = YES diff --git a/doc/doxygen.css.cmake b/doc/doxygen/doxygen.css.cmake similarity index 100% rename from doc/doxygen.css.cmake rename to doc/doxygen/doxygen.css.cmake diff --git a/doc/helpfooter.htm.cmake b/doc/doxygen/helpfooter.htm.cmake similarity index 100% rename from doc/helpfooter.htm.cmake rename to doc/doxygen/helpfooter.htm.cmake diff --git a/doc/helpheader.htm.cmake b/doc/doxygen/helpheader.htm.cmake similarity index 80% rename from doc/helpheader.htm.cmake rename to doc/doxygen/helpheader.htm.cmake index 7cf76a5d..da5c781c 100644 --- a/doc/helpheader.htm.cmake +++ b/doc/doxygen/helpheader.htm.cmake @@ -1,16 +1,16 @@ -Lucene++ API Documentation (Version @LUCENE++_SOVERSION@) +Lucene++ API Documentation (Version @lucene++_SOVERSION@) - + diff --git a/doxygen/lucene++ b/doxygen/lucene++ deleted file mode 100644 index 07dc9395..00000000 --- a/doxygen/lucene++ +++ /dev/null @@ -1,1560 +0,0 @@ -# Doxyfile 1.6.1 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes (" ") - -#--------------------------------------------------------------------------- -# Project related configuration options -#--------------------------------------------------------------------------- - -# This tag specifies the encoding used for all characters in the config file -# that follow. The default is UTF-8 which is also the encoding used for all -# text before the first occurrence of this tag. Doxygen uses libiconv (or the -# iconv built into libc) for the transcoding. See -# http://www.gnu.org/software/libiconv for the list of possible encodings. - -DOXYFILE_ENCODING = UTF-8 - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = Lucene++ - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = 3.0.0 - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = C:/Alan/lucene++/docs - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Esperanto, Farsi, Finnish, French, German, -# Greek, Hungarian, Italian, Japanese, Japanese-en (Japanese with English -# messages), Korean, Korean-en, Lithuanian, Norwegian, Macedonian, Persian, -# Polish, Portuguese, Romanian, Russian, Serbian, Serbian-Cyrilic, Slovak, -# Slovene, Spanish, Swedish, Ukrainian, and Vietnamese. - -OUTPUT_LANGUAGE = English - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ("$name" is automatically -# replaced with the name of the entity): "The $name class" "The $name widget" -# "The $name file" "is" "provides" "specifies" "contains" -# "represents" "a" "an" "the" - -ABBREVIATE_BRIEF = "The $name class" \ - "The $name widget" \ - "The $name file" \ - is \ - provides \ - specifies \ - contains \ - represents \ - a \ - an \ - the - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC = NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown. - -INLINE_INHERITED_MEMB = NO - -# If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full -# path before files name in the file list and in the header files. If set -# to NO the shortest path that makes the file name unique will be used. - -FULL_PATH_NAMES = NO - -# If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag -# can be used to strip a user-defined part of the path. Stripping is -# only done if one of the specified strings matches the left-hand part of -# the path. The tag can be used to show relative paths in the file list. -# If left blank the directory from which doxygen is run is used as the -# path to strip. - -STRIP_FROM_PATH = /Users/dimitri/doxygen/mail/1.5.7/doxywizard/ - -# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of -# the path mentioned in the documentation of a class, which tells -# the reader which header file to include in order to use a class. -# If left blank only the name of the header file containing the class -# definition is used. Otherwise one should specify the include paths that -# are normally passed to the compiler using the -I flag. - -STRIP_FROM_INC_PATH = - -# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter -# (but less readable) file names. This can be useful is your file systems -# doesn't support long names like on DOS, Mac, or CD-ROM. - -SHORT_NAMES = NO - -# If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen -# will interpret the first line (until the first dot) of a JavaDoc-style -# comment as the brief description. If set to NO, the JavaDoc -# comments will behave just like regular Qt-style comments -# (thus requiring an explicit @brief command for a brief description.) - -JAVADOC_AUTOBRIEF = YES - -# If the QT_AUTOBRIEF tag is set to YES then Doxygen will -# interpret the first line (until the first dot) of a Qt-style -# comment as the brief description. If set to NO, the comments -# will behave just like regular Qt-style comments (thus requiring -# an explicit \brief command for a brief description.) - -QT_AUTOBRIEF = NO - -# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen -# treat a multi-line C++ special comment block (i.e. a block of //! or /// -# comments) as a brief description. This used to be the default behaviour. -# The new default is to treat a multi-line C++ comment block as a detailed -# description. Set this tag to YES if you prefer the old behaviour instead. - -MULTILINE_CPP_IS_BRIEF = YES - -# If the INHERIT_DOCS tag is set to YES (the default) then an undocumented -# member inherits the documentation from any documented member that it -# re-implements. - -INHERIT_DOCS = YES - -# If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce -# a new page for each member. If set to NO, the documentation of a member will -# be part of the file/class/namespace that contains it. - -SEPARATE_MEMBER_PAGES = NO - -# The TAB_SIZE tag can be used to set the number of spaces in a tab. -# Doxygen uses this value to replace tabs by spaces in code fragments. - -TAB_SIZE = 4 - -# This tag can be used to specify a number of aliases that acts -# as commands in the documentation. An alias has the form "name=value". -# For example adding "sideeffect=\par Side Effects:\n" will allow you to -# put the command \sideeffect (or @sideeffect) in the documentation, which -# will result in a user-defined paragraph with heading "Side Effects:". -# You can put \n's in the value part of an alias to insert newlines. - -ALIASES = - -# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C -# sources only. Doxygen will then generate output that is more tailored for C. -# For instance, some of the names that are used will be different. The list -# of all members will be omitted, etc. - -OPTIMIZE_OUTPUT_FOR_C = NO - -# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java -# sources only. Doxygen will then generate output that is more tailored for -# Java. For instance, namespaces will be presented as packages, qualified -# scopes will look different, etc. - -OPTIMIZE_OUTPUT_JAVA = NO - -# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran -# sources only. Doxygen will then generate output that is more tailored for -# Fortran. - -OPTIMIZE_FOR_FORTRAN = NO - -# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL -# sources. Doxygen will then generate output that is tailored for -# VHDL. - -OPTIMIZE_OUTPUT_VHDL = NO - -# Doxygen selects the parser to use depending on the extension of the files it parses. -# With this tag you can assign which parser to use for a given extension. -# Doxygen has a built-in mapping, but you can override or extend it using this tag. -# The format is ext=language, where ext is a file extension, and language is one of -# the parsers supported by doxygen: IDL, Java, Javascript, C#, C, C++, D, PHP, -# Objective-C, Python, Fortran, VHDL, C, C++. For instance to make doxygen treat -# .inc files as Fortran files (default is PHP), and .f files as C (default is Fortran), -# use: inc=Fortran f=C. Note that for custom extensions you also need to set -# FILE_PATTERNS otherwise the files are not read by doxygen. - -EXTENSION_MAPPING = - -# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want -# to include (a tag file for) the STL sources as input, then you should -# set this tag to YES in order to let doxygen match functions declarations and -# definitions whose arguments contain STL classes (e.g. func(std::string); v.s. -# func(std::string) {}). This also make the inheritance and collaboration -# diagrams that involve STL classes more complete and accurate. - -BUILTIN_STL_SUPPORT = YES - -# If you use Microsoft's C++/CLI language, you should set this option to YES to -# enable parsing support. - -CPP_CLI_SUPPORT = NO - -# Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. -# Doxygen will parse them like normal C++ but will assume all classes use public -# instead of private inheritance when no explicit protection keyword is present. - -SIP_SUPPORT = NO - -# For Microsoft's IDL there are propget and propput attributes to indicate getter -# and setter methods for a property. Setting this option to YES (the default) -# will make doxygen to replace the get and set methods by a property in the -# documentation. This will only work if the methods are indeed getting or -# setting a simple type. If this is not the case, or you want to show the -# methods anyway, you should set this option to NO. - -IDL_PROPERTY_SUPPORT = YES - -# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC -# tag is set to YES, then doxygen will reuse the documentation of the first -# member in the group (if any) for the other members of the group. By default -# all members of a group must be documented explicitly. - -DISTRIBUTE_GROUP_DOC = NO - -# Set the SUBGROUPING tag to YES (the default) to allow class member groups of -# the same type (for instance a group of public functions) to be put as a -# subgroup of that type (e.g. under the Public Functions section). Set it to -# NO to prevent subgrouping. Alternatively, this can be done per class using -# the \nosubgrouping command. - -SUBGROUPING = YES - -# When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct, union, or enum -# is documented as struct, union, or enum with the name of the typedef. So -# typedef struct TypeS {} TypeT, will appear in the documentation as a struct -# with name TypeT. When disabled the typedef will appear as a member of a file, -# namespace, or class. And the struct will be named TypeS. This can typically -# be useful for C code in case the coding convention dictates that all compound -# types are typedef'ed and only the typedef is referenced, never the tag name. - -TYPEDEF_HIDES_STRUCT = YES - -# The SYMBOL_CACHE_SIZE determines the size of the internal cache use to -# determine which symbols to keep in memory and which to flush to disk. -# When the cache is full, less often used symbols will be written to disk. -# For small to medium size projects (<1000 input files) the default value is -# probably good enough. For larger projects a too small cache size can cause -# doxygen to be busy swapping symbols to and from disk most of the time -# causing a significant performance penality. -# If the system has enough physical memory increasing the cache will improve the -# performance by keeping more symbols in memory. Note that the value works on -# a logarithmic scale so increasing the size by one will rougly double the -# memory usage. The cache size is given by this formula: -# 2^(16+SYMBOL_CACHE_SIZE). The valid range is 0..9, the default is 0, -# corresponding to a cache size of 2^16 = 65536 symbols - -SYMBOL_CACHE_SIZE = 0 - -#--------------------------------------------------------------------------- -# Build related configuration options -#--------------------------------------------------------------------------- - -# If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in -# documentation are documented, even if no documentation was available. -# Private class members and static file members will be hidden unless -# the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES - -EXTRACT_ALL = NO - -# If the EXTRACT_PRIVATE tag is set to YES all private members of a class -# will be included in the documentation. - -EXTRACT_PRIVATE = NO - -# If the EXTRACT_STATIC tag is set to YES all static members of a file -# will be included in the documentation. - -EXTRACT_STATIC = YES - -# If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) -# defined locally in source files will be included in the documentation. -# If set to NO only classes defined in header files are included. - -EXTRACT_LOCAL_CLASSES = YES - -# This flag is only useful for Objective-C code. When set to YES local -# methods, which are defined in the implementation section but not in -# the interface are included in the documentation. -# If set to NO (the default) only methods in the interface are included. - -EXTRACT_LOCAL_METHODS = YES - -# If this flag is set to YES, the members of anonymous namespaces will be -# extracted and appear in the documentation as a namespace called -# 'anonymous_namespace{file}', where file will be replaced with the base -# name of the file that contains the anonymous namespace. By default -# anonymous namespace are hidden. - -EXTRACT_ANON_NSPACES = NO - -# If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all -# undocumented members of documented classes, files or namespaces. -# If set to NO (the default) these members will be included in the -# various overviews, but no documentation section is generated. -# This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_MEMBERS = NO - -# If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all -# undocumented classes that are normally visible in the class hierarchy. -# If set to NO (the default) these classes will be included in the various -# overviews. This option has no effect if EXTRACT_ALL is enabled. - -HIDE_UNDOC_CLASSES = NO - -# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all -# friend (class|struct|union) declarations. -# If set to NO (the default) these declarations will be included in the -# documentation. - -HIDE_FRIEND_COMPOUNDS = YES - -# If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any -# documentation blocks found inside the body of a function. -# If set to NO (the default) these blocks will be appended to the -# function's detailed documentation block. - -HIDE_IN_BODY_DOCS = NO - -# The INTERNAL_DOCS tag determines if documentation -# that is typed after a \internal command is included. If the tag is set -# to NO (the default) then the documentation will be excluded. -# Set it to YES to include the internal documentation. - -INTERNAL_DOCS = NO - -# If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate -# file names in lower-case letters. If set to YES upper-case letters are also -# allowed. This is useful if you have classes or files whose names only differ -# in case and if your file system supports case sensitive file names. Windows -# and Mac users are advised to set this option to NO. - -CASE_SENSE_NAMES = NO - -# If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen -# will show members with their full class and namespace scopes in the -# documentation. If set to YES the scope will be hidden. - -HIDE_SCOPE_NAMES = NO - -# If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen -# will put a list of the files that are included by a file in the documentation -# of that file. - -SHOW_INCLUDE_FILES = YES - -# If the INLINE_INFO tag is set to YES (the default) then a tag [inline] -# is inserted in the documentation for inline members. - -INLINE_INFO = YES - -# If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen -# will sort the (detailed) documentation of file and class members -# alphabetically by member name. If set to NO the members will appear in -# declaration order. - -SORT_MEMBER_DOCS = YES - -# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the -# brief documentation of file, namespace and class members alphabetically -# by member name. If set to NO (the default) the members will appear in -# declaration order. - -SORT_BRIEF_DOCS = NO - -# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen -# will sort the (brief and detailed) documentation of class members so that -# constructors and destructors are listed first. If set to NO (the default) -# the constructors will appear in the respective orders defined by -# SORT_MEMBER_DOCS and SORT_BRIEF_DOCS. -# This tag will be ignored for brief docs if SORT_BRIEF_DOCS is set to NO -# and ignored for detailed docs if SORT_MEMBER_DOCS is set to NO. - -SORT_MEMBERS_CTORS_1ST = NO - -# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the -# hierarchy of group names into alphabetical order. If set to NO (the default) -# the group names will appear in their defined order. - -SORT_GROUP_NAMES = NO - -# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be -# sorted by fully-qualified names, including namespaces. If set to -# NO (the default), the class list will be sorted only by class name, -# not including the namespace part. -# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. -# Note: This option applies only to the class list, not to the -# alphabetical list. - -SORT_BY_SCOPE_NAME = NO - -# The GENERATE_TODOLIST tag can be used to enable (YES) or -# disable (NO) the todo list. This list is created by putting \todo -# commands in the documentation. - -GENERATE_TODOLIST = NO - -# The GENERATE_TESTLIST tag can be used to enable (YES) or -# disable (NO) the test list. This list is created by putting \test -# commands in the documentation. - -GENERATE_TESTLIST = NO - -# The GENERATE_BUGLIST tag can be used to enable (YES) or -# disable (NO) the bug list. This list is created by putting \bug -# commands in the documentation. - -GENERATE_BUGLIST = NO - -# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or -# disable (NO) the deprecated list. This list is created by putting -# \deprecated commands in the documentation. - -GENERATE_DEPRECATEDLIST= YES - -# The ENABLED_SECTIONS tag can be used to enable conditional -# documentation sections, marked by \if sectionname ... \endif. - -ENABLED_SECTIONS = - -# The MAX_INITIALIZER_LINES tag determines the maximum number of lines -# the initial value of a variable or define consists of for it to appear in -# the documentation. If the initializer consists of more lines than specified -# here it will be hidden. Use a value of 0 to hide initializers completely. -# The appearance of the initializer of individual variables and defines in the -# documentation can be controlled using \showinitializer or \hideinitializer -# command in the documentation regardless of this setting. - -MAX_INITIALIZER_LINES = 30 - -# Set the SHOW_USED_FILES tag to NO to disable the list of files generated -# at the bottom of the documentation of classes and structs. If set to YES the -# list will mention the files that were used to generate the documentation. - -SHOW_USED_FILES = YES - -# If the sources in your project are distributed over multiple directories -# then setting the SHOW_DIRECTORIES tag to YES will show the directory hierarchy -# in the documentation. The default is NO. - -SHOW_DIRECTORIES = NO - -# Set the SHOW_FILES tag to NO to disable the generation of the Files page. -# This will remove the Files entry from the Quick Index and from the -# Folder Tree View (if specified). The default is YES. - -SHOW_FILES = YES - -# Set the SHOW_NAMESPACES tag to NO to disable the generation of the -# Namespaces page. This will remove the Namespaces entry from the Quick Index -# and from the Folder Tree View (if specified). The default is YES. - -SHOW_NAMESPACES = YES - -# The FILE_VERSION_FILTER tag can be used to specify a program or script that -# doxygen should invoke to get the current version for each file (typically from -# the version control system). Doxygen will invoke the program by executing (via -# popen()) the command , where is the value of -# the FILE_VERSION_FILTER tag, and is the name of an input file -# provided by doxygen. Whatever the program writes to standard output -# is used as the file version. See the manual for examples. - -FILE_VERSION_FILTER = - -# The LAYOUT_FILE tag can be used to specify a layout file which will be parsed by -# doxygen. The layout file controls the global structure of the generated output files -# in an output format independent way. The create the layout file that represents -# doxygen's defaults, run doxygen with the -l option. You can optionally specify a -# file name after the option, if omitted DoxygenLayout.xml will be used as the name -# of the layout file. - -LAYOUT_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to warning and progress messages -#--------------------------------------------------------------------------- - -# The QUIET tag can be used to turn on/off the messages that are generated -# by doxygen. Possible values are YES and NO. If left blank NO is used. - -QUIET = NO - -# The WARNINGS tag can be used to turn on/off the warning messages that are -# generated by doxygen. Possible values are YES and NO. If left blank -# NO is used. - -WARNINGS = YES - -# If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings -# for undocumented members. If EXTRACT_ALL is set to YES then this flag will -# automatically be disabled. - -WARN_IF_UNDOCUMENTED = NO - -# If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for -# potential errors in the documentation, such as not documenting some -# parameters in a documented function, or documenting parameters that -# don't exist or using markup commands wrongly. - -WARN_IF_DOC_ERROR = YES - -# This WARN_NO_PARAMDOC option can be abled to get warnings for -# functions that are documented, but have no documentation for their parameters -# or return value. If set to NO (the default) doxygen will only warn about -# wrong or incomplete parameter documentation, but not about the absence of -# documentation. - -WARN_NO_PARAMDOC = NO - -# The WARN_FORMAT tag determines the format of the warning messages that -# doxygen can produce. The string should contain the $file, $line, and $text -# tags, which will be replaced by the file and line number from which the -# warning originated and the warning text. Optionally the format may contain -# $version, which will be replaced by the version of the file (if it could -# be obtained via FILE_VERSION_FILTER) - -WARN_FORMAT = "$file:$line: $text" - -# The WARN_LOGFILE tag can be used to specify a file to which warning -# and error messages should be written. If left blank the output is written -# to stderr. - -WARN_LOGFILE = - -#--------------------------------------------------------------------------- -# configuration options related to the input files -#--------------------------------------------------------------------------- - -# The INPUT tag can be used to specify the files and/or directories that contain -# documented source files. You may enter file names like "myfile.cpp" or -# directories like "/usr/src/myproject". Separate the files or directories -# with spaces. - -INPUT = C:/Alan/lucene++ - -# This tag can be used to specify the character encoding of the source files -# that doxygen parses. Internally doxygen uses the UTF-8 encoding, which is -# also the default input encoding. Doxygen uses libiconv (or the iconv built -# into libc) for the transcoding. See http://www.gnu.org/software/libiconv for -# the list of possible encodings. - -INPUT_ENCODING = UTF-8 - -# If the value of the INPUT tag contains directories, you can use the -# FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank the following patterns are tested: -# *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx -# *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 - -FILE_PATTERNS = *.cc \ - *.cxx \ - *.cpp \ - *.c++ \ - *.d \ - *.java \ - *.ii \ - *.ixx \ - *.ipp \ - *.i++ \ - *.inl \ - *.h \ - *.hh \ - *.hxx \ - *.hpp \ - *.h++ \ - *.idl \ - *.odl \ - *.cs \ - *.php \ - *.php3 \ - *.inc \ - *.m \ - *.mm \ - *.dox \ - *.py \ - *.f90 \ - *.f \ - *.vhd \ - *.vhdl - -# The RECURSIVE tag can be used to turn specify whether or not subdirectories -# should be searched for input files as well. Possible values are YES and NO. -# If left blank NO is used. - -RECURSIVE = YES - -# The EXCLUDE tag can be used to specify files and/or directories that should -# excluded from the INPUT source files. This way you can easily exclude a -# subdirectory from a directory tree whose root is specified with the INPUT tag. - -EXCLUDE = - -# The EXCLUDE_SYMLINKS tag can be used select whether or not files or -# directories that are symbolic links (a Unix filesystem feature) are excluded -# from the input. - -EXCLUDE_SYMLINKS = NO - -# If the value of the INPUT tag contains directories, you can use the -# EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude -# certain files from those directories. Note that the wildcards are matched -# against the file with absolute path, so to exclude all test directories -# for example use the pattern */test/* - -EXCLUDE_PATTERNS = */test/* \ - */md5/* \ - */nedmalloc/* \ - */utf8/* \ - */zlib/* - -# The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names -# (namespaces, classes, functions, etc.) that should be excluded from the -# output. The symbol name can be a fully qualified name, a word, or if the -# wildcard * is used, a substring. Examples: ANamespace, AClass, -# AClass::ANamespace, ANamespace::*Test - -EXCLUDE_SYMBOLS = - -# The EXAMPLE_PATH tag can be used to specify one or more files or -# directories that contain example code fragments that are included (see -# the \include command). - -EXAMPLE_PATH = - -# If the value of the EXAMPLE_PATH tag contains directories, you can use the -# EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp -# and *.h) to filter out the source-files in the directories. If left -# blank all files are included. - -EXAMPLE_PATTERNS = * - -# If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be -# searched for input files to be used with the \include or \dontinclude -# commands irrespective of the value of the RECURSIVE tag. -# Possible values are YES and NO. If left blank NO is used. - -EXAMPLE_RECURSIVE = NO - -# The IMAGE_PATH tag can be used to specify one or more files or -# directories that contain image that are included in the documentation (see -# the \image command). - -IMAGE_PATH = - -# The INPUT_FILTER tag can be used to specify a program that doxygen should -# invoke to filter for each input file. Doxygen will invoke the filter program -# by executing (via popen()) the command , where -# is the value of the INPUT_FILTER tag, and is the name of an -# input file. Doxygen will then use the output that the filter program writes -# to standard output. If FILTER_PATTERNS is specified, this tag will be -# ignored. - -INPUT_FILTER = - -# The FILTER_PATTERNS tag can be used to specify filters on a per file pattern -# basis. Doxygen will compare the file name with each pattern and apply the -# filter if there is a match. The filters are a list of the form: -# pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further -# info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER -# is applied to all files. - -FILTER_PATTERNS = - -# If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using -# INPUT_FILTER) will be used to filter the input files when producing source -# files to browse (i.e. when SOURCE_BROWSER is set to YES). - -FILTER_SOURCE_FILES = NO - -#--------------------------------------------------------------------------- -# configuration options related to source browsing -#--------------------------------------------------------------------------- - -# If the SOURCE_BROWSER tag is set to YES then a list of source files will -# be generated. Documented entities will be cross-referenced with these sources. -# Note: To get rid of all source code in the generated output, make sure also -# VERBATIM_HEADERS is set to NO. - -SOURCE_BROWSER = NO - -# Setting the INLINE_SOURCES tag to YES will include the body -# of functions and classes directly in the documentation. - -INLINE_SOURCES = NO - -# Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct -# doxygen to hide any special comment blocks from generated source code -# fragments. Normal C and C++ comments will always remain visible. - -STRIP_CODE_COMMENTS = YES - -# If the REFERENCED_BY_RELATION tag is set to YES -# then for each documented function all documented -# functions referencing it will be listed. - -REFERENCED_BY_RELATION = NO - -# If the REFERENCES_RELATION tag is set to YES -# then for each documented function all documented entities -# called/used by that function will be listed. - -REFERENCES_RELATION = NO - -# If the REFERENCES_LINK_SOURCE tag is set to YES (the default) -# and SOURCE_BROWSER tag is set to YES, then the hyperlinks from -# functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will -# link to the source code. Otherwise they will link to the documentation. - -REFERENCES_LINK_SOURCE = YES - -# If the USE_HTAGS tag is set to YES then the references to source code -# will point to the HTML generated by the htags(1) tool instead of doxygen -# built-in source browser. The htags tool is part of GNU's global source -# tagging system (see http://www.gnu.org/software/global/global.html). You -# will need version 4.8.6 or higher. - -USE_HTAGS = NO - -# If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen -# will generate a verbatim copy of the header file for each class for -# which an include is specified. Set to NO to disable this. - -VERBATIM_HEADERS = YES - -#--------------------------------------------------------------------------- -# configuration options related to the alphabetical class index -#--------------------------------------------------------------------------- - -# If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index -# of all compounds will be generated. Enable this if the project -# contains a lot of classes, structs, unions or interfaces. - -ALPHABETICAL_INDEX = YES - -# If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then -# the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns -# in which this list will be split (can be a number in the range [1..20]) - -COLS_IN_ALPHA_INDEX = 5 - -# In case all classes in a project start with a common prefix, all -# classes will be put under the same header in the alphabetical index. -# The IGNORE_PREFIX tag can be used to specify one or more prefixes that -# should be ignored while generating the index headers. - -IGNORE_PREFIX = - -#--------------------------------------------------------------------------- -# configuration options related to the HTML output -#--------------------------------------------------------------------------- - -# If the GENERATE_HTML tag is set to YES (the default) Doxygen will -# generate HTML output. - -GENERATE_HTML = YES - -# The HTML_OUTPUT tag is used to specify where the HTML docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `html' will be used as the default path. - -HTML_OUTPUT = html - -# The HTML_FILE_EXTENSION tag can be used to specify the file extension for -# each generated HTML page (for example: .htm,.php,.asp). If it is left blank -# doxygen will generate files with .html extension. - -HTML_FILE_EXTENSION = .html - -# The HTML_HEADER tag can be used to specify a personal HTML header for -# each generated HTML page. If it is left blank doxygen will generate a -# standard header. - -HTML_HEADER = - -# The HTML_FOOTER tag can be used to specify a personal HTML footer for -# each generated HTML page. If it is left blank doxygen will generate a -# standard footer. - -HTML_FOOTER = - -# The HTML_STYLESHEET tag can be used to specify a user-defined cascading -# style sheet that is used by each HTML page. It can be used to -# fine-tune the look of the HTML output. If the tag is left blank doxygen -# will generate a default style sheet. Note that doxygen will try to copy -# the style sheet file to the HTML output directory, so don't put your own -# stylesheet in the HTML output directory as well, or it will be erased! - -HTML_STYLESHEET = - -# If the HTML_ALIGN_MEMBERS tag is set to YES, the members of classes, -# files or namespaces will be aligned in HTML using tables. If set to -# NO a bullet list will be used. - -HTML_ALIGN_MEMBERS = YES - -# If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML -# documentation will contain sections that can be hidden and shown after the -# page has loaded. For this to work a browser that supports -# JavaScript and DHTML is required (for instance Mozilla 1.0+, Firefox -# Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). - -HTML_DYNAMIC_SECTIONS = YES - -# If the GENERATE_DOCSET tag is set to YES, additional index files -# will be generated that can be used as input for Apple's Xcode 3 -# integrated development environment, introduced with OSX 10.5 (Leopard). -# To create a documentation set, doxygen will generate a Makefile in the -# HTML output directory. Running make will produce the docset in that -# directory and running "make install" will install the docset in -# ~/Library/Developer/Shared/Documentation/DocSets so that Xcode will find -# it at startup. -# See http://developer.apple.com/tools/creatingdocsetswithdoxygen.html for more information. - -GENERATE_DOCSET = NO - -# When GENERATE_DOCSET tag is set to YES, this tag determines the name of the -# feed. A documentation feed provides an umbrella under which multiple -# documentation sets from a single provider (such as a company or product suite) -# can be grouped. - -DOCSET_FEEDNAME = "Doxygen generated docs" - -# When GENERATE_DOCSET tag is set to YES, this tag specifies a string that -# should uniquely identify the documentation set bundle. This should be a -# reverse domain-name style string, e.g. com.mycompany.MyDocSet. Doxygen -# will append .docset to the name. - -DOCSET_BUNDLE_ID = org.doxygen.Project - -# If the GENERATE_HTMLHELP tag is set to YES, additional index files -# will be generated that can be used as input for tools like the -# Microsoft HTML help workshop to generate a compiled HTML help file (.chm) -# of the generated HTML documentation. - -GENERATE_HTMLHELP = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can -# be used to specify the file name of the resulting .chm file. You -# can add a path in front of the file if the result should not be -# written to the html output directory. - -CHM_FILE = - -# If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can -# be used to specify the location (absolute path including file name) of -# the HTML help compiler (hhc.exe). If non-empty doxygen will try to run -# the HTML help compiler on the generated index.hhp. - -HHC_LOCATION = - -# If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag -# controls if a separate .chi index file is generated (YES) or that -# it should be included in the master .chm file (NO). - -GENERATE_CHI = NO - -# If the GENERATE_HTMLHELP tag is set to YES, the CHM_INDEX_ENCODING -# is used to encode HtmlHelp index (hhk), content (hhc) and project file -# content. - -CHM_INDEX_ENCODING = - -# If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag -# controls whether a binary table of contents is generated (YES) or a -# normal table of contents (NO) in the .chm file. - -BINARY_TOC = NO - -# The TOC_EXPAND flag can be set to YES to add extra items for group members -# to the contents of the HTML help documentation and to the tree view. - -TOC_EXPAND = NO - -# If the GENERATE_QHP tag is set to YES and both QHP_NAMESPACE and QHP_VIRTUAL_FOLDER -# are set, an additional index file will be generated that can be used as input for -# Qt's qhelpgenerator to generate a Qt Compressed Help (.qch) of the generated -# HTML documentation. - -GENERATE_QHP = NO - -# If the QHG_LOCATION tag is specified, the QCH_FILE tag can -# be used to specify the file name of the resulting .qch file. -# The path specified is relative to the HTML output folder. - -QCH_FILE = - -# The QHP_NAMESPACE tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#namespace - -QHP_NAMESPACE = - -# The QHP_VIRTUAL_FOLDER tag specifies the namespace to use when generating -# Qt Help Project output. For more information please see -# http://doc.trolltech.com/qthelpproject.html#virtual-folders - -QHP_VIRTUAL_FOLDER = doc - -# If QHP_CUST_FILTER_NAME is set, it specifies the name of a custom filter to add. -# For more information please see -# http://doc.trolltech.com/qthelpproject.html#custom-filters - -QHP_CUST_FILTER_NAME = - -# The QHP_CUST_FILT_ATTRS tag specifies the list of the attributes of the custom filter to add.For more information please see -# Qt Help Project / Custom Filters. - -QHP_CUST_FILTER_ATTRS = - -# The QHP_SECT_FILTER_ATTRS tag specifies the list of the attributes this project's -# filter section matches. -# Qt Help Project / Filter Attributes. - -QHP_SECT_FILTER_ATTRS = - -# If the GENERATE_QHP tag is set to YES, the QHG_LOCATION tag can -# be used to specify the location of Qt's qhelpgenerator. -# If non-empty doxygen will try to run qhelpgenerator on the generated -# .qhp file. - -QHG_LOCATION = - -# The DISABLE_INDEX tag can be used to turn on/off the condensed index at -# top of each HTML page. The value NO (the default) enables the index and -# the value YES disables it. - -DISABLE_INDEX = NO - -# This tag can be used to set the number of enum values (range [1..20]) -# that doxygen will group on one line in the generated HTML documentation. - -ENUM_VALUES_PER_LINE = 4 - -# The GENERATE_TREEVIEW tag is used to specify whether a tree-like index -# structure should be generated to display hierarchical information. -# If the tag value is set to YES, a side panel will be generated -# containing a tree-like index structure (just like the one that -# is generated for HTML Help). For this to work a browser that supports -# JavaScript, DHTML, CSS and frames is required (i.e. any modern browser). -# Windows users are probably better off using the HTML help feature. - -GENERATE_TREEVIEW = NO - -# By enabling USE_INLINE_TREES, doxygen will generate the Groups, Directories, -# and Class Hierarchy pages using a tree view instead of an ordered list. - -USE_INLINE_TREES = NO - -# If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be -# used to set the initial width (in pixels) of the frame in which the tree -# is shown. - -TREEVIEW_WIDTH = 250 - -# Use this tag to change the font size of Latex formulas included -# as images in the HTML documentation. The default is 10. Note that -# when you change the font size after a successful doxygen run you need -# to manually remove any form_*.png images from the HTML output directory -# to force them to be regenerated. - -FORMULA_FONTSIZE = 10 - -# When the SEARCHENGINE tag is enable doxygen will generate a search box -# for the HTML output. The underlying search engine uses javascript -# and DHTML and should work on any modern browser. Note that when using -# HTML help (GENERATE_HTMLHELP) or Qt help (GENERATE_QHP) -# there is already a search function so this one should typically -# be disabled. - -SEARCHENGINE = YES - -#--------------------------------------------------------------------------- -# configuration options related to the LaTeX output -#--------------------------------------------------------------------------- - -# If the GENERATE_LATEX tag is set to YES (the default) Doxygen will -# generate Latex output. - -GENERATE_LATEX = NO - -# The LATEX_OUTPUT tag is used to specify where the LaTeX docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `latex' will be used as the default path. - -LATEX_OUTPUT = latex - -# The LATEX_CMD_NAME tag can be used to specify the LaTeX command name to be -# invoked. If left blank `latex' will be used as the default command name. - -LATEX_CMD_NAME = latex - -# The MAKEINDEX_CMD_NAME tag can be used to specify the command name to -# generate index for LaTeX. If left blank `makeindex' will be used as the -# default command name. - -MAKEINDEX_CMD_NAME = makeindex - -# If the COMPACT_LATEX tag is set to YES Doxygen generates more compact -# LaTeX documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_LATEX = NO - -# The PAPER_TYPE tag can be used to set the paper type that is used -# by the printer. Possible values are: a4, a4wide, letter, legal and -# executive. If left blank a4wide will be used. - -PAPER_TYPE = a4wide - -# The EXTRA_PACKAGES tag can be to specify one or more names of LaTeX -# packages that should be included in the LaTeX output. - -EXTRA_PACKAGES = - -# The LATEX_HEADER tag can be used to specify a personal LaTeX header for -# the generated latex document. The header should contain everything until -# the first chapter. If it is left blank doxygen will generate a -# standard header. Notice: only use this tag if you know what you are doing! - -LATEX_HEADER = - -# If the PDF_HYPERLINKS tag is set to YES, the LaTeX that is generated -# is prepared for conversion to pdf (using ps2pdf). The pdf file will -# contain links (just like the HTML output) instead of page references -# This makes the output suitable for online browsing using a pdf viewer. - -PDF_HYPERLINKS = YES - -# If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of -# plain latex in the generated Makefile. Set this option to YES to get a -# higher quality PDF documentation. - -USE_PDFLATEX = YES - -# If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. -# command to the generated LaTeX files. This will instruct LaTeX to keep -# running if errors occur, instead of asking the user for help. -# This option is also used when generating formulas in HTML. - -LATEX_BATCHMODE = NO - -# If LATEX_HIDE_INDICES is set to YES then doxygen will not -# include the index chapters (such as File Index, Compound Index, etc.) -# in the output. - -LATEX_HIDE_INDICES = NO - -# If LATEX_SOURCE_CODE is set to YES then doxygen will include -# source code with syntax highlighting in the LaTeX output. -# Note that which sources are shown also depends on other settings -# such as SOURCE_BROWSER. - -LATEX_SOURCE_CODE = NO - -#--------------------------------------------------------------------------- -# configuration options related to the RTF output -#--------------------------------------------------------------------------- - -# If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output -# The RTF output is optimized for Word 97 and may not look very pretty with -# other RTF readers or editors. - -GENERATE_RTF = NO - -# The RTF_OUTPUT tag is used to specify where the RTF docs will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `rtf' will be used as the default path. - -RTF_OUTPUT = rtf - -# If the COMPACT_RTF tag is set to YES Doxygen generates more compact -# RTF documents. This may be useful for small projects and may help to -# save some trees in general. - -COMPACT_RTF = NO - -# If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated -# will contain hyperlink fields. The RTF file will -# contain links (just like the HTML output) instead of page references. -# This makes the output suitable for online browsing using WORD or other -# programs which support those fields. -# Note: wordpad (write) and others do not support links. - -RTF_HYPERLINKS = NO - -# Load stylesheet definitions from file. Syntax is similar to doxygen's -# config file, i.e. a series of assignments. You only have to provide -# replacements, missing definitions are set to their default value. - -RTF_STYLESHEET_FILE = - -# Set optional variables used in the generation of an rtf document. -# Syntax is similar to doxygen's config file. - -RTF_EXTENSIONS_FILE = - -#--------------------------------------------------------------------------- -# configuration options related to the man page output -#--------------------------------------------------------------------------- - -# If the GENERATE_MAN tag is set to YES (the default) Doxygen will -# generate man pages - -GENERATE_MAN = NO - -# The MAN_OUTPUT tag is used to specify where the man pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `man' will be used as the default path. - -MAN_OUTPUT = man - -# The MAN_EXTENSION tag determines the extension that is added to -# the generated man pages (default is the subroutine's section .3) - -MAN_EXTENSION = .3 - -# If the MAN_LINKS tag is set to YES and Doxygen generates man output, -# then it will generate one additional man file for each entity -# documented in the real man page(s). These additional files -# only source the real man page, but without them the man command -# would be unable to find the correct page. The default is NO. - -MAN_LINKS = NO - -#--------------------------------------------------------------------------- -# configuration options related to the XML output -#--------------------------------------------------------------------------- - -# If the GENERATE_XML tag is set to YES Doxygen will -# generate an XML file that captures the structure of -# the code including all documentation. - -GENERATE_XML = NO - -# The XML_OUTPUT tag is used to specify where the XML pages will be put. -# If a relative path is entered the value of OUTPUT_DIRECTORY will be -# put in front of it. If left blank `xml' will be used as the default path. - -XML_OUTPUT = xml - -# The XML_SCHEMA tag can be used to specify an XML schema, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_SCHEMA = - -# The XML_DTD tag can be used to specify an XML DTD, -# which can be used by a validating XML parser to check the -# syntax of the XML files. - -XML_DTD = - -# If the XML_PROGRAMLISTING tag is set to YES Doxygen will -# dump the program listings (including syntax highlighting -# and cross-referencing information) to the XML output. Note that -# enabling this will significantly increase the size of the XML output. - -XML_PROGRAMLISTING = YES - -#--------------------------------------------------------------------------- -# configuration options for the AutoGen Definitions output -#--------------------------------------------------------------------------- - -# If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will -# generate an AutoGen Definitions (see autogen.sf.net) file -# that captures the structure of the code including all -# documentation. Note that this feature is still experimental -# and incomplete at the moment. - -GENERATE_AUTOGEN_DEF = NO - -#--------------------------------------------------------------------------- -# configuration options related to the Perl module output -#--------------------------------------------------------------------------- - -# If the GENERATE_PERLMOD tag is set to YES Doxygen will -# generate a Perl module file that captures the structure of -# the code including all documentation. Note that this -# feature is still experimental and incomplete at the -# moment. - -GENERATE_PERLMOD = NO - -# If the PERLMOD_LATEX tag is set to YES Doxygen will generate -# the necessary Makefile rules, Perl scripts and LaTeX code to be able -# to generate PDF and DVI output from the Perl module output. - -PERLMOD_LATEX = NO - -# If the PERLMOD_PRETTY tag is set to YES the Perl module output will be -# nicely formatted so it can be parsed by a human reader. This is useful -# if you want to understand what is going on. On the other hand, if this -# tag is set to NO the size of the Perl module output will be much smaller -# and Perl will parse it just the same. - -PERLMOD_PRETTY = YES - -# The names of the make variables in the generated doxyrules.make file -# are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. -# This is useful so different doxyrules.make files included by the same -# Makefile don't overwrite each other's variables. - -PERLMOD_MAKEVAR_PREFIX = - -#--------------------------------------------------------------------------- -# Configuration options related to the preprocessor -#--------------------------------------------------------------------------- - -# If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will -# evaluate all C-preprocessor directives found in the sources and include -# files. - -ENABLE_PREPROCESSING = YES - -# If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro -# names in the source code. If set to NO (the default) only conditional -# compilation will be performed. Macro expansion can be done in a controlled -# way by setting EXPAND_ONLY_PREDEF to YES. - -MACRO_EXPANSION = NO - -# If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES -# then the macro expansion is limited to the macros specified with the -# PREDEFINED and EXPAND_AS_DEFINED tags. - -EXPAND_ONLY_PREDEF = NO - -# If the SEARCH_INCLUDES tag is set to YES (the default) the includes files -# in the INCLUDE_PATH (see below) will be search if a #include is found. - -SEARCH_INCLUDES = YES - -# The INCLUDE_PATH tag can be used to specify one or more directories that -# contain include files that are not input files but should be processed by -# the preprocessor. - -INCLUDE_PATH = - -# You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard -# patterns (like *.h and *.hpp) to filter out the header-files in the -# directories. If left blank, the patterns specified with FILE_PATTERNS will -# be used. - -INCLUDE_FILE_PATTERNS = - -# The PREDEFINED tag can be used to specify one or more macro names that -# are defined before the preprocessor is started (similar to the -D option of -# gcc). The argument of the tag is a list of macros of the form: name -# or name=definition (no spaces). If the definition and the = are -# omitted =1 is assumed. To prevent a macro definition from being -# undefined via #undef or recursively expanded use the := operator -# instead of the = operator. - -PREDEFINED = - -# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then -# this tag can be used to specify a list of macro names that should be expanded. -# The macro definition that is found in the sources will be used. -# Use the PREDEFINED tag if you want to use a different macro definition. - -EXPAND_AS_DEFINED = - -# If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then -# doxygen's preprocessor will remove all function-like macros that are alone -# on a line, have an all uppercase name, and do not end with a semicolon. Such -# function macros are typically used for boiler-plate code, and will confuse -# the parser if not removed. - -SKIP_FUNCTION_MACROS = YES - -#--------------------------------------------------------------------------- -# Configuration::additions related to external references -#--------------------------------------------------------------------------- - -# The TAGFILES option can be used to specify one or more tagfiles. -# Optionally an initial location of the external documentation -# can be added for each tagfile. The format of a tag file without -# this location is as follows: -# TAGFILES = file1 file2 ... -# Adding location for the tag files is done as follows: -# TAGFILES = file1=loc1 "file2 = loc2" ... -# where "loc1" and "loc2" can be relative or absolute paths or -# URLs. If a location is present for each tag, the installdox tool -# does not have to be run to correct the links. -# Note that each tag file must have a unique name -# (where the name does NOT include the path) -# If a tag file is not located in the directory in which doxygen -# is run, you must also specify the path to the tagfile here. - -TAGFILES = - -# When a file name is specified after GENERATE_TAGFILE, doxygen will create -# a tag file that is based on the input files it reads. - -GENERATE_TAGFILE = - -# If the ALLEXTERNALS tag is set to YES all external classes will be listed -# in the class index. If set to NO only the inherited external classes -# will be listed. - -ALLEXTERNALS = NO - -# If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed -# in the modules index. If set to NO, only the current project's groups will -# be listed. - -EXTERNAL_GROUPS = YES - -# The PERL_PATH should be the absolute path and name of the perl script -# interpreter (i.e. the result of `which perl'). - -PERL_PATH = /usr/bin/perl - -#--------------------------------------------------------------------------- -# Configuration options related to the dot tool -#--------------------------------------------------------------------------- - -# If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will -# generate a inheritance diagram (in HTML, RTF and LaTeX) for classes with base -# or super classes. Setting the tag to NO turns the diagrams off. Note that -# this option is superseded by the HAVE_DOT option below. This is only a -# fallback. It is recommended to install and use dot, since it yields more -# powerful graphs. - -CLASS_DIAGRAMS = YES - -# You can define message sequence charts within doxygen comments using the \msc -# command. Doxygen will then run the mscgen tool (see -# http://www.mcternan.me.uk/mscgen/) to produce the chart and insert it in the -# documentation. The MSCGEN_PATH tag allows you to specify the directory where -# the mscgen tool resides. If left empty the tool is assumed to be found in the -# default search path. - -MSCGEN_PATH = - -# If set to YES, the inheritance and collaboration graphs will hide -# inheritance and usage relations if the target is undocumented -# or is not a class. - -HIDE_UNDOC_RELATIONS = YES - -# If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is -# available from the path. This tool is part of Graphviz, a graph visualization -# toolkit from AT&T and Lucent Bell Labs. The other options in this section -# have no effect if this option is set to NO (the default) - -HAVE_DOT = NO - -# By default doxygen will write a font called FreeSans.ttf to the output -# directory and reference it in all dot files that doxygen generates. This -# font does not include all possible unicode characters however, so when you need -# these (or just want a differently looking font) you can specify the font name -# using DOT_FONTNAME. You need need to make sure dot is able to find the font, -# which can be done by putting it in a standard location or by setting the -# DOTFONTPATH environment variable or by setting DOT_FONTPATH to the directory -# containing the font. - -DOT_FONTNAME = FreeSans - -# The DOT_FONTSIZE tag can be used to set the size of the font of dot graphs. -# The default size is 10pt. - -DOT_FONTSIZE = 10 - -# By default doxygen will tell dot to use the output directory to look for the -# FreeSans.ttf font (which doxygen will put there itself). If you specify a -# different font using DOT_FONTNAME you can set the path where dot -# can find it using this tag. - -DOT_FONTPATH = - -# If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect inheritance relations. Setting this tag to YES will force the -# the CLASS_DIAGRAMS tag to NO. - -CLASS_GRAPH = YES - -# If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for each documented class showing the direct and -# indirect implementation dependencies (inheritance, containment, and -# class references variables) of the class with other documented classes. - -COLLABORATION_GRAPH = YES - -# If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen -# will generate a graph for groups, showing the direct groups dependencies - -GROUP_GRAPHS = YES - -# If the UML_LOOK tag is set to YES doxygen will generate inheritance and -# collaboration diagrams in a style similar to the OMG's Unified Modeling -# Language. - -UML_LOOK = NO - -# If set to YES, the inheritance and collaboration graphs will show the -# relations between templates and their instances. - -TEMPLATE_RELATIONS = NO - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT -# tags are set to YES then doxygen will generate a graph for each documented -# file showing the direct and indirect include dependencies of the file with -# other documented files. - -INCLUDE_GRAPH = YES - -# If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and -# HAVE_DOT tags are set to YES then doxygen will generate a graph for each -# documented header file showing the documented files that directly or -# indirectly include this file. - -INCLUDED_BY_GRAPH = YES - -# If the CALL_GRAPH and HAVE_DOT options are set to YES then -# doxygen will generate a call dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable call graphs -# for selected functions only using the \callgraph command. - -CALL_GRAPH = NO - -# If the CALLER_GRAPH and HAVE_DOT tags are set to YES then -# doxygen will generate a caller dependency graph for every global function -# or class method. Note that enabling this option will significantly increase -# the time of a run. So in most cases it will be better to enable caller -# graphs for selected functions only using the \callergraph command. - -CALLER_GRAPH = NO - -# If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen -# will graphical hierarchy of all classes instead of a textual one. - -GRAPHICAL_HIERARCHY = YES - -# If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES -# then doxygen will show the dependencies a directory has on other directories -# in a graphical way. The dependency relations are determined by the #include -# relations between the files in the directories. - -DIRECTORY_GRAPH = YES - -# The DOT_IMAGE_FORMAT tag can be used to set the image format of the images -# generated by dot. Possible values are png, jpg, or gif -# If left blank png will be used. - -DOT_IMAGE_FORMAT = png - -# The tag DOT_PATH can be used to specify the path where the dot tool can be -# found. If left blank, it is assumed the dot tool can be found in the path. - -DOT_PATH = - -# The DOTFILE_DIRS tag can be used to specify one or more directories that -# contain dot files that are included in the documentation (see the -# \dotfile command). - -DOTFILE_DIRS = - -# The DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of -# nodes that will be shown in the graph. If the number of nodes in a graph -# becomes larger than this value, doxygen will truncate the graph, which is -# visualized by representing a node as a red box. Note that doxygen if the -# number of direct children of the root node in a graph is already larger than -# DOT_GRAPH_MAX_NODES then the graph will not be shown at all. Also note -# that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. - -DOT_GRAPH_MAX_NODES = 50 - -# The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the -# graphs generated by dot. A depth value of 3 means that only nodes reachable -# from the root by following a path via at most 3 edges will be shown. Nodes -# that lay further from the root node will be omitted. Note that setting this -# option to 1 or 2 may greatly reduce the computation time needed for large -# code bases. Also note that the size of a graph can be further restricted by -# DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. - -MAX_DOT_GRAPH_DEPTH = 0 - -# Set the DOT_TRANSPARENT tag to YES to generate images with a transparent -# background. This is disabled by default, because dot on Windows does not -# seem to support this out of the box. Warning: Depending on the platform used, -# enabling this option may lead to badly anti-aliased labels on the edges of -# a graph (i.e. they become hard to read). - -DOT_TRANSPARENT = NO - -# Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output -# files in one run (i.e. multiple -o and -T options on the command line). This -# makes dot run faster, but since only newer versions of dot (>1.8.10) -# support this, this feature is disabled by default. - -DOT_MULTI_TARGETS = NO - -# If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will -# generate a legend page explaining the meaning of the various boxes and -# arrows in the dot generated graphs. - -GENERATE_LEGEND = YES - -# If the DOT_CLEANUP tag is set to YES (the default) Doxygen will -# remove the intermediate dot files that are used to generate -# the various graphs. - -DOT_CLEANUP = YES diff --git a/include/ASCIIFoldingFilter.h b/include/ASCIIFoldingFilter.h deleted file mode 100644 index f3ece736..00000000 --- a/include/ASCIIFoldingFilter.h +++ /dev/null @@ -1,66 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ASCIIFOLDINGFILTER_H -#define ASCIIFOLDINGFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// This class converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII - /// characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. - /// - /// Characters from the following Unicode blocks are converted; however, only those characters with reasonable ASCII - /// alternatives are converted: - /// - /// C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf - /// Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf - /// Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf - /// Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf - /// Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf - /// Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf - /// IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf - /// Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf - /// Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf - /// General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf - /// Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf - /// Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf - /// Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf - /// Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf - /// Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf - /// Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf - /// - /// See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode - /// - /// The set of character conversions supported by this class is a superset of those supported by Lucene's {@link - /// ISOLatin1AccentFilter} which strips accents from Latin1 characters. For example, 'à' will be replaced by 'a'. - /// - class LPPAPI ASCIIFoldingFilter : public TokenFilter - { - public: - ASCIIFoldingFilter(TokenStreamPtr input); - virtual ~ASCIIFoldingFilter(); - - LUCENE_CLASS(ASCIIFoldingFilter); - - protected: - CharArray output; - int32_t outputPos; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// Converts characters above ASCII to their ASCII equivalents. For example, accents are removed from - /// accented characters. - /// @param input The string to fold - /// @param length The number of characters in the input string - void foldToASCII(const wchar_t* input, int32_t length); - }; -} - -#endif diff --git a/include/AbstractAllTermDocs.h b/include/AbstractAllTermDocs.h deleted file mode 100644 index f2662124..00000000 --- a/include/AbstractAllTermDocs.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ABSTRACTALLTERMDOCS_H -#define ABSTRACTALLTERMDOCS_H - -#include "TermDocs.h" - -namespace Lucene -{ - /// Base class for enumerating all but deleted docs. - /// - /// NOTE: this class is meant only to be used internally by Lucene; it's only public so it - /// can be shared across packages. - class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject - { - public: - AbstractAllTermDocs(int32_t maxDoc); - virtual ~AbstractAllTermDocs(); - - LUCENE_CLASS(AbstractAllTermDocs); - - protected: - int32_t maxDoc; - int32_t _doc; - - public: - virtual void seek(TermPtr term); - virtual void seek(TermEnumPtr termEnum); - virtual int32_t doc(); - virtual int32_t freq(); - virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); - virtual bool skipTo(int32_t target); - virtual void close(); - virtual bool isDeleted(int32_t doc) = 0; - }; -} - -#endif diff --git a/include/AbstractField.h b/include/AbstractField.h deleted file mode 100644 index 5cb2649f..00000000 --- a/include/AbstractField.h +++ /dev/null @@ -1,224 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ABSTRACTFIELD_H -#define ABSTRACTFIELD_H - -#include "Fieldable.h" - -namespace Lucene -{ - class LPPAPI AbstractField : public Fieldable, public LuceneObject - { - public: - /// Specifies whether and how a field should be stored. - enum Store - { - /// Store the original field value in the index. This is useful for short texts like a document's title - /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer - /// is used before it is stored. - STORE_YES, - - /// Do not store the field value in the index. - STORE_NO - }; - - /// Specifies whether and how a field should be indexed. - enum Index - { - /// Do not index the field value. This field can thus not be searched, but one can still access its - /// contents provided it is {@link Field.Store stored}. - INDEX_NO, - - /// Index the tokens produced by running the field's value through an Analyzer. This is useful for - /// common text. - INDEX_ANALYZED, - - /// Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used - /// the value will be stored as a single term. This is useful for unique Ids like product numbers. - INDEX_NOT_ANALYZED, - - /// Index the field's value without an Analyzer, and also disable the storing of norms. Note that you - /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}. No norms means - /// that index-time field and document boosting and field length normalization are disabled. The benefit - /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the - /// index, during searching. Note that once you index a given field with norms enabled, disabling norms - /// will have no effect. In other words, for this to have the above described effect on a field, all - /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning. - INDEX_NOT_ANALYZED_NO_NORMS, - - /// Index the tokens produced by running the field's value through an Analyzer, and also separately - /// disable the storing of norms. See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you - /// may want to disable them. - INDEX_ANALYZED_NO_NORMS - }; - - /// Specifies whether and how a field should have term vectors. - enum TermVector - { - /// Do not store term vectors. - TERM_VECTOR_NO, - - /// Store the term vectors of each document. A term vector is a list of the document's terms and their - /// number of occurrences in that document. - TERM_VECTOR_YES, - - /// Store the term vector + token position information - /// @see #YES - TERM_VECTOR_WITH_POSITIONS, - - /// Store the term vector + token offset information - /// @see #YES - TERM_VECTOR_WITH_OFFSETS, - - /// Store the term vector + token position and offset information - /// @see #YES - /// @see #WITH_POSITIONS - /// @see #WITH_OFFSETS - TERM_VECTOR_WITH_POSITIONS_OFFSETS - }; - - public: - virtual ~AbstractField(); - - LUCENE_CLASS(AbstractField); - - protected: - AbstractField(); - AbstractField(const String& name, Store store, Index index, TermVector termVector); - - String _name; - bool storeTermVector; - bool storeOffsetWithTermVector; - bool storePositionWithTermVector; - bool _omitNorms; - bool _isStored; - bool _isIndexed; - bool _isTokenized; - bool _isBinary; - bool lazy; - bool omitTermFreqAndPositions; - double boost; - - // the data object for all different kind of field values - FieldsData fieldsData; - - // pre-analyzed tokenStream for indexed fields - TokenStreamPtr tokenStream; - - // length/offset for all primitive types - int32_t binaryLength; - int32_t binaryOffset; - - public: - /// Sets the boost factor hits on this field. This value will be multiplied into the score of all - /// hits on this this field of this document. - /// - /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. - /// If a document has multiple fields with the same name, all such values are multiplied together. - /// This product is then used to compute the norm factor for the field. By default, in the {@link - /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the - /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} - /// before it is stored in the index. One should attempt to ensure that this product does not overflow - /// the range of that encoding. - /// - /// @see Document#setBoost(double) - /// @see Similarity#computeNorm(String, FieldInvertState) - /// @see Similarity#encodeNorm(double) - virtual void setBoost(double boost); - - /// Returns the boost factor for hits for this field. - /// - /// The default value is 1.0. - /// - /// Note: this value is not stored directly with the document in the index. Documents returned from - /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value - /// present as when this field was indexed. - virtual double getBoost(); - - /// Returns the name of the field as an interned string. For example "date", "title", "body", ... - virtual String name(); - - /// True if the value of the field is to be stored in the index for return with search hits. It is an - /// error for this to be true if a field is Reader-valued. - virtual bool isStored(); - - /// True if the value of the field is to be indexed, so that it may be searched on. - virtual bool isIndexed(); - - /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields - /// are indexed as a single word and may not be Reader-valued. - virtual bool isTokenized(); - - /// True if the term or terms used to index this field are stored as a term vector, available from - /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the - /// original content of the field, only to terms used to index it. If the original content must be - /// preserved, use the stored attribute instead. - virtual bool isTermVectorStored(); - - /// True if terms are stored as term vector together with their offsets (start and end position in - /// source text). - virtual bool isStoreOffsetWithTermVector(); - - /// True if terms are stored as term vector together with their token positions. - virtual bool isStorePositionWithTermVector(); - - /// True if the value of the field is stored as binary. - virtual bool isBinary(); - - /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} - /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. - /// @return reference to the Field value as byte[]. - virtual ByteArray getBinaryValue(); - - /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} - /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. - /// @return reference to the Field value as byte[]. - virtual ByteArray getBinaryValue(ByteArray result); - - /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is - /// undefined. - /// @return length of byte[] segment that represents this Field value. - virtual int32_t getBinaryLength(); - - /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is - /// undefined. - /// @return index of the first character in byte[] segment that represents this Field value. - virtual int32_t getBinaryOffset(); - - /// True if norms are omitted for this indexed field. - virtual bool getOmitNorms(); - - /// @see #setOmitTermFreqAndPositions - virtual bool getOmitTermFreqAndPositions(); - - /// If set, omit normalization factors associated with this indexed field. - /// This effectively disables indexing boosts and length normalization for this field. - virtual void setOmitNorms(bool omitNorms); - - /// If set, omit term freq, positions and payloads from postings for this field. - /// - /// NOTE: While this option reduces storage space required in the index, it also means any query requiring - /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail - /// to find results. - virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions); - - /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field - /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} - /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. - /// - /// @return true if this field can be loaded lazily - virtual bool isLazy(); - - /// Prints a Field for human consumption. - virtual String toString(); - - protected: - void setStoreTermVector(TermVector termVector); - }; -} - -#endif diff --git a/include/AllTermDocs.h b/include/AllTermDocs.h deleted file mode 100644 index 4b865806..00000000 --- a/include/AllTermDocs.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ALLTERMDOCS_H -#define ALLTERMDOCS_H - -#include "AbstractAllTermDocs.h" - -namespace Lucene -{ - class AllTermDocs : public AbstractAllTermDocs - { - public: - AllTermDocs(SegmentReaderPtr parent); - virtual ~AllTermDocs(); - - LUCENE_CLASS(AllTermDocs); - - protected: - BitVectorWeakPtr _deletedDocs; - - public: - virtual bool isDeleted(int32_t doc); - }; -} - -#endif diff --git a/include/Allocator.h b/include/Allocator.h deleted file mode 100644 index 4c356feb..00000000 --- a/include/Allocator.h +++ /dev/null @@ -1,146 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ALLOCATOR_H -#define ALLOCATOR_H - -#include "Config.h" - -namespace Lucene -{ - /// Allocate block of memory. - LPPAPI void* AllocMemory(size_t size); - - /// Reallocate a given block of memory. - LPPAPI void* ReallocMemory(void* memory, size_t size); - - /// Release a given block of memory. - LPPAPI void FreeMemory(void* memory); - - /// Release thread cache. Note: should be called whenever a thread - /// exits and using nedmalloc. - LPPAPI void ReleaseThreadCache(); - - /// Custom stl allocator used to help exporting stl container across process - /// borders. It can also calls custom memory allocation functions that can - /// help track memory leaks and/or improve performance over standard allocators. - /// @see #AllocMemory(size_t) - /// @see #FreeMemory(void*) - template - class Allocator - { - public: - typedef size_t size_type; - typedef ptrdiff_t difference_type; - typedef TYPE* pointer; - typedef const TYPE* const_pointer; - typedef TYPE& reference; - typedef const TYPE& const_reference; - typedef TYPE value_type; - - Allocator() - { - } - - Allocator(const Allocator&) - { - } - - pointer allocate(size_type n, const void* = 0) - { - return (TYPE*)AllocMemory((size_t)(n * sizeof(TYPE))); - } - - void deallocate(void* p, size_type) - { - if (p != NULL) - FreeMemory(p); - } - - pointer address(reference x) const - { - return &x; - } - - const_pointer address(const_reference x) const - { - return &x; - } - - Allocator& operator= (const Allocator&) - { - return *this; - } - - void construct(pointer p, const TYPE& val) - { - new ((TYPE*)p) TYPE(val); - } - - void destroy(pointer p) - { - p->~TYPE(); - } - - size_type max_size() const - { - return size_t(-1); - } - - template - struct rebind - { - typedef Allocator other; - }; - - template - Allocator(const Allocator&) - { - } - }; - - template - inline bool operator== (const Allocator&, const Allocator&) - { - return true; - } - - template - inline bool operator!= (const Allocator&, const Allocator&) - { - return false; - } - - template <> - class Allocator - { - public: - typedef void* pointer; - typedef const void* const_pointer; - typedef void value_type; - - Allocator() - { - } - - Allocator(const Allocator&) - { - } - - template - struct rebind - { - typedef Allocator other; - }; - - template - Allocator(const Allocator&) - { - } - }; -} - -#endif diff --git a/include/Analyzer.h b/include/Analyzer.h deleted file mode 100644 index 2b6d1c15..00000000 --- a/include/Analyzer.h +++ /dev/null @@ -1,70 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ANALYZER_H -#define ANALYZER_H - -#include "CloseableThreadLocal.h" - -namespace Lucene -{ - /// An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index terms - /// from text. - /// - /// Typical implementations first build a Tokenizer, which breaks the stream of characters from the Reader into - /// raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer. - class LPPAPI Analyzer : public LuceneObject - { - public: - virtual ~Analyzer(); - LUCENE_CLASS(Analyzer); - - protected: - CloseableThreadLocal tokenStreams; - - public: - /// Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null - /// field name for backward compatibility. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) = 0; - - /// Creates a TokenStream that is allowed to be re-used from the previous time that the same thread called - /// this method. Callers that do not need to use more than one TokenStream at the same time from this analyzer - /// should use this method for better performance. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - - /// Invoked before indexing a Fieldable instance if terms have already been added to that field. This allows - /// custom analyzers to place an automatic position increment gap between Fieldable instances using the same - /// field name. The default value position increment gap is 0. With a 0 position increment gap and the typical - /// default token position increment of 1, all terms in a field, including across Fieldable instances, are in - /// successive positions, allowing exact PhraseQuery matches, for instance, across Fieldable instance boundaries. - /// - /// @param fieldName Fieldable name being indexed. - /// @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} - virtual int32_t getPositionIncrementGap(const String& fieldName); - - /// Just like {@link #getPositionIncrementGap}, except for Token offsets instead. By default this returns 1 for - /// tokenized fields and, as if the fields were joined with an extra space character, and 0 for un-tokenized - /// fields. This method is only called if the field produced at least one token for indexing. - /// - /// @param field the field just indexed - /// @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} - virtual int32_t getOffsetGap(FieldablePtr field); - - /// Frees persistent resources used by this Analyzer - virtual void close(); - - protected: - /// Used by Analyzers that implement reusableTokenStream to retrieve previously saved TokenStreams for re-use - /// by the same thread. - virtual LuceneObjectPtr getPreviousTokenStream(); - - /// Used by Analyzers that implement reusableTokenStream to save a TokenStream for later re-use by the - /// same thread. - virtual void setPreviousTokenStream(LuceneObjectPtr stream); - }; -} - -#endif diff --git a/include/Array.h b/include/Array.h deleted file mode 100644 index 774d58a8..00000000 --- a/include/Array.h +++ /dev/null @@ -1,154 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ARRAY_H -#define ARRAY_H - -#include -#include "Lucene.h" - -namespace Lucene -{ - template - class ArrayData - { - public: - ArrayData(int32_t size) - { - data = NULL; - resize(size); - } - - ~ArrayData() - { - resize(0); - } - - public: - TYPE* data; - int32_t size; - - public: - void resize(int32_t size) - { - if (size == 0) - { - FreeMemory(data); - data = NULL; - } - else if (data == NULL) - data = (TYPE*)AllocMemory(size * sizeof(TYPE)); - else - data = (TYPE*)ReallocMemory(data, size * sizeof(TYPE)); - this->size = size; - } - }; - - /// Utility template class to handle sharable arrays of simple data types - template - class Array - { - public: - typedef Array this_type; - typedef ArrayData array_type; - - Array() - { - array = NULL; - } - - protected: - boost::shared_ptr container; - array_type* array; - - public: - static this_type newInstance(int32_t size) - { - this_type instance; - instance.container = Lucene::newInstance(size); - instance.array = instance.container.get(); - return instance; - } - - void reset() - { - resize(0); - } - - void resize(int32_t size) - { - if (size == 0) - container.reset(); - else if (!container) - container = Lucene::newInstance(size); - else - container->resize(size); - array = container.get(); - } - - TYPE* get() const - { - return array->data; - } - - int32_t size() const - { - return array->size; - } - - bool equals(const this_type& other) const - { - if (array->size != other.array->size) - return false; - return (std::memcmp(array->data, other.array->data, array->size) == 0); - } - - int32_t hashCode() const - { - return (int32_t)(int64_t)array; - } - - TYPE& operator[] (int32_t i) const - { - BOOST_ASSERT(i >= 0 && i < array->size); - return array->data[i]; - } - - operator bool () const - { - return container; - } - - bool operator! () const - { - return !container; - } - - bool operator== (const Array& other) - { - return (container == other.container); - } - - bool operator!= (const Array& other) - { - return (container != other.container); - } - }; - - template - inline std::size_t hash_value(const Array& value) - { - return (std::size_t)value.hashCode(); - } - - template - inline bool operator== (const Array& value1, const Array& value2) - { - return (value1.hashCode() == value2.hashCode()); - } -} - -#endif diff --git a/include/Attribute.h b/include/Attribute.h deleted file mode 100644 index feed4081..00000000 --- a/include/Attribute.h +++ /dev/null @@ -1,56 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ATTRIBUTE_H -#define ATTRIBUTE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Base class for Attributes that can be added to a {@link AttributeSource}. - /// - /// Attributes are used to add data in a dynamic, yet type-safe way to a source of usually streamed objects, - /// eg. a {@link TokenStream}. - class LPPAPI Attribute : public LuceneObject - { - public: - virtual ~Attribute(); - LUCENE_CLASS(Attribute); - - public: - /// Clears the values in this Attribute and resets it to its default value. If this implementation - /// implements more than one Attribute interface it clears all. - virtual void clear() = 0; - - /// Subclasses must implement this method and should compute a hashCode similar to this: - /// - /// int32_t hashCode() - /// { - /// int32_t code = startOffset; - /// code = code * 31 + endOffset; - /// return code; - /// } - /// - /// see also {@link #equals(Object)} - virtual int32_t hashCode() = 0; - - /// All values used for computation of {@link #hashCode()} should be checked here for equality. - /// - /// see also {@link LuceneObject#equals(Object)} - virtual bool equals(LuceneObjectPtr other) = 0; - - /// Copies the values from this Attribute into the passed-in target attribute. The target implementation - /// must support all the Attributes this implementation supports. - virtual void copyTo(AttributePtr target) = 0; - - /// Shallow clone. Subclasses must override this if they need to clone any members deeply. - /// @param base clone reference - null when called initially, then set in top virtual override. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()) = 0; - }; -} - -#endif diff --git a/include/AttributeSource.h b/include/AttributeSource.h deleted file mode 100644 index ed3895b1..00000000 --- a/include/AttributeSource.h +++ /dev/null @@ -1,192 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ATTRIBUTESOURCE_H -#define ATTRIBUTESOURCE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class LPPAPI AttributeFactory : public LuceneObject - { - protected: - AttributeFactory(); - - public: - virtual ~AttributeFactory(); - - LUCENE_CLASS(AttributeFactory); - - public: - /// returns an {@link Attribute}. - virtual AttributePtr createAttributeInstance(const String& className); - - template - AttributePtr createInstance(const String& className) - { - AttributePtr attrImpl = createAttributeInstance(className); - return attrImpl ? attrImpl : newLucene(); - } - - /// This is the default factory that creates {@link Attribute}s using the class name of the supplied - /// {@link Attribute} interface class by appending Impl to it. - static AttributeFactoryPtr DEFAULT_ATTRIBUTE_FACTORY(); - }; - - /// An AttributeSource contains a list of different {@link Attribute}s, and methods to add and get them. - /// There can only be a single instance of an attribute in the same AttributeSource instance. This is ensured - /// by passing in the actual type of the Attribute (Class) to the {@link #addAttribute(Class)}, - /// which then checks if an instance of that type is already present. If yes, it returns the instance, otherwise - /// it creates a new instance and returns it. - class LPPAPI AttributeSource : public LuceneObject - { - public: - /// An AttributeSource using the default attribute factory {@link DefaultAttributeFactory}. - AttributeSource(); - - /// An AttributeSource that uses the same attributes as the supplied one. - AttributeSource(AttributeSourcePtr input); - - /// An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} - /// instances. - AttributeSource(AttributeFactoryPtr factory); - - virtual ~AttributeSource(); - - LUCENE_CLASS(AttributeSource); - - protected: - AttributeFactoryPtr factory; - MapStringAttribute attributes; - AttributeSourceStatePtr currentState; - - public: - /// returns the used AttributeFactory. - AttributeFactoryPtr getAttributeFactory(); - - /// This method first checks if an instance of that class is already in this AttributeSource and returns it. - /// Otherwise a new instance is created, added to this AttributeSource and returned. - template - boost::shared_ptr addAttribute() - { - String className(ATTR::_getClassName()); - boost::shared_ptr attrImpl(boost::dynamic_pointer_cast(getAttribute(className))); - if (!attrImpl) - { - attrImpl = boost::dynamic_pointer_cast(factory->createInstance(className)); - if (!attrImpl) - boost::throw_exception(IllegalArgumentException(L"Could not instantiate implementing class for " + className)); - addAttribute(className, attrImpl); - } - return attrImpl; - } - - /// Adds a custom Attribute instance. - void addAttribute(const String& className, AttributePtr attrImpl); - - /// Returns true if this AttributeSource has any attributes. - bool hasAttributes(); - - /// Returns true, if this AttributeSource contains the passed-in Attribute. - template - bool hasAttribute() - { - return getAttribute(ATTR::_getClassName()); - } - - /// Returns the instance of the passed in Attribute contained in this AttributeSource. - template - boost::shared_ptr getAttribute() - { - String className(ATTR::_getClassName()); - boost::shared_ptr attr(boost::dynamic_pointer_cast(getAttribute(className))); - if (!attr) - boost::throw_exception(IllegalArgumentException(L"This AttributeSource does not have the attribute '" + className + L"'.")); - return attr; - } - - /// Resets all Attributes in this AttributeSource by calling {@link AttributeImpl#clear()} on each Attribute - /// implementation. - void clearAttributes(); - - /// Captures the state of all Attributes. The return value can be passed to {@link #restoreState} to restore - /// the state of this or another AttributeSource. - AttributeSourceStatePtr captureState(); - - /// Restores this state by copying the values of all attribute implementations that this state contains into - /// the attributes implementations of the targetStream. The targetStream must contain a corresponding instance - /// for each argument contained in this state (eg. it is not possible to restore the state of an AttributeSource - /// containing a TermAttribute into a AttributeSource using a Token instance as implementation). - /// - /// Note that this method does not affect attributes of the targetStream that are not contained in this state. - /// In other words, if for example the targetStream contains an OffsetAttribute, but this state doesn't, then - /// the value of the OffsetAttribute remains unchanged. It might be desirable to reset its value to the default, - /// in which case the caller should first call {@link TokenStream#clearAttributes()} on the targetStream. - void restoreState(AttributeSourceStatePtr state); - - /// Return hash code for this object. - virtual int32_t hashCode(); - - /// Return whether two objects are equal - virtual bool equals(LuceneObjectPtr other); - - /// Returns a string representation of the object - virtual String toString(); - - /// Performs a clone of all {@link AttributeImpl} instances returned in a new AttributeSource instance. This - /// method can be used to eg. create another TokenStream with exactly the same attributes (using {@link - /// #AttributeSource(AttributeSource)}) - AttributeSourcePtr cloneAttributes(); - - /// Return a vector of attributes based on currentState. - Collection getAttributes(); - - protected: - /// The caller must pass in a className value. - /// This method checks if an instance of that class is already in this AttributeSource and returns it. - AttributePtr getAttribute(const String& className); - - /// Returns true, if this AttributeSource contains the passed-in Attribute. - bool hasAttribute(const String& className); - - void computeCurrentState(); - }; - - class LPPAPI DefaultAttributeFactory : public AttributeFactory - { - public: - virtual ~DefaultAttributeFactory(); - - LUCENE_CLASS(DefaultAttributeFactory); - - public: - /// returns an {@link Attribute}. - virtual AttributePtr createAttributeInstance(const String& className); - }; - - /// This class holds the state of an AttributeSource. - /// @see #captureState - /// @see #restoreState - class LPPAPI AttributeSourceState : public LuceneObject - { - public: - virtual ~AttributeSourceState(); - - LUCENE_CLASS(AttributeSourceState); - - protected: - AttributePtr attribute; - AttributeSourceStatePtr next; - - public: - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class AttributeSource; - }; -} - -#endif diff --git a/include/AveragePayloadFunction.h b/include/AveragePayloadFunction.h deleted file mode 100644 index 2424abcd..00000000 --- a/include/AveragePayloadFunction.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef AVERAGEPAYLOADFUNCTION_H -#define AVERAGEPAYLOADFUNCTION_H - -#include "PayloadFunction.h" - -namespace Lucene -{ - /// Calculate the final score as the average score of all payloads seen. - /// - /// Is thread safe and completely reusable. - class LPPAPI AveragePayloadFunction : public PayloadFunction - { - public: - virtual ~AveragePayloadFunction(); - LUCENE_CLASS(AveragePayloadFunction); - - public: - virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, - double currentScore, double currentPayloadScore); - virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - }; -} - -#endif diff --git a/include/Base64.h b/include/Base64.h deleted file mode 100644 index 72a6e0ef..00000000 --- a/include/Base64.h +++ /dev/null @@ -1,33 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BASE64_H -#define BASE64_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class Base64 : public LuceneObject - { - public: - virtual ~Base64(); - LUCENE_CLASS(Base64); - - protected: - static const String BASE64_CHARS; - - public: - static String encode(ByteArray bytes); - static String encode(const uint8_t* bytes, int32_t length); - static ByteArray decode(const String& str); - - protected: - static bool isBase64(wchar_t ch); - }; -} - -#endif diff --git a/include/BaseCharFilter.h b/include/BaseCharFilter.h deleted file mode 100644 index ddbb2546..00000000 --- a/include/BaseCharFilter.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BASECHARFILTER_H -#define BASECHARFILTER_H - -#include "CharFilter.h" - -namespace Lucene -{ - /// Base utility class for implementing a {@link CharFilter}. You subclass this, and then record mappings by - /// calling {@link #addOffCorrectMap}, and then invoke the correct method to correct an offset. - class LPPAPI BaseCharFilter : public CharFilter - { - public: - BaseCharFilter(CharStreamPtr in); - virtual ~BaseCharFilter(); - - LUCENE_CLASS(BaseCharFilter); - - protected: - IntArray offsets; - IntArray diffs; - int32_t size; - - protected: - /// Retrieve the corrected offset. - virtual int32_t correct(int32_t currentOff); - - int32_t getLastCumulativeDiff(); - void addOffCorrectMap(int32_t off, int32_t cumulativeDiff); - }; -} - -#endif diff --git a/include/BitSet.h b/include/BitSet.h deleted file mode 100644 index 2598f8f9..00000000 --- a/include/BitSet.h +++ /dev/null @@ -1,66 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BITSET_H -#define BITSET_H - -#include -#include "LuceneObject.h" - -namespace Lucene -{ - class LPPAPI BitSet : public LuceneObject - { - public: - BitSet(uint32_t size = 0); - virtual ~BitSet(); - - LUCENE_CLASS(BitSet); - - protected: - typedef boost::dynamic_bitset< uint64_t, Allocator > bitset_type; - bitset_type bitSet; - - public: - const uint64_t* getBits(); - void clear(); - void clear(uint32_t bitIndex); - void fastClear(uint32_t bitIndex); - void clear(uint32_t fromIndex, uint32_t toIndex); - void fastClear(uint32_t fromIndex, uint32_t toIndex); - void set(uint32_t bitIndex); - void fastSet(uint32_t bitIndex); - void set(uint32_t bitIndex, bool value); - void fastSet(uint32_t bitIndex, bool value); - void set(uint32_t fromIndex, uint32_t toIndex); - void fastSet(uint32_t fromIndex, uint32_t toIndex); - void set(uint32_t fromIndex, uint32_t toIndex, bool value); - void fastSet(uint32_t fromIndex, uint32_t toIndex, bool value); - void flip(uint32_t bitIndex); - void fastFlip(uint32_t bitIndex); - void flip(uint32_t fromIndex, uint32_t toIndex); - void fastFlip(uint32_t fromIndex, uint32_t toIndex); - uint32_t size() const; - uint32_t numBlocks() const; - bool isEmpty() const; - bool get(uint32_t bitIndex) const; - bool fastGet(uint32_t bitIndex) const; - int32_t nextSetBit(uint32_t fromIndex) const; - void _and(BitSetPtr set); - void _or(BitSetPtr set); - void _xor(BitSetPtr set); - void andNot(BitSetPtr set); - bool intersectsBitSet(BitSetPtr set) const; - uint32_t cardinality(); - void resize(uint32_t size); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/BitUtil.h b/include/BitUtil.h deleted file mode 100644 index 6fde2cc3..00000000 --- a/include/BitUtil.h +++ /dev/null @@ -1,75 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BITUTIL_H -#define BITUTIL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A variety of high efficiency bit twiddling routines. - class LPPAPI BitUtil : public LuceneObject - { - public: - virtual ~BitUtil(); - LUCENE_CLASS(BitUtil); - - public: - /// Table of number of trailing zeros in a byte - static const uint8_t ntzTable[]; - - public: - /// Returns the number of bits set in the long - static int32_t pop(int64_t x); - - /// Returns the number of set bits in an array of longs. - static int64_t pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords); - - /// Returns the popcount or cardinality of the two sets after an intersection. Neither array is modified. - static int64_t pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); - - /// Returns the popcount or cardinality of the union of two sets. Neither array is modified. - static int64_t pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); - - /// Returns the popcount or cardinality of A & ~B. Neither array is modified. - static int64_t pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); - - /// Returns the popcount or cardinality of A ^ B. Neither array is modified. - static int64_t pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); - - /// Returns number of trailing zeros in a 64 bit long value. - static int32_t ntz(int64_t val); - - /// Returns number of trailing zeros in a 32 bit int value. - static int32_t ntz(int32_t val); - - /// Returns 0 based index of first set bit (only works for x!=0) - /// This is an alternate implementation of ntz() - static int32_t ntz2(int64_t x); - - /// Returns 0 based index of first set bit. - /// This is an alternate implementation of ntz() - static int32_t ntz3(int64_t x); - - /// Returns true if v is a power of two or zero. - static bool isPowerOfTwo(int32_t v); - - /// Returns true if v is a power of two or zero. - static bool isPowerOfTwo(int64_t v); - - /// Returns the next highest power of two, or the current value if it's already a power of two or zero. - static int32_t nextHighestPowerOfTwo(int32_t v); - - /// Returns the next highest power of two, or the current value if it's already a power of two or zero. - static int64_t nextHighestPowerOfTwo(int64_t v); - - protected: - inline static void CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c); - }; -} - -#endif diff --git a/include/BitVector.h b/include/BitVector.h deleted file mode 100644 index b91d63f4..00000000 --- a/include/BitVector.h +++ /dev/null @@ -1,95 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BITVECTOR_H -#define BITVECTOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Optimized implementation of a vector of bits. - class LPPAPI BitVector : public LuceneObject - { - public: - /// Constructs a vector capable of holding n bits. - BitVector(int32_t n = 0); - - BitVector(ByteArray bits, int32_t size); - - /// Constructs a bit vector from the file name in Directory d, - /// as written by the {@link #write} method. - BitVector(DirectoryPtr d, const String& name); - - virtual ~BitVector(); - - LUCENE_CLASS(BitVector); - - protected: - ByteArray bits; - int32_t _size; - int32_t _count; - - static const uint8_t BYTE_COUNTS[]; // table of bits/byte - - public: - /// Clone this vector - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Sets the value of bit to one. - void set(int32_t bit); - - /// Sets the value of bit to true, and returns true if bit was already set. - bool getAndSet(int32_t bit); - - /// Sets the value of bit to zero. - void clear(int32_t bit); - - /// Returns true if bit is one and false if it is zero. - bool get(int32_t bit); - - /// Returns the number of bits in this vector. This is also one greater than - /// the number of the largest valid bit number. - int32_t size(); - - /// Returns the total number of one bits in this vector. This is efficiently - /// computed and cached, so that, if the vector is not changed, no recomputation - /// is done for repeated calls. - int32_t count(); - - /// For testing - int32_t getRecomputedCount(); - - /// Writes this vector to the file name in Directory d, in a format that can - /// be read by the constructor {@link #BitVector(DirectoryPtr, const String&)}. - void write(DirectoryPtr d, const String& name); - - /// Retrieve a subset of this BitVector. - /// @param start starting index, inclusive - /// @param end ending index, exclusive - /// @return subset - BitVectorPtr subset(int32_t start, int32_t end); - - protected: - /// Write as a bit set. - void writeBits(IndexOutputPtr output); - - /// Write as a d-gaps list. - void writeDgaps(IndexOutputPtr output); - - /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, - /// or dense, and should be saved as a bit set. - bool isSparse(); - - /// Read as a bit set. - void readBits(IndexInputPtr input); - - /// Read as a d-gaps list. - void readDgaps(IndexInputPtr input); - }; -} - -#endif diff --git a/include/BooleanClause.h b/include/BooleanClause.h deleted file mode 100644 index 287af56b..00000000 --- a/include/BooleanClause.h +++ /dev/null @@ -1,61 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BOOLEANCLAUSE_H -#define BOOLEANCLAUSE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A clause in a BooleanQuery. - class LPPAPI BooleanClause : public LuceneObject - { - public: - /// Specifies how clauses are to occur in matching documents. - enum Occur - { - /// Use this operator for clauses that must appear in the matching documents. - MUST, - - /// Use this operator for clauses that should appear in the matching documents. For a BooleanQuery - /// with no MUST clauses one or more SHOULD clauses must match a document for the BooleanQuery to match. - /// @see BooleanQuery#setMinimumNumberShouldMatch - SHOULD, - - /// Use this operator for clauses that must not appear in the matching documents. Note that it is not - /// possible to search for queries that only consist of a MUST_NOT clause. - MUST_NOT - }; - - public: - BooleanClause(QueryPtr query, Occur occur); - virtual ~BooleanClause(); - - LUCENE_CLASS(BooleanClause); - - protected: - /// The query whose matching documents are combined by the boolean query. - QueryPtr query; - Occur occur; - - public: - Occur getOccur(); - void setOccur(Occur occur); - - QueryPtr getQuery(); - void setQuery(QueryPtr query); - - bool isProhibited(); - bool isRequired(); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual String toString(); - }; -} - -#endif diff --git a/include/BooleanQuery.h b/include/BooleanQuery.h deleted file mode 100644 index 38112691..00000000 --- a/include/BooleanQuery.h +++ /dev/null @@ -1,102 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BOOLEANQUERY_H -#define BOOLEANQUERY_H - -#include "Query.h" -#include "BooleanClause.h" -#include "Weight.h" - -namespace Lucene -{ - /// A Query that matches documents matching boolean combinations of other queries, eg. {@link TermQuery}s, - /// {@link PhraseQuery}s or other BooleanQuerys. - class LPPAPI BooleanQuery : public Query - { - public: - /// Constructs an empty boolean query. - /// - /// {@link Similarity#coord(int32_t, int32_t)} may be disabled in scoring, as appropriate. For example, - /// this score factor does not make sense for most automatically generated queries, like {@link WildcardQuery} - /// and {@link FuzzyQuery}. - /// - /// @param disableCoord disables {@link Similarity#coord(int32_t, int32_t)} in scoring. - BooleanQuery(bool disableCoord = false); - virtual ~BooleanQuery(); - - LUCENE_CLASS(BooleanQuery); - - protected: - static int32_t maxClauseCount; - - Collection clauses; - bool disableCoord; - int32_t minNrShouldMatch; - - public: - using Query::toString; - - /// Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the permitted - /// number of clauses cause TooManyClauses to be thrown. - /// @see #setMaxClauseCount(int32_t) - static int32_t getMaxClauseCount(); - - /// Set the maximum number of clauses permitted per BooleanQuery. Default value is 1024. - static void setMaxClauseCount(int32_t maxClauseCount); - - /// Returns true if {@link Similarity#coord(int32_t, int32_t)} is disabled in scoring for this query instance. - /// @see #BooleanQuery(bool) - bool isCoordDisabled(); - - /// Implement coord disabling. - virtual SimilarityPtr getSimilarity(SearcherPtr searcher); - - /// Specifies a minimum number of the optional BooleanClauses which must be satisfied. - /// - /// By default no optional clauses are necessary for a match (unless there are no required clauses). If this - /// method is used, then the specified number of clauses is required. - /// - /// Use of this method is totally independent of specifying that any specific clauses are required (or prohibited). - /// This number will only be compared against the number of matching optional clauses. - /// - /// @param min the number of optional clauses that must match - void setMinimumNumberShouldMatch(int32_t min); - - /// Gets the minimum number of the optional BooleanClauses which must be satisfied. - int32_t getMinimumNumberShouldMatch(); - - /// Adds a clause to a boolean query. - /// @see #getMaxClauseCount() - void add(QueryPtr query, BooleanClause::Occur occur); - - /// Adds a clause to a boolean query. - /// @see #getMaxClauseCount() - void add(BooleanClausePtr clause); - - /// Returns the set of clauses in this query. - Collection getClauses(); - - /// Returns an iterator on the clauses in this query. - Collection::iterator begin(); - Collection::iterator end(); - - virtual WeightPtr createWeight(SearcherPtr searcher); - - virtual QueryPtr rewrite(IndexReaderPtr reader); - - virtual void extractTerms(SetTerm terms); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - friend class BooleanWeight; - }; -} - -#endif diff --git a/include/BooleanScorer.h b/include/BooleanScorer.h deleted file mode 100644 index 98665d3b..00000000 --- a/include/BooleanScorer.h +++ /dev/null @@ -1,160 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BOOLEANSCORER_H -#define BOOLEANSCORER_H - -#include "Scorer.h" -#include "Collector.h" - -namespace Lucene -{ - /// BooleanScorer uses a ~16k array to score windows of docs. So it scores docs 0-16k first, then docs 16-32k, - /// etc. For each window it iterates through all query terms and accumulates a score in table[doc%16k]. It also - /// stores in the table a bitmask representing which terms contributed to the score. Non-zero scores are chained - /// in a linked list. At the end of scoring each window it then iterates through the linked list and, if the - /// bitmask matches the boolean constraints, collects a hit. For boolean queries with lots of frequent terms this - /// can be much faster, since it does not need to update a priority queue for each posting, instead performing - /// constant-time operations per posting. The only downside is that it results in hits being delivered out-of-order - /// within the window, which means it cannot be nested within other scorers. But it works well as a top-level scorer. - /// - /// The new BooleanScorer2 implementation instead works by merging priority queues of postings, albeit with some - /// clever tricks. For example, a pure conjunction (all terms required) does not require a priority queue. Instead it - /// sorts the posting streams at the start, then repeatedly skips the first to to the last. If the first ever equals - /// the last, then there's a hit. When some terms are required and some terms are optional, the conjunction can - /// be evaluated first, then the optional terms can all skip to the match and be added to the score. Thus the - /// conjunction can reduce the number of priority queue updates for the optional terms. - class BooleanScorer : public Scorer - { - public: - BooleanScorer(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers); - virtual ~BooleanScorer(); - - LUCENE_CLASS(BooleanScorer); - - protected: - SubScorerPtr scorers; - BucketTablePtr bucketTable; - int32_t maxCoord; - Collection coordFactors; - int32_t requiredMask; - int32_t prohibitedMask; - int32_t nextMask; - int32_t minNrShouldMatch; - int32_t end; - BucketPtr current; - int32_t doc; - - protected: - // firstDocID is ignored since nextDoc() initializes 'current' - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - - public: - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - virtual void score(CollectorPtr collector); - virtual String toString(); - }; - - class BooleanScorerCollector : public Collector - { - public: - BooleanScorerCollector(int32_t mask, BucketTablePtr bucketTable); - virtual ~BooleanScorerCollector(); - - LUCENE_CLASS(BooleanScorerCollector); - - protected: - BucketTableWeakPtr _bucketTable; - int32_t mask; - ScorerWeakPtr _scorer; - - public: - virtual void collect(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - virtual bool acceptsDocsOutOfOrder(); - }; - - // An internal class which is used in score(Collector, int32_t) for setting the current score. This is required - // since Collector exposes a setScorer method and implementations that need the score will call scorer->score(). - // Therefore the only methods that are implemented are score() and doc(). - class BucketScorer : public Scorer - { - public: - BucketScorer(); - virtual ~BucketScorer(); - - LUCENE_CLASS(BucketScorer); - - public: - double _score; - int32_t doc; - - public: - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - }; - - class Bucket : public LuceneObject - { - public: - Bucket(); - virtual ~Bucket(); - - LUCENE_CLASS(Bucket); - - public: - int32_t doc; // tells if bucket is valid - double score; // incremental score - int32_t bits; // used for bool constraints - int32_t coord; // count of terms in score - BucketWeakPtr _next; // next valid bucket - }; - - /// A simple hash table of document scores within a range. - class BucketTable : public LuceneObject - { - public: - BucketTable(); - virtual ~BucketTable(); - - LUCENE_CLASS(BucketTable); - - public: - static const int32_t SIZE; - static const int32_t MASK; - - Collection buckets; - BucketPtr first; // head of valid list - - public: - CollectorPtr newCollector(int32_t mask); - int32_t size(); - }; - - class SubScorer : public LuceneObject - { - public: - SubScorer(ScorerPtr scorer, bool required, bool prohibited, CollectorPtr collector, SubScorerPtr next); - virtual ~SubScorer(); - - LUCENE_CLASS(SubScorer); - - public: - ScorerPtr scorer; - bool required; - bool prohibited; - CollectorPtr collector; - SubScorerPtr next; - }; -} - -#endif diff --git a/include/BooleanScorer2.h b/include/BooleanScorer2.h deleted file mode 100644 index 1a9f9785..00000000 --- a/include/BooleanScorer2.h +++ /dev/null @@ -1,170 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BOOLEANSCORER2_H -#define BOOLEANSCORER2_H - -#include "DisjunctionSumScorer.h" -#include "ConjunctionScorer.h" - -namespace Lucene -{ - /// See the description in BooleanScorer, comparing BooleanScorer & BooleanScorer2 - /// - /// An alternative to BooleanScorer that also allows a minimum number of optional scorers that should match. - /// Implements skipTo(), and has no limitations on the numbers of added scorers. - /// Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. - class BooleanScorer2 : public Scorer - { - public: - /// Creates a {@link Scorer} with the given similarity and lists of required, prohibited and optional - /// scorers. In no required scorers are added, at least one of the optional scorers will have to match - /// during the search. - /// - /// @param similarity The similarity to be used. - /// @param minNrShouldMatch The minimum number of optional added scorers that should match during the search. - /// In case no required scorers are added, at least one of the optional scorers will have to match during - /// the search. - /// @param required The list of required scorers. - /// @param prohibited The list of prohibited scorers. - /// @param optional The list of optional scorers. - BooleanScorer2(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional); - - virtual ~BooleanScorer2(); - - LUCENE_CLASS(BooleanScorer2); - - protected: - Collection requiredScorers; - Collection optionalScorers; - Collection prohibitedScorers; - - CoordinatorPtr coordinator; - - /// The scorer to which all scoring will be delegated, except for computing and using the coordination factor. - ScorerPtr countingSumScorer; - - int32_t minNrShouldMatch; - int32_t doc; - - public: - virtual void initialize(); - - /// Scores and collects all matching documents. - /// @param collector The collector to which all matching documents are passed through. - virtual void score(CollectorPtr collector); - - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - virtual int32_t advance(int32_t target); - - protected: - ScorerPtr countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch); - ScorerPtr countingConjunctionSumScorer(Collection requiredScorers); - ScorerPtr dualConjunctionSumScorer(ScorerPtr req1, ScorerPtr req2); - - /// Returns the scorer to be used for match counting and score summing. Uses requiredScorers, optionalScorers - /// and prohibitedScorers. - ScorerPtr makeCountingSumScorer(); - ScorerPtr makeCountingSumScorerNoReq(); - ScorerPtr makeCountingSumScorerSomeReq(); - - /// Returns the scorer to be used for match counting and score summing. Uses the given required scorer and - /// the prohibitedScorers. - /// @param requiredCountingSumScorer A required scorer already built. - ScorerPtr addProhibitedScorers(ScorerPtr requiredCountingSumScorer); - - friend class CountingDisjunctionSumScorer; - friend class CountingConjunctionSumScorer; - }; - - class Coordinator : public LuceneObject - { - public: - Coordinator(BooleanScorer2Ptr scorer); - virtual ~Coordinator(); - - LUCENE_CLASS(Coordinator); - - public: - BooleanScorer2WeakPtr _scorer; - Collection coordFactors; - int32_t maxCoord; // to be increased for each non prohibited scorer - int32_t nrMatchers; // to be increased by score() of match counting scorers. - - public: - void init(); // use after all scorers have been added. - - friend class BooleanScorer2; - }; - - /// Count a scorer as a single match. - class SingleMatchScorer : public Scorer - { - public: - SingleMatchScorer(ScorerPtr scorer, CoordinatorPtr coordinator); - virtual ~SingleMatchScorer(); - - LUCENE_CLASS(SingleMatchScorer); - - protected: - ScorerPtr scorer; - CoordinatorPtr coordinator; - int32_t lastScoredDoc; - double lastDocScore; - - public: - virtual double score(); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - }; - - class CountingDisjunctionSumScorer : public DisjunctionSumScorer - { - public: - CountingDisjunctionSumScorer(BooleanScorer2Ptr scorer, Collection subScorers, int32_t minimumNrMatchers); - virtual ~CountingDisjunctionSumScorer(); - - LUCENE_CLASS(CountingDisjunctionSumScorer); - - protected: - BooleanScorer2WeakPtr _scorer; - int32_t lastScoredDoc; - - // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). - double lastDocScore; - - public: - virtual double score(); - - friend class BooleanScorer2; - }; - - class CountingConjunctionSumScorer : public ConjunctionScorer - { - public: - CountingConjunctionSumScorer(BooleanScorer2Ptr scorer, SimilarityPtr similarity, Collection scorers); - virtual ~CountingConjunctionSumScorer(); - - LUCENE_CLASS(CountingConjunctionSumScorer); - - protected: - BooleanScorer2WeakPtr _scorer; - int32_t lastScoredDoc; - int32_t requiredNrMatchers; - - // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). - double lastDocScore; - - public: - virtual double score(); - }; -} - -#endif diff --git a/include/BufferedDeletes.h b/include/BufferedDeletes.h deleted file mode 100644 index aa79c001..00000000 --- a/include/BufferedDeletes.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BUFFEREDDELETES_H -#define BUFFEREDDELETES_H - -#include "Term.h" -#include "Query.h" - -namespace Lucene -{ - /// Holds buffered deletes, by docID, term or query. We hold two instances of this class: one for - /// the deletes prior to the last flush, the other for deletes after the last flush. This is so if - /// we need to abort (discard all buffered docs) we can also discard the buffered deletes yet keep - /// the deletes done during previously flushed segments. - class BufferedDeletes : public LuceneObject - { - public: - BufferedDeletes(bool doTermSort); - virtual ~BufferedDeletes(); - - LUCENE_CLASS(BufferedDeletes); - - public: - int32_t numTerms; - MapTermNum terms; - MapQueryInt queries; - Collection docIDs; - int64_t bytesUsed; - - public: - int32_t size(); - void update(BufferedDeletesPtr in); - void clear(); - void addBytesUsed(int64_t b); - bool any(); - void remap(MergeDocIDRemapperPtr mapper, SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount); - }; - - /// Number of documents a delete term applies to. - class Num : public LuceneObject - { - public: - Num(int32_t num); - - protected: - int32_t num; - - public: - int32_t getNum(); - void setNum(int32_t num); - }; -} - -#endif diff --git a/include/BufferedIndexInput.h b/include/BufferedIndexInput.h deleted file mode 100644 index d100a11a..00000000 --- a/include/BufferedIndexInput.h +++ /dev/null @@ -1,105 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BUFFEREDINDEXINPUT_H -#define BUFFEREDINDEXINPUT_H - -#include "IndexInput.h" - -namespace Lucene -{ - /// Base implementation class for buffered {@link IndexInput}. - class LPPAPI BufferedIndexInput : public IndexInput - { - public: - /// Construct BufferedIndexInput with a specific bufferSize. - BufferedIndexInput(int32_t bufferSize = BUFFER_SIZE); - virtual ~BufferedIndexInput(); - - LUCENE_CLASS(BufferedIndexInput); - - public: - /// Default buffer size. - static const int32_t BUFFER_SIZE; - - protected: - int32_t bufferSize; - int64_t bufferStart; // position in file of buffer - int32_t bufferLength; // end of valid bytes - int32_t bufferPosition; // next byte to read - ByteArray buffer; - - public: - /// Reads and returns a single byte. - /// @see IndexOutput#writeByte(uint8_t) - virtual uint8_t readByte(); - - /// Change the buffer size used by this IndexInput. - void setBufferSize(int32_t newSize); - - /// Returns buffer size. - /// @see #setBufferSize - int32_t getBufferSize(); - - /// Reads a specified number of bytes into an array at the specified offset. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - /// @see #readInternal(uint8_t*, int32_t, int32_t) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Reads a specified number of bytes into an array at the specified offset with control over whether the - /// read should be buffered (callers who have their own buffer should pass in "false" for useBuffer). - /// Currently only {@link BufferedIndexInput} respects this parameter. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @param useBuffer set to false if the caller will handle buffering. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - /// @see #readInternal(uint8_t*, int32_t, int32_t) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); - - /// Closes the stream to further operations. - virtual void close(); - - /// Returns the current position in this file, where the next read will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next read will occur. - /// @see #getFilePointer() - /// @see #seekInternal(int64_t) - virtual void seek(int64_t pos); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - virtual void newBuffer(ByteArray newBuffer); - - void checkBufferSize(int32_t bufferSize); - - /// Refill buffer in preparation for reading. - /// @see #readInternal(uint8_t*, int32_t, int32_t) - /// @see #seekInternal(int64_t) - virtual void refill(); - - /// Implements buffer refill. Reads bytes from the current position in the input. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) = 0; - - /// Implements seek. Sets current position in this file, where the next {@link - /// #readInternal(uint8_t*, int32_t, int32_t)} will occur. - /// @param pos position to set next write. - /// @see #readInternal(uint8_t*, int32_t, int32_t) - virtual void seekInternal(int64_t pos) = 0; - }; -} - -#endif diff --git a/include/BufferedIndexOutput.h b/include/BufferedIndexOutput.h deleted file mode 100644 index 4d474050..00000000 --- a/include/BufferedIndexOutput.h +++ /dev/null @@ -1,75 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BUFFEREDINDEXOUTPUT_H -#define BUFFEREDINDEXOUTPUT_H - -#include "IndexOutput.h" - -namespace Lucene -{ - /// Base implementation class for buffered {@link IndexOutput}. - class LPPAPI BufferedIndexOutput : public IndexOutput - { - public: - BufferedIndexOutput(); - virtual ~BufferedIndexOutput(); - - LUCENE_CLASS(BufferedIndexOutput); - - public: - static const int32_t BUFFER_SIZE; - - protected: - int64_t bufferStart; // position in file of buffer - int32_t bufferPosition; // position in buffer - ByteArray buffer; - - public: - /// Writes a single byte. - /// @see IndexInput#readByte() - virtual void writeByte(uint8_t b); - - /// Writes an array of bytes. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) - virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); - - /// Forces any buffered output to be written. - virtual void flush(); - - /// Implements buffer write. Writes bytes at the current - /// position in the output. - /// @param b the bytes to write. - /// @param offset the offset in the byte array. - /// @param length the number of bytes to write. - virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); - - /// Closes this stream to further operations. - virtual void close(); - - /// Returns the current position in this file, where the next write will occur. - /// @see #seek(long) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next write will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// The number of bytes in the file. - virtual int64_t length() = 0; - - protected: - /// Implements buffer write. Writes bytes at the current - /// position in the output. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - void flushBuffer(const uint8_t* b, int32_t length); - }; -} - -#endif diff --git a/include/BufferedReader.h b/include/BufferedReader.h deleted file mode 100644 index 60f49cb3..00000000 --- a/include/BufferedReader.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BUFFEREDREADER_H -#define BUFFEREDREADER_H - -#include "Reader.h" - -namespace Lucene -{ - /// Read text from a character-input stream, buffering characters so as to provide - /// for the efficient reading of characters, arrays, and lines. - class LPPAPI BufferedReader : public Reader - { - public: - /// Create a buffering character-input stream. - BufferedReader(ReaderPtr reader, int32_t size = READER_BUFFER); - virtual ~BufferedReader(); - - LUCENE_CLASS(BufferedReader); - - protected: - ReaderPtr reader; - int32_t bufferSize; - int32_t bufferLength; // end of valid bytes - int32_t bufferPosition; // next byte to read - CharArray buffer; - - public: - static const int32_t READER_BUFFER; - - public: - /// Read a single character. - virtual int32_t read(); - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); - - /// Read a line of text. - virtual bool readLine(String& line); - - /// Close the stream. - virtual void close(); - - /// Tell whether this stream supports the mark() operation - virtual bool markSupported(); - - /// Reset the stream. - virtual void reset(); - - protected: - /// Refill buffer in preparation for reading. - int32_t refill(); - - /// Read a single character without moving position. - int32_t peek(); - }; -} - -#endif diff --git a/include/ByteBlockPool.h b/include/ByteBlockPool.h deleted file mode 100644 index 9126784c..00000000 --- a/include/ByteBlockPool.h +++ /dev/null @@ -1,69 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BYTEBLOCKPOOL_H -#define BYTEBLOCKPOOL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] arrays. - /// The idea is to allocate slices of increasing lengths. For example, the first slice is 5 bytes, the - /// next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we hit the end of - /// the slice, we allocate the next slice and then write the address of the new slice into the last 4 - /// bytes of the previous slice (the "forwarding address"). - /// - /// Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way the methods - /// that are writing into the slice don't need to record its length and instead allocate a new slice once - /// they hit a non-zero byte. - class ByteBlockPool : public LuceneObject - { - public: - ByteBlockPool(ByteBlockPoolAllocatorBasePtr allocator, bool trackAllocations); - virtual ~ByteBlockPool(); - - LUCENE_CLASS(ByteBlockPool); - - public: - Collection buffers; - int32_t bufferUpto; // Which buffer we are up to - int32_t byteUpto; // Where we are in head buffer - - ByteArray buffer; - int32_t byteOffset; - - static const int32_t nextLevelArray[]; - static const int32_t levelSizeArray[]; - - protected: - bool trackAllocations; - ByteBlockPoolAllocatorBasePtr allocator; - - public: - static int32_t FIRST_LEVEL_SIZE(); - - void reset(); - void nextBuffer(); - int32_t newSlice(int32_t size); - int32_t allocSlice(ByteArray slice, int32_t upto); - }; - - class ByteBlockPoolAllocatorBase : public LuceneObject - { - public: - virtual ~ByteBlockPoolAllocatorBase(); - - LUCENE_CLASS(ByteBlockPoolAllocatorBase); - - public: - virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end) = 0; - virtual void recycleByteBlocks(Collection blocks) = 0; - virtual ByteArray getByteBlock(bool trackAllocations) = 0; - }; -} - -#endif diff --git a/include/ByteFieldSource.h b/include/ByteFieldSource.h deleted file mode 100644 index 5fca05df..00000000 --- a/include/ByteFieldSource.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BYTEFIELDSOURCE_H -#define BYTEFIELDSOURCE_H - -#include "FieldCacheSource.h" - -namespace Lucene -{ - /// Obtains byte field values from the {@link FieldCache} using getBytes() and makes those values available - /// as other numeric types, casting as needed. - /// - /// @see FieldCacheSource for requirements on the field. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, - /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU - /// per lookup but will not consume double the FieldCache RAM. - class LPPAPI ByteFieldSource : public FieldCacheSource - { - public: - /// Create a cached byte field source with a specific string-to-byte parser. - ByteFieldSource(const String& field, ByteParserPtr parser = ByteParserPtr()); - virtual ~ByteFieldSource(); - - LUCENE_CLASS(ByteFieldSource); - - protected: - ByteParserPtr parser; - - public: - virtual String description(); - virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); - virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); - virtual int32_t cachedFieldSourceHashCode(); - }; -} - -#endif diff --git a/include/ByteSliceReader.h b/include/ByteSliceReader.h deleted file mode 100644 index 26b7eacb..00000000 --- a/include/ByteSliceReader.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BYTESLICEREADER_H -#define BYTESLICEREADER_H - -#include "IndexInput.h" - -namespace Lucene -{ - /// IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice - /// until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it. - class ByteSliceReader : public IndexInput - { - public: - ByteSliceReader(); - virtual ~ByteSliceReader(); - - LUCENE_CLASS(ByteSliceReader); - - public: - ByteBlockPoolPtr pool; - int32_t bufferUpto; - ByteArray buffer; - int32_t upto; - int32_t limit; - int32_t level; - int32_t bufferOffset; - int32_t endIndex; - - public: - void init(ByteBlockPoolPtr pool, int32_t startIndex, int32_t endIndex); - bool eof(); - - /// Reads and returns a single byte. - virtual uint8_t readByte(); - - int64_t writeTo(IndexOutputPtr out); - - void nextSlice(); - - /// Reads a specified number of bytes into an array at the specified offset. - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Not implemented - virtual int64_t getFilePointer(); - - /// Not implemented - virtual int64_t length(); - - /// Not implemented - virtual void seek(int64_t pos); - - /// Not implemented - virtual void close(); - }; -} - -#endif diff --git a/include/ByteSliceWriter.h b/include/ByteSliceWriter.h deleted file mode 100644 index 3dc9c97c..00000000 --- a/include/ByteSliceWriter.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef BYTESLICEWRITER_H -#define BYTESLICEWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold - /// the posting list for many terms in RAM. - class ByteSliceWriter : public LuceneObject - { - public: - ByteSliceWriter(ByteBlockPoolPtr pool); - virtual ~ByteSliceWriter(); - - LUCENE_CLASS(ByteSliceWriter); - - protected: - ByteArray slice; - int32_t upto; - ByteBlockPoolPtr pool; - - public: - int32_t offset0; - - public: - /// Set up the writer to write at address. - void init(int32_t address); - - /// Write byte into byte slice stream - void writeByte(uint8_t b); - - void writeBytes(const uint8_t* b, int32_t offset, int32_t length); - int32_t getAddress(); - void writeVInt(int32_t i); - }; -} - -#endif diff --git a/include/CMakeLists.txt b/include/CMakeLists.txt new file mode 100644 index 00000000..63c064eb --- /dev/null +++ b/include/CMakeLists.txt @@ -0,0 +1,8 @@ +#################################### +# include directories +#################################### + +add_subdirectory(config_h) +add_subdirectory(lucene++) + +include_directories("${CMAKE_CURRENT_SOURCE_DIR}") \ No newline at end of file diff --git a/include/CachingSpanFilter.h b/include/CachingSpanFilter.h deleted file mode 100644 index 6b2802ef..00000000 --- a/include/CachingSpanFilter.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CACHINGSPANFILTER_H -#define CACHINGSPANFILTER_H - -#include "SpanFilter.h" -#include "CachingWrapperFilter.h" - -namespace Lucene -{ - /// Wraps another SpanFilter's result and caches it. The purpose is to allow filters to simply filter, - /// and then wrap with this class to add caching. - class LPPAPI CachingSpanFilter : public SpanFilter - { - public: - /// New deletions always result in a cache miss, by default ({@link CachingWrapperFilter#RECACHE}. - CachingSpanFilter(SpanFilterPtr filter, CachingWrapperFilter::DeletesMode deletesMode = CachingWrapperFilter::DELETES_RECACHE); - virtual ~CachingSpanFilter(); - - LUCENE_CLASS(CachingSpanFilter); - - protected: - SpanFilterPtr filter; - FilterCachePtr cache; - - public: - // for testing - int32_t hitCount; - int32_t missCount; - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader); - - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - protected: - SpanFilterResultPtr getCachedResult(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/CachingTokenFilter.h b/include/CachingTokenFilter.h deleted file mode 100644 index 3d1f2e44..00000000 --- a/include/CachingTokenFilter.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CACHINGTOKENFILTER_H -#define CACHINGTOKENFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// This class can be used if the token attributes of a TokenStream are intended to be consumed more than once. - /// It caches all token attribute states locally in a List. - /// - /// CachingTokenFilter implements the optional method {@link TokenStream#reset()}, which repositions the stream - /// to the first Token. - class LPPAPI CachingTokenFilter : public TokenFilter - { - public: - CachingTokenFilter(TokenStreamPtr input); - virtual ~CachingTokenFilter(); - - LUCENE_CLASS(CachingTokenFilter); - - protected: - Collection cache; - Collection::iterator iterator; - AttributeSourceStatePtr finalState; - - public: - virtual bool incrementToken(); - virtual void end(); - virtual void reset(); - - protected: - void fillCache(); - }; -} - -#endif diff --git a/include/CachingWrapperFilter.h b/include/CachingWrapperFilter.h deleted file mode 100644 index 782abd59..00000000 --- a/include/CachingWrapperFilter.h +++ /dev/null @@ -1,69 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CACHINGWRAPPERFILTER_H -#define CACHINGWRAPPERFILTER_H - -#include "Filter.h" - -namespace Lucene -{ - /// Wraps another filter's result and caches it. The purpose is to allow filters to simply filter, and - /// then wrap with this class to add caching. - class LPPAPI CachingWrapperFilter : public Filter - { - public: - /// Specifies how new deletions against a reopened reader should be handled. - /// - /// The default is IGNORE, which means the cache entry will be re-used for a given segment, even when - /// that segment has been reopened due to changes in deletions. This is a big performance gain, - /// especially with near-real-timer readers, since you don't hit a cache miss on every reopened reader - /// for prior segments. - /// - /// However, in some cases this can cause invalid query results, allowing deleted documents to be - /// returned. This only happens if the main query does not rule out deleted documents on its own, - /// such as a toplevel ConstantScoreQuery. To fix this, use RECACHE to re-create the cached filter - /// (at a higher per-reopen cost, but at faster subsequent search performance), or use DYNAMIC to - /// dynamically intersect deleted docs (fast reopen time but some hit to search performance). - enum DeletesMode { DELETES_IGNORE, DELETES_RECACHE, DELETES_DYNAMIC }; - - /// New deletes are ignored by default, which gives higher cache hit rate on reopened readers. - /// Most of the time this is safe, because the filter will be AND'd with a Query that fully enforces - /// deletions. If instead you need this filter to always enforce deletions, pass either {@link - /// DeletesMode#RECACHE} or {@link DeletesMode#DYNAMIC}. - CachingWrapperFilter(FilterPtr filter, DeletesMode deletesMode = DELETES_IGNORE); - - virtual ~CachingWrapperFilter(); - - LUCENE_CLASS(CachingWrapperFilter); - - INTERNAL: - FilterPtr filter; - - // for testing - int32_t hitCount; - int32_t missCount; - - protected: - /// A Filter cache - FilterCachePtr cache; - - /// Provide the DocIdSet to be cached, using the DocIdSet provided by the wrapped Filter. - /// - /// This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} returns - /// true, else it copies the {@link DocIdSetIterator} into an {@link OpenBitSetDISI}. - DocIdSetPtr docIdSetToCache(DocIdSetPtr docIdSet, IndexReaderPtr reader); - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/CharArraySet.h b/include/CharArraySet.h deleted file mode 100644 index 45135a39..00000000 --- a/include/CharArraySet.h +++ /dev/null @@ -1,56 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARARRAYSET_H -#define CHARARRAYSET_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A simple class that stores Strings as char[]'s in a hash table. Note that this is not a general purpose class. - /// For example, it cannot remove items from the set, nor does it resize its hash table to be smaller, etc. It is - /// designed to be quick to test if a char[] is in the set without the necessity of converting it to a String first. - class LPPAPI CharArraySet : public LuceneObject - { - public: - CharArraySet(bool ignoreCase); - - /// Create set from a set of strings. - CharArraySet(HashSet entries, bool ignoreCase); - - /// Create set from a collection of strings. - CharArraySet(Collection entries, bool ignoreCase); - - virtual ~CharArraySet(); - - LUCENE_CLASS(CharArraySet); - - protected: - HashSet entries; - bool ignoreCase; - - public: - virtual bool contains(const String& text); - - /// True if the length chars of text starting at offset are in the set - virtual bool contains(const wchar_t* text, int32_t offset, int32_t length); - - /// Add this String into the set - virtual bool add(const String& text); - - /// Add this char[] into the set. - virtual bool add(CharArray text); - - virtual int32_t size(); - virtual bool isEmpty(); - - HashSet::iterator begin(); - HashSet::iterator end(); - }; -} - -#endif diff --git a/include/CharBlockPool.h b/include/CharBlockPool.h deleted file mode 100644 index ea726a33..00000000 --- a/include/CharBlockPool.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARBLOCKPOOL_H -#define CHARBLOCKPOOL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class CharBlockPool : public LuceneObject - { - public: - CharBlockPool(DocumentsWriterPtr docWriter); - virtual ~CharBlockPool(); - - LUCENE_CLASS(CharBlockPool); - - public: - Collection buffers; - int32_t numBuffer; - int32_t bufferUpto; // Which buffer we are up to - int32_t charUpto; // Where we are in head buffer - - CharArray buffer; // Current head buffer - int32_t charOffset; // Current head offset - - protected: - DocumentsWriterWeakPtr _docWriter; - - public: - void reset(); - void nextBuffer(); - }; -} - -#endif diff --git a/include/CharFilter.h b/include/CharFilter.h deleted file mode 100644 index 05f83792..00000000 --- a/include/CharFilter.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARFILTER_H -#define CHARFILTER_H - -#include "CharStream.h" - -namespace Lucene -{ - /// Subclasses of CharFilter can be chained to filter CharStream. They can be used as {@link Reader} with - /// additional offset correction. {@link Tokenizer}s will automatically use {@link #correctOffset} if a - /// CharFilter/CharStream subclass is used. - class LPPAPI CharFilter : public CharStream - { - protected: - CharFilter(CharStreamPtr in); - public: - virtual ~CharFilter(); - - LUCENE_CLASS(CharFilter); - - protected: - CharStreamPtr input; - - protected: - /// Subclass may want to override to correct the current offset. - /// @param currentOff current offset - /// @return corrected offset - virtual int32_t correct(int32_t currentOff); - - /// Chains the corrected offset through the input CharFilter. - virtual int32_t correctOffset(int32_t currentOff); - - virtual void close(); - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - virtual bool markSupported(); - virtual void mark(int32_t readAheadLimit); - virtual void reset(); - }; -} - -#endif diff --git a/include/CharFolder.h b/include/CharFolder.h deleted file mode 100644 index bd4913ef..00000000 --- a/include/CharFolder.h +++ /dev/null @@ -1,51 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARFOLDER_H -#define CHARFOLDER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Utility class for folding character case. - class CharFolder : public LuceneObject - { - public: - virtual ~CharFolder(); - LUCENE_CLASS(CharFolder); - - protected: - static bool lowerCache; - static bool upperCache; - static wchar_t lowerChars[CHAR_MAX - CHAR_MIN + 1]; - static wchar_t upperChars[CHAR_MAX - CHAR_MIN + 1]; - - public: - static wchar_t toLower(wchar_t ch); - static wchar_t toUpper(wchar_t ch); - - template - static void toLower(ITER first, ITER last) - { - for (; first != last; ++first) - *first = toLower(*first); - } - - template - static void toUpper(ITER first, ITER last) - { - for (; first != last; ++first) - *first = toUpper(*first); - } - - protected: - static bool fillLower(); - static bool fillUpper(); - }; -} - -#endif diff --git a/include/CharReader.h b/include/CharReader.h deleted file mode 100644 index 2de76d55..00000000 --- a/include/CharReader.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARREADER_H -#define CHARREADER_H - -#include "CharStream.h" - -namespace Lucene -{ - /// CharReader is a Reader wrapper. It reads chars from Reader and outputs {@link CharStream}, defining an - /// identify function {@link #correctOffset} method that simply returns the provided offset. - class LPPAPI CharReader : public CharStream - { - public: - CharReader(ReaderPtr in); - virtual ~CharReader(); - - LUCENE_CLASS(CharReader); - - protected: - ReaderPtr input; - - public: - using CharStream::read; - - static CharStreamPtr get(ReaderPtr input); - - virtual int32_t correctOffset(int32_t currentOff); - virtual void close(); - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - virtual bool markSupported(); - virtual void mark(int32_t readAheadLimit); - virtual void reset(); - }; -} - -#endif diff --git a/include/CharStream.h b/include/CharStream.h deleted file mode 100644 index a7c92d6e..00000000 --- a/include/CharStream.h +++ /dev/null @@ -1,33 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARSTREAM_H -#define CHARSTREAM_H - -#include "Reader.h" - -namespace Lucene -{ - /// CharStream adds {@link #correctOffset} functionality over {@link Reader}. All Tokenizers accept a CharStream - /// instead of {@link Reader} as input, which enables arbitrary character based filtering before tokenization. - /// The {@link #correctOffset} method fixed offsets to account for removal or insertion of characters, so that the - /// offsets reported in the tokens match the character offsets of the original Reader. - class LPPAPI CharStream : public Reader - { - public: - virtual ~CharStream(); - LUCENE_CLASS(CharStream); - - public: - /// Called by CharFilter(s) and Tokenizer to correct token offset. - /// - /// @param currentOff offset as seen in the output - /// @return corrected offset based on the input - virtual int32_t correctOffset(int32_t currentOff) = 0; - }; -} - -#endif diff --git a/include/CharTokenizer.h b/include/CharTokenizer.h deleted file mode 100644 index e5298823..00000000 --- a/include/CharTokenizer.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHARTOKENIZER_H -#define CHARTOKENIZER_H - -#include "Tokenizer.h" - -namespace Lucene -{ - /// An abstract base class for simple, character-oriented tokenizers. - class LPPAPI CharTokenizer : public Tokenizer - { - public: - CharTokenizer(ReaderPtr input); - CharTokenizer(AttributeSourcePtr source, ReaderPtr input); - CharTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - virtual ~CharTokenizer(); - - LUCENE_CLASS(CharTokenizer); - - protected: - int32_t offset; - int32_t bufferIndex; - int32_t dataLen; - - static const int32_t MAX_WORD_LEN; - static const int32_t IO_BUFFER_SIZE; - - CharArray ioBuffer; - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken(); - virtual void end(); - virtual void reset(ReaderPtr input); - - protected: - /// Returns true if a character should be included in a token. This tokenizer generates as tokens adjacent - /// sequences of characters which satisfy this predicate. Characters for which this is false are used to - /// define token boundaries and are not included in tokens. - virtual bool isTokenChar(wchar_t c) = 0; - - /// Called on each token character to normalize it before it is added to the token. The default implementation - /// does nothing. Subclasses may use this to, eg., lowercase tokens. - virtual wchar_t normalize(wchar_t c); - }; -} - -#endif diff --git a/include/CheckIndex.h b/include/CheckIndex.h deleted file mode 100644 index 8d3ba52f..00000000 --- a/include/CheckIndex.h +++ /dev/null @@ -1,320 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHECKINDEX_H -#define CHECKINDEX_H - -#include "SegmentTermDocs.h" - -namespace Lucene -{ - /// Basic tool and API to check the health of an index and write a new segments file that removes reference to - /// problematic segments. - /// - /// As this tool checks every byte in the index, on a large index it can take quite a long time to run. - /// - /// WARNING: Please make a complete backup of your index before using this to fix your index! - class LPPAPI CheckIndex : public LuceneObject - { - public: - /// Create a new CheckIndex on the directory. - CheckIndex(DirectoryPtr dir); - virtual ~CheckIndex(); - - LUCENE_CLASS(CheckIndex); - - protected: - InfoStreamPtr infoStream; - DirectoryPtr dir; - - static bool _assertsOn; - - public: - /// Set infoStream where messages should go. If null, no messages are printed - void setInfoStream(InfoStreamPtr out); - - /// Returns a {@link IndexStatus} instance detailing the state of the index. - /// - /// As this method checks every byte in the index, on a large index it can take quite a long time to run. - /// - /// WARNING: make sure you only call this when the index is not opened by any writer. - IndexStatusPtr checkIndex(); - - /// Returns a {@link IndexStatus} instance detailing the state of the index. - /// - /// @param onlySegments list of specific segment names to check - /// - /// As this method checks every byte in the specified segments, on a large index it can take quite a long - /// time to run. - /// - /// WARNING: make sure you only call this when the index is not opened by any writer. - IndexStatusPtr checkIndex(Collection onlySegments); - - /// Repairs the index using previously returned result from {@link #checkIndex}. Note that this does not - /// remove any of the unreferenced files after it's done; you must separately open an {@link IndexWriter}, - /// which deletes unreferenced files when it's created. - /// - /// WARNING: this writes a new segments file into the index, effectively removing all documents in broken - /// segments from the index. BE CAREFUL. - /// - /// WARNING: Make sure you only call this when the index is not opened by any writer. - void fixIndex(IndexStatusPtr result); - - static bool testAsserts(); - static bool assertsOn(); - - /// Command-line interface to check and fix an index. - /// - /// Run it like this: - /// CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] - /// - /// -fix: actually write a new segments_N file, removing any problematic segments - /// - /// -segment X: only check the specified segment(s). This can be specified multiple times, - /// to check more than one segment, eg -segment _2 -segment _a. - /// You can't use this with the -fix option. - /// - /// WARNING: -fix should only be used on an emergency basis as it will cause documents (perhaps many) - /// to be permanently removed from the index. Always make a backup copy of your index before running - /// this! Do not run this tool on an index that is actively being written to. You have been warned! - /// - /// Run without -fix, this tool will open the index, report version information and report any exceptions - /// it hits and what action it would take if -fix were specified. With -fix, this tool will remove any - /// segments that have issues and write a new segments_N file. This means all documents contained in the - /// affected segments will be removed. - /// - /// This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. - static int main(Collection args); - - protected: - void msg(const String& msg); - - /// Test field norms. - FieldNormStatusPtr testFieldNorms(Collection fieldNames, SegmentReaderPtr reader); - - /// Test the term index. - TermIndexStatusPtr testTermIndex(SegmentInfoPtr info, SegmentReaderPtr reader); - - /// Test stored fields for a segment. - StoredFieldStatusPtr testStoredFields(SegmentInfoPtr info, SegmentReaderPtr reader); - - /// Test term vectors for a segment. - TermVectorStatusPtr testTermVectors(SegmentInfoPtr info, SegmentReaderPtr reader); - }; - - /// Returned from {@link #checkIndex()} detailing the health and status of the index. - class LPPAPI IndexStatus : public LuceneObject - { - public: - IndexStatus(); - virtual ~IndexStatus(); - - LUCENE_CLASS(IndexStatus); - - public: - /// True if no problems were found with the index. - bool clean; - - /// True if we were unable to locate and load the segments_N file. - bool missingSegments; - - /// True if we were unable to open the segments_N file. - bool cantOpenSegments; - - /// True if we were unable to read the version number from segments_N file. - bool missingSegmentVersion; - - /// Name of latest segments_N file in the index. - String segmentsFileName; - - /// Number of segments in the index. - int32_t numSegments; - - /// String description of the version of the index. - String segmentFormat; - - /// Empty unless you passed specific segments list to check as optional 3rd argument. - /// @see CheckIndex#checkIndex(List) - Collection segmentsChecked; - - /// True if the index was created with a newer version of Lucene than the CheckIndex tool. - bool toolOutOfDate; - - /// List of {@link SegmentInfoStatus} instances, detailing status of each segment. - Collection segmentInfos; - - /// Directory index is in. - DirectoryPtr dir; - - /// SegmentInfos instance containing only segments that had no problems (this is used with the - /// {@link CheckIndex#fixIndex} method to repair the index. - SegmentInfosPtr newSegments; - - /// How many documents will be lost to bad segments. - int32_t totLoseDocCount; - - /// How many bad segments were found. - int32_t numBadSegments; - - /// True if we checked only specific segments ({@link #checkIndex(List)}) was called with non-null argument). - bool partial; - - /// Holds the userData of the last commit in the index - MapStringString userData; - }; - - /// Holds the status of each segment in the index. See {@link #segmentInfos}. - class LPPAPI SegmentInfoStatus : public LuceneObject - { - public: - SegmentInfoStatus(); - virtual ~SegmentInfoStatus(); - - LUCENE_CLASS(SegmentInfoStatus); - - public: - /// Name of the segment. - String name; - - /// Document count (does not take deletions into account). - int32_t docCount; - - /// True if segment is compound file format. - bool compound; - - /// Number of files referenced by this segment. - int32_t numFiles; - - /// Net size (MB) of the files referenced by this segment. - double sizeMB; - - /// Doc store offset, if this segment shares the doc store files (stored fields and term vectors) with - /// other segments. This is -1 if it does not share. - int32_t docStoreOffset; - - /// String of the shared doc store segment, or null if this segment does not share the doc store files. - String docStoreSegment; - - /// True if the shared doc store files are compound file format. - bool docStoreCompoundFile; - - /// True if this segment has pending deletions. - bool hasDeletions; - - /// Name of the current deletions file name. - String deletionsFileName; - - /// Number of deleted documents. - int32_t numDeleted; - - /// True if we were able to open a SegmentReader on this segment. - bool openReaderPassed; - - /// Number of fields in this segment. - int32_t numFields; - - /// True if at least one of the fields in this segment does not omitTermFreqAndPositions. - /// @see AbstractField#setOmitTermFreqAndPositions - bool hasProx; - - /// Map that includes certain debugging details that IndexWriter records into each segment it creates - MapStringString diagnostics; - - /// Status for testing of field norms (null if field norms could not be tested). - FieldNormStatusPtr fieldNormStatus; - - /// Status for testing of indexed terms (null if indexed terms could not be tested). - TermIndexStatusPtr termIndexStatus; - - /// Status for testing of stored fields (null if stored fields could not be tested). - StoredFieldStatusPtr storedFieldStatus; - - /// Status for testing of term vectors (null if term vectors could not be tested). - TermVectorStatusPtr termVectorStatus; - }; - - /// Status from testing field norms. - class LPPAPI FieldNormStatus : public LuceneObject - { - public: - FieldNormStatus(); - virtual ~FieldNormStatus(); - - LUCENE_CLASS(FieldNormStatus); - - public: - /// Number of fields successfully tested - int64_t totFields; - - /// Exception thrown during term index test (null on success) - LuceneException error; - }; - - /// Status from testing term index. - class LPPAPI TermIndexStatus : public LuceneObject - { - public: - TermIndexStatus(); - virtual ~TermIndexStatus(); - - LUCENE_CLASS(TermIndexStatus); - - public: - /// Total term count - int64_t termCount; - - /// Total frequency across all terms. - int64_t totFreq; - - /// Total number of positions. - int64_t totPos; - - /// Exception thrown during term index test (null on success) - LuceneException error; - }; - - /// Status from testing stored fields. - class LPPAPI StoredFieldStatus : public LuceneObject - { - public: - StoredFieldStatus(); - virtual ~StoredFieldStatus(); - - LUCENE_CLASS(StoredFieldStatus); - - public: - /// Number of documents tested. - int32_t docCount; - - /// Total number of stored fields tested. - int64_t totFields; - - /// Exception thrown during stored fields test (null on success) - LuceneException error; - }; - - /// Status from testing stored fields. - class LPPAPI TermVectorStatus : public LuceneObject - { - public: - TermVectorStatus(); - virtual ~TermVectorStatus(); - - LUCENE_CLASS(TermVectorStatus); - - public: - /// Number of documents tested. - int32_t docCount; - - /// Total number of term vectors tested. - int64_t totVectors; - - /// Exception thrown during term vector test (null on success) - LuceneException error; - }; -} - -#endif diff --git a/include/ChecksumIndexInput.h b/include/ChecksumIndexInput.h deleted file mode 100644 index cdf4d61c..00000000 --- a/include/ChecksumIndexInput.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHECKSUMINDEXINPUT_H -#define CHECKSUMINDEXINPUT_H - -#include -#include "IndexInput.h" - -namespace Lucene -{ - /// Writes bytes through to a primary IndexInput, computing checksum as it goes. - /// Note that you cannot use seek(). - class LPPAPI ChecksumIndexInput : public IndexInput - { - public: - ChecksumIndexInput(IndexInputPtr main); - virtual ~ChecksumIndexInput(); - - LUCENE_CLASS(ChecksumIndexInput); - - protected: - IndexInputPtr main; - boost::crc_32_type checksum; - - public: - /// Reads and returns a single byte. - /// @see IndexOutput#writeByte(uint8_t) - virtual uint8_t readByte(); - - /// Reads a specified number of bytes into an array at the specified offset. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Return calculated checksum. - int64_t getChecksum(); - - /// Closes the stream to further operations. - virtual void close(); - - /// Returns the current position in this file, where the next read will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next read will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// The number of bytes in the file. - virtual int64_t length(); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/ChecksumIndexOutput.h b/include/ChecksumIndexOutput.h deleted file mode 100644 index fa01910b..00000000 --- a/include/ChecksumIndexOutput.h +++ /dev/null @@ -1,70 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CHECKSUMINDEXOUTPUT_H -#define CHECKSUMINDEXOUTPUT_H - -#include -#include "IndexOutput.h" - -namespace Lucene -{ - /// Writes bytes through to a primary IndexOutput, computing - /// checksum. Note that you cannot use seek(). - class LPPAPI ChecksumIndexOutput : public IndexOutput - { - public: - ChecksumIndexOutput(IndexOutputPtr main); - virtual ~ChecksumIndexOutput(); - - LUCENE_CLASS(ChecksumIndexOutput); - - protected: - IndexOutputPtr main; - boost::crc_32_type checksum; - - public: - /// Writes a single byte. - /// @see IndexInput#readByte() - virtual void writeByte(uint8_t b); - - /// Writes an array of bytes. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) - virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); - - /// Return calculated checksum. - int64_t getChecksum(); - - /// Forces any buffered output to be written. - virtual void flush(); - - /// Closes the stream to further operations. - virtual void close(); - - /// Returns the current position in this file, where the next write will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next write will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// Starts but does not complete the commit of this file (= writing of - /// the final checksum at the end). After this is called must call - /// {@link #finishCommit} and the {@link #close} to complete the commit. - void prepareCommit(); - - /// See {@link #prepareCommit} - void finishCommit(); - - /// The number of bytes in the file. - virtual int64_t length(); - }; -} - -#endif diff --git a/include/CloseableThreadLocal.h b/include/CloseableThreadLocal.h deleted file mode 100644 index cabeaee7..00000000 --- a/include/CloseableThreadLocal.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CLOSEABLETHREADLOCAL_H -#define CLOSEABLETHREADLOCAL_H - -#include "LuceneThread.h" - -namespace Lucene -{ - /// General purpose thread-local map. - template - class CloseableThreadLocal : public LuceneObject - { - public: - typedef boost::shared_ptr localDataPtr; - typedef Map MapLocalData; - - CloseableThreadLocal() - { - localData = MapLocalData::newInstance(); - } - - public: - localDataPtr get() - { - SyncLock syncLock(this); - typename MapLocalData::iterator local = localData.find(LuceneThread::currentId()); - if (local != localData.end()) - return local->second; - localDataPtr initial(initialValue()); - if (initial) - localData.put(LuceneThread::currentId(), initial); - return initial; - } - - void set(localDataPtr data) - { - SyncLock syncLock(this); - localData.put(LuceneThread::currentId(), data); - } - - void close() - { - SyncLock syncLock(this); - localData.remove(LuceneThread::currentId()); - } - - protected: - MapLocalData localData; - - virtual localDataPtr initialValue() - { - return localDataPtr(); // override - } - }; -} - -#endif diff --git a/include/Collator.h b/include/Collator.h deleted file mode 100644 index 0931c630..00000000 --- a/include/Collator.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COLLATOR_H -#define COLLATOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Convenience class for storing collate objects. - class LPPAPI Collator : public LuceneObject - { - public: - /// Creates a new Collator, given the file to read from. - Collator(std::locale locale); - virtual ~Collator(); - - LUCENE_CLASS(Collator); - - protected: - const std::collate& collate; - - public: - int32_t compare(const String& first, const String& second); - }; -} - -#endif diff --git a/include/Collection.h b/include/Collection.h deleted file mode 100644 index d785ecdc..00000000 --- a/include/Collection.h +++ /dev/null @@ -1,314 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COLLECTION_H -#define COLLECTION_H - -#include -#include "LuceneSync.h" - -namespace Lucene -{ - /// Utility template class to handle collections that can be safely copied and shared - template - class Collection : public LuceneSync - { - public: - typedef Collection this_type; - typedef boost::shared_ptr shared_ptr; - typedef std::vector< TYPE, Allocator > collection_type; - typedef typename collection_type::iterator iterator; - typedef typename collection_type::const_iterator const_iterator; - typedef TYPE value_type; - - virtual ~Collection() - { - } - - protected: - boost::shared_ptr container; - - public: - static this_type newInstance(int32_t size = 0) - { - this_type instance; - instance.container = Lucene::newInstance(size); - return instance; - } - - template - static this_type newInstance(ITER first, ITER last) - { - this_type instance; - instance.container = Lucene::newInstance(first, last); - return instance; - } - - void reset() - { - resize(0); - } - - void resize(int32_t size) - { - if (size == 0) - container.reset(); - else - container->resize(size); - } - - int32_t size() const - { - return (int32_t)container->size(); - } - - bool empty() const - { - return container->empty(); - } - - void clear() - { - container->clear(); - } - - iterator begin() - { - return container->begin(); - } - - iterator end() - { - return container->end(); - } - - const_iterator begin() const - { - return container->begin(); - } - - const_iterator end() const - { - return container->end(); - } - - void add(const TYPE& type) - { - container->push_back(type); - } - - void add(int32_t pos, const TYPE& type) - { - container->insert(container->begin() + pos, type); - } - - template - void addAll(ITER first, ITER last) - { - container->insert(container->end(), first, last); - } - - template - void insert(ITER pos, const TYPE& type) - { - container->insert(pos, type); - } - - template - ITER remove(ITER pos) - { - return container->erase(pos); - } - - template - ITER remove(ITER first, ITER last) - { - return container->erase(first, last); - } - - void remove(const TYPE& type) - { - container->erase(std::remove(container->begin(), container->end(), type), container->end()); - } - - template - void remove_if(PRED comp) - { - container->erase(std::remove_if(container->begin(), container->end(), comp), container->end()); - } - - TYPE removeFirst() - { - TYPE front = container->front(); - container->erase(container->begin()); - return front; - } - - TYPE removeLast() - { - TYPE back = container->back(); - container->pop_back(); - return back; - } - - iterator find(const TYPE& type) - { - return std::find(container->begin(), container->end(), type); - } - - template - iterator find_if(PRED comp) - { - return std::find_if(container->begin(), container->end(), comp); - } - - bool contains(const TYPE& type) const - { - return (std::find(container->begin(), container->end(), type) != container->end()); - } - - template - bool contains_if(PRED comp) const - { - return (std::find_if(container->begin(), container->end(), comp) != container->end()); - } - - bool equals(const this_type& other) const - { - return equals(other, std::equal_to()); - } - - template - bool equals(const this_type& other, PRED comp) const - { - if (container->size() != other.container->size()) - return false; - return std::equal(container->begin(), container->end(), other.container->begin(), comp); - } - - int32_t hashCode() - { - return (int32_t)(int64_t)container.get(); - } - - void swap(this_type& other) - { - container.swap(other->container); - } - - TYPE& operator[] (int32_t pos) - { - return (*container)[pos]; - } - - const TYPE& operator[] (int32_t pos) const - { - return (*container)[pos]; - } - - operator bool() const - { - return container; - } - - bool operator! () const - { - return !container; - } - - bool operator== (const this_type& other) - { - return (container == other.container); - } - - bool operator!= (const this_type& other) - { - return (container != other.container); - } - }; - - template - Collection newCollection(const TYPE& a1) - { - Collection result = Collection::newInstance(); - result.add(a1); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2) - { - Collection result = newCollection(a1); - result.add(a2); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3) - { - Collection result = newCollection(a1, a2); - result.add(a3); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4) - { - Collection result = newCollection(a1, a2, a3); - result.add(a4); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5) - { - Collection result = newCollection(a1, a2, a3, a4); - result.add(a5); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6) - { - Collection result = newCollection(a1, a2, a3, a4, a5); - result.add(a6); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7) - { - Collection result = newCollection(a1, a2, a3, a4, a5, a6); - result.add(a7); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8) - { - Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7); - result.add(a8); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9) - { - Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8); - result.add(a9); - return result; - } - - template - Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9, const TYPE& a10) - { - Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8, a9); - result.add(a10); - return result; - } -} - -#endif diff --git a/include/Collector.h b/include/Collector.h deleted file mode 100644 index 95c72071..00000000 --- a/include/Collector.h +++ /dev/null @@ -1,138 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COLLECTOR_H -#define COLLECTOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Collectors are primarily meant to be used to gather raw results from a search, and implement sorting - /// or custom result filtering, collation, etc. - /// - /// Lucene's core collectors are derived from Collector. Likely your application can use one of these - /// classes, or subclass {@link TopDocsCollector}, instead of implementing Collector directly: - /// - ///
    - ///
  • {@link TopDocsCollector} is an abstract base class that assumes you will retrieve the top N docs, - /// according to some criteria, after collection is done. - /// - ///
  • {@link TopScoreDocCollector} is a concrete subclass {@link TopDocsCollector} and sorts according - /// to score + docID. This is used internally by the {@link IndexSearcher} search methods that do not take - /// an explicit {@link Sort}. It is likely the most frequently used collector. - /// - ///
  • {@link TopFieldCollector} subclasses {@link TopDocsCollector} and sorts according to a specified - /// {@link Sort} object (sort by field). This is used internally by the {@link IndexSearcher} search methods - /// that take an explicit {@link Sort}. - /// - ///
  • {@link TimeLimitingCollector}, which wraps any other Collector and aborts the search if it's taken too - /// much time. - /// - ///
  • {@link PositiveScoresOnlyCollector} wraps any other Collector and prevents collection of hits whose - /// score is <= 0.0 - /// - ///
- /// - /// Collector decouples the score from the collected doc: the score computation is skipped entirely if it's not - /// needed. Collectors that do need the score should implement the {@link #setScorer} method, to hold onto the - /// passed {@link Scorer} instance, and call {@link Scorer#score()} within the collect method to compute the - /// current hit's score. If your collector may request the score for a single hit multiple times, you should use - /// {@link ScoreCachingWrappingScorer}. - /// - /// NOTE: The doc that is passed to the collect method is relative to the current reader. If your collector needs - /// to resolve this to the docID space of the Multi*Reader, you must re-base it by recording the docBase from the - /// most recent setNextReader call. Here's a simple example showing how to collect docIDs into a BitSet: - /// - ///
-    /// class MyCollector : public Collector
-    /// {
-    /// public:
-    ///     MyCollector(BitSetPtr bits)
-    ///     {
-    ///         this->bits = bits;
-    ///         this->docBase = 0;
-    ///     }
-    /// 
-    /// protected:
-    ///     BitSetPtr bits;
-    ///     int32_t docBase;
-    /// 
-    /// public:
-    ///     virtual void setScorer(ScorerPtr scorer)
-    ///     {
-    ///         // ignore scorer
-    ///     }
-    ///     
-    ///     virtual void collect(int32_t doc)
-    ///     {
-    ///         bits->set(doc + docBase);
-    ///     }
-    ///     
-    ///     virtual void setNextReader(IndexReaderPtr reader, int32_t docBase)
-    ///     {
-    ///         this->docBase = docBase;
-    ///     }
-    ///     
-    ///     virtual bool acceptsDocsOutOfOrder()
-    ///     {
-    ///         return true; // accept docs out of order (for a BitSet it doesn't matter)
-    ///     }
-    /// };
-    /// 
-    /// ...
-    /// 
-    /// SearcherPtr searcher = newLucene(indexReader);
-    /// BitSetPtr bits = newLucene(indexReader->maxDoc());
-    /// searcher->search(query, newLucene(bits));
-    ///
-    /// 
- /// Not all collectors will need to rebase the docID. For example, a collector that simply counts the - /// total number of hits would skip it. - /// - /// NOTE: Prior to 2.9, Lucene silently filtered out hits with score <= 0. As of 2.9, the core Collectors - /// no longer do that. It's very unusual to have such hits (a negative query boost, or function query - /// returning negative custom scores, could cause it to happen). If you need that behavior, use {@link - /// PositiveScoresOnlyCollector}. - class LPPAPI Collector : public LuceneObject - { - public: - virtual ~Collector(); - LUCENE_CLASS(Collector); - - public: - /// Called before successive calls to {@link #collect(int32_t)}. Implementations that need the score - /// of the current document (passed-in to {@link #collect(int32_t)}), should save the passed-in Scorer - /// and call scorer.score() when needed. - virtual void setScorer(ScorerPtr scorer) = 0; - - /// Called once for every document matching a query, with the unbased document number. - /// - /// Note: This is called in an inner search loop. For good search performance, implementations of this - /// method should not call {@link Searcher#doc(int32_t)} or {@link IndexReader#document(int32_t)} on - /// every hit. Doing so can slow searches by an order of magnitude or more. - virtual void collect(int32_t doc) = 0; - - /// Called before collecting from each IndexReader. All doc ids in {@link #collect(int32_t)} will - /// correspond to reader. Add docBase to the current IndexReaders internal document id to re-base ids - /// in {@link #collect(int32_t)}. - /// @param reader next IndexReader - /// @param docBase - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) = 0; - - /// Return true if this collector does not require the matching docIDs to be delivered in int sort - /// order (smallest to largest) to {@link #collect}. - /// - /// Most Lucene Query implementations will visit matching docIDs in order. However, some queries - /// (currently limited to certain cases of {@link BooleanQuery}) can achieve faster searching if the - /// Collector allows them to deliver the docIDs out of order. - /// - /// Many collectors don't mind getting docIDs out of order, so it's important to return true here. - virtual bool acceptsDocsOutOfOrder() = 0; - }; -} - -#endif diff --git a/include/ComplexExplanation.h b/include/ComplexExplanation.h deleted file mode 100644 index a267b9ef..00000000 --- a/include/ComplexExplanation.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COMPLEXEXPLANATION_H -#define COMPLEXEXPLANATION_H - -#include "Explanation.h" - -namespace Lucene -{ - /// Describes the score computation for document and query, and can distinguish a match independent - /// of a positive value. - class LPPAPI ComplexExplanation : public Explanation - { - public: - ComplexExplanation(bool match = false, double value = 0, const String& description = EmptyString); - virtual ~ComplexExplanation(); - - LUCENE_CLASS(ComplexExplanation); - - protected: - bool match; - - public: - /// The match status of this explanation node. - bool getMatch(); - - /// Sets the match status assigned to this explanation node. - void setMatch(bool match); - - /// Indicates whether or not this Explanation models a good match. - /// - /// If the match status is explicitly set this method uses it; otherwise it defers to the - /// superclass. - /// - /// @see #getMatch - virtual bool isMatch(); - - protected: - virtual String getSummary(); - }; -} - -#endif diff --git a/include/CompoundFileReader.h b/include/CompoundFileReader.h deleted file mode 100644 index c5618557..00000000 --- a/include/CompoundFileReader.h +++ /dev/null @@ -1,123 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COMPOUNDFILEREADER_H -#define COMPOUNDFILEREADER_H - -#include "Directory.h" -#include "BufferedIndexInput.h" - -namespace Lucene -{ - /// Class for accessing a compound stream. - /// This class implements a directory, but is limited to only read operations. - /// Directory methods that would normally modify data throw an exception. - class CompoundFileReader : public Directory - { - public: - CompoundFileReader(DirectoryPtr dir, const String& name); - CompoundFileReader(DirectoryPtr dir, const String& name, int32_t readBufferSize); - virtual ~CompoundFileReader(); - - LUCENE_CLASS(CompoundFileReader); - - protected: - struct FileEntry - { - FileEntry(int64_t offset = 0, int64_t length = 0) - { - this->offset = offset; - this->length = length; - } - int64_t offset; - int64_t length; - }; - typedef boost::shared_ptr FileEntryPtr; - typedef HashMap MapStringFileEntryPtr; - - DirectoryPtr directory; - String fileName; - int32_t readBufferSize; - IndexInputPtr stream; - MapStringFileEntryPtr entries; - - protected: - void ConstructReader(DirectoryPtr dir, const String& name, int32_t readBufferSize); - - public: - DirectoryPtr getDirectory(); - String getName(); - virtual void close(); - virtual IndexInputPtr openInput(const String& name); - virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); - - /// Returns an array of strings, one for each file in the directory. - virtual HashSet listAll(); - - /// Returns true if a file with the given name exists. - virtual bool fileExists(const String& name); - - /// Returns the time the compound file was last modified. - virtual uint64_t fileModified(const String& name); - - /// Set the modified time of the compound file to now. - virtual void touchFile(const String& name); - - /// Not implemented - virtual void deleteFile(const String& name); - - /// Not implemented - virtual void renameFile(const String& from, const String& to); - - /// Returns the length of a file in the directory. - virtual int64_t fileLength(const String& name); - - /// Not implemented - virtual IndexOutputPtr createOutput(const String& name); - - /// Not implemented - virtual LockPtr makeLock(const String& name); - }; - - /// Implementation of an IndexInput that reads from a portion of the compound file. - class CSIndexInput : public BufferedIndexInput - { - public: - CSIndexInput(); - CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length); - CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length, int32_t readBufferSize); - virtual ~CSIndexInput(); - - LUCENE_CLASS(CSIndexInput); - - public: - IndexInputPtr base; - int64_t fileOffset; - int64_t _length; - - public: - /// Closes the stream to further operations. - virtual void close(); - - virtual int64_t length(); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - /// Implements buffer refill. Reads bytes from the current position in the input. - /// @param b the array to read bytes into - /// @param offset the offset in the array to start storing bytes - /// @param len the number of bytes to read - virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); - - /// Implements seek. Sets current position in this file, where the next {@link - /// #readInternal(byte[],int,int)} will occur. - virtual void seekInternal(int64_t pos); - }; -} - -#endif diff --git a/include/CompoundFileWriter.h b/include/CompoundFileWriter.h deleted file mode 100644 index 0ed42a9d..00000000 --- a/include/CompoundFileWriter.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COMPOUNDFILEWRITER_H -#define COMPOUNDFILEWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Combines multiple files into a single compound file. - /// The file format: - /// VInt fileCount - /// {Directory} - /// fileCount entries with the following structure: - /// int64_t dataOffset - /// String fileName - /// {File Data} - /// fileCount entries with the raw data of the corresponding file - /// - /// The fileCount integer indicates how many files are contained in this compound file. The {directory} - /// that follows has that many entries. Each directory entry contains a long pointer to the start of - /// this file's data section, and a string with that file's name. - class CompoundFileWriter : public LuceneObject - { - public: - CompoundFileWriter(DirectoryPtr dir, const String& name, CheckAbortPtr checkAbort = CheckAbortPtr()); - virtual ~CompoundFileWriter(); - - LUCENE_CLASS(CompoundFileWriter); - - protected: - struct FileEntry - { - /// source file - String file; - - /// temporary holder for the start of directory entry for this file - int64_t directoryOffset; - - /// temporary holder for the start of this file's data section - int64_t dataOffset; - }; - - DirectoryWeakPtr _directory; - String fileName; - HashSet ids; - Collection entries; - bool merged; - CheckAbortPtr checkAbort; - - public: - /// Returns the directory of the compound file. - DirectoryPtr getDirectory(); - - /// Returns the name of the compound file. - String getName(); - - /// Add a source stream. file is the string by which the sub-stream will be known in the - /// compound stream. - void addFile(const String& file); - - /// Merge files with the extensions added up to now. All files with these extensions are - /// combined sequentially into the compound stream. After successful merge, the source - /// are deleted.files - void close(); - - protected: - /// Copy the contents of the file with specified extension into the provided output stream. - /// Use the provided buffer for moving data to reduce memory allocation. - void copyFile(const FileEntry& source, IndexOutputPtr os, ByteArray buffer); - }; -} - -#endif diff --git a/include/CompressionTools.h b/include/CompressionTools.h deleted file mode 100644 index 934ef038..00000000 --- a/include/CompressionTools.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef COMPRESSIONTOOLS_H -#define COMPRESSIONTOOLS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Simple utility class providing static methods to compress and decompress binary data for stored fields. - class LPPAPI CompressionTools : public LuceneObject - { - public: - virtual ~CompressionTools(); - - LUCENE_CLASS(CompressionTools); - - public: - /// Compresses the specified byte range using the specified compressionLevel - static ByteArray compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel); - - /// Compresses the specified byte range, with default BEST_COMPRESSION level - static ByteArray compress(uint8_t* value, int32_t offset, int32_t length); - - /// Compresses all bytes in the array, with default BEST_COMPRESSION level - static ByteArray compress(ByteArray value); - - /// Compresses the String value, with default BEST_COMPRESSION level - static ByteArray compressString(const String& value); - - /// Compresses the String value using the specified compressionLevel - static ByteArray compressString(const String& value, int32_t compressionLevel); - - /// Decompress the byte array previously returned by compress - static ByteArray decompress(ByteArray value); - - /// Decompress the byte array previously returned by compressString back into a String - static String decompressString(ByteArray value); - - protected: - static const int32_t COMPRESS_BUFFER; - }; -} - -#endif diff --git a/include/ConcurrentMergeScheduler.h b/include/ConcurrentMergeScheduler.h deleted file mode 100644 index 8c0cb804..00000000 --- a/include/ConcurrentMergeScheduler.h +++ /dev/null @@ -1,100 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CONCURRENTMERGESCHEDULER_H -#define CONCURRENTMERGESCHEDULER_H - -#include "MergeScheduler.h" - -namespace Lucene -{ - /// A {@link MergeScheduler} that runs each merge using a separate thread, up until a - /// maximum number of threads ({@link #setMaxThreadCount}) at which when a merge is needed, - /// the thread(s) that are updating the index will pause until one or more merges completes. - /// This is a simple way to use concurrency in the indexing process without having to create - /// and manage application level threads. - class LPPAPI ConcurrentMergeScheduler : public MergeScheduler - { - public: - ConcurrentMergeScheduler(); - virtual ~ConcurrentMergeScheduler(); - - LUCENE_CLASS(ConcurrentMergeScheduler); - - protected: - int32_t mergeThreadPriority; - - SetMergeThread mergeThreads; - - /// Max number of threads allowed to be merging at once - int32_t maxThreadCount; - - DirectoryPtr dir; - - bool closed; - IndexWriterWeakPtr _writer; - - static Collection allInstances; - - bool suppressExceptions; - static bool anyExceptions; - - public: - virtual void initialize(); - - /// Sets the max # simultaneous threads that may be running. If a merge is necessary yet - /// we already have this many threads running, the incoming thread (that is calling - /// add/updateDocument) will block until a merge thread has completed. - virtual void setMaxThreadCount(int32_t count); - - /// Get the max # simultaneous threads that may be running. @see #setMaxThreadCount. - virtual int32_t getMaxThreadCount(); - - /// Return the priority that merge threads run at. By default the priority is 1 plus the - /// priority of (ie, slightly higher priority than) the first thread that calls merge. - virtual int32_t getMergeThreadPriority(); - - /// Set the priority that merge threads run at. - virtual void setMergeThreadPriority(int32_t pri); - - virtual void close(); - - virtual void sync(); - - virtual void merge(IndexWriterPtr writer); - - /// Used for testing - static bool anyUnhandledExceptions(); - static void clearUnhandledExceptions(); - - /// Used for testing - void setSuppressExceptions(); - void clearSuppressExceptions(); - - /// Used for testing - static void setTestMode(); - - protected: - virtual bool verbose(); - virtual void message(const String& message); - virtual void initMergeThreadPriority(); - virtual int32_t mergeThreadCount(); - - /// Does the actual merge, by calling {@link IndexWriter#merge} - virtual void doMerge(OneMergePtr merge); - - virtual MergeThreadPtr getMergeThread(IndexWriterPtr writer, OneMergePtr merge); - - /// Called when an exception is hit in a background merge thread - virtual void handleMergeException(const LuceneException& exc); - - virtual void addMyself(); - - friend class MergeThread; - }; -} - -#endif diff --git a/include/Config.h b/include/Config.h deleted file mode 100644 index 90c55790..00000000 --- a/include/Config.h +++ /dev/null @@ -1,93 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CONFIG_H -#define CONFIG_H - -#if defined(_WIN32) || defined(_WIN64) -#pragma warning(disable:4251) -#pragma warning(disable:4275) -#pragma warning(disable:4005) -#pragma warning(disable:4996) -#ifndef _WIN64 -#pragma warning(disable:4244) -#endif -#endif - -#if defined(_WIN32) || defined(_WIN64) -#define LPP_IMPORT __declspec(dllimport) -#define LPP_EXPORT __declspec(dllexport) -#else -#ifdef LPP_HAVE_GXXCLASSVISIBILITY -#define LPP_IMPORT __attribute__ ((visibility("default"))) -#define LPP_EXPORT __attribute__ ((visibility("default"))) -#else -#define LPP_IMPORT -#define LPP_EXPORT -#endif -#endif - -// Define LPPAPI for dll builds -#ifdef LPP_HAVE_DLL -#ifdef LPP_BUILDING_LIB -#define LPPAPI LPP_EXPORT -#define LPPCONTRIBAPI LPP_EXPORT -#else -#define LPPAPI LPP_IMPORT -#define LPPCONTRIBAPI LPP_IMPORT -#endif -#else -#define LPPAPI -#define LPPCONTRIBAPI -#endif - -// Check windows -#if defined(_WIN32) || defined(_WIN64) -#define LPP_UNICODE_CHAR_SIZE_2 -#if defined(_WIN64) -#define LPP_BUILD_64 -#else -#define LPP_BUILD_32 -#endif -#endif - -// Check GCC -#if defined(__GNUC__) -#define LPP_UNICODE_CHAR_SIZE_4 -#if defined(__x86_64__) || defined(__ppc64__) -#define LPP_BUILD_64 -#else -#define LPP_BUILD_32 -#endif -#endif - -// Default to 32-bit platforms -#if !defined(LPP_BUILD_32) && !defined(LPP_BUILD_64) -#define LPP_BUILD_32 -#endif - -// Default to 4-byte unicode format -#if !defined(LPP_UNICODE_CHAR_SIZE_2) && !defined(LPP_UNICODE_CHAR_SIZE_4) -#define LPP_UNICODE_CHAR_SIZE_4 -#endif - -// Define to enable cyclic checking in debug builds -// #define LPP_USE_CYCLIC_CHECK - -// Define to use nedmalloc memory allocator -// #define LPP_USE_NEDMALLOC - -#ifdef LPP_USE_NEDMALLOC -#define EXTSPEC LPPAPI -#endif - -// Make internal bitset storage public -#define BOOST_DYNAMIC_BITSET_DONT_USE_FRIENDS - -// Force boost file-system version 2 for later boost versions > 1.46 -#define BOOST_FILESYSTEM_VERSION 2 - -#endif diff --git a/include/ConjunctionScorer.h b/include/ConjunctionScorer.h deleted file mode 100644 index f2c62eba..00000000 --- a/include/ConjunctionScorer.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CONJUNCTIONSCORER_H -#define CONJUNCTIONSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// Scorer for conjunctions, sets of queries, all of which are required. - class ConjunctionScorer : public Scorer - { - public: - ConjunctionScorer(SimilarityPtr similarity, Collection scorers); - virtual ~ConjunctionScorer(); - - LUCENE_CLASS(ConjunctionScorer); - - protected: - Collection scorers; - double coord; - int32_t lastDoc; - - public: - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - - protected: - int32_t doNext(); - }; -} - -#endif diff --git a/include/ConstantScoreQuery.h b/include/ConstantScoreQuery.h deleted file mode 100644 index 53bfcd37..00000000 --- a/include/ConstantScoreQuery.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CONSTANTSCOREQUERY_H -#define CONSTANTSCOREQUERY_H - -#include "Query.h" -#include "Weight.h" -#include "Scorer.h" - -namespace Lucene -{ - /// A query that wraps a filter and simply returns a constant score equal to the query boost for every - /// document in the filter. - class LPPAPI ConstantScoreQuery : public Query - { - public: - ConstantScoreQuery(FilterPtr filter); - virtual ~ConstantScoreQuery(); - - LUCENE_CLASS(ConstantScoreQuery); - - protected: - FilterPtr filter; - - public: - using Query::toString; - - /// Returns the encapsulated filter - FilterPtr getFilter(); - - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual void extractTerms(SetTerm terms); - - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class ConstantWeight; - friend class ConstantScorer; - }; -} - -#endif diff --git a/include/Constants.h b/include/Constants.h deleted file mode 100644 index 06337d96..00000000 --- a/include/Constants.h +++ /dev/null @@ -1,88 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CONSTANTS_H -#define CONSTANTS_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Some useful Lucene constants. - class LPPAPI Constants - { - private: - Constants(); - - public: - virtual ~Constants(); - - public: - static String OS_NAME; - static String LUCENE_MAIN_VERSION; - static String LUCENE_VERSION; - }; - - /// Use by certain classes to match version compatibility across releases of Lucene. - /// - /// WARNING: When changing the version parameter that you supply to components in Lucene, do not simply - /// change the version at search-time, but instead also adjust your indexing code to match, and re-index. - class LPPAPI LuceneVersion - { - private: - LuceneVersion(); - - public: - virtual ~LuceneVersion(); - - public: - enum Version - { - /// Match settings and bugs in Lucene's 2.0 release. - LUCENE_20 = 0, - - /// Match settings and bugs in Lucene's 2.1 release. - LUCENE_21, - - /// Match settings and bugs in Lucene's 2.2 release. - LUCENE_22, - - /// Match settings and bugs in Lucene's 2.3 release. - LUCENE_23, - - /// Match settings and bugs in Lucene's 2.4 release. - LUCENE_24, - - /// Match settings and bugs in Lucene's 2.9 release. - LUCENE_29, - - /// Match settings and bugs in Lucene's 3.0 release. - /// - /// Use this to get the latest & greatest settings, bug fixes, etc, for Lucene. - LUCENE_30, - - /// Add new constants for later versions **here** to respect order! - - /// Warning: If you use this setting, and then upgrade to a newer release of Lucene, - /// sizable changes may happen. If backwards compatibility is important then you - /// should instead explicitly specify an actual version. - /// - /// If you use this constant then you may need to re-index all of your documents - /// when upgrading Lucene, as the way text is indexed may have changed. Additionally, - /// you may need to re-test your entire application to ensure it behaves as - /// expected, as some defaults may have changed and may break functionality in your - /// application. - /// - /// Deprecated: Use an actual version instead. - LUCENE_CURRENT - }; - - public: - static bool onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second); - }; -} - -#endif diff --git a/include/CustomScoreProvider.h b/include/CustomScoreProvider.h deleted file mode 100644 index a3361184..00000000 --- a/include/CustomScoreProvider.h +++ /dev/null @@ -1,93 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CUSTOMSCOREPROVIDER_H -#define CUSTOMSCOREPROVIDER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// An instance of this subclass should be returned by {@link CustomScoreQuery#getCustomScoreProvider}, - /// if you want to modify the custom score calculation of a {@link CustomScoreQuery}. - /// - /// Since Lucene 2.9, queries operate on each segment of an Index separately, so overriding the similar - /// (now deprecated) methods in {@link CustomScoreQuery} is no longer suitable, as the supplied doc ID - /// is per-segment and without knowledge of the IndexReader you cannot access the document or {@link - /// FieldCache}. - class LPPAPI CustomScoreProvider : public LuceneObject - { - public: - /// Creates a new instance of the provider class for the given {@link IndexReader}. - CustomScoreProvider(IndexReaderPtr reader); - - virtual ~CustomScoreProvider(); - - LUCENE_CLASS(CustomScoreProvider); - - protected: - IndexReaderPtr reader; - - public: - /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. - /// - /// Subclasses can override this method to modify the custom score. - /// - /// If your custom scoring is different than the default herein you should override at least one of - /// the two customScore() methods. If the number of ValueSourceQueries is always < 2 it is - /// sufficient to override the other {@link #customScore(int32_t, double, double) customScore()} - /// method, which is simpler. - /// - /// The default computation herein is a multiplication of given scores: - ///
-        /// ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
-        /// 
- /// - /// @param doc id of scored doc. - /// @param subQueryScore score of that doc by the subQuery. - /// @param valSrcScores scores of that doc by the ValueSourceQuery. - /// @return custom score. - virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); - - /// Compute a custom score by the subQuery score and the ValueSourceQuery score. - /// - /// Subclasses can override this method to modify the custom score. - /// - /// If your custom scoring is different than the default herein you should override at least one of the - /// two customScore() methods. If the number of ValueSourceQueries is always < 2 it is sufficient to - /// override this customScore() method, which is simpler. - /// - /// The default computation herein is a multiplication of the two scores: - ///
-        /// ModifiedScore = subQueryScore * valSrcScore
-        /// 
- /// - /// @param doc id of scored doc. - /// @param subQueryScore score of that doc by the subQuery. - /// @param valSrcScore score of that doc by the ValueSourceQuery. - /// @return custom score. - virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); - - /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, Collection)}, - /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. - /// - /// @param doc doc being explained. - /// @param subQueryExpl explanation for the sub-query part. - /// @param valSrcExpls explanation for the value source part. - /// @return an explanation for the custom score - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); - - /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, double)}, - /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. - /// @param doc doc being explained. - /// @param subQueryExpl explanation for the sub-query part. - /// @param valSrcExpl explanation for the value source part. - /// @return an explanation for the custom score - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); - }; -} - -#endif diff --git a/include/CustomScoreQuery.h b/include/CustomScoreQuery.h deleted file mode 100644 index 7b108912..00000000 --- a/include/CustomScoreQuery.h +++ /dev/null @@ -1,132 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CUSTOMSCOREQUERY_H -#define CUSTOMSCOREQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// Query that sets document score as a programmatic function of several (sub) scores: - ///
    - ///
  1. the score of its subQuery (any query) - ///
  2. (optional) the score of its ValueSourceQuery (or queries). For most simple/convenient use cases - /// this query is likely to be a {@link FieldScoreQuery} - ///
- /// Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}. - class LPPAPI CustomScoreQuery : public Query - { - public: - /// Create a CustomScoreQuery over input subQuery. - /// @param subQuery the sub query whose scored is being customed. Must not be null. - CustomScoreQuery(QueryPtr subQuery); - - /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. - /// @param subQuery the sub query whose score is being customized. Must not be null. - /// @param valSrcQuery a value source query whose scores are used in the custom score computation. For - /// most simple/convenient use case this would be a {@link FieldScoreQuery}. This parameter is - /// optional - it can be null. - CustomScoreQuery(QueryPtr subQuery, ValueSourceQueryPtr valSrcQuery); - - /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. - /// @param subQuery the sub query whose score is being customized. Must not be null. - /// @param valSrcQueries value source queries whose scores are used in the custom score computation. - /// For most simple/convenient use case these would be {@link FieldScoreQueries}. This parameter is - /// optional - it can be null or even an empty array. - CustomScoreQuery(QueryPtr subQuery, Collection valSrcQueries); - - virtual ~CustomScoreQuery(); - - LUCENE_CLASS(CustomScoreQuery); - - protected: - QueryPtr subQuery; - Collection valSrcQueries; // never null (empty array if there are no valSrcQueries). - bool strict; // if true, valueSource part of query does not take part in weights normalization. - - public: - using Query::toString; - - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual void extractTerms(SetTerm terms); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. - /// - /// Deprecated: Will be removed in Lucene 3.1. - /// - /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment - /// search (since Lucene 2.9). - /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} - /// for the given {@link IndexReader}. - virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); - - /// Compute a custom score by the subQuery score and the ValueSourceQuery score. - /// - /// Deprecated: Will be removed in Lucene 3.1. - /// - /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment - /// search (since Lucene 2.9). - /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} - /// for the given {@link IndexReader}. - virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); - - /// Explain the custom score. - /// - /// Deprecated: Will be removed in Lucene 3.1. - /// - /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment - /// search (since Lucene 2.9). - /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} - /// for the given {@link IndexReader}. - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); - - /// Explain the custom score. - /// - /// Deprecated Will be removed in Lucene 3.1. - /// - /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment - /// search (since Lucene 2.9). - /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} - /// for the given {@link IndexReader}. - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); - - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Checks if this is strict custom scoring. In strict custom scoring, the ValueSource part does not - /// participate in weight normalization. This may be useful when one wants full control over how scores - /// are modified, and does not care about normalizing by the ValueSource part. One particular case where - /// this is useful if for testing this query. - /// - /// Note: only has effect when the ValueSource part is not null. - virtual bool isStrict(); - - /// Set the strict mode of this query. - /// @param strict The strict mode to set. - /// @see #isStrict() - virtual void setStrict(bool strict); - - /// A short name of this query, used in {@link #toString(String)}. - virtual String name(); - - protected: - void ConstructQuery(QueryPtr subQuery, Collection valSrcQueries); - - /// Returns a {@link CustomScoreProvider} that calculates the custom scores for the given {@link - /// IndexReader}. The default implementation returns a default implementation as specified in - /// the docs of {@link CustomScoreProvider}. - virtual CustomScoreProviderPtr getCustomScoreProvider(IndexReaderPtr reader); - - friend class CustomWeight; - friend class CustomScorer; - }; -} - -#endif diff --git a/include/CycleCheck.h b/include/CycleCheck.h deleted file mode 100644 index 8f3df4bb..00000000 --- a/include/CycleCheck.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef CYCLECHECK_H -#define CYCLECHECK_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Debug utility to track shared_ptr utilization. - class LPPAPI CycleCheck - { - public: - virtual ~CycleCheck(); - - protected: - static MapStringInt cycleMap; - static Set staticRefs; - - protected: - void addRef(const String& className, int32_t ref); - static void addStatic(LuceneObjectPtr* staticRef); - - public: - template - static void addStatic(TYPE& staticRef) - { - addStatic(reinterpret_cast(&staticRef)); - } - - static void dumpRefs(); - }; - - template - class CycleCheckT : public CycleCheck - { - public: - CycleCheckT() - { - addRef(TYPE::_getClassName(), 1); - } - - virtual ~CycleCheckT() - { - addRef(TYPE::_getClassName(), -1); - } - }; -} - -#endif diff --git a/include/DateField.h b/include/DateField.h deleted file mode 100644 index bdf6ecd4..00000000 --- a/include/DateField.h +++ /dev/null @@ -1,56 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DATEFIELD_H -#define DATEFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Provides support for converting dates to strings and vice-versa. The strings are structured so that - /// lexicographic sorting orders by date, which makes them suitable for use as field values and search terms. - /// - /// Note that this class saves dates with millisecond granularity, which is bad for {@link TermRangeQuery} and - /// {@link PrefixQuery}, as those queries are expanded to a BooleanQuery with a potentially large number of terms - /// when searching. Thus you might want to use {@link DateTools} instead. - /// - /// Note: dates before 1970 cannot be used, and therefore cannot be indexed when using this class. See {@link - /// DateTools} for an alternative without such a limitation. - /// - /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) - /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix - /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric - /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. - /// - /// @deprecated If you build a new index, use {@link DateTools} or {@link NumericField} instead. This class is - /// included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). - class LPPAPI DateField : public LuceneObject - { - public: - virtual ~DateField(); - - LUCENE_CLASS(DateField); - - protected: - static int32_t DATE_LEN(); - - public: - static const String& MIN_DATE_STRING(); - static const String& MAX_DATE_STRING(); - - /// Converts a Date to a string suitable for indexing. - static String dateToString(const boost::posix_time::ptime& date); - - /// Converts a millisecond time to a string suitable for indexing. - static String timeToString(int64_t time); - - /// Converts a string-encoded date into a millisecond time. - static int64_t stringToTime(const String& s); - }; -} - -#endif diff --git a/include/DateTools.h b/include/DateTools.h deleted file mode 100644 index f93f5832..00000000 --- a/include/DateTools.h +++ /dev/null @@ -1,110 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DATETOOLS_H -#define DATETOOLS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Provides support for converting dates to strings and vice-versa. The strings are structured so that - /// lexicographic sorting orders them by date, which makes them suitable for use as field values and search - /// terms. - /// - /// This class also helps you to limit the resolution of your dates. Do not save dates with a finer resolution - /// than you really need, as then RangeQuery and PrefixQuery will require more memory and become slower. - /// - /// Compared to {@link DateField} the strings generated by the methods in this class take slightly more space, - /// unless your selected resolution is set to Resolution.DAY or lower. - /// - /// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) - /// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix - /// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric - /// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. - class LPPAPI DateTools : public LuceneObject - { - public: - virtual ~DateTools(); - - LUCENE_CLASS(DateTools); - - public: - enum Resolution - { - RESOLUTION_NULL, - RESOLUTION_YEAR, - RESOLUTION_MONTH, - RESOLUTION_DAY, - RESOLUTION_HOUR, - RESOLUTION_MINUTE, - RESOLUTION_SECOND, - RESOLUTION_MILLISECOND - }; - - enum DateOrder - { - DATEORDER_LOCALE, - DATEORDER_YMD, - DATEORDER_DMY, - DATEORDER_MDY - }; - - protected: - static DateOrder dateOrder; - - public: - /// Converts a Date to a string suitable for indexing. - /// @param date the date to be converted - /// @param resolution the desired resolution - /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone - static String dateToString(const boost::posix_time::ptime& date, Resolution resolution); - - /// Converts a millisecond time to a string suitable for indexing. - /// @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT - /// @param resolution the desired resolution - /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone - static String timeToString(int64_t time, Resolution resolution); - - /// Converts a string produced by timeToString or dateToString back to a time, represented as the number of - /// milliseconds since January 1, 1970, 00:00:00 GMT. - /// @param dateString the date string to be converted - /// @return the number of milliseconds since January 1, 1970, 00:00:00 GMT - static int64_t stringToTime(const String& dateString); - - /// Converts a string produced by timeToString or dateToString back to a time, represented as a ptime object. - /// @param dateString the date string to be converted - /// @return the parsed time as a ptime object - static boost::posix_time::ptime stringToDate(const String& dateString); - - /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11 will be changed to 2004-09-01 00:00:00 - /// when using Resolution.MONTH. - /// @param resolution The desired resolution of the date to be returned - /// @return the date with all values more precise than resolution set to 0 or 1 - static boost::posix_time::ptime round(const boost::posix_time::ptime& date, Resolution resolution); - - /// Limit a date's resolution. For example, the date 1095767411000 (which represents 2004-09-21 13:50:11) will - /// be changed to 1093989600000 (2004-09-01 00:00:00) when using Resolution.MONTH. - /// @param resolution The desired resolution of the date to be returned - /// @return the date with all values more precise than resolution set to 0 or 1, expressed as milliseconds - /// since January 1, 1970, 00:00:00 GMT - static int64_t round(int64_t time, Resolution resolution); - - /// Allow overriding of date ordering. - static void setDateOrder(DateTools::DateOrder order); - - /// Return date ordering based on given locale (or overridden in {@link #setDateOrder(DateTools::DateOrder)}). - static DateTools::DateOrder getDateOrder(std::locale locale = std::locale()); - - /// Parse a given date using locale date format - /// @param dateString the date string to be converted - /// @param locale the locale to use for parsing - /// @return the parsed time as a ptime object - static boost::posix_time::ptime parseDate(const String& dateString, std::locale locale = std::locale()); - }; -} - -#endif diff --git a/include/DefaultSimilarity.h b/include/DefaultSimilarity.h deleted file mode 100644 index 8e0b63ba..00000000 --- a/include/DefaultSimilarity.h +++ /dev/null @@ -1,60 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DEFAULTSIMILARITY_H -#define DEFAULTSIMILARITY_H - -#include "Similarity.h" - -namespace Lucene -{ - /// Default scoring implementation. - class LPPAPI DefaultSimilarity : public Similarity - { - public: - DefaultSimilarity(); - virtual ~DefaultSimilarity(); - - LUCENE_CLASS(DefaultSimilarity); - - protected: - bool discountOverlaps; // Default false - - public: - /// Implemented as state->getBoost() * lengthNorm(numTerms), where numTerms is {@link - /// FieldInvertState#getLength()} if {@link #setDiscountOverlaps} is false, else it's {@link - /// FieldInvertState#getLength()} - {@link FieldInvertState#getNumOverlap()}. - virtual double computeNorm(const String& fieldName, FieldInvertStatePtr state); - - /// Implemented as 1 / sqrt(numTerms). - virtual double lengthNorm(const String& fieldName, int32_t numTokens); - - /// Implemented as 1 / sqrt(sumOfSquaredWeights). - virtual double queryNorm(double sumOfSquaredWeights); - - /// Implemented as sqrt(freq). - virtual double tf(double freq); - - /// Implemented as 1 / (distance + 1). - virtual double sloppyFreq(int32_t distance); - - /// Implemented as log(numDocs / (docFreq + 1)) + 1. - virtual double idf(int32_t docFreq, int32_t numDocs); - - /// Implemented as overlap / maxOverlap. - virtual double coord(int32_t overlap, int32_t maxOverlap); - - /// Determines whether overlap tokens (Tokens with 0 position increment) are ignored when computing - /// norm. By default this is false, meaning overlap tokens are counted just like non-overlap tokens. - /// @see #computeNorm - void setDiscountOverlaps(bool v); - - /// @see #setDiscountOverlaps - bool getDiscountOverlaps(); - }; -} - -#endif diff --git a/include/DefaultSkipListReader.h b/include/DefaultSkipListReader.h deleted file mode 100644 index 811ef343..00000000 --- a/include/DefaultSkipListReader.h +++ /dev/null @@ -1,60 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DEFAULTSKIPLISTREADER_H -#define DEFAULTSKIPLISTREADER_H - -#include "MultiLevelSkipListReader.h" - -namespace Lucene -{ - /// Implements the skip list reader for the default posting list format that stores positions and payloads. - class DefaultSkipListReader : public MultiLevelSkipListReader - { - public: - DefaultSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval); - virtual ~DefaultSkipListReader(); - - LUCENE_CLASS(DefaultSkipListReader); - - protected: - bool currentFieldStoresPayloads; - Collection freqPointer; - Collection proxPointer; - Collection payloadLength; - - int64_t lastFreqPointer; - int64_t lastProxPointer; - int32_t lastPayloadLength; - - public: - void init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads); - - /// Returns the freq pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} - /// has skipped. - int64_t getFreqPointer(); - - /// Returns the prox pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} - /// has skipped. - int64_t getProxPointer(); - - /// Returns the payload length of the payload stored just before the doc to which the last call of {@link - /// MultiLevelSkipListReader#skipTo(int)} has skipped. - int32_t getPayloadLength(); - - protected: - /// Seeks the skip entry on the given level - virtual void seekChild(int32_t level); - - /// Copies the values of the last read skip entry on this level - virtual void setLastSkipData(int32_t level); - - /// Subclasses must implement the actual skip data encoding in this method. - virtual int32_t readSkipData(int32_t level, IndexInputPtr skipStream); - }; -} - -#endif diff --git a/include/DefaultSkipListWriter.h b/include/DefaultSkipListWriter.h deleted file mode 100644 index ae627fff..00000000 --- a/include/DefaultSkipListWriter.h +++ /dev/null @@ -1,53 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DEFAULTSKIPLISTWRITER_H -#define DEFAULTSKIPLISTWRITER_H - -#include "MultiLevelSkipListWriter.h" - -namespace Lucene -{ - /// Implements the skip list writer for the default posting list format that stores positions and payloads. - class DefaultSkipListWriter : public MultiLevelSkipListWriter - { - public: - DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, IndexOutputPtr freqOutput, IndexOutputPtr proxOutput); - virtual ~DefaultSkipListWriter(); - - LUCENE_CLASS(DefaultSkipListWriter); - - protected: - Collection lastSkipDoc; - Collection lastSkipPayloadLength; - Collection lastSkipFreqPointer; - Collection lastSkipProxPointer; - - IndexOutputPtr freqOutput; - IndexOutputPtr proxOutput; - - int32_t curDoc; - bool curStorePayloads; - int32_t curPayloadLength; - int64_t curFreqPointer; - int64_t curProxPointer; - - public: - void setFreqOutput(IndexOutputPtr freqOutput); - void setProxOutput(IndexOutputPtr proxOutput); - - /// Sets the values for the current skip data. - void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength); - - protected: - virtual void resetSkip(); - virtual void writeSkipData(int32_t level, IndexOutputPtr skipBuffer); - - friend class FormatPostingsTermsWriter; - }; -} - -#endif diff --git a/include/Directory.h b/include/Directory.h deleted file mode 100644 index a0c32f12..00000000 --- a/include/Directory.h +++ /dev/null @@ -1,110 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DIRECTORY_H -#define DIRECTORY_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A Directory is a flat list of files. Files may be written once, when they are created. Once a file - /// is created it may only be opened for read, or deleted. Random access is permitted both when reading - /// and writing. Directory locking is implemented by an instance of {@link LockFactory}, and can be changed - /// for each Directory instance using {@link #setLockFactory}. - class LPPAPI Directory : public LuceneObject - { - public: - Directory(); - virtual ~Directory(); - - LUCENE_CLASS(Directory); - - protected: - bool isOpen; - - /// Holds the LockFactory instance (implements locking for this Directory instance). - LockFactoryPtr lockFactory; - - public: - /// Returns an array of strings, one for each file in the directory. - virtual HashSet listAll() = 0; - - /// Returns true if a file with the given name exists. - virtual bool fileExists(const String& name) = 0; - - /// Returns the time the named file was last modified. - virtual uint64_t fileModified(const String& name) = 0; - - /// Set the modified time of an existing file to now. - virtual void touchFile(const String& name) = 0; - - /// Removes an existing file in the directory. - virtual void deleteFile(const String& name) = 0; - - /// Returns the length of a file in the directory. - virtual int64_t fileLength(const String& name) = 0; - - /// Creates a new, empty file in the directory with the given name. - /// Returns a stream writing this file. - virtual IndexOutputPtr createOutput(const String& name) = 0; - - /// Returns a stream reading an existing file. - virtual IndexInputPtr openInput(const String& name) = 0; - - /// Closes the store. - virtual void close() = 0; - - /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit - /// changes to the index, to prevent a machine/OS crash from corrupting the index. - virtual void sync(const String& name); - - /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory - /// implementation may ignore the buffer size. Currently the only Directory implementations that respect - /// this parameter are {@link FSDirectory} and {@link CompoundFileReader}. - virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); - - /// Construct a {@link Lock}. - /// @param name the name of the lock file. - virtual LockPtr makeLock(const String& name); - - /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you - /// are certain this lock is no longer in use. - /// @param name name of the lock to be cleared. - void clearLock(const String& name); - - /// Set the LockFactory that this Directory instance should use for its locking implementation. Each * instance - /// of LockFactory should only be used for one directory (ie, do not share a single instance across multiple - /// Directories). - /// @param lockFactory instance of {@link LockFactory}. - void setLockFactory(LockFactoryPtr lockFactory); - - /// Get the LockFactory that this Directory instance is using for its locking implementation. Note that this - /// may be null for Directory implementations that provide their own locking implementation. - LockFactoryPtr getLockFactory(); - - /// Return a string identifier that uniquely differentiates this Directory instance from other Directory - /// instances. This ID should be the same if two Directory instances are considered "the same index". - /// This is how locking "scopes" to the right index. - virtual String getLockID(); - - virtual String toString(); - - /// Copy contents of a directory src to a directory dest. If a file in src already exists in dest then the one - /// in dest will be blindly overwritten. NOTE: the source directory cannot change while this method is running. - /// Otherwise the results are undefined. - /// @param src source directory. - /// @param dest destination directory. - /// @param closeDirSrc if true, call {@link #close()} method on source directory. - static void copy(DirectoryPtr src, DirectoryPtr dest, bool closeDirSrc); - - protected: - /// @throws AlreadyClosed if this Directory is closed. - void ensureOpen(); - }; -} - -#endif diff --git a/include/DirectoryReader.h b/include/DirectoryReader.h deleted file mode 100644 index bfd3cc26..00000000 --- a/include/DirectoryReader.h +++ /dev/null @@ -1,353 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DIRECTORYREADER_H -#define DIRECTORYREADER_H - -#include "IndexReader.h" -#include "TermEnum.h" -#include "TermPositions.h" -#include "IndexCommit.h" -#include "SegmentMergeQueue.h" - -namespace Lucene -{ - /// An IndexReader which reads indexes with multiple segments. - class DirectoryReader : public IndexReader - { - public: - /// Construct reading the named set of readers. - DirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); - - /// Used by near real-time search. - DirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor); - - /// This constructor is only used for {@link #reopen()} - DirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, - Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, - bool doClone, int32_t termInfosIndexDivisor); - - virtual ~DirectoryReader(); - - LUCENE_CLASS(DirectoryReader); - - protected: - DirectoryPtr _directory; - bool readOnly; - IndexWriterWeakPtr _writer; - IndexDeletionPolicyPtr deletionPolicy; - HashSet synced; - LockPtr writeLock; - SegmentInfosPtr segmentInfos; - SegmentInfosPtr segmentInfosStart; - bool stale; - int32_t termInfosIndexDivisor; - - bool rollbackHasChanges; - - Collection subReaders; - Collection starts; // 1st docno for each segment - MapStringByteArray normsCache; - int32_t _maxDoc; - int32_t _numDocs; - bool _hasDeletions; - - // Max version in index as of when we opened; this can be > our current segmentInfos version - // in case we were opened on a past IndexCommit - int64_t maxIndexVersion; - - public: - void _initialize(Collection subReaders); - - static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); - - virtual IndexReaderPtr reopen(); - virtual IndexReaderPtr reopen(bool openReadOnly); - virtual IndexReaderPtr reopen(IndexCommitPtr commit); - - /// Version number when this IndexReader was opened. - virtual int64_t getVersion(); - - /// Return an array of term frequency vectors for the specified document. - virtual Collection getTermFreqVectors(int32_t docNumber); - - /// Return a term frequency vector for the specified document and field. - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - - /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of the {@link TermFreqVector}. - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - - /// Map all the term vectors for all fields in a Document - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - - /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in the IndexReader base class. - /// @return true if the index is optimized; false otherwise - virtual bool isOptimized(); - - /// Returns the number of documents in this index. - virtual int32_t numDocs(); - - /// Returns one greater than the largest possible document number. - virtual int32_t maxDoc(); - - /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what {@link Field}s to load and how they should be loaded. - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - - /// Returns true if document n has been deleted - virtual bool isDeleted(int32_t n); - - /// Returns true if any documents have been deleted - virtual bool hasDeletions(); - - /// Find reader for doc n - static int32_t readerIndex(int32_t n, Collection starts, int32_t numSubReaders); - - /// Returns true if there are norms stored for this field. - virtual bool hasNorms(const String& field); - - /// Returns the byte-encoded normalization factor for the named field of every document. - virtual ByteArray norms(const String& field); - - /// Reads the byte-encoded normalization factor for the named field of every document. - virtual void norms(const String& field, ByteArray norms, int32_t offset); - - /// Returns an enumeration of all the terms in the index. - virtual TermEnumPtr terms(); - - /// Returns an enumeration of all terms starting at a given term. - virtual TermEnumPtr terms(TermPtr t); - - /// Returns the number of documents containing the term t. - virtual int32_t docFreq(TermPtr t); - - /// Returns an unpositioned {@link TermDocs} enumerator. - virtual TermDocsPtr termDocs(); - - /// Returns an unpositioned {@link TermPositions} enumerator. - virtual TermPositionsPtr termPositions(); - - /// Tries to acquire the WriteLock on this directory. this method is only valid if this - /// IndexReader is directory owner. - virtual void acquireWriteLock(); - - void startCommit(); - void rollbackCommit(); - - /// Retrieve the String userData optionally passed to IndexWriter#commit. - virtual MapStringString getCommitUserData(); - - /// Check whether any new changes have occurred to the index since this reader was opened. - virtual bool isCurrent(); - - /// Get a list of unique field names that exist in this index and have the specified field - /// option information. - virtual HashSet getFieldNames(FieldOption fieldOption); - - static HashSet getFieldNames(FieldOption fieldOption, Collection subReaders); - - /// Returns the sequential sub readers that this reader is logically composed of. - virtual Collection getSequentialSubReaders(); - - /// Returns the directory this index resides in. - virtual DirectoryPtr directory(); - - virtual int32_t getTermInfosIndexDivisor(); - - /// Return the IndexCommit that this reader has opened. - virtual IndexCommitPtr getIndexCommit(); - - /// Returns all commit points that exist in the Directory. - static Collection listCommits(DirectoryPtr dir); - - protected: - IndexReaderPtr doReopenFromWriter(bool openReadOnly, IndexCommitPtr commit); - IndexReaderPtr doReopen(bool openReadOnly, IndexCommitPtr commit); - IndexReaderPtr doReopenNoWriter(bool openReadOnly, IndexCommitPtr commit); - DirectoryReaderPtr doReopen(SegmentInfosPtr infos, bool doClone, bool openReadOnly); - - /// Implements deletion of the document numbered docNum. - virtual void doDelete(int32_t docNum); - - /// Implements actual undeleteAll() in subclass. - virtual void doUndeleteAll(); - - int32_t readerIndex(int32_t n); - - /// Implements setNorm in subclass. - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - - /// Commit changes resulting from delete, undeleteAll, or setNorm operations - /// - /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional semantics). - virtual void doCommit(MapStringString commitUserData); - - /// Implements close. - virtual void doClose(); - - friend class FindSegmentsReopen; - }; - - class MultiTermEnum : public TermEnum - { - public: - MultiTermEnum(IndexReaderPtr topReader, Collection readers, Collection starts, TermPtr t); - virtual ~MultiTermEnum(); - - LUCENE_CLASS(MultiTermEnum); - - protected: - SegmentMergeQueuePtr queue; - TermPtr _term; - int32_t _docFreq; - - public: - IndexReaderWeakPtr _topReader; - Collection matchingSegments; // null terminated array of matching segments - - public: - /// Increments the enumeration to the next element. True if one exists. - virtual bool next(); - - /// Returns the current Term in the enumeration. - virtual TermPtr term(); - - /// Returns the docFreq of the current Term in the enumeration. - virtual int32_t docFreq(); - - /// Closes the enumeration to further activity, freeing resources. - virtual void close(); - }; - - class MultiTermDocs : public TermPositions, public LuceneObject - { - public: - MultiTermDocs(IndexReaderPtr topReader, Collection r, Collection s); - virtual ~MultiTermDocs(); - - LUCENE_CLASS(MultiTermDocs); - - protected: - IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs - Collection readers; - Collection starts; - TermPtr term; - - int32_t base; - int32_t pointer; - - Collection readerTermDocs; - TermDocsPtr current; - MultiTermEnumPtr tenum; // the term enum used for seeking - int32_t matchingSegmentPos; // position into the matching segments from tenum - SegmentMergeInfoPtr smi; // current segment mere info - - public: - /// Returns the current document number. - virtual int32_t doc(); - - /// Returns the frequency of the term within the current document. - virtual int32_t freq(); - - /// Sets this to the data for a term. - virtual void seek(TermPtr term); - - /// Sets this to the data for the current term in a {@link TermEnum}. - virtual void seek(TermEnumPtr termEnum); - - /// Moves to the next pair in the enumeration. - virtual bool next(); - - /// Attempts to read multiple entries from the enumeration, up to length of docs. - /// Optimized implementation. - virtual int32_t read(Collection docs, Collection freqs); - - /// Skips entries to the first beyond the current whose document number is greater than or equal to target. - virtual bool skipTo(int32_t target); - - /// Frees associated resources. - virtual void close(); - - protected: - virtual TermDocsPtr termDocs(int32_t i); - virtual TermDocsPtr termDocs(IndexReaderPtr reader); - }; - - class MultiTermPositions : public MultiTermDocs - { - public: - MultiTermPositions(IndexReaderPtr topReader, Collection r, Collection s); - virtual ~MultiTermPositions(); - - LUCENE_CLASS(MultiTermPositions); - - public: - /// Returns next position in the current document. - virtual int32_t nextPosition(); - - /// Returns the length of the payload at the current term position. - virtual int32_t getPayloadLength(); - - /// Returns the payload data at the current term position. - virtual ByteArray getPayload(ByteArray data, int32_t offset); - - /// Checks if a payload can be loaded at this position. - virtual bool isPayloadAvailable(); - - protected: - virtual TermDocsPtr termDocs(IndexReaderPtr reader); - }; - - class ReaderCommit : public IndexCommit - { - public: - ReaderCommit(SegmentInfosPtr infos, DirectoryPtr dir); - virtual ~ReaderCommit(); - - LUCENE_CLASS(ReaderCommit); - - protected: - String segmentsFileName; - HashSet files; - DirectoryPtr dir; - int64_t generation; - int64_t version; - bool _isOptimized; - MapStringString userData; - - public: - virtual String toString(); - - /// Returns true if this commit is an optimized index. - virtual bool isOptimized(); - - /// Two IndexCommits are equal if both their Directory and versions are equal. - virtual String getSegmentsFileName(); - - /// Returns all index files referenced by this commit point. - virtual HashSet getFileNames(); - - /// Returns the {@link Directory} for the index. - virtual DirectoryPtr getDirectory(); - - /// Returns the version for this IndexCommit. - virtual int64_t getVersion(); - - /// Returns the generation (the _N in segments_N) for this IndexCommit. - virtual int64_t getGeneration(); - - virtual bool isDeleted(); - - /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. - virtual MapStringString getUserData(); - - virtual void deleteCommit(); - }; -} - -#endif diff --git a/include/DisjunctionMaxQuery.h b/include/DisjunctionMaxQuery.h deleted file mode 100644 index 5645ee75..00000000 --- a/include/DisjunctionMaxQuery.h +++ /dev/null @@ -1,98 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DISJUNCTIONMAXQUERY_H -#define DISJUNCTIONMAXQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A query that generates the union of documents produced by its subqueries, and that scores each - /// document with the maximum score for that document as produced by any subquery, plus a tie breaking - /// increment for any additional matching subqueries. This is useful when searching for a word in - /// multiple fields with different boost factors (so that the fields cannot be combined equivalently - /// into a single search field). We want the primary score to be the one associated with the highest - /// boost, not the sum of the field scores (as BooleanQuery would give). If the query is "albino - /// elephant" this ensures that "albino" matching one field and "elephant" matching another gets a - /// higher score than "albino" matching both fields. To get this result, use both BooleanQuery and - /// DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in each field, while the - /// set of these DisjunctionMaxQuery's is combined into a BooleanQuery. The tie breaker capability - /// allows results that include the same term in multiple fields to be judged better than results that - /// include this term in only the best of those multiple fields, without confusing this with the better - /// case of two different terms in the multiple fields. - class LPPAPI DisjunctionMaxQuery : public Query - { - public: - /// Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. - /// @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied - /// by this weight and added into the final score. If non-zero, the value should be small, on the - /// order of 0.1, which says that 10 occurrences of word in a lower-scored field that is also in a - /// higher scored field is just as good as a unique word in the lower scored field (ie., one that is - /// not in any higher scored field. - DisjunctionMaxQuery(double tieBreakerMultiplier = 0.0); - - /// Creates a new DisjunctionMaxQuery - /// @param disjuncts A Collection of all the disjuncts to add - /// @param tieBreakerMultiplier The weight to give to each matching non-maximum disjunct - DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier); - - virtual ~DisjunctionMaxQuery(); - - LUCENE_CLASS(DisjunctionMaxQuery); - - protected: - /// The subqueries - Collection disjuncts; - - /// Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. - double tieBreakerMultiplier; - - public: - using Query::toString; - - /// Add a subquery to this disjunction - /// @param query the disjunct added - void add(QueryPtr query); - - /// Add a collection of disjuncts to this disjunction - void add(Collection disjuncts); - - /// An iterator over the disjuncts - Collection::iterator begin(); - Collection::iterator end(); - - /// Create the Weight used to score us - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Optimize our representation and our subqueries representations - /// @param reader the IndexReader we query - /// @return an optimized copy of us (which may not be a copy if there is nothing to optimize) - virtual QueryPtr rewrite(IndexReaderPtr reader); - - /// Create a shallow copy of us - used in rewriting if necessary - /// @return a copy of us (but reuse, don't copy, our subqueries) - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Adds all terms occurring in this query to the terms set. - virtual void extractTerms(SetTerm terms); - - /// Pretty print us. - /// @param field the field to which we are applied - /// @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" - virtual String toString(const String& field); - - /// @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the - /// same order, as us - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - - friend class DisjunctionMaxWeight; - }; -} - -#endif diff --git a/include/DisjunctionMaxScorer.h b/include/DisjunctionMaxScorer.h deleted file mode 100644 index 905727b7..00000000 --- a/include/DisjunctionMaxScorer.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DISJUNCTIONMAXSCORER_H -#define DISJUNCTIONMAXSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers - /// is generated in document number order. The score for each document is the maximum of the scores computed - /// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores - /// for the other subqueries that generate the document. - class DisjunctionMaxScorer : public Scorer - { - public: - DisjunctionMaxScorer(double tieBreakerMultiplier, SimilarityPtr similarity, Collection subScorers, int32_t numScorers); - virtual ~DisjunctionMaxScorer(); - - LUCENE_CLASS(DisjunctionMaxScorer); - - protected: - /// The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. - Collection subScorers; - int32_t numScorers; - - /// Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. - double tieBreakerMultiplier; - - int32_t doc; - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - - /// Determine the current document score. Initially invalid, until {@link #next()} is called the first time. - /// @return the score of the current generated document - virtual double score(); - - virtual int32_t advance(int32_t target); - - protected: - /// Recursively iterate all subScorers that generated last doc computing sum and max - void scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max); - - /// Organize subScorers into a min heap with scorers generating the earliest document on top. - void heapify(); - - /// The subtree of subScorers at root is a min heap except possibly for its root element. Bubble the root - /// down as required to make the subtree a heap. - void heapAdjust(int32_t root); - - /// Remove the root Scorer from subScorers and re-establish it as a heap - void heapRemoveRoot(); - }; -} - -#endif diff --git a/include/DisjunctionSumScorer.h b/include/DisjunctionSumScorer.h deleted file mode 100644 index 1bcbbec3..00000000 --- a/include/DisjunctionSumScorer.h +++ /dev/null @@ -1,95 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DISJUNCTIONSUMSCORER_H -#define DISJUNCTIONSUMSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// A Scorer for OR like queries, counterpart of ConjunctionScorer. This Scorer implements {@link - /// Scorer#skipTo(int32_t)} and uses skipTo() on the given Scorers. - class DisjunctionSumScorer : public Scorer - { - public: - DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers = 1); - virtual ~DisjunctionSumScorer(); - - LUCENE_CLASS(DisjunctionSumScorer); - - protected: - /// The number of subscorers. - int32_t nrScorers; - - /// The subscorers. - Collection subScorers; - - /// The minimum number of scorers that should match. - int32_t minimumNrMatchers; - - /// The scorerDocQueue contains all subscorers ordered by their current doc(), with the minimum at - /// the top. The scorerDocQueue is initialized the first time next() or skipTo() is called. An exhausted - /// scorer is immediately removed from the scorerDocQueue. If less than the minimumNrMatchers scorers - /// remain in the scorerDocQueue next() and skipTo() return false. - /// - /// After each to call to next() or skipTo() currentSumScore is the total score of the current matching doc, - /// nrMatchers is the number of matching scorers, and all scorers are after the matching doc, or are exhausted. - ScorerDocQueuePtr scorerDocQueue; - - /// The document number of the current match. - int32_t currentDoc; - - /// The number of subscorers that provide the current match. - int32_t _nrMatchers; - - double currentScore; - - public: - virtual void initialize(); - - virtual void score(CollectorPtr collector); - virtual int32_t nextDoc(); - - /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} - /// is called the first time. - virtual double score(); - - virtual int32_t docID(); - - /// Returns the number of subscorers matching the current document. Initially invalid, until {@link #next()} - /// is called the first time. - int32_t nrMatchers(); - - /// Advances to the first match beyond the current whose document number is greater than or equal to a given - /// target. The implementation uses the skipTo() method on the subscorers. - /// - /// @param target The target document number. - /// @return the document whose number is greater than or equal to the given target, or -1 if none exist. - virtual int32_t advance(int32_t target); - - protected: - /// Called the first time next() or skipTo() is called to initialize scorerDocQueue. - void initScorerDocQueue(); - - /// Collects matching documents in a range. Hook for optimization. Note that {@link #next()} must be - /// called once before this method is called for the first time. - /// @param collector The collector to which all matching documents are passed through. - /// @param max Do not score documents past this. - /// @return true if more matching documents may remain. - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - - /// Advance all subscorers after the current document determined by the top of the scorerDocQueue. Repeat - /// until at least the minimum number of subscorers match on the same document and all subscorers are after - /// that document or are exhausted. On entry the scorerDocQueue has at least minimumNrMatchers available. - /// At least the scorer with the minimum document number will be advanced. - /// @return true if there is a match. In case there is a match, currentDoc, currentSumScore and nrMatchers - /// describe the match. - bool advanceAfterCurrent(); - }; -} - -#endif diff --git a/include/DocConsumer.h b/include/DocConsumer.h deleted file mode 100644 index e0c5e768..00000000 --- a/include/DocConsumer.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCCONSUMER_H -#define DOCCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class DocConsumer : public LuceneObject - { - public: - virtual ~DocConsumer(); - - LUCENE_CLASS(DocConsumer); - - public: - virtual DocConsumerPerThreadPtr addThread(DocumentsWriterThreadStatePtr perThread) = 0; - virtual void flush(Collection threads, SegmentWriteStatePtr state) = 0; - virtual void closeDocStore(SegmentWriteStatePtr state) = 0; - virtual void abort() = 0; - virtual bool freeRAM() = 0; - }; -} - -#endif diff --git a/include/DocConsumerPerThread.h b/include/DocConsumerPerThread.h deleted file mode 100644 index 7988c2d5..00000000 --- a/include/DocConsumerPerThread.h +++ /dev/null @@ -1,31 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCCONSUMERPERTHREAD_H -#define DOCCONSUMERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class DocConsumerPerThread : public LuceneObject - { - public: - virtual ~DocConsumerPerThread(); - - LUCENE_CLASS(DocConsumerPerThread); - - public: - /// Process the document. If there is something for this document to be done in docID order, - /// you should encapsulate that as a DocWriter and return it. - /// DocumentsWriter then calls finish() on this object when it's its turn. - virtual DocWriterPtr processDocument() = 0; - - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/DocFieldConsumer.h b/include/DocFieldConsumer.h deleted file mode 100644 index cd14243e..00000000 --- a/include/DocFieldConsumer.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMER_H -#define DOCFIELDCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class DocFieldConsumer : public LuceneObject - { - public: - virtual ~DocFieldConsumer(); - - LUCENE_CLASS(DocFieldConsumer); - - protected: - FieldInfosPtr fieldInfos; - - public: - /// Called when DocumentsWriter decides to create a new segment - virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; - - /// Called when DocumentsWriter decides to close the doc stores - virtual void closeDocStore(SegmentWriteStatePtr state) = 0; - - /// Called when an aborting exception is hit - virtual void abort() = 0; - - /// Add a new thread - virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) = 0; - - /// Called when DocumentsWriter is using too much RAM. The consumer should free RAM, if possible, returning - /// true if any RAM was in fact freed. - virtual bool freeRAM() = 0; - - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - }; -} - -#endif diff --git a/include/DocFieldConsumerPerField.h b/include/DocFieldConsumerPerField.h deleted file mode 100644 index 2e8866a9..00000000 --- a/include/DocFieldConsumerPerField.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMERPERFIELD_H -#define DOCFIELDCONSUMERPERFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class DocFieldConsumerPerField : public LuceneObject - { - public: - virtual ~DocFieldConsumerPerField(); - - LUCENE_CLASS(DocFieldConsumerPerField); - - public: - /// Processes all occurrences of a single field - virtual void processFields(Collection fields, int32_t count) = 0; - - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/DocFieldConsumerPerThread.h b/include/DocFieldConsumerPerThread.h deleted file mode 100644 index ab701262..00000000 --- a/include/DocFieldConsumerPerThread.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMERPERTHREAD_H -#define DOCFIELDCONSUMERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class DocFieldConsumerPerThread : public LuceneObject - { - public: - virtual ~DocFieldConsumerPerThread(); - - LUCENE_CLASS(DocFieldConsumerPerThread); - - public: - virtual void startDocument() = 0; - virtual DocWriterPtr finishDocument() = 0; - virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi) = 0; - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/DocFieldConsumers.h b/include/DocFieldConsumers.h deleted file mode 100644 index e6949ac6..00000000 --- a/include/DocFieldConsumers.h +++ /dev/null @@ -1,73 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMERS_H -#define DOCFIELDCONSUMERS_H - -#include "DocFieldConsumer.h" -#include "DocumentsWriter.h" - -namespace Lucene -{ - /// This is just a "splitter" class: it lets you wrap two DocFieldConsumer instances as a single consumer. - class DocFieldConsumers : public DocFieldConsumer - { - public: - DocFieldConsumers(DocFieldConsumerPtr one, DocFieldConsumerPtr two); - virtual ~DocFieldConsumers(); - - LUCENE_CLASS(DocFieldConsumers); - - public: - DocFieldConsumerPtr one; - DocFieldConsumerPtr two; - - Collection docFreeList; - int32_t freeCount; - int32_t allocCount; - - public: - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - - /// Called when DocumentsWriter decides to create a new segment - virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - - /// Called when DocumentsWriter decides to close the doc stores - virtual void closeDocStore(SegmentWriteStatePtr state); - - /// Called when DocumentsWriter is using too much RAM. - virtual bool freeRAM(); - - /// Add a new thread - virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread); - - DocFieldConsumersPerDocPtr getPerDoc(); - void freePerDoc(DocFieldConsumersPerDocPtr perDoc); - }; - - class DocFieldConsumersPerDoc : public DocWriter - { - public: - DocFieldConsumersPerDoc(DocFieldConsumersPtr fieldConsumers); - virtual ~DocFieldConsumersPerDoc(); - - LUCENE_CLASS(DocFieldConsumersPerDoc); - - protected: - DocFieldConsumersWeakPtr _fieldConsumers; - - public: - DocWriterPtr one; - DocWriterPtr two; - - public: - virtual int64_t sizeInBytes(); - virtual void finish(); - virtual void abort(); - }; -} - -#endif diff --git a/include/DocFieldConsumersPerField.h b/include/DocFieldConsumersPerField.h deleted file mode 100644 index e01bb10d..00000000 --- a/include/DocFieldConsumersPerField.h +++ /dev/null @@ -1,35 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMERSPERFIELD_H -#define DOCFIELDCONSUMERSPERFIELD_H - -#include "DocFieldConsumerPerField.h" - -namespace Lucene -{ - class DocFieldConsumersPerField : public DocFieldConsumerPerField - { - public: - DocFieldConsumersPerField(DocFieldConsumersPerThreadPtr perThread, DocFieldConsumerPerFieldPtr one, DocFieldConsumerPerFieldPtr two); - virtual ~DocFieldConsumersPerField(); - - LUCENE_CLASS(DocFieldConsumersPerField); - - public: - DocFieldConsumerPerFieldPtr one; - DocFieldConsumerPerFieldPtr two; - DocFieldConsumersPerThreadWeakPtr _perThread; - - public: - /// Processes all occurrences of a single field - virtual void processFields(Collection fields, int32_t count); - - virtual void abort(); - }; -} - -#endif diff --git a/include/DocFieldConsumersPerThread.h b/include/DocFieldConsumersPerThread.h deleted file mode 100644 index e8c86a8f..00000000 --- a/include/DocFieldConsumersPerThread.h +++ /dev/null @@ -1,37 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDCONSUMERSPERTHREAD_H -#define DOCFIELDCONSUMERSPERTHREAD_H - -#include "DocFieldConsumerPerThread.h" - -namespace Lucene -{ - class DocFieldConsumersPerThread : public DocFieldConsumerPerThread - { - public: - DocFieldConsumersPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocFieldConsumersPtr parent, - DocFieldConsumerPerThreadPtr one, DocFieldConsumerPerThreadPtr two); - virtual ~DocFieldConsumersPerThread(); - - LUCENE_CLASS(DocFieldConsumersPerThread); - - public: - DocFieldConsumerPerThreadPtr one; - DocFieldConsumerPerThreadPtr two; - DocFieldConsumersWeakPtr _parent; - DocStatePtr docState; - - public: - virtual void startDocument(); - virtual void abort(); - virtual DocWriterPtr finishDocument(); - virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi); - }; -} - -#endif diff --git a/include/DocFieldProcessor.h b/include/DocFieldProcessor.h deleted file mode 100644 index 3bfcd05d..00000000 --- a/include/DocFieldProcessor.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDPROCESSOR_H -#define DOCFIELDPROCESSOR_H - -#include "DocConsumer.h" - -namespace Lucene -{ - /// This is a DocConsumer that gathers all fields under the same name, and calls per-field consumers to process - /// field by field. This class doesn't doesn't do any "real" work of its own: it just forwards the fields to a - /// DocFieldConsumer. - class DocFieldProcessor : public DocConsumer - { - public: - DocFieldProcessor(DocumentsWriterPtr docWriter, DocFieldConsumerPtr consumer); - virtual ~DocFieldProcessor(); - - LUCENE_CLASS(DocFieldProcessor); - - public: - DocumentsWriterWeakPtr _docWriter; - FieldInfosPtr fieldInfos; - DocFieldConsumerPtr consumer; - StoredFieldsWriterPtr fieldsWriter; - - public: - virtual void closeDocStore(SegmentWriteStatePtr state); - virtual void flush(Collection threads, SegmentWriteStatePtr state); - virtual void abort(); - virtual bool freeRAM(); - virtual DocConsumerPerThreadPtr addThread(DocumentsWriterThreadStatePtr perThread); - }; -} - -#endif diff --git a/include/DocFieldProcessorPerField.h b/include/DocFieldProcessorPerField.h deleted file mode 100644 index 08f6f50f..00000000 --- a/include/DocFieldProcessorPerField.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDPROCESSORPERFIELD_H -#define DOCFIELDPROCESSORPERFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Holds all per thread, per field state. - class DocFieldProcessorPerField : public LuceneObject - { - public: - DocFieldProcessorPerField(DocFieldProcessorPerThreadPtr perThread, FieldInfoPtr fieldInfo); - virtual ~DocFieldProcessorPerField(); - - LUCENE_CLASS(DocFieldProcessorPerField); - - public: - DocFieldConsumerPerFieldPtr consumer; - FieldInfoPtr fieldInfo; - - DocFieldProcessorPerFieldPtr next; - int32_t lastGen; - - int32_t fieldCount; - Collection fields; - - public: - virtual void abort(); - }; -} - -#endif diff --git a/include/DocFieldProcessorPerThread.h b/include/DocFieldProcessorPerThread.h deleted file mode 100644 index 2b3f9067..00000000 --- a/include/DocFieldProcessorPerThread.h +++ /dev/null @@ -1,86 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCFIELDPROCESSORPERTHREAD_H -#define DOCFIELDPROCESSORPERTHREAD_H - -#include "DocConsumerPerThread.h" -#include "DocumentsWriter.h" - -namespace Lucene -{ - /// Gathers all Fieldables for a document under the same name, updates FieldInfos, and calls per-field - /// consumers to process field by field. - /// - /// Currently, only a single thread visits the fields, sequentially, for processing. - class DocFieldProcessorPerThread : public DocConsumerPerThread - { - public: - DocFieldProcessorPerThread(DocumentsWriterThreadStatePtr threadState, DocFieldProcessorPtr docFieldProcessor); - virtual ~DocFieldProcessorPerThread(); - - LUCENE_CLASS(DocFieldProcessorPerThread); - - public: - double docBoost; - int32_t fieldGen; - DocFieldProcessorWeakPtr _docFieldProcessor; - FieldInfosPtr fieldInfos; - DocFieldConsumerPerThreadPtr consumer; - Collection _fields; // Holds all fields seen in current doc - int32_t fieldCount; - - Collection fieldHash; // Hash table for all fields ever seen - int32_t hashMask; - int32_t totalFieldCount; - - StoredFieldsWriterPerThreadPtr fieldsWriter; - DocStatePtr docState; - - Collection docFreeList; - int32_t freeCount; - int32_t allocCount; - - public: - virtual void initialize(); - virtual void abort(); - Collection fields(); - - // If there are fields we've seen but did not see again in the last run, then free them up. - void trimFields(SegmentWriteStatePtr state); - - virtual DocWriterPtr processDocument(); - - DocFieldProcessorPerThreadPerDocPtr getPerDoc(); - void freePerDoc(DocFieldProcessorPerThreadPerDocPtr perDoc); - - protected: - void rehash(); - }; - - class DocFieldProcessorPerThreadPerDoc : public DocWriter - { - public: - DocFieldProcessorPerThreadPerDoc(DocFieldProcessorPerThreadPtr docProcessor); - virtual ~DocFieldProcessorPerThreadPerDoc(); - - LUCENE_CLASS(DocFieldProcessorPerThreadPerDoc); - - public: - DocWriterPtr one; - DocWriterPtr two; - - protected: - DocFieldProcessorPerThreadWeakPtr _docProcessor; - - public: - virtual int64_t sizeInBytes(); - virtual void finish(); - virtual void abort(); - }; -} - -#endif diff --git a/include/DocIdBitSet.h b/include/DocIdBitSet.h deleted file mode 100644 index f6a1140a..00000000 --- a/include/DocIdBitSet.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCIDBITSET_H -#define DOCIDBITSET_H - -#include "DocIdSet.h" - -namespace Lucene -{ - /// Simple DocIdSet and DocIdSetIterator backed by a BitSet - class LPPAPI DocIdBitSet : public DocIdSet - { - public: - DocIdBitSet(); - DocIdBitSet(BitSetPtr bitSet); - - virtual ~DocIdBitSet(); - - LUCENE_CLASS(DocIdBitSet); - - protected: - BitSetPtr bitSet; - - public: - virtual DocIdSetIteratorPtr iterator(); - - /// This DocIdSet implementation is cacheable. - virtual bool isCacheable(); - - /// Returns the underlying BitSet. - BitSetPtr getBitSet(); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/DocIdSet.h b/include/DocIdSet.h deleted file mode 100644 index c4022cca..00000000 --- a/include/DocIdSet.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCIDSET_H -#define DOCIDSET_H - -#include "DocIdSetIterator.h" - -namespace Lucene -{ - /// A DocIdSet contains a set of doc ids. Implementing classes must only implement {@link #iterator} to - /// provide access to the set. - class LPPAPI DocIdSet : public LuceneObject - { - public: - virtual ~DocIdSet(); - LUCENE_CLASS(DocIdSet); - - public: - /// Provides a {@link DocIdSetIterator} to access the set. This implementation can return null or - /// {@link #EmptyDocIdSet}.iterator() if there are no docs that match. - virtual DocIdSetIteratorPtr iterator() = 0; - - /// This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet should be cached without - /// copying it into a BitSet. The default is to return false. If you have an own DocIdSet implementation - /// that does its iteration very effective and fast without doing disk I/O, override this method and - /// return true. - virtual bool isCacheable(); - - /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. - static DocIdSetPtr EMPTY_DOCIDSET(); - }; -} - -#endif diff --git a/include/DocIdSetIterator.h b/include/DocIdSetIterator.h deleted file mode 100644 index 69fac733..00000000 --- a/include/DocIdSetIterator.h +++ /dev/null @@ -1,75 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCIDSETITERATOR_H -#define DOCIDSETITERATOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This abstract class defines methods to iterate over a set of non-decreasing doc ids. Note that this class - /// assumes it iterates on doc Ids, and therefore {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to - /// be used as a sentinel object. Implementations of this class are expected to consider INT_MAX as an invalid value. - class LPPAPI DocIdSetIterator : public LuceneObject - { - public: - virtual ~DocIdSetIterator(); - - LUCENE_CLASS(DocIdSetIterator); - - public: - /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there are no more - /// docs in the iterator. - static const int32_t NO_MORE_DOCS; - - public: - /// Returns the following: - ///
    - ///
  • -1 or {@link #NO_MORE_DOCS} if {@link #nextDoc()} or {@link #advance(int)} were not called yet. - ///
  • {@link #NO_MORE_DOCS} if the iterator has exhausted. - ///
  • Otherwise it should return the doc ID it is currently on. - ///
- virtual int32_t docID() = 0; - - /// Advances to the next document in the set and returns the doc it is currently on, or {@link #NO_MORE_DOCS} - /// if there are no more docs in the set. - /// - /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted - /// behaviour. - virtual int32_t nextDoc() = 0; - - /// Advances to the first beyond the current whose document number is greater than or equal to target. Returns - /// the current document number or {@link #NO_MORE_DOCS} if there are no more docs in the set. - /// - /// Behaves as if written: - /// - ///
-        /// int32_t advance(int32_t target)
-        /// {
-        ///     int32_t doc;
-        ///     while ((doc = nextDoc()) < target)
-        ///     { }
-        ///     return doc;
-        /// }
-        /// 
- /// - /// Some implementations are considerably more efficient than that. - /// - /// NOTE: certain implementations may return a different value (each time) if called several times in a row - /// with the same target. - /// - /// NOTE: this method may be called with {@value #NO_MORE_DOCS} for efficiency by some Scorers. If your - /// implementation cannot efficiently determine that it should exhaust, it is recommended that you check for - /// that value in each call to this method. - /// - /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted - /// behaviour. - virtual int32_t advance(int32_t target) = 0; - }; -} - -#endif diff --git a/include/DocInverter.h b/include/DocInverter.h deleted file mode 100644 index 321bd85a..00000000 --- a/include/DocInverter.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCINVERTER_H -#define DOCINVERTER_H - -#include "DocFieldConsumer.h" - -namespace Lucene -{ - /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a - /// InvertedTermsConsumer to process those terms. - class DocInverter : public DocFieldConsumer - { - public: - DocInverter(InvertedDocConsumerPtr consumer, InvertedDocEndConsumerPtr endConsumer); - virtual ~DocInverter(); - - LUCENE_CLASS(DocInverter); - - public: - InvertedDocConsumerPtr consumer; - InvertedDocEndConsumerPtr endConsumer; - - public: - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - - /// Called when DocumentsWriter decides to create a new segment - virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - - /// Called when DocumentsWriter decides to close the doc stores - virtual void closeDocStore(SegmentWriteStatePtr state); - - /// Called when an aborting exception is hit - virtual void abort(); - - /// Called when DocumentsWriter is using too much RAM. - virtual bool freeRAM(); - - /// Add a new thread - virtual DocFieldConsumerPerThreadPtr addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread); - }; -} - -#endif diff --git a/include/DocInverterPerField.h b/include/DocInverterPerField.h deleted file mode 100644 index bdb0c92e..00000000 --- a/include/DocInverterPerField.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCINVERTERPERFIELD_H -#define DOCINVERTERPERFIELD_H - -#include "DocFieldConsumerPerField.h" - -namespace Lucene -{ - /// Holds state for inverting all occurrences of a single field in the document. This class doesn't do - /// anything itself; instead, it forwards the tokens produced by analysis to its own consumer - /// (InvertedDocConsumerPerField). It also interacts with an endConsumer (InvertedDocEndConsumerPerField). - class DocInverterPerField : public DocFieldConsumerPerField - { - public: - DocInverterPerField(DocInverterPerThreadPtr perThread, FieldInfoPtr fieldInfo); - virtual ~DocInverterPerField(); - - LUCENE_CLASS(DocInverterPerField); - - protected: - DocInverterPerThreadWeakPtr _perThread; - FieldInfoPtr fieldInfo; - - public: - InvertedDocConsumerPerFieldPtr consumer; - InvertedDocEndConsumerPerFieldPtr endConsumer; - DocStatePtr docState; - FieldInvertStatePtr fieldState; - - public: - virtual void initialize(); - virtual void abort(); - - /// Processes all occurrences of a single field - virtual void processFields(Collection fields, int32_t count); - }; -} - -#endif diff --git a/include/DocInverterPerThread.h b/include/DocInverterPerThread.h deleted file mode 100644 index 76886009..00000000 --- a/include/DocInverterPerThread.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCINVERTERPERTHREAD_H -#define DOCINVERTERPERTHREAD_H - -#include "DocFieldConsumerPerThread.h" -#include "AttributeSource.h" - -namespace Lucene -{ - /// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a - /// InvertedTermsConsumer to process those terms. - class DocInverterPerThread : public DocFieldConsumerPerThread - { - public: - DocInverterPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocInverterPtr docInverter); - virtual ~DocInverterPerThread(); - - LUCENE_CLASS(DocInverterPerThread); - - public: - DocInverterWeakPtr _docInverter; - InvertedDocConsumerPerThreadPtr consumer; - InvertedDocEndConsumerPerThreadPtr endConsumer; - SingleTokenAttributeSourcePtr singleToken; - - DocStatePtr docState; - FieldInvertStatePtr fieldState; - - /// Used to read a string value for a field - ReusableStringReaderPtr stringReader; - - public: - virtual void initialize(); - virtual void startDocument(); - virtual DocWriterPtr finishDocument(); - virtual void abort(); - virtual DocFieldConsumerPerFieldPtr addField(FieldInfoPtr fi); - }; - - class SingleTokenAttributeSource : public AttributeSource - { - public: - SingleTokenAttributeSource(); - virtual ~SingleTokenAttributeSource(); - - LUCENE_CLASS(SingleTokenAttributeSource); - - public: - TermAttributePtr termAttribute; - OffsetAttributePtr offsetAttribute; - - public: - void reinit(const String& stringValue, int32_t startOffset, int32_t endOffset); - }; -} - -#endif diff --git a/include/DocValues.h b/include/DocValues.h deleted file mode 100644 index fd82aa26..00000000 --- a/include/DocValues.h +++ /dev/null @@ -1,98 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCVALUES_H -#define DOCVALUES_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Represents field values as different types. Normally created via a {@link ValueSuorce} for a - /// particular field and reader. - /// - /// DocValues is distinct from ValueSource because there needs to be an object created at query - /// evaluation time that is not referenced by the query itself because: - /// - Query objects should be MT safe - /// - For caching, Query objects are often used as keys... you don't want the Query carrying around - /// big objects - class LPPAPI DocValues : public LuceneObject - { - public: - DocValues(); - virtual ~DocValues(); - - LUCENE_CLASS(DocValues); - - protected: - double minVal; - double maxVal; - double avgVal; - bool computed; - - public: - using LuceneObject::toString; - - /// Return doc value as a double. - /// Mandatory: every DocValues implementation must implement at least this method. - /// @param doc document whose double value is requested. - virtual double doubleVal(int32_t doc) = 0; - - /// Return doc value as an int. - /// Optional: DocValues implementation can (but don't have to) override this method. - /// @param doc document whose int value is requested. - virtual int32_t intVal(int32_t doc); - - /// Return doc value as a long. - /// Optional: DocValues implementation can (but don't have to) override this method. - /// @param doc document whose long value is requested. - virtual int64_t longVal(int32_t doc); - - /// Return doc value as a string. - /// Optional: DocValues implementation can (but don't have to) override this method. - /// @param doc document whose string value is requested. - virtual String strVal(int32_t doc); - - /// Return a string representation of a doc value, as required for Explanations. - virtual String toString(int32_t doc) = 0; - - /// Explain the scoring value for the input doc. - virtual ExplanationPtr explain(int32_t doc); - - /// For test purposes only, return the inner array of values, or null if not applicable. - /// - /// Allows tests to verify that loaded values are: - ///
    - ///
  1. indeed cached/reused. - ///
  2. stored in the expected size/type (byte/short/int/float). - ///
- /// - /// Note: implementations of DocValues must override this method for these test elements to be tested, - /// Otherwise the test would not fail, just print a warning. - virtual CollectionValue getInnerArray(); - - /// Returns the minimum of all values or NaN if this DocValues instance does not contain any value. - /// This operation is optional - /// @return the minimum of all values or NaN if this DocValues instance does not contain any value. - virtual double getMinValue(); - - /// Returns the maximum of all values or NaN if this DocValues instance does not contain any value. - /// This operation is optional - /// @return the maximum of all values or NaN if this DocValues instance does not contain any value. - virtual double getMaxValue(); - - /// Returns the average of all values or NaN if this DocValues instance does not contain any value. - /// This operation is optional - /// @return the average of all values or NaN if this DocValues instance does not contain any value. - virtual double getAverageValue(); - - protected: - /// Compute optional values - void compute(); - }; -} - -#endif diff --git a/include/Document.h b/include/Document.h deleted file mode 100644 index 328ed61a..00000000 --- a/include/Document.h +++ /dev/null @@ -1,142 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCUMENT_H -#define DOCUMENT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Documents are the unit of indexing and search. - /// - /// A Document is a set of fields. Each field has a name and a textual value. A field may be {@link - /// Fieldable#isStored() stored} with the document, in which case it is returned with search hits on the - /// document. Thus each document should typically contain one or more stored fields which uniquely - /// identify it. - /// - /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents - /// retrieved from the index, eg. with {@link ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link - /// IndexReader#document(int)}. - class LPPAPI Document : public LuceneObject - { - public: - /// Constructs a new document with no fields. - Document(); - - virtual ~Document(); - - LUCENE_CLASS(Document); - - protected: - Collection fields; - double boost; - - public: - /// Sets a boost factor for hits on any field of this document. This value will be multiplied into the - /// score of all hits on this document. - /// - /// The default value is 1.0. - /// - /// Values are multiplied into the value of {@link Fieldable#getBoost()} of each field in this document. - /// Thus, this method in effect sets a default boost for the fields of this document. - /// - /// @see Fieldable#setBoost(double) - void setBoost(double boost); - - /// Returns, at indexing time, the boost factor as set by {@link #setBoost(double)}. - /// - /// Note that once a document is indexed this value is no longer available from the index. At search time, - /// for retrieved documents, this method always returns 1. This however does not mean that the boost value - /// set at indexing time was ignored - it was just combined with other indexing time factors and stored - /// elsewhere, for better indexing and search performance. (For more information see the "norm(t,d)" part - /// of the scoring formula in {@link Similarity}.) - /// - /// @see #setBoost(double) - double getBoost(); - - /// Adds a field to a document. Several fields may be added with the same name. In this case, if the fields - /// are indexed, their text is treated as though appended for the purposes of search. - /// - /// Note that add like the removeField(s) methods only makes sense prior to adding a document to an index. - /// These methods cannot be used to change the content of an existing index! In order to achieve this, a - /// document has to be deleted from an index and a new changed version of that document has to be added. - void add(FieldablePtr field); - - /// Removes field with the specified name from the document. If multiple fields exist with this name, this - /// method removes the first field that has been added. If there is no field with the specified name, the - /// document remains unchanged. - /// - /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to - /// an index. These methods cannot be used to change the content of an existing index! In order to achieve - /// this, a document has to be deleted from an index and a new changed version of that document has to be added. - void removeField(const String& name); - - /// Removes all fields with the given name from the document. If there is no field with the specified name, - /// the document remains unchanged. - /// - /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to an - /// index. These methods cannot be used to change the content of an existing index! In order to achieve this, - /// a document has to be deleted from an index and a new changed version of that document has to be added. - void removeFields(const String& name); - - /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with - /// this name, this method returns the first value added. - /// Do not use this method with lazy loaded fields. - FieldPtr getField(const String& name); - - /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with - /// this name, this method returns the first value added. - FieldablePtr getFieldable(const String& name); - - /// Returns the string value of the field with the given name if any exist in this document, or null. If multiple - /// fields exist with this name, this method returns the first value added. If only binary fields with this name - /// exist, returns null. - String get(const String& name); - - /// Returns a List of all the fields in a document. - /// - /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents - /// retrieved from the index, eg. {@link Searcher#doc(int)} or {@link IndexReader#document(int)}. - Collection getFields(); - - /// Returns an array of {@link Field}s with the given name. Do not use with lazy loaded fields. This method - /// returns an empty array when there are no matching fields. It never returns null. - /// @param name the name of the field - /// @return a Field[] array - Collection getFields(const String& name); - - /// Returns an array of {@link Fieldable}s with the given name. - /// This method returns an empty array when there are no matching fields. It never returns null. - /// @param name the name of the field - /// @return a Fieldable[] array - Collection getFieldables(const String& name); - - /// Returns an array of values of the field specified as the method parameter. - /// This method returns an empty array when there are no matching fields. It never returns null. - /// @param name the name of the field - /// @return a String[] of field values - Collection getValues(const String& name); - - /// Returns an array of byte arrays for of the fields that have the name specified as the method parameter. - /// This method returns an empty array when there are no matching fields. It never returns null. - /// @param name the name of the field - /// @return a byte[][] of binary field values - Collection getBinaryValues(const String& name); - - /// Returns an array of bytes for the first (or only) field that has the name specified as the method parameter. - /// This method will return null if no binary fields with the specified name are available. There may be - /// non-binary fields with the same name. - /// @param name the name of the field. - /// @return a byte[] containing the binary field value or null - ByteArray getBinaryValue(const String& name); - - /// Returns a string representation of the object - virtual String toString(); - }; -} - -#endif diff --git a/include/DocumentsWriter.h b/include/DocumentsWriter.h deleted file mode 100644 index 1f2d8de5..00000000 --- a/include/DocumentsWriter.h +++ /dev/null @@ -1,530 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCUMENTSWRITER_H -#define DOCUMENTSWRITER_H - -#include "ByteBlockPool.h" -#include "RAMFile.h" - -namespace Lucene -{ - /// This class accepts multiple added documents and directly writes a single segment file. It does this more - /// efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on - /// those segments. - /// - /// Each added document is passed to the {@link DocConsumer}, which in turn processes the document and interacts - /// with other consumers in the indexing chain. Certain consumers, like {@link StoredFieldsWriter} and {@link - /// TermVectorsTermsWriter}, digest a document and immediately write bytes to the "doc store" files (ie, - /// they do not consume RAM per document, except while they are processing the document). - /// - /// Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsWriter}, buffer bytes in RAM and flush only - /// when a new segment is produced. - /// - /// Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are - /// flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory. - /// - /// Threads: - /// Multiple threads are allowed into addDocument at once. There is an initial synchronized call to - /// getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState - /// over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a - /// different content source) then we make better use of RAM. Then processDocument is called on that ThreadState - /// without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized - /// "finishDocument" is called to flush changes to the directory. - /// - /// When flush is called by IndexWriter we forcefully idle all threads and flush only once they are all idle. - /// This means you can call flush with a given thread even while other threads are actively adding/deleting - /// documents. - /// - /// Exceptions: - /// Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors - /// directly to files in the directory, there are certain limited times when an exception can corrupt this state. - /// For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an - /// std::bad_alloc exception while appending to the in-memory posting lists can corrupt that posting list. - /// We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added - /// since the last flush. - /// - /// All other exceptions ("non-aborting exceptions") can still partially update the index structures. These - /// updates are consistent, but, they represent only a part of the document seen up until the exception was hit. - /// When this happens, we immediately mark the document as deleted so that the document is always atomically - /// ("all or none") added to the index. - class DocumentsWriter : public LuceneObject - { - public: - DocumentsWriter(DirectoryPtr directory, IndexWriterPtr writer, IndexingChainPtr indexingChain); - virtual ~DocumentsWriter(); - - LUCENE_CLASS(DocumentsWriter); - - protected: - String docStoreSegment; // Current doc-store segment we are writing - int32_t docStoreOffset; // Current starting doc-store offset of current segment - - int32_t nextDocID; // Next docID to be added - int32_t numDocsInRAM; // # docs buffered in RAM - - /// Max # ThreadState instances; if there are more threads than this they share ThreadStates - static const int32_t MAX_THREAD_STATE; - Collection threadStates; - MapThreadDocumentsWriterThreadState threadBindings; - - int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush) - bool aborting; // True if an abort is pending - - DocFieldProcessorPtr docFieldProcessor; - - /// Deletes done after the last flush; these are discarded on abort - BufferedDeletesPtr deletesInRAM; - - /// Deletes done before the last flush; these are still kept on abort - BufferedDeletesPtr deletesFlushed; - - /// The max number of delete terms that can be buffered before they must be flushed to disk. - int32_t maxBufferedDeleteTerms; - - /// How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead. - int64_t ramBufferSize; - int64_t waitQueuePauseBytes; - int64_t waitQueueResumeBytes; - - /// If we've allocated 5% over our RAM budget, we then free down to 95% - int64_t freeTrigger; - int64_t freeLevel; - - /// Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead. - int32_t maxBufferedDocs; - - /// How many docs already flushed to index - int32_t flushedDocCount; - - bool closed; - - /// List of files that were written before last abort() - HashSet _abortedFiles; - SegmentWriteStatePtr flushState; - - Collection freeIntBlocks; - Collection freeCharBlocks; - - public: - /// Coarse estimates used to measure RAM usage of buffered deletes - static const int32_t OBJECT_HEADER_BYTES; - static const int32_t POINTER_NUM_BYTE; - static const int32_t INT_NUM_BYTE; - static const int32_t CHAR_NUM_BYTE; - - /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object - /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is - /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since - /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). - /// BufferedDeletes.num is OBJ_HEADER + INT. - static const int32_t BYTES_PER_DEL_TERM; - - /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is - /// OBJ_HEADER + int - static const int32_t BYTES_PER_DEL_DOCID; - - /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object - /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount - /// (say 24 bytes). Integer is OBJ_HEADER + INT. - static const int32_t BYTES_PER_DEL_QUERY; - - /// Initial chunks size of the shared byte[] blocks used to store postings data - static const int32_t BYTE_BLOCK_SHIFT; - static const int32_t BYTE_BLOCK_SIZE; - static const int32_t BYTE_BLOCK_MASK; - static const int32_t BYTE_BLOCK_NOT_MASK; - - /// Initial chunk size of the shared char[] blocks used to store term text - static const int32_t CHAR_BLOCK_SHIFT; - static const int32_t CHAR_BLOCK_SIZE; - static const int32_t CHAR_BLOCK_MASK; - - static const int32_t MAX_TERM_LENGTH; - - /// Initial chunks size of the shared int[] blocks used to store postings data - static const int32_t INT_BLOCK_SHIFT; - static const int32_t INT_BLOCK_SIZE; - static const int32_t INT_BLOCK_MASK; - - static const int32_t PER_DOC_BLOCK_SIZE; - - INTERNAL: - IndexWriterWeakPtr _writer; - DirectoryPtr directory; - IndexingChainPtr indexingChain; - String segment; // Current segment we are working on - - int32_t numDocsInStore; // # docs written to doc stores - - bool flushPending; // True when a thread has decided to flush - bool bufferIsFull; // True when it's time to write segment - - InfoStreamPtr infoStream; - int32_t maxFieldLength; - SimilarityPtr similarity; - - DocConsumerPtr consumer; - - HashSet _openFiles; - HashSet _closedFiles; - - WaitQueuePtr waitQueue; - SkipDocWriterPtr skipDocWriter; - - ByteBlockAllocatorPtr byteBlockAllocator; - ByteBlockAllocatorPtr perDocAllocator; - - int64_t numBytesAlloc; - int64_t numBytesUsed; - - // used only by assert - TermPtr lastDeleteTerm; - - public: - virtual void initialize(); - - /// Create and return a new DocWriterBuffer. - PerDocBufferPtr newPerDocBuffer(); - - static IndexingChainPtr getDefaultIndexingChain(); - - void updateFlushedDocCount(int32_t n); - int32_t getFlushedDocCount(); - void setFlushedDocCount(int32_t n); - - /// Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false - bool hasProx(); - - /// If non-null, various details of indexing are printed here. - void setInfoStream(InfoStreamPtr infoStream); - - void setMaxFieldLength(int32_t maxFieldLength); - void setSimilarity(SimilarityPtr similarity); - - /// Set how much RAM we can use before flushing. - void setRAMBufferSizeMB(double mb); - double getRAMBufferSizeMB(); - - /// Set max buffered docs, which means we will flush by doc count instead of by RAM usage. - void setMaxBufferedDocs(int32_t count); - int32_t getMaxBufferedDocs(); - - /// Get current segment name we are writing. - String getSegment(); - - /// Returns how many docs are currently buffered in RAM. - int32_t getNumDocsInRAM(); - - /// Returns the current doc store segment we are writing to. - String getDocStoreSegment(); - - /// Returns the doc offset into the shared doc store for the current buffered docs. - int32_t getDocStoreOffset(); - - /// Closes the current open doc stores an returns the doc store segment name. This returns null if there - /// are no buffered documents. - String closeDocStore(); - - HashSet abortedFiles(); - - void message(const String& message); - - /// Returns Collection of files in use by this instance, including any flushed segments. - HashSet openFiles(); - HashSet closedFiles(); - - void addOpenFile(const String& name); - void removeOpenFile(const String& name); - - void setAborting(); - - /// Called if we hit an exception at a bad time (when updating the index files) and must discard all - /// currently buffered docs. This resets our state, discarding any docs added since last flush. - void abort(); - - /// Returns true if an abort is in progress - bool pauseAllThreads(); - void resumeAllThreads(); - - bool anyChanges(); - - void initFlushState(bool onlyDocStore); - - /// Flush all pending docs to a new segment - int32_t flush(bool _closeDocStore); - - HashSet getFlushedFiles(); - - /// Build compound file for the segment we just flushed - void createCompoundFile(const String& segment); - - /// Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter - /// to trigger a single flush even when multiple threads are trying to do so. - bool setFlushPending(); - void clearFlushPending(); - - void pushDeletes(); - - void close(); - - void initSegmentName(bool onlyDocStore); - - /// Returns a free (idle) ThreadState that may be used for indexing this one document. This call also - /// pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the - /// thread state has been acquired. - DocumentsWriterThreadStatePtr getThreadState(DocumentPtr doc, TermPtr delTerm); - - /// Returns true if the caller (IndexWriter) should now flush. - bool addDocument(DocumentPtr doc, AnalyzerPtr analyzer); - - bool updateDocument(TermPtr t, DocumentPtr doc, AnalyzerPtr analyzer); - bool updateDocument(DocumentPtr doc, AnalyzerPtr analyzer, TermPtr delTerm); - - int32_t getNumBufferedDeleteTerms(); // for testing - MapTermNum getBufferedDeleteTerms(); // for testing - - /// Called whenever a merge has completed and the merged segments had deletions - void remapDeletes(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergeDocCount); - - bool bufferDeleteTerms(Collection terms); - bool bufferDeleteTerm(TermPtr term); - bool bufferDeleteQueries(Collection queries); - bool bufferDeleteQuery(QueryPtr query); - bool deletesFull(); - bool doApplyDeletes(); - - void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); - int32_t getMaxBufferedDeleteTerms(); - - bool hasDeletes(); - bool applyDeletes(SegmentInfosPtr infos); - bool doBalanceRAM(); - - void waitForWaitQueue(); - - int64_t getRAMUsed(); - - IntArray getIntBlock(bool trackAllocations); - void bytesAllocated(int64_t numBytes); - void bytesUsed(int64_t numBytes); - void recycleIntBlocks(Collection blocks, int32_t start, int32_t end); - - CharArray getCharBlock(); - void recycleCharBlocks(Collection blocks, int32_t numBlocks); - - String toMB(int64_t v); - - /// We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds - /// characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require - /// varying amount of storage from these four classes. - /// - /// For example, docs with many unique single-occurrence short terms will use up the Postings - /// RAM and hardly any of the other two. Whereas docs with very large terms will use alot of char blocks - /// RAM and relatively less of the other two. This method just frees allocations from the pools once we - /// are over-budget, which balances the pools to match the current docs. - void balanceRAM(); - - protected: - /// Reset after a flush - void doAfterFlush(); - - bool allThreadsIdle(); - - void waitReady(DocumentsWriterThreadStatePtr state); - - bool timeToFlushDeletes(); - - // used only by assert - bool checkDeleteTerm(TermPtr term); - - bool applyDeletes(IndexReaderPtr reader, int32_t docIDStart); - void addDeleteTerm(TermPtr term, int32_t docCount); - - /// Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document - void addDeleteDocID(int32_t docID); - void addDeleteQuery(QueryPtr query, int32_t docID); - - /// Does the synchronized work to finish/flush the inverted document. - void finishDocument(DocumentsWriterThreadStatePtr perThread, DocWriterPtr docWriter); - - friend class WaitQueue; - }; - - class DocState : public LuceneObject - { - public: - DocState(); - virtual ~DocState(); - - LUCENE_CLASS(DocState); - - public: - DocumentsWriterWeakPtr _docWriter; - AnalyzerPtr analyzer; - int32_t maxFieldLength; - InfoStreamPtr infoStream; - SimilarityPtr similarity; - int32_t docID; - DocumentPtr doc; - String maxTermPrefix; - - public: - /// Only called by asserts - virtual bool testPoint(const String& name); - - void clear(); - }; - - /// RAMFile buffer for DocWriters. - class PerDocBuffer : public RAMFile - { - public: - PerDocBuffer(DocumentsWriterPtr docWriter); - virtual ~PerDocBuffer(); - - LUCENE_CLASS(PerDocBuffer); - - protected: - DocumentsWriterWeakPtr _docWriter; - - public: - /// Recycle the bytes used. - void recycle(); - - protected: - /// Allocate bytes used from shared pool. - virtual ByteArray newBuffer(int32_t size); - }; - - /// Consumer returns this on each doc. This holds any state that must be flushed synchronized - /// "in docID order". We gather these and flush them in order. - class DocWriter : public LuceneObject - { - public: - DocWriter(); - virtual ~DocWriter(); - - LUCENE_CLASS(DocWriter); - - public: - DocWriterPtr next; - int32_t docID; - - public: - virtual void finish() = 0; - virtual void abort() = 0; - virtual int64_t sizeInBytes() = 0; - - virtual void setNext(DocWriterPtr next); - }; - - /// The IndexingChain must define the {@link #getChain(DocumentsWriter)} method which returns the DocConsumer - /// that the DocumentsWriter calls to process the documents. - class IndexingChain : public LuceneObject - { - public: - virtual ~IndexingChain(); - - LUCENE_CLASS(IndexingChain); - - public: - virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter) = 0; - }; - - /// This is the current indexing chain: - /// DocConsumer / DocConsumerPerThread - /// --> code: DocFieldProcessor / DocFieldProcessorPerThread - /// --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - /// --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - /// --> code: DocInverter / DocInverterPerThread / DocInverterPerField - /// --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - /// --> code: TermsHash / TermsHashPerThread / TermsHashPerField - /// --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - /// --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - /// --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - /// --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - /// --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - /// --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - class DefaultIndexingChain : public IndexingChain - { - public: - virtual ~DefaultIndexingChain(); - - LUCENE_CLASS(DefaultIndexingChain); - - public: - virtual DocConsumerPtr getChain(DocumentsWriterPtr documentsWriter); - }; - - class SkipDocWriter : public DocWriter - { - public: - virtual ~SkipDocWriter(); - - LUCENE_CLASS(SkipDocWriter); - - public: - virtual void finish(); - virtual void abort(); - virtual int64_t sizeInBytes(); - }; - - class WaitQueue : public LuceneObject - { - public: - WaitQueue(DocumentsWriterPtr docWriter); - virtual ~WaitQueue(); - - LUCENE_CLASS(WaitQueue); - - protected: - DocumentsWriterWeakPtr _docWriter; - - public: - Collection waiting; - int32_t nextWriteDocID; - int32_t nextWriteLoc; - int32_t numWaiting; - int64_t waitingBytes; - - public: - void reset(); - bool doResume(); - bool doPause(); - void abort(); - bool add(DocWriterPtr doc); - - protected: - void writeDocument(DocWriterPtr doc); - }; - - class ByteBlockAllocator : public ByteBlockPoolAllocatorBase - { - public: - ByteBlockAllocator(DocumentsWriterPtr docWriter, int32_t blockSize); - virtual ~ByteBlockAllocator(); - - LUCENE_CLASS(ByteBlockAllocator); - - protected: - DocumentsWriterWeakPtr _docWriter; - - public: - int32_t blockSize; - Collection freeByteBlocks; - - public: - /// Allocate another byte[] from the shared pool - virtual ByteArray getByteBlock(bool trackAllocations); - - /// Return byte[]'s to the pool - virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end); - virtual void recycleByteBlocks(Collection blocks); - }; -} - -#endif diff --git a/include/DocumentsWriterThreadState.h b/include/DocumentsWriterThreadState.h deleted file mode 100644 index 5cec49bd..00000000 --- a/include/DocumentsWriterThreadState.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOCUMENTSWRITERTHREADSTATE_H -#define DOCUMENTSWRITERTHREADSTATE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Used by DocumentsWriter to maintain per-thread state. - /// We keep a separate Posting hash and other state for each thread and then merge postings - /// hashes from all threads when writing the segment. - class DocumentsWriterThreadState : public LuceneObject - { - public: - DocumentsWriterThreadState(DocumentsWriterPtr docWriter); - virtual ~DocumentsWriterThreadState(); - - LUCENE_CLASS(DocumentsWriterThreadState); - - public: - bool isIdle; // false if this is currently in use by a thread - int32_t numThreads; // Number of threads that share this instance - bool doFlushAfter; // true if we should flush after processing current doc - DocConsumerPerThreadPtr consumer; - DocStatePtr docState; - DocumentsWriterWeakPtr _docWriter; - - public: - virtual void initialize(); - void doAfterFlush(); - }; -} - -#endif diff --git a/include/DoubleFieldSource.h b/include/DoubleFieldSource.h deleted file mode 100644 index e7e80591..00000000 --- a/include/DoubleFieldSource.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef DOUBLEFIELDSOURCE_H -#define DOUBLEFIELDSOURCE_H - -#include "FieldCacheSource.h" -#include "DocValues.h" - -namespace Lucene -{ - /// Obtains double field values from the {@link FieldCache} using getDoubles() and makes those values available - /// as other numeric types, casting as needed. - /// - /// @see FieldCacheSource for requirements on the field. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, - /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU - /// per lookup but will not consume double the FieldCache RAM. - class DoubleFieldSource : public FieldCacheSource - { - public: - /// Create a cached double field source with a specific string-to-double parser. - DoubleFieldSource(const String& field, DoubleParserPtr parser = DoubleParserPtr()); - virtual ~DoubleFieldSource(); - - LUCENE_CLASS(DoubleFieldSource); - - protected: - DoubleParserPtr parser; - - public: - virtual String description(); - virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); - virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); - virtual int32_t cachedFieldSourceHashCode(); - }; - - class DoubleDocValues : public DocValues - { - public: - DoubleDocValues(DoubleFieldSourcePtr source, Collection arr); - virtual ~DoubleDocValues(); - - LUCENE_CLASS(DoubleDocValues); - - protected: - DoubleFieldSourceWeakPtr _source; - Collection arr; - - public: - virtual double doubleVal(int32_t doc); - virtual String toString(int32_t doc); - virtual CollectionValue getInnerArray(); - }; -} - -#endif diff --git a/include/ExactPhraseScorer.h b/include/ExactPhraseScorer.h deleted file mode 100644 index 31e6c48e..00000000 --- a/include/ExactPhraseScorer.h +++ /dev/null @@ -1,27 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef EXACTPHRASESCORER_H -#define EXACTPHRASESCORER_H - -#include "PhraseScorer.h" - -namespace Lucene -{ - class ExactPhraseScorer : public PhraseScorer - { - public: - ExactPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms); - virtual ~ExactPhraseScorer(); - - LUCENE_CLASS(ExactPhraseScorer); - - protected: - virtual double phraseFreq(); - }; -} - -#endif diff --git a/include/Explanation.h b/include/Explanation.h deleted file mode 100644 index fc2a2779..00000000 --- a/include/Explanation.h +++ /dev/null @@ -1,87 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef EXPLANATION_H -#define EXPLANATION_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Describes the score computation for document and query. - class LPPAPI Explanation : public LuceneObject - { - public: - Explanation(double value = 0, const String& description = EmptyString); - virtual ~Explanation(); - - LUCENE_CLASS(Explanation); - - protected: - double value; // the value of this node - String description; // what it represents - Collection details; // sub-explanations - - public: - /// Indicates whether or not this Explanation models a good match. - /// - /// By default, an Explanation represents a "match" if the value is positive. - /// - /// @see #getValue - virtual bool isMatch(); - - /// The value assigned to this explanation node. - virtual double getValue(); - - /// Sets the value assigned to this explanation node. - virtual void setValue(double value); - - /// A description of this explanation node. - virtual String getDescription(); - - /// Sets the description of this explanation node. - virtual void setDescription(const String& description); - - /// The sub-nodes of this explanation node. - virtual Collection getDetails(); - - /// Adds a sub-node to this explanation node. - virtual void addDetail(ExplanationPtr detail); - - /// Render an explanation as text. - virtual String toString(); - - /// Render an explanation as HTML. - virtual String toHtml(); - - protected: - /// A short one line summary which should contain all high level information about this Explanation, - /// without the "Details" - virtual String getSummary(); - - virtual String toString(int32_t depth); - }; - - /// Small Util class used to pass both an idf factor as well as an explanation for that factor. - /// - /// This class will likely be held on a {@link Weight}, so be aware before storing any large fields. - class LPPAPI IDFExplanation : public LuceneObject - { - public: - virtual ~IDFExplanation(); - LUCENE_CLASS(IDFExplanation); - - public: - /// @return the idf factor - virtual double getIdf() = 0; - - /// This should be calculated lazily if possible. - /// @return the explanation for the idf factor. - virtual String explain() = 0; - }; -} - -#endif diff --git a/include/FSDirectory.h b/include/FSDirectory.h deleted file mode 100644 index 84824ff9..00000000 --- a/include/FSDirectory.h +++ /dev/null @@ -1,133 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FSDIRECTORY_H -#define FSDIRECTORY_H - -#include "Directory.h" - -namespace Lucene -{ - /// Base class for Directory implementations that store index files in the file system. There are currently three - /// core subclasses: - /// - /// {@link SimpleFSDirectory} is a straightforward implementation using std::ofstream and std::ifstream. - /// - /// {@link MMapDirectory} uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual - /// memory relative to your index size, eg if you are running on a 64 bit operating system, oryour index sizes are - /// small enough to fit into the virtual memory space. - /// - /// For users who have no reason to prefer a specific implementation, it's best to simply use {@link #open}. For - /// all others, you should instantiate the desired implementation directly. - /// - /// The locking implementation is by default {@link NativeFSLockFactory}, but can be changed by passing in a custom - /// {@link LockFactory} instance. - /// @see Directory - class LPPAPI FSDirectory : public Directory - { - protected: - /// Create a new FSDirectory for the named location (ctor for subclasses). - /// @param path the path of the directory. - /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) - FSDirectory(const String& path, LockFactoryPtr lockFactory); - - public: - virtual ~FSDirectory(); - - LUCENE_CLASS(FSDirectory); - - public: - /// Default read chunk size. This is a conditional default based on operating system. - /// @see #setReadChunkSize - static const int32_t DEFAULT_READ_CHUNK_SIZE; - - protected: - bool checked; - - /// The underlying filesystem directory. - String directory; - - /// @see #DEFAULT_READ_CHUNK_SIZE - int32_t chunkSize; - - public: - /// Creates an FSDirectory instance. - static FSDirectoryPtr open(const String& path); - - /// Just like {@link #open(File)}, but allows you to also specify a custom {@link LockFactory}. - static FSDirectoryPtr open(const String& path, LockFactoryPtr lockFactory); - - /// Lists all files (not subdirectories) in the directory. - /// @throws NoSuchDirectoryException if the directory does not exist, or does exist but is not a directory. - static HashSet listAll(const String& dir); - - /// Returns the time the named file was last modified. - static uint64_t fileModified(const String& directory, const String& name); - - /// Create file system directory. - void createDir(); - - /// Return file system directory. - String getFile(); - - /// Sets the maximum number of bytes read at once from the underlying file during {@link IndexInput#readBytes}. - /// The default value is {@link #DEFAULT_READ_CHUNK_SIZE}. Changes to this value will not impact any already-opened - /// {@link IndexInput}s. You should call this before attempting to open an index on the directory. This value should - /// be as large as possible to reduce any possible performance impact. - void setReadChunkSize(int32_t chunkSize); - - /// The maximum number of bytes to read at once from the underlying file during {@link IndexInput#readBytes}. - /// @see #setReadChunkSize - int32_t getReadChunkSize(); - - /// Lists all files (not subdirectories) in the directory. - /// @see #listAll(const String&) - virtual HashSet listAll(); - - /// Returns true if a file with the given name exists. - virtual bool fileExists(const String& name); - - /// Returns the time the named file was last modified. - virtual uint64_t fileModified(const String& name); - - /// Set the modified time of an existing file to now. - virtual void touchFile(const String& name); - - /// Removes an existing file in the directory. - virtual void deleteFile(const String& name); - - /// Returns the length in bytes of a file in the directory. - virtual int64_t fileLength(const String& name); - - /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit changes to - /// the index, to prevent a machine/OS crash from corrupting the index. - virtual void sync(const String& name); - - /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory - /// implementation may ignore the buffer size. - virtual IndexInputPtr openInput(const String& name); - - /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory - /// implementation may ignore the buffer size. Currently the only Directory implementations that respect this parameter - /// are {@link FSDirectory} and {@link CompoundFileReader}. - virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); - - /// Return a string identifier that uniquely differentiates this Directory instance from other Directory instances. - virtual String getLockID(); - - /// Closes the store to future operations. - virtual void close(); - - /// For debug output. - virtual String toString(); - - protected: - /// Initializes the directory to create a new file with the given name. This method should be used in {@link #createOutput}. - void initOutput(const String& name); - }; -} - -#endif diff --git a/include/FSLockFactory.h b/include/FSLockFactory.h deleted file mode 100644 index f91c0d0a..00000000 --- a/include/FSLockFactory.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FSLOCKFACTORY_H -#define FSLOCKFACTORY_H - -#include "LockFactory.h" - -namespace Lucene -{ - /// Base class for file system based locking implementation. - class LPPAPI FSLockFactory : public LockFactory - { - protected: - FSLockFactory(); - - public: - virtual ~FSLockFactory(); - - LUCENE_CLASS(FSLockFactory); - - protected: - /// Directory for the lock files. - String lockDir; - - public: - /// Set the lock directory. This method can be only called once to - /// initialize the lock directory. It is used by {@link FSDirectory} - /// to set the lock directory to itself. Subclasses can also use - /// this method to set the directory in the constructor. - void setLockDir(const String& lockDir); - - /// Retrieve the lock directory. - String getLockDir(); - }; -} - -#endif diff --git a/include/FastCharStream.h b/include/FastCharStream.h deleted file mode 100644 index a962d265..00000000 --- a/include/FastCharStream.h +++ /dev/null @@ -1,57 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FASTCHARSTREAM_H -#define FASTCHARSTREAM_H - -#include "QueryParserCharStream.h" - -namespace Lucene -{ - /// An efficient implementation of QueryParserCharStream interface. - /// - /// Note that this does not do line-number counting, but instead keeps track of the character position of - /// the token in the input, as required by Lucene's {@link Token} API. - class LPPAPI FastCharStream : public QueryParserCharStream, public LuceneObject - { - public: - /// Constructs from a Reader. - FastCharStream(ReaderPtr reader); - virtual ~FastCharStream(); - - LUCENE_CLASS(FastCharStream); - - public: - CharArray buffer; - - int32_t bufferLength; // end of valid chars - int32_t bufferPosition; // next char to read - - int32_t tokenStart; // offset in buffer - int32_t bufferStart; // position in file of buffer - - ReaderPtr input; // source of chars - - public: - virtual wchar_t readChar(); - virtual wchar_t BeginToken(); - virtual void backup(int32_t amount); - virtual String GetImage(); - virtual CharArray GetSuffix(int32_t length); - virtual void Done(); - virtual int32_t getColumn(); - virtual int32_t getLine(); - virtual int32_t getEndColumn(); - virtual int32_t getEndLine(); - virtual int32_t getBeginColumn(); - virtual int32_t getBeginLine(); - - protected: - void refill(); - }; -} - -#endif diff --git a/include/Field.h b/include/Field.h deleted file mode 100644 index 46f5deb7..00000000 --- a/include/Field.h +++ /dev/null @@ -1,155 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELD_H -#define FIELD_H - -#include "AbstractField.h" - -namespace Lucene -{ - class LPPAPI Field : public AbstractField - { - public: - /// Create a field by specifying its name, value and how it will be saved in the index. Term vectors - /// will not be stored in the index. - /// - /// @param name The name of the field - /// @param value The string to process - /// @param store Whether value should be stored in the index - /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing - Field(const String& name, const String& value, Store store, Index index); - - /// Create a field by specifying its name, value and how it will be saved in the index. - /// - /// @param name The name of the field - /// @param value The string to process - /// @param store Whether value should be stored in the index - /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing - /// @param termVector Whether term vector should be stored - Field(const String& name, const String& value, Store store, Index index, TermVector termVector); - - /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. The Reader is - /// read only when the Document is added to the index, ie. you may not close the Reader until {@link - /// IndexWriter#addDocument(Document)} has been called. - /// - /// @param name The name of the field - /// @param reader The reader with the content - Field(const String& name, ReaderPtr reader); - - /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. The - /// Reader is read only when the Document is added to the index, ie. you may not close the Reader until - /// {@link IndexWriter#addDocument(Document)} has been called. - /// - /// @param name The name of the field - /// @param reader The reader with the content - /// @param termVector Whether term vector should be stored - Field(const String& name, ReaderPtr reader, TermVector termVector); - - /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. This is useful - /// for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, ie. you - /// may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. - /// - /// @param name The name of the field - /// @param tokenStream The TokenStream with the content - Field(const String& name, TokenStreamPtr tokenStream); - - /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. This is - /// useful for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, - /// ie. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. - /// - /// @param name The name of the field - /// @param tokenStream The TokenStream with the content - /// @param termVector Whether term vector should be stored - Field(const String& name, TokenStreamPtr tokenStream, TermVector termVector); - - /// Create a stored field with binary value. Optionally the value may be compressed. - /// - /// @param name The name of the field - /// @param value The binary value - /// @param store How value should be stored (compressed or not) - Field(const String& name, ByteArray value, Store store); - - /// Create a stored field with binary value. Optionally the value may be compressed. - /// - /// @param name The name of the field - /// @param value The binary value - /// @param offset Starting offset in value where this Field's bytes are - /// @param length Number of bytes to use for this Field, starting at offset - /// @param store How value should be stored (compressed or not) - Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); - - virtual ~Field(); - - LUCENE_CLASS(Field); - - public: - using AbstractField::isStored; - using AbstractField::isIndexed; - - /// Specifies whether and how a field should be stored. - static bool isStored(Store store); - - /// Specifies whether and how a field should be indexed. - static bool isIndexed(Index index); - static bool isAnalyzed(Index index); - static bool omitNorms(Index index); - - /// Get the best representation of the index given the flags. - static Field::Index toIndex(bool indexed, bool analyzed); - - /// Get the best representation of the index given the flags. - static Field::Index toIndex(bool indexed, bool analyzed, bool omitNorms); - - /// Specifies whether and how a field should have term vectors. - static bool isStored(TermVector termVector); - static bool withPositions(TermVector termVector); - static bool withOffsets(TermVector termVector); - - /// Get the best representation of the index given the flags. - static Field::TermVector toTermVector(bool stored, bool withOffsets, bool withPositions); - - /// The value of the field as a String, or null. If null, the Reader value or binary value is used. - /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. - virtual String stringValue(); - - /// The value of the field as a Reader, or null. If null, the String value or binary value is used. - /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. - virtual ReaderPtr readerValue(); - - /// The value of the field as a TokesStream, or null. If null, the Reader value or String value is - /// analyzed to produce the indexed tokens. - virtual TokenStreamPtr tokenStreamValue(); - - /// Change the value of this field. This can be used during indexing to re-use a single Field instance - /// to improve indexing speed. Typically a single {@link Document} instance is re-used as well. This - /// helps most on small documents. - /// - /// Each Field instance should only be used once within a single {@link Document} instance. - virtual void setValue(const String& value); - - /// Change the value of this field. - virtual void setValue(ReaderPtr value); - - /// Change the value of this field. - virtual void setValue(ByteArray value); - - /// Change the value of this field. - virtual void setValue(ByteArray value, int32_t offset, int32_t length); - - /// Sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return - /// true. May be combined with stored values from stringValue() or getBinaryValue() - virtual void setTokenStream(TokenStreamPtr tokenStream); - - protected: - void ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector); - void ConstructField(const String& name, ReaderPtr reader, TermVector termVector); - void ConstructField(const String& name, TokenStreamPtr tokenStream, TermVector termVector); - void ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); - }; -} - -#endif diff --git a/include/FieldCache.h b/include/FieldCache.h deleted file mode 100644 index a61eea09..00000000 --- a/include/FieldCache.h +++ /dev/null @@ -1,281 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHE_H -#define FIELDCACHE_H - -#include -#include "LuceneObject.h" - -namespace Lucene -{ - /// Maintains caches of term values. - /// @see FieldCacheSanityChecker - class LPPAPI FieldCache - { - public: - virtual ~FieldCache(); - LUCENE_INTERFACE(FieldCache); - - public: - /// Specifies whether and how a field should be stored. - enum CacheType - { - CACHE_BYTE = 1, - CACHE_INT, - CACHE_LONG, - CACHE_DOUBLE, - CACHE_STRING, - CACHE_STRING_INDEX - }; - - /// Indicator for StringIndex values in the cache. - /// NOTE: the value assigned to this constant must not be the same as any of those in SortField - static const int32_t STRING_INDEX; - - public: - /// The cache used internally by sorting and range query classes. - static FieldCachePtr DEFAULT(); - - /// The default parser for byte values, which are encoded by StringUtils::toInt - static ByteParserPtr DEFAULT_BYTE_PARSER(); - - /// The default parser for int values, which are encoded by StringUtils::toInt - static IntParserPtr DEFAULT_INT_PARSER(); - - /// The default parser for int values, which are encoded by StringUtils::toLong - static LongParserPtr DEFAULT_LONG_PARSER(); - - /// The default parser for double values, which are encoded by StringUtils::toDouble - static DoubleParserPtr DEFAULT_DOUBLE_PARSER(); - - /// A parser instance for int values encoded by {@link NumericUtils#prefixCodedToInt(String)}, - /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. - static IntParserPtr NUMERIC_UTILS_INT_PARSER(); - - /// A parser instance for long values encoded by {@link NumericUtils#prefixCodedToLong(String)}, - /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. - static LongParserPtr NUMERIC_UTILS_LONG_PARSER(); - - /// A parser instance for double values encoded by {@link NumericUtils}, - /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. - static DoubleParserPtr NUMERIC_UTILS_DOUBLE_PARSER(); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as a single byte and returns an array of size reader.maxDoc() of the value each document - /// has in the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the single byte values. - /// @return The values in the given field for each document. - virtual Collection getBytes(IndexReaderPtr reader, const String& field); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as bytes and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the bytes. - /// @param parser Computes byte for string values. - /// @return The values in the given field for each document. - virtual Collection getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as integers and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the integers. - /// @return The values in the given field for each document. - virtual Collection getInts(IndexReaderPtr reader, const String& field); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as integers and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the integers. - /// @param parser Computes integer for string values. - /// @return The values in the given field for each document. - virtual Collection getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as longs and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the longs. - /// @return The values in the given field for each document. - virtual Collection getLongs(IndexReaderPtr reader, const String& field); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as longs and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the longs. - /// @param parser Computes long for string values. - /// @return The values in the given field for each document. - virtual Collection getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as integers and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the doubles. - /// @return The values in the given field for each document. - virtual Collection getDoubles(IndexReaderPtr reader, const String& field); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in - /// field as doubles and returns an array of size reader.maxDoc() of the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the doubles. - /// @param parser Computes double for string values. - /// @return The values in the given field for each document. - virtual Collection getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser); - - /// Checks the internal cache for an appropriate entry, and if none are found, reads the term values in - /// field and returns an array of size reader.maxDoc() containing the value each document has in - /// the given field. - /// @param reader Used to get field values. - /// @param field Which field contains the strings. - /// @return The values in the given field for each document. - virtual Collection getStrings(IndexReaderPtr reader, const String& field); - - /// Checks the internal cache for an appropriate entry, and if none are found reads the term values in - /// field and returns an array of them in natural order, along with an array telling which element in - /// the term array each document uses. - /// @param reader Used to get field values. - /// @param field Which field contains the strings. - /// @return Array of terms and index into the array for each document. - virtual StringIndexPtr getStringIndex(IndexReaderPtr reader, const String& field); - - /// Generates an array of CacheEntry objects representing all items currently in the FieldCache. - virtual Collection getCacheEntries() = 0; - - /// Instructs the FieldCache to forcibly expunge all entries from the underlying caches. This is intended - /// only to be used for test methods as a way to ensure a known base state of the Cache. It should not be - /// relied on for "Cache maintenance" in general application code. - virtual void purgeAllCaches() = 0; - - /// Drops all cache entries associated with this reader. NOTE: this reader must precisely match the reader - /// that the cache entry is keyed on. If you pass a top-level reader, it usually will have no effect as - /// Lucene now caches at the segment reader level. - virtual void purge(IndexReaderPtr r) = 0; - - /// If non-null, FieldCacheImpl will warn whenever entries are created that are not sane according to - /// {@link FieldCacheSanityChecker}. - virtual void setInfoStream(InfoStreamPtr stream); - - /// @see #setInfoStream - virtual InfoStreamPtr getInfoStream(); - }; - - class LPPAPI CreationPlaceholder : public LuceneObject - { - public: - virtual ~CreationPlaceholder(); - LUCENE_CLASS(CreationPlaceholder); - - public: - boost::any value; - }; - - /// Stores term text values and document ordering data. - class LPPAPI StringIndex : public LuceneObject - { - public: - StringIndex(Collection values, Collection lookup); - virtual ~StringIndex(); - - LUCENE_CLASS(StringIndex); - - public: - /// All the term values, in natural order. - Collection lookup; - - /// For each document, an index into the lookup array. - Collection order; - - public: - int32_t binarySearchLookup(const String& key); - }; - - /// Marker interface as super-interface to all parsers. It is used to specify a custom parser to {@link - /// SortField#SortField(String, Parser)}. - class LPPAPI Parser : public LuceneObject - { - public: - virtual ~Parser(); - LUCENE_CLASS(Parser); - }; - - /// Interface to parse bytes from document fields. - /// @see FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr) - class LPPAPI ByteParser : public Parser - { - public: - virtual ~ByteParser(); - LUCENE_CLASS(ByteParser); - - public: - /// Return a single Byte representation of this field's value. - virtual uint8_t parseByte(const String& string); - }; - - /// Interface to parse ints from document fields. - /// @see FieldCache#getInts(IndexReaderPtr, String, IntParserPtr) - class LPPAPI IntParser : public Parser - { - public: - virtual ~IntParser(); - LUCENE_CLASS(IntParser); - - public: - /// Return a integer representation of this field's value. - virtual int32_t parseInt(const String& string); - }; - - /// Interface to parse longs from document fields. - /// @see FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr) - class LPPAPI LongParser : public Parser - { - public: - virtual ~LongParser(); - LUCENE_CLASS(LongParser); - - public: - /// Return a long representation of this field's value. - virtual int64_t parseLong(const String& string); - }; - - /// Interface to parse doubles from document fields. - /// @see FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr) - class LPPAPI DoubleParser : public Parser - { - public: - virtual ~DoubleParser(); - LUCENE_CLASS(DoubleParser); - - public: - /// Return a double representation of this field's value. - virtual double parseDouble(const String& string); - }; - - /// A unique Identifier/Description for each item in the FieldCache. Can be useful for logging/debugging. - class LPPAPI FieldCacheEntry : public LuceneObject - { - public: - virtual ~FieldCacheEntry(); - LUCENE_CLASS(FieldCacheEntry); - - public: - virtual LuceneObjectPtr getReaderKey() = 0; - virtual String getFieldName() = 0; - virtual int32_t getCacheType() = 0; - virtual boost::any getCustom() = 0; - virtual boost::any getValue() = 0; - - virtual String toString(); - }; -} - -#endif diff --git a/include/FieldCacheImpl.h b/include/FieldCacheImpl.h deleted file mode 100644 index 0689c63d..00000000 --- a/include/FieldCacheImpl.h +++ /dev/null @@ -1,193 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHEIMPL_H -#define FIELDCACHEIMPL_H - -#include "FieldCache.h" - -namespace Lucene -{ - /// The default cache implementation, storing all values in memory. A WeakHashMap is used for storage. - class FieldCacheImpl : public FieldCache, public LuceneObject - { - public: - FieldCacheImpl(); - virtual ~FieldCacheImpl(); - - LUCENE_CLASS(FieldCacheImpl); - - protected: - MapStringCache caches; - InfoStreamPtr infoStream; - - public: - virtual void initialize(); - virtual void purgeAllCaches(); - virtual void purge(IndexReaderPtr r); - virtual Collection getCacheEntries(); - - virtual Collection getBytes(IndexReaderPtr reader, const String& field); - virtual Collection getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser); - - virtual Collection getInts(IndexReaderPtr reader, const String& field); - virtual Collection getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser); - - virtual Collection getLongs(IndexReaderPtr reader, const String& field); - virtual Collection getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser); - - virtual Collection getDoubles(IndexReaderPtr reader, const String& field); - virtual Collection getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser); - - virtual Collection getStrings(IndexReaderPtr reader, const String& field); - virtual StringIndexPtr getStringIndex(IndexReaderPtr reader, const String& field); - - virtual void setInfoStream(InfoStreamPtr stream); - virtual InfoStreamPtr getInfoStream(); - }; - - class Entry : public LuceneObject - { - public: - /// Creates one of these objects for a custom comparator/parser. - Entry(const String& field, boost::any custom); - virtual ~Entry(); - - LUCENE_CLASS(Entry); - - public: - String field; // which Fieldable - boost::any custom; // which custom comparator or parser - - public: - /// Two of these are equal if they reference the same field and type. - virtual bool equals(LuceneObjectPtr other); - - /// Composes a hashcode based on the field and type. - virtual int32_t hashCode(); - }; - - /// Internal cache. - class Cache : public LuceneObject - { - public: - Cache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~Cache(); - - LUCENE_CLASS(Cache); - - public: - FieldCacheWeakPtr _wrapper; - WeakMapLuceneObjectMapEntryAny readerCache; - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key) = 0; - - public: - /// Remove this reader from the cache, if present. - virtual void purge(IndexReaderPtr r); - - virtual boost::any get(IndexReaderPtr reader, EntryPtr key); - virtual void printNewInsanity(InfoStreamPtr infoStream, boost::any value); - }; - - class ByteCache : public Cache - { - public: - ByteCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~ByteCache(); - - LUCENE_CLASS(ByteCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class IntCache : public Cache - { - public: - IntCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~IntCache(); - - LUCENE_CLASS(IntCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class LongCache : public Cache - { - public: - LongCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~LongCache(); - - LUCENE_CLASS(LongCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class DoubleCache : public Cache - { - public: - DoubleCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~DoubleCache(); - - LUCENE_CLASS(DoubleCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class StringCache : public Cache - { - public: - StringCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~StringCache(); - - LUCENE_CLASS(StringCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class StringIndexCache : public Cache - { - public: - StringIndexCache(FieldCachePtr wrapper = FieldCachePtr()); - virtual ~StringIndexCache(); - - LUCENE_CLASS(StringIndexCache); - - protected: - virtual boost::any createValue(IndexReaderPtr reader, EntryPtr key); - }; - - class FieldCacheEntryImpl : public FieldCacheEntry - { - public: - FieldCacheEntryImpl(LuceneObjectPtr readerKey, const String& fieldName, int32_t cacheType, boost::any custom, boost::any value); - virtual ~FieldCacheEntryImpl(); - - LUCENE_CLASS(FieldCacheEntryImpl); - - protected: - LuceneObjectPtr readerKey; - String fieldName; - int32_t cacheType; - boost::any custom; - boost::any value; - - public: - virtual LuceneObjectPtr getReaderKey(); - virtual String getFieldName(); - virtual int32_t getCacheType(); - virtual boost::any getCustom(); - virtual boost::any getValue(); - }; -} - -#endif diff --git a/include/FieldCacheRangeFilter.h b/include/FieldCacheRangeFilter.h deleted file mode 100644 index 8e589752..00000000 --- a/include/FieldCacheRangeFilter.h +++ /dev/null @@ -1,115 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHERANGEFILTER_H -#define FIELDCACHERANGEFILTER_H - -#include "Filter.h" -#include "FieldCache.h" - -namespace Lucene -{ - /// A range filter built on top of a cached single term field (in {@link FieldCache}). - /// - /// FieldCacheRangeFilter builds a single cache for the field the first time it is used. Each subsequent - /// FieldCacheRangeFilter on the same field then reuses this cache, even if the range itself changes. - /// - /// This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) as building a - /// {@link TermRangeFilter}, if using a {@link #newStringRange}. However, if the range never changes it is - /// slower (around 2x as slow) than building a CachingWrapperFilter on top of a single {@link TermRangeFilter}. - /// - /// For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. - /// Furthermore, it does not need the numeric values encoded by {@link NumericField}. But it has the problem - /// that it only works with exact one value/document (see below). - /// - /// As with all {@link FieldCache} based functionality, FieldCacheRangeFilter is only valid for fields which - /// exact one term for each document (except for {@link #newStringRange} where 0 terms are also allowed). Due - /// to a restriction of {@link FieldCache}, for numeric ranges all terms that do not have a numeric value, 0 - /// is assumed. - /// - /// Thus it works on dates, prices and other single value fields but will not work on regular text fields. It - /// is preferable to use a NOT_ANALYZED field to ensure that there is only a single term. - /// - /// Do not instantiate this template directly, use one of the static factory methods available, that create a - /// correct instance for different data types supported by {@link FieldCache}. - class LPPAPI FieldCacheRangeFilter : public Filter - { - public: - FieldCacheRangeFilter(const String& field, ParserPtr parser, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilter(); - - LUCENE_CLASS(FieldCacheRangeFilter); - - INTERNAL: - String field; - ParserPtr parser; - bool includeLower; - bool includeUpper; - - public: - /// Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all fields containing - /// zero or one term in the field. The range can be half-open by setting one of the values to null. - static FieldCacheRangeFilterPtr newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String)}. This works with all - /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one of the - /// values to null. - static FieldCacheRangeFilterPtr newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr)}. This - /// works with all byte fields containing exactly one numeric term in the field. The range can be half-open by - /// setting one of the values to null. - static FieldCacheRangeFilterPtr newByteRange(const String& field, ByteParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String)}. This works with all - /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one of the - /// values to null. - static FieldCacheRangeFilterPtr newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String, IntParserPtr)}. This - /// works with all int fields containing exactly one numeric term in the field. The range can be half-open by - /// setting one of the values to null. - static FieldCacheRangeFilterPtr newIntRange(const String& field, IntParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String)}. This works with all - /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the - /// values to null. - static FieldCacheRangeFilterPtr newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr)}. This - /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by - /// setting one of the values to null. - static FieldCacheRangeFilterPtr newLongRange(const String& field, LongParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String)}. This works with all - /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the - /// values to null. - static FieldCacheRangeFilterPtr newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper); - - /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr)}. This - /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by - /// setting one of the values to null. - static FieldCacheRangeFilterPtr newDoubleRange(const String& field, DoubleParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); - - virtual String toString() = 0; - virtual bool equals(LuceneObjectPtr other) = 0; - virtual int32_t hashCode() = 0; - - /// Returns the field name for this filter - virtual String getField(); - - /// Returns true if the lower endpoint is inclusive - virtual bool includesLower(); - - /// Returns true if the upper endpoint is inclusive - virtual bool includesUpper(); - - /// Returns the current numeric parser - virtual ParserPtr getParser(); - }; -} - -#endif diff --git a/include/FieldCacheSanityChecker.h b/include/FieldCacheSanityChecker.h deleted file mode 100644 index d046d731..00000000 --- a/include/FieldCacheSanityChecker.h +++ /dev/null @@ -1,126 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHESANITYCHECKER_H -#define FIELDCACHESANITYCHECKER_H - -#include "LuceneObject.h" -#include "MapOfSets.h" - -namespace Lucene -{ - /// Provides methods for sanity checking that entries in the FieldCache are not wasteful or inconsistent. - /// - /// Lucene 2.9 Introduced numerous enhancements into how the FieldCache is used by the low levels of Lucene - /// searching (for Sorting and ValueSourceQueries) to improve both the speed for Sorting, as well as reopening - /// of IndexReaders. But these changes have shifted the usage of FieldCache from "top level" IndexReaders - /// (frequently a MultiReader or DirectoryReader) down to the leaf level SegmentReaders. As a result, - /// existing applications that directly access the FieldCache may find RAM usage increase significantly when - /// upgrading to 2.9 or later. This class provides an API for these applications (or their Unit tests) to - /// check at run time if the FieldCache contains "insane" usages of the FieldCache. - /// - /// @see FieldCache - /// @see FieldCacheSanityChecker.Insanity - /// @see FieldCacheSanityChecker.InsanityType - class LPPAPI FieldCacheSanityChecker : public LuceneObject - { - public: - FieldCacheSanityChecker(); - virtual ~FieldCacheSanityChecker(); - - LUCENE_CLASS(FieldCacheSanityChecker); - - public: - typedef MapOfSets< int32_t, boost::hash, std::equal_to, FieldCacheEntryPtr, luceneHash, luceneEquals > MapSetIntFieldCacheEntry; - typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, int32_t, boost::hash, std::equal_to > MapSetReaderFieldInt; - typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, ReaderFieldPtr, luceneHash, luceneEquals > MapSetReaderFieldReaderField; - - /// An Enumeration of the different types of "insane" behaviour that may be detected in a FieldCache. - enum InsanityType - { - /// Indicates an overlap in cache usage on a given field in sub/super readers. - SUBREADER, - - /// Indicates entries have the same reader+fieldname but different cached values. This can happen - /// if different datatypes, or parsers are used -- and while it's not necessarily a bug it's - /// typically an indication of a possible problem. - /// - /// NOTE: Only the reader, fieldname, and cached value are actually tested -- if two cache entries - /// have different parsers or datatypes but the cached values are the same Object (== not just equal()) - /// this method does not consider that a red flag. This allows for subtle variations in the way a - /// Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) - VALUEMISMATCH, - - /// Indicates an expected bit of "insanity". This may be useful for clients that wish to preserve/log - /// information about insane usage but indicate that it was expected. - EXPECTED - }; - - /// Quick and dirty convenience method - /// @see #check - static Collection checkSanity(FieldCachePtr cache); - - /// Quick and dirty convenience method that instantiates an instance with "good defaults" and uses it to - /// test the CacheEntrys. - /// @see #check - static Collection checkSanity(Collection cacheEntries); - - /// Tests a CacheEntry[] for indication of "insane" cache usage. - /// NOTE: FieldCache CreationPlaceholder objects are ignored. - Collection check(Collection cacheEntries); - - protected: - /// Internal helper method used by check that iterates over valMismatchKeys and generates a Collection of - /// Insanity instances accordingly. The MapOfSets are used to populate the Insanity objects. - /// @see InsanityType#VALUEMISMATCH - Collection checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, - MapSetReaderFieldInt readerFieldToValIds, - SetReaderField valMismatchKeys); - - /// Internal helper method used by check that iterates over the keys of readerFieldToValIds and generates a - /// Collection of Insanity instances whenever two (or more) ReaderField instances are found that have an - /// ancestry relationships. - /// @see InsanityType#SUBREADER - Collection checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, - MapSetReaderFieldInt readerFieldToValIds); - - /// Checks if the seed is an IndexReader, and if so will walk the hierarchy of subReaders building up a - /// list of the objects returned by obj.getFieldCacheKey() - Collection getAllDecendentReaderKeys(LuceneObjectPtr seed); - }; - - /// Simple container for a collection of related CacheEntry objects that in conjunction with each other - /// represent some "insane" usage of the FieldCache. - class LPPAPI Insanity : public LuceneObject - { - public: - Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries); - virtual ~Insanity(); - - LUCENE_CLASS(Insanity); - - protected: - FieldCacheSanityChecker::InsanityType type; - String msg; - Collection entries; - - public: - /// Type of insane behavior this object represents - FieldCacheSanityChecker::InsanityType getType(); - - /// Description of the insane behaviour - String getMsg(); - - /// CacheEntry objects which suggest a problem - Collection getCacheEntries(); - - /// Multi-Line representation of this Insanity object, starting with the Type and Msg, followed by each - /// CacheEntry.toString() on it's own line prefaced by a tab character - virtual String toString(); - }; -} - -#endif diff --git a/include/FieldCacheSource.h b/include/FieldCacheSource.h deleted file mode 100644 index b5860e7a..00000000 --- a/include/FieldCacheSource.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHESOURCE_H -#define FIELDCACHESOURCE_H - -#include "ValueSource.h" - -namespace Lucene -{ - /// A base class for ValueSource implementations that retrieve values for a single field from the - /// {@link FieldCache}. - /// - /// Fields used herein must be indexed (doesn't matter if these fields are stored or not). - /// - /// It is assumed that each such indexed field is untokenized, or at least has a single token in a document. - /// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current - /// code would use the value of one of these tokens, but this is not guaranteed). - /// - /// Document with no tokens in this field are assigned the Zero value. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. - class LPPAPI FieldCacheSource : public ValueSource - { - public: - /// Create a cached field source for the input field. - FieldCacheSource(const String& field); - virtual ~FieldCacheSource(); - - LUCENE_CLASS(FieldCacheSource); - - protected: - String field; - - public: - virtual DocValuesPtr getValues(IndexReaderPtr reader); - virtual String description(); - - /// Return cached DocValues for input field and reader. - /// @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing) - /// @param field Field for which values are required. - /// @see ValueSource - virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) = 0; - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal. - virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other) = 0; - - /// Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field and the cache - /// (those are taken care of elsewhere). - virtual int32_t cachedFieldSourceHashCode() = 0; - }; -} - -#endif diff --git a/include/FieldCacheTermsFilter.h b/include/FieldCacheTermsFilter.h deleted file mode 100644 index 6a561fb5..00000000 --- a/include/FieldCacheTermsFilter.h +++ /dev/null @@ -1,65 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCACHETERMSFILTER_H -#define FIELDCACHETERMSFILTER_H - -#include "Filter.h" - -namespace Lucene -{ - /// A {@link Filter} that only accepts documents whose single term value in the specified field is contained - /// in the provided set of allowed terms. - /// - /// This is the same functionality as TermsFilter (from contrib/queries), except this filter requires that the - /// field contains only a single term for all documents. Because of drastically different implementations, - /// they also have different performance characteristics, as described below. - /// - /// The first invocation of this filter on a given field will be slower, since a {@link StringIndex} must be - /// created. Subsequent invocations using the same field will re-use this cache. However, as with all - /// functionality based on {@link FieldCache}, persistent RAM is consumed to hold the cache, and is not freed - /// until the {@link IndexReader} is closed. In contrast, TermsFilter has no persistent RAM consumption. - /// - /// With each search, this filter translates the specified set of Terms into a private {@link OpenBitSet} keyed - /// by term number per unique {@link IndexReader} (normally one reader per segment). Then, during matching, - /// the term number for each docID is retrieved from the cache and then checked for inclusion using the {@link - /// OpenBitSet}. Since all testing is done using RAM resident data structures, performance should be very fast, - /// most likely fast enough to not require further caching of the DocIdSet for each possible combination of - /// terms. However, because docIDs are simply scanned linearly, an index with a great many small documents may - /// find this linear scan too costly. - /// - /// In contrast, TermsFilter builds up an {@link OpenBitSet}, keyed by docID, every time it's created, by - /// enumerating through all matching docs using {@link TermDocs} to seek and scan through each term's docID list. - /// While there is no linear scan of all docIDs, besides the allocation of the underlying array in the {@link - /// OpenBitSet}, this approach requires a number of "disk seeks" in proportion to the number of terms, which can - /// be exceptionally costly when there are cache misses in the OS's IO cache. - /// - /// Generally, this filter will be slower on the first invocation for a given field, but subsequent invocations, - /// even if you change the allowed set of Terms, should be faster than TermsFilter, especially as the number of - /// Terms being matched increases. If you are matching only a very small number of terms, and those terms in - /// turn match a very small number of documents, TermsFilter may perform faster. - /// - /// Which filter is best is very application dependent. - class LPPAPI FieldCacheTermsFilter : public Filter - { - public: - FieldCacheTermsFilter(const String& field, Collection terms); - virtual ~FieldCacheTermsFilter(); - - LUCENE_CLASS(FieldCacheTermsFilter); - - protected: - String field; - Collection terms; - - public: - FieldCachePtr getFieldCache(); - - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/FieldComparator.h b/include/FieldComparator.h deleted file mode 100644 index 0c61fb44..00000000 --- a/include/FieldComparator.h +++ /dev/null @@ -1,348 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCOMPARATOR_H -#define FIELDCOMPARATOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A FieldComparator compares hits so as to determine their sort order when collecting the top results with - /// {@link TopFieldCollector}. The concrete public FieldComparator classes here correspond to the SortField types. - /// - /// This API is designed to achieve high performance sorting, by exposing a tight interaction with {@link - /// FieldValueHitQueue} as it visits hits. Whenever a hit is competitive, it's enrolled into a virtual slot, - /// which is an int ranging from 0 to numHits-1. The {@link FieldComparator} is made aware of segment transitions - /// during searching in case any internal state it's tracking needs to be recomputed during these transitions. - /// - /// A comparator must define these functions: - ///
    - ///
  • {@link #compare} Compare a hit at 'slot a' with hit 'slot b'. - /// - ///
  • {@link #setBottom} This method is called by {@link FieldValueHitQueue} to notify the FieldComparator of - /// the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your - /// comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the - /// comparators before it). - /// - ///
  • {@link #compareBottom} Compare a new hit (docID) against the "weakest" (bottom) entry in the queue. - /// - ///
  • {@link #copy} Installs a new hit into the priority queue. The {@link FieldValueHitQueue} calls this - /// method when a new hit is competitive. - /// - ///
  • {@link #setNextReader} Invoked when the search is switching to the next segment. You may need to update - /// internal state of the comparator, for example retrieving new values from the {@link FieldCache}. - /// - ///
  • {@link #value} Return the sort value stored in the specified slot. This is only called at the end of - /// the search, in order to populate {@link FieldDoc#fields} when returning the top results. - ///
- class LPPAPI FieldComparator : public LuceneObject - { - public: - virtual ~FieldComparator(); - LUCENE_CLASS(FieldComparator); - - public: - /// Compare hit at slot1 with hit at slot2. - /// @param slot1 first slot to compare - /// @param slot2 second slot to compare - /// @return any N < 0 if slot2's value is sorted after slot1, any N > 0 if the slot2's value is sorted - /// before slot1 and 0 if they are equal - virtual int32_t compare(int32_t slot1, int32_t slot2) = 0; - - /// Set the bottom slot, ie the "weakest" (sorted last) entry in the queue. When {@link #compareBottom} - /// is called, you should compare against this slot. This will always be called before {@link #compareBottom}. - /// @param slot the currently weakest (sorted last) slot in the queue - virtual void setBottom(int32_t slot) = 0; - - /// Compare the bottom of the queue with doc. This will only invoked after setBottom has been called. - /// This should return the same result as {@link #compare(int,int)}} as if bottom were slot1 and the new - /// document were slot 2. - /// - /// For a search that hits many results, this method will be the hotspot (invoked by far the most frequently). - /// - /// @param doc that was hit - /// @return any N < 0 if the doc's value is sorted after the bottom entry (not competitive), any N > 0 if - /// the doc's value is sorted before the bottom entry and 0 if they are equal. - virtual int32_t compareBottom(int32_t doc) = 0; - - /// This method is called when a new hit is competitive. You should copy any state associated with this - /// document that will be required for future comparisons, into the specified slot. - /// @param slot which slot to copy the hit to - /// @param doc docID relative to current reader - virtual void copy(int32_t slot, int32_t doc) = 0; - - /// Set a new Reader. All doc correspond to the current Reader. - /// - /// @param reader current reader - /// @param docBase docBase of this reader - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) = 0; - - /// Sets the Scorer to use in case a document's score is needed. - /// @param scorer Scorer instance that you should use to obtain the current hit's score, if necessary. - virtual void setScorer(ScorerPtr scorer); - - /// Return the actual value in the slot. - /// @param slot the value - /// @return value in this slot upgraded to ComparableValue - virtual ComparableValue value(int32_t slot) = 0; - }; - - template - class NumericComparator : public FieldComparator - { - public: - NumericComparator(int32_t numHits, const String& field = EmptyString) - { - this->values = Collection::newInstance(numHits); - this->field = field; - this->bottom = 0; - } - - virtual ~NumericComparator() - { - } - - protected: - Collection values; - Collection currentReaderValues; - String field; - TYPE bottom; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2) - { - return (int32_t)(values[slot1] - values[slot2]); - } - - virtual int32_t compareBottom(int32_t doc) - { - return (int32_t)(bottom - currentReaderValues[doc]); - } - - virtual void copy(int32_t slot, int32_t doc) - { - values[slot] = currentReaderValues[doc]; - } - - virtual void setBottom(int32_t slot) - { - bottom = values[slot]; - } - - virtual ComparableValue value(int32_t slot) - { - return ComparableValue(values[slot]); - } - }; - - /// Parses field's values as byte (using {@link FieldCache#getBytes} and sorts by ascending value. - class LPPAPI ByteComparator : public NumericComparator - { - public: - ByteComparator(int32_t numHits, const String& field, ParserPtr parser); - virtual ~ByteComparator(); - - LUCENE_CLASS(ByteComparator); - - protected: - ByteParserPtr parser; - - public: - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - /// Sorts by ascending docID - class LPPAPI DocComparator : public NumericComparator - { - public: - DocComparator(int32_t numHits); - virtual ~DocComparator(); - - LUCENE_CLASS(DocComparator); - - protected: - int32_t docBase; - - public: - virtual int32_t compareBottom(int32_t doc); - virtual void copy(int32_t slot, int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - /// Parses field's values as double (using {@link FieldCache#getDoubles} and sorts by ascending value - class LPPAPI DoubleComparator : public NumericComparator - { - public: - DoubleComparator(int32_t numHits, const String& field, ParserPtr parser); - virtual ~DoubleComparator(); - - LUCENE_CLASS(DoubleComparator); - - protected: - DoubleParserPtr parser; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - /// Parses field's values as int (using {@link FieldCache#getInts} and sorts by ascending value - class LPPAPI IntComparator : public NumericComparator - { - public: - IntComparator(int32_t numHits, const String& field, ParserPtr parser); - virtual ~IntComparator(); - - LUCENE_CLASS(IntComparator); - - protected: - IntParserPtr parser; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - /// Parses field's values as long (using {@link FieldCache#getLongs} and sorts by ascending value - class LPPAPI LongComparator : public NumericComparator - { - public: - LongComparator(int32_t numHits, const String& field, ParserPtr parser); - virtual ~LongComparator(); - - LUCENE_CLASS(LongComparator); - - protected: - LongParserPtr parser; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - /// Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily - /// by ascending docID, performance is faster using {@link TopScoreDocCollector} directly (which {@link - /// IndexSearcher#search} uses when no {@link Sort} is specified). - class LPPAPI RelevanceComparator : public NumericComparator - { - public: - RelevanceComparator(int32_t numHits); - virtual ~RelevanceComparator(); - - LUCENE_CLASS(RelevanceComparator); - - protected: - ScorerPtr scorer; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void copy(int32_t slot, int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Sorts by a field's value using the Collator for a given Locale. - class LPPAPI StringComparatorLocale : public FieldComparator - { - public: - StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale); - virtual ~StringComparatorLocale(); - - LUCENE_CLASS(StringComparatorLocale); - - protected: - Collection values; - Collection currentReaderValues; - String field; - CollatorPtr collator; - String bottom; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void copy(int32_t slot, int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setBottom(int32_t slot); - virtual ComparableValue value(int32_t slot); - }; - - /// Sorts by field's natural String sort order, using ordinals. This is functionally equivalent to {@link - /// StringValComparator}, but it first resolves the string to their relative ordinal positions (using the - /// index returned by {@link FieldCache#getStringIndex}), and does most comparisons using the ordinals. - /// For medium to large results, this comparator will be much faster than {@link StringValComparator}. For - /// very small result sets it may be slower. - class LPPAPI StringOrdValComparator : public FieldComparator - { - public: - StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed); - virtual ~StringOrdValComparator(); - - LUCENE_CLASS(StringOrdValComparator); - - protected: - Collection ords; - Collection values; - Collection readerGen; - - int32_t currentReaderGen; - Collection lookup; - Collection order; - String field; - - int32_t bottomSlot; - int32_t bottomOrd; - String bottomValue; - bool reversed; - int32_t sortPos; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void copy(int32_t slot, int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setBottom(int32_t slot); - virtual ComparableValue value(int32_t slot); - virtual Collection getValues(); - virtual int32_t getBottomSlot(); - virtual String getField(); - - protected: - void convert(int32_t slot); - int32_t binarySearch(Collection lookup, const String& key, int32_t low, int32_t high); - }; - - /// Sorts by field's natural String sort order. All comparisons are done using String.compare, which is - /// slow for medium to large result sets but possibly very fast for very small results sets. - class LPPAPI StringValComparator : public FieldComparator - { - public: - StringValComparator(int32_t numHits, const String& field); - virtual ~StringValComparator(); - - LUCENE_CLASS(StringOrdValComparator); - - protected: - Collection values; - Collection currentReaderValues; - String field; - String bottom; - - public: - virtual int32_t compare(int32_t slot1, int32_t slot2); - virtual int32_t compareBottom(int32_t doc); - virtual void copy(int32_t slot, int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setBottom(int32_t slot); - virtual ComparableValue value(int32_t slot); - }; -} - -#endif diff --git a/include/FieldComparatorSource.h b/include/FieldComparatorSource.h deleted file mode 100644 index aeade930..00000000 --- a/include/FieldComparatorSource.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDCOMPARATORSOURCE_H -#define FIELDCOMPARATORSOURCE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Provides a {@link FieldComparator} for custom field sorting. - class LPPAPI FieldComparatorSource : public LuceneObject - { - public: - virtual ~FieldComparatorSource(); - LUCENE_CLASS(FieldComparatorSource); - - public: - /// Creates a comparator for the field in the given index. - /// @param fieldname Name of the field to create comparator for. - /// @return FieldComparator. - virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) = 0; - }; -} - -#endif diff --git a/include/FieldDoc.h b/include/FieldDoc.h deleted file mode 100644 index 1e3c7b1a..00000000 --- a/include/FieldDoc.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDDOC_H -#define FIELDDOC_H - -#include "ScoreDoc.h" - -namespace Lucene -{ - /// A ScoreDoc which also contains information about how to sort the referenced document. In addition to the - /// document number and score, this object contains an array of values for the document from the field(s) used - /// to sort. For example, if the sort criteria was to sort by fields "a", "b" then "c", the fields object array - /// will have three elements, corresponding respectively to the term values for the document in fields "a", "b" - /// and "c". The class of each element in the array will be either Integer, Double or String depending on the - /// type of values in the terms of each field. - class LPPAPI FieldDoc : public ScoreDoc - { - public: - FieldDoc(int32_t doc, double score, Collection fields = Collection()); - virtual ~FieldDoc(); - - LUCENE_CLASS(FieldDoc); - - public: - /// The values which are used to sort the referenced document. The order of these will match the original - /// sort criteria given by a Sort object. Each Object will be either an Integer, Double or String, depending - /// on the type of values in the terms of the original field. - /// @see Sort - /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) - Collection fields; - - public: - virtual String toString(); - }; -} - -#endif diff --git a/include/FieldDocSortedHitQueue.h b/include/FieldDocSortedHitQueue.h deleted file mode 100644 index cb1b4c0f..00000000 --- a/include/FieldDocSortedHitQueue.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDDOCSORTEDHITQUEUE_H -#define FIELDDOCSORTEDHITQUEUE_H - -#include "PriorityQueue.h" - -namespace Lucene -{ - /// Collects sorted results from Searchable's and collates them. - /// The elements put into this queue must be of type FieldDoc. - class FieldDocSortedHitQueue : public PriorityQueue - { - public: - FieldDocSortedHitQueue(int32_t size); - virtual ~FieldDocSortedHitQueue(); - - LUCENE_CLASS(FieldDocSortedHitQueue); - - public: - Collection fields; - - // used in the case where the fields are sorted by locale based strings - Collection collators; - - public: - /// Allows redefinition of sort fields if they are null. This is to handle the case using - /// ParallelMultiSearcher where the original list contains AUTO and we don't know the actual sort - /// type until the values come back. The fields can only be set once. This method should be - /// synchronized external like all other PQ methods. - void setFields(Collection fields); - - /// Returns the fields being used to sort. - Collection getFields(); - - protected: - /// Returns an array of collators, possibly null. The collators correspond to any SortFields which - /// were given a specific locale. - /// @param fields Array of sort fields. - /// @return Array, possibly null. - Collection hasCollators(Collection fields); - - /// Returns whether first is less relevant than second. - virtual bool lessThan(const FieldDocPtr& first, const FieldDocPtr& second); - }; -} - -#endif diff --git a/include/FieldInfo.h b/include/FieldInfo.h deleted file mode 100644 index 012cc664..00000000 --- a/include/FieldInfo.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDINFO_H -#define FIELDINFO_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class FieldInfo : public LuceneObject - { - public: - FieldInfo(const String& na, bool tk, int32_t nu, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); - virtual ~FieldInfo(); - - LUCENE_CLASS(FieldInfo); - - public: - String name; - bool isIndexed; - int32_t number; - - // true if term vector for this field should be stored - bool storeTermVector; - bool storeOffsetWithTermVector; - bool storePositionWithTermVector; - - bool omitNorms; // omit norms associated with indexed fields - bool omitTermFreqAndPositions; - - bool storePayloads; // whether this field stores payloads together with term positions - - public: - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, - bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); - }; -} - -#endif diff --git a/include/FieldInfos.h b/include/FieldInfos.h deleted file mode 100644 index 475eb561..00000000 --- a/include/FieldInfos.h +++ /dev/null @@ -1,151 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDINFOS_H -#define FIELDINFOS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Access to the Fieldable Info file that describes document fields and whether or not they are indexed. - /// Each segment has a separate Fieldable Info file. Objects of this class are thread-safe for multiple - /// readers, but only one thread can be adding documents at a time, with no other reader or writer threads - /// accessing this object. - class FieldInfos : public LuceneObject - { - public: - FieldInfos(); - - /// Construct a FieldInfos object using the directory and the name of the file IndexInput - /// @param d The directory to open the IndexInput from - /// @param name The name of the file to open the IndexInput from in the Directory - FieldInfos(DirectoryPtr d, const String& name); - - virtual ~FieldInfos(); - - LUCENE_CLASS(FieldInfos); - - public: - // Used internally (ie not written to *.fnm files) for pre-2.9 files - static const int32_t FORMAT_PRE; - - // First used in 2.9; prior to 2.9 there was no format header - static const int32_t FORMAT_START; - - static const int32_t CURRENT_FORMAT; - - static const uint8_t IS_INDEXED; - static const uint8_t STORE_TERMVECTOR; - static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; - static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; - static const uint8_t OMIT_NORMS; - static const uint8_t STORE_PAYLOADS; - static const uint8_t OMIT_TERM_FREQ_AND_POSITIONS; - - protected: - Collection byNumber; - MapStringFieldInfo byName; - int32_t format; - - public: - /// Returns a deep clone of this FieldInfos instance. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Adds field info for a Document. - void add(DocumentPtr doc); - - /// Returns true if any fields do not omitTermFreqAndPositions - bool hasProx(); - - /// Add fields that are indexed. Whether they have termvectors has to be specified. - /// @param names The names of the fields - /// @param storeTermVectors Whether the fields store term vectors or not - /// @param storePositionWithTermVector true if positions should be stored. - /// @param storeOffsetWithTermVector true if offsets should be stored - void addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector); - - /// Assumes the fields are not storing term vectors. - /// @param names The names of the fields - /// @param isIndexed Whether the fields are indexed or not - /// @see #add(const String&, bool) - void add(HashSet names, bool isIndexed); - - /// Calls 5 parameter add with false for all TermVector parameters. - /// @param name The name of the Fieldable - /// @param isIndexed true if the field is indexed - /// @see #add(const String&, bool, bool, bool, bool) - void add(const String& name, bool isIndexed); - - /// Calls 5 parameter add with false for term vector positions and offsets. - /// @param name The name of the field - /// @param isIndexed true if the field is indexed - /// @param storeTermVector true if the term vector should be stored - void add(const String& name, bool isIndexed, bool storeTermVector); - - /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag - /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes - /// for the TermVector parameters. - /// @param name The name of the field - /// @param isIndexed true if the field is indexed - /// @param storeTermVector true if the term vector should be stored - /// @param storePositionWithTermVector true if the term vector with positions should be stored - /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored - void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector); - - /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag - /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes - /// for the TermVector parameters. - /// @param name The name of the field - /// @param isIndexed true if the field is indexed - /// @param storeTermVector true if the term vector should be stored - /// @param storePositionWithTermVector true if the term vector with positions should be stored - /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored - /// @param omitNorms true if the norms for the indexed field should be omitted - void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms); - - /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed - /// flag is the same as was given previously for this field. If not - marks it as being indexed. - /// Same goes for the TermVector parameters. - /// @param name The name of the field - /// @param isIndexed true if the field is indexed - /// @param storeTermVector true if the term vector should be stored - /// @param storePositionWithTermVector true if the term vector with positions should be stored - /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored - /// @param omitNorms true if the norms for the indexed field should be omitted - /// @param storePayloads true if payloads should be stored for this field - /// @param omitTermFreqAndPositions true if term freqs should be omitted for this field - FieldInfoPtr add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); - - int32_t fieldNumber(const String& fieldName); - FieldInfoPtr fieldInfo(const String& fieldName); - - /// Return the fieldName identified by its number. - /// @return the fieldName or an empty string when the field with the given number doesn't exist. - String fieldName(int32_t fieldNumber); - - /// Return the fieldinfo object referenced by the fieldNumber. - /// @return the FieldInfo object or null when the given fieldNumber doesn't exist. - FieldInfoPtr fieldInfo(int32_t fieldNumber); - - int32_t size(); - - bool hasVectors(); - - void write(DirectoryPtr d, const String& name); - void write(IndexOutputPtr output); - - protected: - FieldInfoPtr addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); - - void read(IndexInputPtr input, const String& fileName); - }; -} - -#endif diff --git a/include/FieldInvertState.h b/include/FieldInvertState.h deleted file mode 100644 index b74283ed..00000000 --- a/include/FieldInvertState.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDINVERTSTATE_H -#define FIELDINVERTSTATE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This class tracks the number and position / offset parameters of terms being added to the index. - /// The information collected in this class is also used to calculate the normalization factor for a field. - class LPPAPI FieldInvertState : public LuceneObject - { - public: - FieldInvertState(int32_t position = 0, int32_t length = 0, int32_t numOverlap = 0, int32_t offset = 0, double boost = 0); - virtual ~FieldInvertState(); - - LUCENE_CLASS(FieldInvertState); - - INTERNAL: - int32_t position; - int32_t length; - int32_t numOverlap; - int32_t offset; - double boost; - AttributeSourcePtr attributeSource; - - public: - /// Re-initialize the state, using this boost value. - /// @param docBoost boost value to use. - void reset(double docBoost); - - /// Get the last processed term position. - /// @return the position - int32_t getPosition(); - - /// Get total number of terms in this field. - /// @return the length - int32_t getLength(); - - /// Get the number of terms with positionIncrement == 0. - /// @return the numOverlap - int32_t getNumOverlap(); - - /// Get end offset of the last processed term. - /// @return the offset - int32_t getOffset(); - - /// Get boost value. This is the cumulative product of document boost and field boost for all field - /// instances sharing the same field name. - /// @return the boost - double getBoost(); - - AttributeSourcePtr getAttributeSource(); - }; -} - -#endif diff --git a/include/FieldMaskingSpanQuery.h b/include/FieldMaskingSpanQuery.h deleted file mode 100644 index 5dd97fcd..00000000 --- a/include/FieldMaskingSpanQuery.h +++ /dev/null @@ -1,84 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDMASKINGSPANQUERY_H -#define FIELDMASKINGSPANQUERY_H - -#include "SpanQuery.h" - -namespace Lucene -{ - /// Wrapper to allow {@link SpanQuery} objects participate in composite single-field SpanQueries by - /// 'lying' about their search field. That is, the masked SpanQuery will function as normal, but - /// {@link SpanQuery#getField()} simply hands back the value supplied in this class's constructor. - /// - /// This can be used to support Queries like {@link SpanNearQuery} or {@link SpanOrQuery} across - /// different fields, which is not ordinarily permitted. - /// - /// This can be useful for denormalized relational data: for example, when indexing a document with - /// conceptually many 'children': - /// - ///
-    /// teacherid: 1
-    /// studentfirstname: james
-    /// studentsurname: jones
-    ///
-    /// teacherid: 2
-    /// studenfirstname: james
-    /// studentsurname: smith
-    /// studentfirstname: sally
-    /// studentsurname: jones
-    /// 
- /// - /// A SpanNearQuery with a slop of 0 can be applied across two {@link SpanTermQuery} objects as follows: - /// - ///
-    /// SpanQueryPtr q1 = newLucene(newLucene(L"studentfirstname", L"james"));
-    /// SpanQueryPtr q2 = newLucene(newLucene(L"studentsurname", L"jones"));
-    /// SpanQueryPtr q2m = newLucene(q2, L"studentfirstname");
-    ///
-    /// Collection span = newCollection(q1, q1);
-    ///
-    /// QueryPtr q = newLucene(span, -1, false);
-    /// 
- /// to search for 'studentfirstname:james studentsurname:jones' and find teacherid 1 without matching - /// teacherid 2 (which has a 'james' in position 0 and 'jones' in position 1). - /// - /// Note: as {@link #getField()} returns the masked field, scoring will be done using the norms of the - /// field name supplied. This may lead to unexpected scoring behaviour. - class LPPAPI FieldMaskingSpanQuery : public SpanQuery - { - public: - FieldMaskingSpanQuery(SpanQueryPtr maskedQuery, const String& maskedField); - virtual ~FieldMaskingSpanQuery(); - - LUCENE_CLASS(FieldMaskingSpanQuery); - - protected: - SpanQueryPtr maskedQuery; - String field; - - public: - using SpanQuery::toString; - - virtual String getField(); - SpanQueryPtr getMaskedQuery(); - virtual SpansPtr getSpans(IndexReaderPtr reader); - virtual void extractTerms(SetTerm terms); - virtual WeightPtr createWeight(SearcherPtr searcher); - virtual SimilarityPtr getSimilarity(SearcherPtr searcher); - virtual QueryPtr rewrite(IndexReaderPtr reader); - - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Returns a clone of this query. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/FieldScoreQuery.h b/include/FieldScoreQuery.h deleted file mode 100644 index 8c7cb9d4..00000000 --- a/include/FieldScoreQuery.h +++ /dev/null @@ -1,75 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDSCOREQUERY_H -#define FIELDSCOREQUERY_H - -#include "ValueSourceQuery.h" - -namespace Lucene -{ - /// A query that scores each document as the value of the numeric input field. - /// - /// The query matches all documents, and scores each document according to the numeric value of that field. - /// - /// It is assumed, and expected, that: - ///
    - ///
  • The field used here is indexed, and has exactly one token in every scored document. - ///
  • Best if this field is un_tokenized. - ///
  • That token is parseable to the selected type. - ///
- /// - /// Combining this query in a FunctionQuery allows much freedom in affecting document scores. Note, that - /// with this freedom comes responsibility: it is more than likely that the default Lucene scoring is superior - /// in quality to scoring modified as explained here. However, in some cases, and certainly for research - /// experiments, this capability may turn useful. - /// - /// When constructing this query, select the appropriate type. That type should match the data stored in the - /// field. So in fact the "right" type should be selected before indexing. Type selection has effect on the - /// RAM usage: - ///
    - ///
  • Byte consumes 1 * maxDocs bytes. - ///
  • Int consumes 4 * maxDocs bytes. - ///
  • Double consumes 8 * maxDocs bytes. - ///
- /// - /// Caching: Values for the numeric field are loaded once and cached in memory for further use with the same - /// IndexReader. To take advantage of this, it is extremely important to reuse index-readers or index- - /// searchers, otherwise, for instance if for each query a new index reader is opened, large penalties would - /// be paid for loading the field values into memory over and over again. - class LPPAPI FieldScoreQuery : public ValueSourceQuery - { - public: - /// Type of score field, indicating how field values are interpreted/parsed. - enum Type - { - /// Field values are interpreted as numeric byte values. - BYTE, - - /// Field values are interpreted as numeric integer values. - INT, - - /// Field values are interpreted as numeric double values. - DOUBLE - }; - - /// Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. - /// The type param tells how to parse the field string values into a numeric score value. - /// @param field the numeric field to be used. - /// @param type the type of the field. - FieldScoreQuery(const String& field, Type type); - - virtual ~FieldScoreQuery(); - - LUCENE_CLASS(FieldScoreQuery); - - public: - /// Create the appropriate (cached) field value source. - static ValueSourcePtr getValueSource(const String& field, Type type); - }; -} - -#endif diff --git a/include/FieldSelector.h b/include/FieldSelector.h deleted file mode 100644 index 628cc51c..00000000 --- a/include/FieldSelector.h +++ /dev/null @@ -1,74 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDSELECTOR_H -#define FIELDSELECTOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The FieldSelector allows one to make decisions about what Fields get loaded on a {@link Document} by - /// {@link IndexReader#document(int32_t, FieldSelector)} - class LPPAPI FieldSelector : public LuceneObject - { - protected: - FieldSelector(); - - public: - virtual ~FieldSelector(); - - LUCENE_CLASS(FieldSelector); - - public: - /// Provides information about what should be done with this Field - enum FieldSelectorResult - { - /// Null value - SELECTOR_NULL, - - /// Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is - /// encountered. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should - /// not return null. - /// {@link Document#add(Fieldable)} should be called by the Reader. - SELECTOR_LOAD, - - /// Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually - /// contain its data until invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link - /// Document#getFieldable(String)} is safe to use and should return a valid instance of a {@link - /// Fieldable}. - /// {@link Document#add(Fieldable)} should be called by the Reader. - SELECTOR_LAZY_LOAD, - - /// Do not load the {@link Field}. {@link Document#getField(String)} and {@link - /// Document#getFieldable(String)} should return null. {@link Document#add(Fieldable)} is not called. - /// {@link Document#add(Fieldable)} should not be called by the Reader. - SELECTOR_NO_LOAD, - - /// Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading - /// for the {@link Document}. Thus, the Document may not have its complete set of Fields. {@link - /// Document#getField(String)} and {@link Document#getFieldable(String)} should both be valid for - /// this {@link Field} - /// {@link Document#add(Fieldable)} should be called by the Reader. - SELECTOR_LOAD_AND_BREAK, - - /// Load the size of this {@link Field} rather than its value. Size is measured as number of bytes - /// required to store the field == bytes for a binary or any compressed value, and 2*chars for a String - /// value. The size is stored as a binary value, represented as an int in a byte[], with the higher - /// order byte first in [0] - SELECTOR_SIZE, - - /// Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further - /// fields, after the size is loaded - SELECTOR_SIZE_AND_BREAK - }; - - public: - virtual FieldSelectorResult accept(const String& fieldName) = 0; - }; -} - -#endif diff --git a/include/FieldSortedTermVectorMapper.h b/include/FieldSortedTermVectorMapper.h deleted file mode 100644 index 7e00c42f..00000000 --- a/include/FieldSortedTermVectorMapper.h +++ /dev/null @@ -1,51 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDSORTEDTERMVECTORMAPPER_H -#define FIELDSORTEDTERMVECTORMAPPER_H - -#include -#include "TermVectorMapper.h" - -namespace Lucene -{ - /// For each Field, store a sorted collection of {@link TermVectorEntry}s - /// This is not thread-safe. - class LPPAPI FieldSortedTermVectorMapper : public TermVectorMapper - { - public: - /// @param comparator A Comparator for sorting {@link TermVectorEntry}s - FieldSortedTermVectorMapper(TermVectorEntryComparator comparator); - - FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); - - virtual ~FieldSortedTermVectorMapper(); - - LUCENE_CLASS(FieldSortedTermVectorMapper); - - protected: - MapStringCollectionTermVectorEntry fieldToTerms; - Collection currentSet; - String currentField; - TermVectorEntryComparator comparator; - - public: - /// Map the Term Vector information into your own structure - virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); - - /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. - virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); - - /// Get the mapping between fields and terms, sorted by the comparator - /// @return A map between field names and {@link java.util.SortedSet}s per field. SortedSet entries are - /// {@link TermVectorEntry} - MapStringCollectionTermVectorEntry getFieldToTerms(); - - TermVectorEntryComparator getComparator(); - }; -} - -#endif diff --git a/include/FieldValueHitQueue.h b/include/FieldValueHitQueue.h deleted file mode 100644 index c7331aaa..00000000 --- a/include/FieldValueHitQueue.h +++ /dev/null @@ -1,73 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDVALUEHITQUEUE_H -#define FIELDVALUEHITQUEUE_H - -#include "HitQueueBase.h" -#include "ScoreDoc.h" - -namespace Lucene -{ - /// A hit queue for sorting by hits by terms in more than one field. Uses FieldCache::DEFAULT for maintaining - /// internal term lookup tables. - /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) - /// @see FieldCache - class LPPAPI FieldValueHitQueue : public HitQueueBase - { - protected: - FieldValueHitQueue(Collection fields, int32_t size); - - public: - virtual ~FieldValueHitQueue(); - - LUCENE_CLASS(FieldValueHitQueue); - - protected: - /// Stores the sort criteria being used. - Collection fields; - Collection comparators; - Collection reverseMul; - - public: - /// Creates a hit queue sorted by the given list of fields. - /// @param fields SortField array we are sorting by in priority order (highest priority first); cannot - /// be null or empty. - /// @param size The number of hits to retain. Must be greater than zero. - static FieldValueHitQueuePtr create(Collection fields, int32_t size); - - Collection getComparators(); - Collection getReverseMul(); - - /// Given a queue Entry, creates a corresponding FieldDoc that contains the values used to sort the given - /// document. These values are not the raw values out of the index, but the internal representation of - /// them. This is so the given search hit can be collated by a MultiSearcher with other search hits. - /// @param entry The Entry used to create a FieldDoc - /// @return The newly created FieldDoc - /// @see Searchable#search(WeightPtr, FilterPtr, int32_t, SortPtr) - FieldDocPtr fillFields(FieldValueHitQueueEntryPtr entry); - - /// Returns the SortFields being used by this hit queue. - Collection getFields(); - }; - - class LPPAPI FieldValueHitQueueEntry : public ScoreDoc - { - public: - FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score); - virtual ~FieldValueHitQueueEntry(); - - LUCENE_CLASS(FieldValueHitQueueEntry); - - public: - int32_t slot; - - public: - virtual String toString(); - }; -} - -#endif diff --git a/include/Fieldable.h b/include/Fieldable.h deleted file mode 100644 index 8fa9b59b..00000000 --- a/include/Fieldable.h +++ /dev/null @@ -1,151 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDABLE_H -#define FIELDABLE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Synonymous with {@link Field}. - /// - /// WARNING: This interface may change within minor versions, despite Lucene's backward compatibility - /// requirements. This means new methods may be added from version to version. This change only - /// affects the Fieldable API; other backwards compatibility promises remain intact. For example, Lucene - /// can still read and write indices created within the same major version. - class LPPAPI Fieldable - { - public: - LUCENE_INTERFACE(Fieldable); - - public: - /// Sets the boost factor hits on this field. This value will be multiplied into the score of all - /// hits on this this field of this document. - /// - /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. - /// If a document has multiple fields with the same name, all such values are multiplied together. - /// This product is then used to compute the norm factor for the field. By default, in the {@link - /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the - /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} - /// before it is stored in the index. One should attempt to ensure that this product does not overflow - /// the range of that encoding. - /// - /// @see Document#setBoost(double) - /// @see Similarity#computeNorm(String, FieldInvertState) - /// @see Similarity#encodeNorm(double) - virtual void setBoost(double boost) = 0; - - /// Returns the boost factor for hits for this field. - /// - /// The default value is 1.0. - /// - /// Note: this value is not stored directly with the document in the index. Documents returned from - /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value - /// present as when this field was indexed. - virtual double getBoost() = 0; - - /// Returns the name of the field as an interned string. For example "date", "title", "body", ... - virtual String name() = 0; - - /// The value of the field as a String, or empty. - /// - /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value - /// unless isBinary()==true, in which case getBinaryValue() will be used. - /// - /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. - /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate - /// indexed tokens if not null, else readerValue() will be used to generate indexed tokens if not null, - /// else stringValue() will be used to generate tokens. - virtual String stringValue() = 0; - - /// The value of the field as a Reader, which can be used at index time to generate indexed tokens. - /// @see #stringValue() - virtual ReaderPtr readerValue() = 0; - - /// The TokenStream for this field to be used when indexing, or null. - /// @see #stringValue() - virtual TokenStreamPtr tokenStreamValue() = 0; - - /// True if the value of the field is to be stored in the index for return with search hits. - virtual bool isStored() = 0; - - /// True if the value of the field is to be indexed, so that it may be searched on. - virtual bool isIndexed() = 0; - - /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields - /// are indexed as a single word and may not be Reader-valued. - virtual bool isTokenized() = 0; - - /// True if the term or terms used to index this field are stored as a term vector, available from - /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the - /// original content of the field, only to terms used to index it. If the original content must be - /// preserved, use the stored attribute instead. - virtual bool isTermVectorStored() = 0; - - /// True if terms are stored as term vector together with their offsets (start and end position in - /// source text). - virtual bool isStoreOffsetWithTermVector() = 0; - - /// True if terms are stored as term vector together with their token positions. - virtual bool isStorePositionWithTermVector() = 0; - - /// True if the value of the field is stored as binary. - virtual bool isBinary() = 0; - - /// True if norms are omitted for this indexed field. - virtual bool getOmitNorms() = 0; - - /// If set, omit normalization factors associated with this indexed field. - /// This effectively disables indexing boosts and length normalization for this field. - virtual void setOmitNorms(bool omitNorms) = 0; - - /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field - /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} - /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. - /// - /// @return true if this field can be loaded lazily - virtual bool isLazy() = 0; - - /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is - /// undefined. - /// @return index of the first character in byte[] segment that represents this Field value. - virtual int32_t getBinaryOffset() = 0; - - /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is - /// undefined. - /// @return length of byte[] segment that represents this Field value. - virtual int32_t getBinaryLength() = 0; - - /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} - /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. - /// @return reference to the Field value as byte[]. - virtual ByteArray getBinaryValue() = 0; - - /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} - /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. - /// - /// About reuse: if you pass in the result byte[] and it is used, likely the underlying implementation will - /// hold onto this byte[] and return it in future calls to {@link #getBinaryValue()}. So if you subsequently - /// re-use the same byte[] elsewhere it will alter this Fieldable's value. - /// @param result User defined buffer that will be used if possible. If this is null or not large enough, - /// a new buffer is allocated - /// @return reference to the Field value as byte[]. - virtual ByteArray getBinaryValue(ByteArray result) = 0; - - /// @see #setOmitTermFreqAndPositions - virtual bool getOmitTermFreqAndPositions() = 0; - - /// If set, omit term freq, positions and payloads from postings for this field. - /// - /// NOTE: While this option reduces storage space required in the index, it also means any query requiring - /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail - /// to find results. - virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) = 0; - }; -} - -#endif diff --git a/include/FieldsReader.h b/include/FieldsReader.h deleted file mode 100644 index 50e3048e..00000000 --- a/include/FieldsReader.h +++ /dev/null @@ -1,141 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDSREADER_H -#define FIELDSREADER_H - -#include "AbstractField.h" -#include "CloseableThreadLocal.h" - -namespace Lucene -{ - /// Class responsible for access to stored document fields. It uses .fdt and .fdx; files. - class FieldsReader : public LuceneObject - { - public: - /// Used only by clone - FieldsReader(FieldInfosPtr fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, - int32_t docStoreOffset, IndexInputPtr cloneableFieldsStream, IndexInputPtr cloneableIndexStream); - FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn); - FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); - - virtual ~FieldsReader(); - - LUCENE_CLASS(FieldsReader); - - protected: - FieldInfosPtr fieldInfos; - - // The main fieldStream, used only for cloning. - IndexInputPtr cloneableFieldsStream; - - // This is a clone of cloneableFieldsStream used for reading documents. It should not be cloned outside of a - // synchronized context. - IndexInputPtr fieldsStream; - - IndexInputPtr cloneableIndexStream; - IndexInputPtr indexStream; - int32_t numTotalDocs; - int32_t _size; - bool closed; - int32_t format; - int32_t formatSize; - - // The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. - int32_t docStoreOffset; - - CloseableThreadLocal fieldsStreamTL; - bool isOriginal; - - public: - /// Returns a cloned FieldsReader that shares open IndexInputs with the original one. It is the caller's job not to - /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic). - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a - /// Field. This means that the Fields values will not be accessible. - void close(); - - int32_t size(); - - bool canReadRawDocs(); - - DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); - - /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID. - /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID. - IndexInputPtr rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs); - - protected: - void ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); - - void ensureOpen(); - - void seekIndex(int32_t docID); - - /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. - /// This will have the most payoff on large fields. - void skipField(bool binary, bool compressed); - void skipField(bool binary, bool compressed, int32_t toRead); - - void addFieldLazy(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize); - void addField(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize); - - /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes). - /// Read just the size - caller must skip the field content to continue reading fields. Return the size in bytes or chars, - /// depending on field type. - int32_t addFieldSize(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed); - - ByteArray uncompress(ByteArray b); - String uncompressString(ByteArray b); - - friend class LazyField; - }; - - class LazyField : public AbstractField - { - public: - LazyField(FieldsReaderPtr reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); - LazyField(FieldsReaderPtr reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); - virtual ~LazyField(); - - LUCENE_CLASS(LazyField); - - protected: - FieldsReaderWeakPtr _reader; - int32_t toRead; - int64_t pointer; - - /// @deprecated Only kept for backward-compatibility with <3.0 indexes. - bool isCompressed; - - public: - /// The value of the field as a Reader, or null. If null, the String value, binary value, or TokenStream value is used. - /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. - ReaderPtr readerValue(); - - /// The value of the field as a TokenStream, or null. If null, the Reader value, String value, or binary value is used. - /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. - TokenStreamPtr tokenStreamValue(); - - /// The value of the field as a String, or null. If null, the Reader value, binary value, or TokenStream value is used. - /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. - String stringValue(); - - int64_t getPointer(); - void setPointer(int64_t pointer); - int32_t getToRead(); - void setToRead(int32_t toRead); - - /// Return the raw byte[] for the binary field. - virtual ByteArray getBinaryValue(ByteArray result); - - protected: - IndexInputPtr getFieldStream(); - }; -} - -#endif diff --git a/include/FieldsWriter.h b/include/FieldsWriter.h deleted file mode 100644 index f503123d..00000000 --- a/include/FieldsWriter.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FIELDSWRITER_H -#define FIELDSWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class FieldsWriter : public LuceneObject - { - public: - FieldsWriter(DirectoryPtr d, const String& segment, FieldInfosPtr fn); - FieldsWriter(IndexOutputPtr fdx, IndexOutputPtr fdt, FieldInfosPtr fn); - virtual ~FieldsWriter(); - - LUCENE_CLASS(FieldsWriter); - - protected: - FieldInfosPtr fieldInfos; - IndexOutputPtr fieldsStream; - IndexOutputPtr indexStream; - bool doClose; - - public: - static const uint8_t FIELD_IS_TOKENIZED; - static const uint8_t FIELD_IS_BINARY; - static const uint8_t FIELD_IS_COMPRESSED; - - static const int32_t FORMAT; // Original format - static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; // Changed strings to UTF8 - static const int32_t FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; // Lucene 3.0: Removal of compressed fields - - // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this - // if you switch to a new format! - static const int32_t FORMAT_CURRENT; - - public: - void setFieldsStream(IndexOutputPtr stream); - - /// Writes the contents of buffer into the fields stream and adds a new entry for this document into the index - /// stream. This assumes the buffer was already written in the correct fields format. - void flushDocument(int32_t numStoredFields, RAMOutputStreamPtr buffer); - - void skipDocument(); - void flush(); - void close(); - void writeField(FieldInfoPtr fi, FieldablePtr field); - - /// Bulk write a contiguous series of documents. The lengths array is the length (in bytes) of each raw document. - /// The stream IndexInput is the fieldsStream from which we should bulk-copy all bytes. - void addRawDocuments(IndexInputPtr stream, Collection lengths, int32_t numDocs); - - void addDocument(DocumentPtr doc); - }; -} - -#endif diff --git a/include/FileReader.h b/include/FileReader.h deleted file mode 100644 index 7e6f8259..00000000 --- a/include/FileReader.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILEREADER_H -#define FILEREADER_H - -#include "Reader.h" - -namespace Lucene -{ - /// Convenience class for reading character files. - class LPPAPI FileReader : public Reader - { - public: - /// Creates a new FileReader, given the file name to read from. - FileReader(const String& fileName); - virtual ~FileReader(); - - LUCENE_CLASS(FileReader); - - protected: - ifstreamPtr file; - int64_t _length; - ByteArray fileBuffer; - - public: - static const int32_t FILE_EOF; - static const int32_t FILE_ERROR; - - public: - /// Read a single character. - virtual int32_t read(); - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - - /// Close the stream. - virtual void close(); - - /// Tell whether this stream supports the mark() operation - virtual bool markSupported(); - - /// Reset the stream. - virtual void reset(); - - /// The number of bytes in the file. - virtual int64_t length(); - }; -} - -#endif diff --git a/include/FileSwitchDirectory.h b/include/FileSwitchDirectory.h deleted file mode 100644 index 05282c90..00000000 --- a/include/FileSwitchDirectory.h +++ /dev/null @@ -1,85 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILESWITCHDIRECTORY_H -#define FILESWITCHDIRECTORY_H - -#include "Directory.h" - -namespace Lucene -{ - /// A Directory instance that switches files between two other - /// Directory instances. - /// - /// Files with the specified extensions are placed in the primary - /// directory; others are placed in the secondary directory. The - /// provided Set must not change once passed to this class, and - /// must allow multiple threads to call contains at once. - class LPPAPI FileSwitchDirectory : public Directory - { - public: - FileSwitchDirectory(HashSet primaryExtensions, DirectoryPtr primaryDir, DirectoryPtr secondaryDir, bool doClose); - virtual ~FileSwitchDirectory(); - - LUCENE_CLASS(FileSwitchDirectory); - - protected: - HashSet primaryExtensions; - DirectoryPtr primaryDir; - DirectoryPtr secondaryDir; - bool doClose; - - public: - /// Return the primary directory. - DirectoryPtr getPrimaryDir(); - - /// Return the secondary directory. - DirectoryPtr getSecondaryDir(); - - /// Closes the store. - virtual void close(); - - /// Returns an array of strings, one for each file in the directory. - virtual HashSet listAll(); - - /// Utility method to return a file's extension. - static String getExtension(const String& name); - - /// Returns true if a file with the given name exists. - virtual bool fileExists(const String& name); - - /// Returns the time the named file was last modified. - virtual uint64_t fileModified(const String& name); - - /// Set the modified time of an existing file to now. - virtual void touchFile(const String& name); - - /// Removes an existing file in the directory. - virtual void deleteFile(const String& name); - - /// Returns the length of a file in the directory. - virtual int64_t fileLength(const String& name); - - /// Creates a new, empty file in the directory with the given name. - /// Returns a stream writing this file. - virtual IndexOutputPtr createOutput(const String& name); - - /// Ensure that any writes to this file are moved to stable storage. - /// Lucene uses this to properly commit changes to the index, to - /// prevent a machine/OS crash from corrupting the index. - virtual void sync(const String& name); - - /// Returns a stream reading an existing file, with the specified - /// read buffer size. The particular Directory implementation may - /// ignore the buffer size. - virtual IndexInputPtr openInput(const String& name); - - protected: - DirectoryPtr getDirectory(const String& name); - }; -} - -#endif diff --git a/include/FileUtils.h b/include/FileUtils.h deleted file mode 100644 index 1a6944ee..00000000 --- a/include/FileUtils.h +++ /dev/null @@ -1,66 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILEUTILS_H -#define FILEUTILS_H - -#include "Lucene.h" - -namespace Lucene -{ - namespace FileUtils - { - /// Return true if given file or directory exists. - LPPAPI bool fileExists(const String& path); - - /// Return file last modified date and time. - LPPAPI uint64_t fileModified(const String& path); - - /// Set file last modified date and time to now. - LPPAPI bool touchFile(const String& path); - - /// Return file length in bytes. - LPPAPI int64_t fileLength(const String& path); - - /// Set new file length, truncating or expanding as required. - LPPAPI bool setFileLength(const String& path, int64_t length); - - /// Delete file from file system. - LPPAPI bool removeFile(const String& path); - - /// Copy a file to/from file system. - LPPAPI bool copyFile(const String& source, const String& dest); - - /// Create new directory under given location. - LPPAPI bool createDirectory(const String& path); - - /// Delete directory from file system. - LPPAPI bool removeDirectory(const String& path); - - /// Return true if given path points to a directory. - LPPAPI bool isDirectory(const String& path); - - /// Return list of files (and/or directories) under given directory. - /// @param path path to list directory. - /// @param filesOnly if true the exclude sub-directories. - /// @param dirList list of files to return. - LPPAPI bool listDirectory(const String& path, bool filesOnly, HashSet dirList); - - /// Copy a directory to/from file system. - LPPAPI bool copyDirectory(const String& source, const String& dest); - - /// Return complete path after joining given directory and file name. - LPPAPI String joinPath(const String& path, const String& file); - - /// Extract parent path from given path. - LPPAPI String extractPath(const String& path); - - /// Extract file name from given path. - LPPAPI String extractFile(const String& path); - } -} - -#endif diff --git a/include/Filter.h b/include/Filter.h deleted file mode 100644 index afee41f0..00000000 --- a/include/Filter.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTER_H -#define FILTER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract base class for restricting which documents may be returned during searching. - class LPPAPI Filter : public LuceneObject - { - public: - virtual ~Filter(); - LUCENE_CLASS(Filter); - - public: - /// Creates a {@link DocIdSet} enumerating the documents that should be permitted in search results. - /// - /// Note: null can be returned if no documents are accepted by this Filter. - /// - /// Note: This method will be called once per segment in the index during searching. The returned - /// {@link DocIdSet} must refer to document IDs for that segment, not for the top-level reader. - /// - /// @param reader a {@link IndexReader} instance opened on the index currently searched on. Note, - /// it is likely that the provided reader does not represent the whole underlying index ie. if the - /// index has more than one segment the given reader only represents a single segment. - /// @return a DocIdSet that provides the documents which should be permitted or prohibited in search - /// results. NOTE: null can be returned if no documents will be accepted by this Filter. - /// - /// @see DocIdBitSet - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) = 0; - }; -} - -#endif diff --git a/include/FilterIndexReader.h b/include/FilterIndexReader.h deleted file mode 100644 index 05b4bdbc..00000000 --- a/include/FilterIndexReader.h +++ /dev/null @@ -1,139 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTERINDEXREADER_H -#define FILTERINDEXREADER_H - -#include "IndexReader.h" -#include "TermPositions.h" -#include "TermEnum.h" - -namespace Lucene -{ - /// A FilterIndexReader contains another IndexReader, which it uses as its basic source of data, possibly - /// transforming the data along the way or providing additional functionality. The class FilterIndexReader - /// itself simply implements all abstract methods of IndexReader with versions that pass all requests to - /// the contained index reader. Subclasses of FilterIndexReader may further override some of these methods - /// and may also provide additional methods and fields. - class LPPAPI FilterIndexReader : public IndexReader - { - public: - /// Construct a FilterIndexReader based on the specified base reader. Directory locking for delete, - /// undeleteAll, and setNorm operations is left to the base reader. - /// - /// Note that base reader is closed if this FilterIndexReader is closed. - /// @param in specified base reader. - FilterIndexReader(IndexReaderPtr in); - - virtual ~FilterIndexReader(); - - LUCENE_CLASS(FilterIndexReader); - - protected: - IndexReaderPtr in; - - public: - virtual DirectoryPtr directory(); - virtual Collection getTermFreqVectors(int32_t docNumber); - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - virtual int32_t numDocs(); - virtual int32_t maxDoc(); - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - virtual bool isDeleted(int32_t n); - virtual bool hasDeletions(); - virtual bool hasNorms(const String& field); - virtual ByteArray norms(const String& field); - virtual void norms(const String& field, ByteArray norms, int32_t offset); - virtual TermEnumPtr terms(); - virtual TermEnumPtr terms(TermPtr t); - virtual int32_t docFreq(TermPtr t); - virtual TermDocsPtr termDocs(); - virtual TermDocsPtr termDocs(TermPtr term); - virtual TermPositionsPtr termPositions(); - virtual HashSet getFieldNames(FieldOption fieldOption); - virtual int64_t getVersion(); - virtual bool isCurrent(); - virtual bool isOptimized(); - virtual Collection getSequentialSubReaders(); - - /// If the subclass of FilteredIndexReader modifies the contents of the FieldCache, you must - /// override this method to provide a different key - virtual LuceneObjectPtr getFieldCacheKey(); - - /// If the subclass of FilteredIndexReader modifies the deleted docs, you must override this - /// method to provide a different key - virtual LuceneObjectPtr getDeletesCacheKey(); - - protected: - virtual void doUndeleteAll(); - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - virtual void doDelete(int32_t docNum); - virtual void doCommit(MapStringString commitUserData); - virtual void doClose(); - }; - - /// Base class for filtering {@link TermDocs} implementations. - class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject - { - public: - FilterTermDocs(TermDocsPtr in); - virtual ~FilterTermDocs(); - - LUCENE_CLASS(FilterTermDocs); - - protected: - TermDocsPtr in; - - public: - virtual void seek(TermPtr term); - virtual void seek(TermEnumPtr termEnum); - virtual int32_t doc(); - virtual int32_t freq(); - virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); - virtual bool skipTo(int32_t target); - virtual void close(); - }; - - /// Base class for filtering {@link TermPositions} implementations. - class LPPAPI FilterTermPositions : public FilterTermDocs - { - public: - FilterTermPositions(TermPositionsPtr in); - virtual ~FilterTermPositions(); - - LUCENE_CLASS(FilterTermPositions); - - public: - virtual int32_t nextPosition(); - virtual int32_t getPayloadLength(); - virtual ByteArray getPayload(ByteArray data, int32_t offset); - virtual bool isPayloadAvailable(); - }; - - /// Base class for filtering {@link TermEnum} implementations. - class LPPAPI FilterTermEnum : public TermEnum - { - public: - FilterTermEnum(TermEnumPtr in); - virtual ~FilterTermEnum(); - - LUCENE_CLASS(FilterTermEnum); - - protected: - TermEnumPtr in; - - public: - virtual bool next(); - virtual TermPtr term(); - virtual int32_t docFreq(); - virtual void close(); - }; -} - -#endif diff --git a/include/FilterManager.h b/include/FilterManager.h deleted file mode 100644 index 5f6b6736..00000000 --- a/include/FilterManager.h +++ /dev/null @@ -1,71 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTERMANAGER_H -#define FILTERMANAGER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Filter caching singleton. It can be used to save filters locally for reuse. Also could be used as a - /// persistent storage for any filter as long as the filter provides a proper hashCode(), as that is used - /// as the key in the cache. - /// - /// The cache is periodically cleaned up from a separate thread to ensure the cache doesn't exceed the - /// maximum size. - class LPPAPI FilterManager : public LuceneObject - { - public: - /// Sets up the FilterManager singleton. - FilterManager(); - virtual ~FilterManager(); - - LUCENE_CLASS(FilterManager); - - protected: - /// The default maximum number of Filters in the cache - static const int32_t DEFAULT_CACHE_CLEAN_SIZE; - - /// The default frequency of cache cleanup - static const int64_t DEFAULT_CACHE_SLEEP_TIME; - - /// The cache itself - MapIntFilterItem cache; - - /// Maximum allowed cache size - int32_t cacheCleanSize; - - /// Cache cleaning frequency - int64_t cleanSleepTime; - - /// Cache cleaner that runs in a separate thread - FilterCleanerPtr filterCleaner; - - public: - virtual void initialize(); - - static FilterManagerPtr getInstance(); - - /// Sets the max size that cache should reach before it is cleaned up - /// @param cacheCleanSize maximum allowed cache size - void setCacheSize(int32_t cacheCleanSize); - - /// Sets the cache cleaning frequency in milliseconds. - /// @param cleanSleepTime cleaning frequency in milliseconds - void setCleanThreadSleepTime(int64_t cleanSleepTime); - - /// Returns the cached version of the filter. Allows the caller to pass up a small filter but this will - /// keep a persistent version around and allow the caching filter to do its job. - /// @param filter The input filter - /// @return The cached version of the filter - FilterPtr getFilter(FilterPtr filter); - - friend class FilterCleaner; - }; -} - -#endif diff --git a/include/FilteredDocIdSet.h b/include/FilteredDocIdSet.h deleted file mode 100644 index c95bdd9d..00000000 --- a/include/FilteredDocIdSet.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTEREDDOCIDSET_H -#define FILTEREDDOCIDSET_H - -#include "DocIdSet.h" - -namespace Lucene -{ - /// Abstract decorator class for a DocIdSet implementation that provides on-demand filtering/validation - /// mechanism on a given DocIdSet. - /// - /// Technically, this same functionality could be achieved with ChainedFilter (under contrib/misc), however - /// the benefit of this class is it never materializes the full bitset for the filter. Instead, the {@link - /// #match} method is invoked on-demand, per docID visited during searching. If you know few docIDs will - /// be visited, and the logic behind {@link #match} is relatively costly, this may be a better way to filter - /// than ChainedFilter. - /// @see DocIdSet - class LPPAPI FilteredDocIdSet : public DocIdSet - { - public: - /// @param innerSet Underlying DocIdSet - FilteredDocIdSet(DocIdSetPtr innerSet); - virtual ~FilteredDocIdSet(); - - LUCENE_CLASS(FilteredDocIdSet); - - protected: - DocIdSetPtr innerSet; - - public: - /// This DocIdSet implementation is cacheable if the inner set is cacheable. - virtual bool isCacheable(); - - /// Implementation of the contract to build a DocIdSetIterator. - /// @see DocIdSetIterator - /// @see FilteredDocIdSetIterator - virtual DocIdSetIteratorPtr iterator(); - - protected: - /// Validation method to determine whether a docid should be in the result set. - /// @param docid docid to be tested - /// @return true if input docid should be in the result set, false otherwise. - virtual bool match(int32_t docid) = 0; - - friend class DefaultFilteredDocIdSetIterator; - }; -} - -#endif diff --git a/include/FilteredDocIdSetIterator.h b/include/FilteredDocIdSetIterator.h deleted file mode 100644 index 98d2daa7..00000000 --- a/include/FilteredDocIdSetIterator.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTEREDDOCIDSETITERATOR_H -#define FILTEREDDOCIDSETITERATOR_H - -#include "DocIdSetIterator.h" - -namespace Lucene -{ - /// Abstract decorator class of a DocIdSetIterator implementation that provides on-demand filter/validation - /// mechanism on an underlying DocIdSetIterator. See {@link FilteredDocIdSet}. - class LPPAPI FilteredDocIdSetIterator : public DocIdSetIterator - { - public: - /// @param innerIter Underlying DocIdSetIterator. - FilteredDocIdSetIterator(DocIdSetIteratorPtr innerIter); - virtual ~FilteredDocIdSetIterator(); - - LUCENE_CLASS(FilteredDocIdSetIterator); - - protected: - DocIdSetIteratorPtr innerIter; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - - protected: - /// Validation method to determine whether a docid should be in the result set. - /// @param doc docid to be tested - /// @return true if input docid should be in the result set, false otherwise. - /// @see #FilteredDocIdSetIterator(DocIdSetIterator). - virtual bool match(int32_t docid) = 0; - }; -} - -#endif diff --git a/include/FilteredQuery.h b/include/FilteredQuery.h deleted file mode 100644 index 75a66f4b..00000000 --- a/include/FilteredQuery.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTEREDQUERY_H -#define FILTEREDQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A query that applies a filter to the results of another query. - /// - /// Note: the bits are retrieved from the filter each time this query is used in a search - use a - /// CachingWrapperFilter to avoid regenerating the bits every time. - /// - /// @see CachingWrapperFilter - class LPPAPI FilteredQuery : public Query - { - public: - /// Constructs a new query which applies a filter to the results of the original query. - /// Filter::getDocIdSet() will be called every time this query is used in a search. - /// @param query Query to be filtered, cannot be null. - /// @param filter Filter to apply to query results, cannot be null. - FilteredQuery(QueryPtr query, FilterPtr filter); - - virtual ~FilteredQuery(); - - LUCENE_CLASS(FilteredQuery); - - private: - QueryPtr query; - FilterPtr filter; - - public: - using Query::toString; - - /// Returns a Weight that applies the filter to the enclosed query's Weight. - /// This is accomplished by overriding the Scorer returned by the Weight. - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Rewrites the wrapped query. - virtual QueryPtr rewrite(IndexReaderPtr reader); - - QueryPtr getQuery(); - FilterPtr getFilter(); - - virtual void extractTerms(SetTerm terms); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class FilteredQueryWeight; - }; -} - -#endif diff --git a/include/FilteredTermEnum.h b/include/FilteredTermEnum.h deleted file mode 100644 index b10fd217..00000000 --- a/include/FilteredTermEnum.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FILTEREDTERMENUM_H -#define FILTEREDTERMENUM_H - -#include "TermEnum.h" - -namespace Lucene -{ - /// Abstract class for enumerating a subset of all terms. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than - /// all that precede it. - class LPPAPI FilteredTermEnum : public TermEnum - { - public: - virtual ~FilteredTermEnum(); - LUCENE_CLASS(FilteredTermEnum); - - protected: - /// The current term - TermPtr currentTerm; - - /// The delegate enum - to set this member use {@link #setEnum} - TermEnumPtr actualEnum; - - public: - /// Equality measure on the term - virtual double difference() = 0; - - /// Returns the docFreq of the current Term in the enumeration. - /// Returns -1 if no Term matches or all terms have been enumerated. - virtual int32_t docFreq(); - - /// Increments the enumeration to the next element. True if one exists. - virtual bool next(); - - /// Returns the current Term in the enumeration. - /// Returns null if no Term matches or all terms have been enumerated. - virtual TermPtr term(); - - /// Closes the enumeration to further activity, freeing resources. - virtual void close(); - - protected: - /// Equality compare on the term - virtual bool termCompare(TermPtr term) = 0; - - /// Indicates the end of the enumeration has been reached - virtual bool endEnum() = 0; - - /// Use this method to set the actual TermEnum (eg. in ctor), it will be automatically positioned - /// on the first matching term. - virtual void setEnum(TermEnumPtr actualEnum); - }; -} - -#endif diff --git a/include/FlagsAttribute.h b/include/FlagsAttribute.h deleted file mode 100644 index be7478e3..00000000 --- a/include/FlagsAttribute.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FLAGSATTRIBUTE_H -#define FLAGSATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// This attribute can be used to pass different flags down the tokenizer chain, eg from one TokenFilter - /// to another one. - class LPPAPI FlagsAttribute : public Attribute - { - public: - FlagsAttribute(); - virtual ~FlagsAttribute(); - - LUCENE_CLASS(FlagsAttribute); - - protected: - int32_t flags; - - public: - virtual String toString(); - - /// Get the bitset for any bits that have been set. This is completely distinct from {@link - /// TypeAttribute#type()}, although they do share similar purposes. The flags can be used to encode - /// information about the token for use by other {@link TokenFilter}s. - virtual int32_t getFlags(); - - /// @see #getFlags() - virtual void setFlags(int32_t flags); - - virtual void clear(); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual void copyTo(AttributePtr target); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/FormatPostingsDocsConsumer.h b/include/FormatPostingsDocsConsumer.h deleted file mode 100644 index 111931a0..00000000 --- a/include/FormatPostingsDocsConsumer.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSDOCSCONSUMER_H -#define FORMATPOSTINGSDOCSCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class FormatPostingsDocsConsumer : public LuceneObject - { - public: - virtual ~FormatPostingsDocsConsumer(); - - LUCENE_CLASS(FormatPostingsDocsConsumer); - - public: - /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. - virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq) = 0; - - /// Called when we are done adding docs to this term - virtual void finish() = 0; - }; -} - -#endif diff --git a/include/FormatPostingsDocsWriter.h b/include/FormatPostingsDocsWriter.h deleted file mode 100644 index ea02d983..00000000 --- a/include/FormatPostingsDocsWriter.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSDOCSWRITER_H -#define FORMATPOSTINGSDOCSWRITER_H - -#include "FormatPostingsDocsConsumer.h" - -namespace Lucene -{ - /// Consumes doc & freq, writing them using the current index file format - class FormatPostingsDocsWriter : public FormatPostingsDocsConsumer - { - public: - FormatPostingsDocsWriter(SegmentWriteStatePtr state, FormatPostingsTermsWriterPtr parent); - virtual ~FormatPostingsDocsWriter(); - - LUCENE_CLASS(FormatPostingsDocsWriter); - - public: - IndexOutputPtr out; - FormatPostingsTermsWriterWeakPtr _parent; - SegmentWriteStatePtr state; - FormatPostingsPositionsWriterPtr posWriter; - DefaultSkipListWriterPtr skipListWriter; - int32_t skipInterval; - int32_t totalNumDocs; - - bool omitTermFreqAndPositions; - bool storePayloads; - int64_t freqStart; - FieldInfoPtr fieldInfo; - - int32_t lastDocID; - int32_t df; - - TermInfoPtr termInfo; // minimize consing - UTF8ResultPtr utf8; - - public: - virtual void initialize(); - - void setField(FieldInfoPtr fieldInfo); - - /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. - virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq); - - /// Called when we are done adding docs to this term - virtual void finish(); - - void close(); - }; -} - -#endif diff --git a/include/FormatPostingsFieldsConsumer.h b/include/FormatPostingsFieldsConsumer.h deleted file mode 100644 index 416fa7b1..00000000 --- a/include/FormatPostingsFieldsConsumer.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSFIELDSCONSUMER_H -#define FORMATPOSTINGSFIELDSCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract API that consumes terms, doc, freq, prox and payloads postings. Concrete implementations of this - /// actually do "something" with the postings (write it into the index in a specific format). - class FormatPostingsFieldsConsumer : public LuceneObject - { - public: - virtual ~FormatPostingsFieldsConsumer(); - - LUCENE_CLASS(FormatPostingsFieldsConsumer); - - public: - /// Add a new field. - virtual FormatPostingsTermsConsumerPtr addField(FieldInfoPtr field) = 0; - - /// Called when we are done adding everything. - virtual void finish() = 0; - }; -} - -#endif diff --git a/include/FormatPostingsFieldsWriter.h b/include/FormatPostingsFieldsWriter.h deleted file mode 100644 index e61d686c..00000000 --- a/include/FormatPostingsFieldsWriter.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSFIELDSWRITER_H -#define FORMATPOSTINGSFIELDSWRITER_H - -#include "FormatPostingsFieldsConsumer.h" - -namespace Lucene -{ - class FormatPostingsFieldsWriter : public FormatPostingsFieldsConsumer - { - public: - FormatPostingsFieldsWriter(SegmentWriteStatePtr state, FieldInfosPtr fieldInfos); - virtual ~FormatPostingsFieldsWriter(); - - LUCENE_CLASS(FormatPostingsFieldsWriter); - - public: - DirectoryPtr dir; - String segment; - TermInfosWriterPtr termsOut; - SegmentWriteStatePtr state; - FieldInfosPtr fieldInfos; - FormatPostingsTermsWriterPtr termsWriter; - DefaultSkipListWriterPtr skipListWriter; - int32_t totalNumDocs; - - public: - virtual void initialize(); - - /// Add a new field. - virtual FormatPostingsTermsConsumerPtr addField(FieldInfoPtr field); - - /// Called when we are done adding everything. - virtual void finish(); - }; -} - -#endif diff --git a/include/FormatPostingsPositionsConsumer.h b/include/FormatPostingsPositionsConsumer.h deleted file mode 100644 index 35d3c43d..00000000 --- a/include/FormatPostingsPositionsConsumer.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSPOSITIONSCONSUMER_H -#define FORMATPOSTINGSPOSITIONSCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class FormatPostingsPositionsConsumer : public LuceneObject - { - public: - virtual ~FormatPostingsPositionsConsumer(); - - LUCENE_CLASS(FormatPostingsPositionsConsumer); - - public: - /// Add a new position & payload. If payloadLength > 0 you must read those bytes from the IndexInput. - virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) = 0; - - /// Called when we are done adding positions & payloads. - virtual void finish() = 0; - }; -} - -#endif diff --git a/include/FormatPostingsPositionsWriter.h b/include/FormatPostingsPositionsWriter.h deleted file mode 100644 index 7a0e9add..00000000 --- a/include/FormatPostingsPositionsWriter.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSPOSITIONSWRITER_H -#define FORMATPOSTINGSPOSITIONSWRITER_H - -#include "FormatPostingsPositionsConsumer.h" - -namespace Lucene -{ - class FormatPostingsPositionsWriter : public FormatPostingsPositionsConsumer - { - public: - FormatPostingsPositionsWriter(SegmentWriteStatePtr state, FormatPostingsDocsWriterPtr parent); - virtual ~FormatPostingsPositionsWriter(); - - LUCENE_CLASS(FormatPostingsPositionsWriter); - - public: - FormatPostingsDocsWriterWeakPtr _parent; - IndexOutputPtr out; - - bool omitTermFreqAndPositions; - bool storePayloads; - int32_t lastPayloadLength; - - int32_t lastPosition; - - public: - /// Add a new position & payload - virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength); - - void setField(FieldInfoPtr fieldInfo); - - /// Called when we are done adding positions & payloads - virtual void finish(); - - void close(); - }; -} - -#endif diff --git a/include/FormatPostingsTermsConsumer.h b/include/FormatPostingsTermsConsumer.h deleted file mode 100644 index cfe5f513..00000000 --- a/include/FormatPostingsTermsConsumer.h +++ /dev/null @@ -1,34 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSTERMSCONSUMER_H -#define FORMATPOSTINGSTERMSCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class FormatPostingsTermsConsumer : public LuceneObject - { - public: - virtual ~FormatPostingsTermsConsumer(); - - LUCENE_CLASS(FormatPostingsTermsConsumer); - - public: - CharArray termBuffer; - - public: - /// Adds a new term in this field - virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start) = 0; - virtual FormatPostingsDocsConsumerPtr addTerm(const String& text); - - /// Called when we are done adding terms to this field - virtual void finish() = 0; - }; -} - -#endif diff --git a/include/FormatPostingsTermsWriter.h b/include/FormatPostingsTermsWriter.h deleted file mode 100644 index 4b5aee7f..00000000 --- a/include/FormatPostingsTermsWriter.h +++ /dev/null @@ -1,50 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FORMATPOSTINGSTERMSWRITER_H -#define FORMATPOSTINGSTERMSWRITER_H - -#include "FormatPostingsTermsConsumer.h" - -namespace Lucene -{ - class FormatPostingsTermsWriter : public FormatPostingsTermsConsumer - { - public: - FormatPostingsTermsWriter(SegmentWriteStatePtr state, FormatPostingsFieldsWriterPtr parent); - virtual ~FormatPostingsTermsWriter(); - - LUCENE_CLASS(FormatPostingsTermsWriter); - - public: - FormatPostingsFieldsWriterWeakPtr _parent; - SegmentWriteStatePtr state; - FormatPostingsDocsWriterPtr docsWriter; - TermInfosWriterPtr termsOut; - FieldInfoPtr fieldInfo; - - CharArray currentTerm; - int32_t currentTermStart; - - int64_t freqStart; - int64_t proxStart; - - public: - virtual void initialize(); - - void setField(FieldInfoPtr fieldInfo); - - /// Adds a new term in this field - virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start); - - /// Called when we are done adding terms to this field - virtual void finish(); - - void close(); - }; -} - -#endif diff --git a/include/FreqProxFieldMergeState.h b/include/FreqProxFieldMergeState.h deleted file mode 100644 index c430cafd..00000000 --- a/include/FreqProxFieldMergeState.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FREQPROXFIELDMERGESTATE_H -#define FREQPROXFIELDMERGESTATE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Used by DocumentsWriter to merge the postings from multiple ThreadStates when creating a segment - class FreqProxFieldMergeState : public LuceneObject - { - public: - FreqProxFieldMergeState(FreqProxTermsWriterPerFieldPtr field); - virtual ~FreqProxFieldMergeState(); - - LUCENE_CLASS(FreqProxFieldMergeState); - - public: - FreqProxTermsWriterPerFieldPtr field; - int32_t numPostings; - CharBlockPoolPtr charPool; - Collection postings; - - FreqProxTermsWriterPostingListPtr p; - CharArray text; - int32_t textOffset; - - ByteSliceReaderPtr freq; - ByteSliceReaderPtr prox; - - int32_t docID; - int32_t termFreq; - - protected: - int32_t postingUpto; - - public: - bool nextTerm(); - bool nextDoc(); - }; -} - -#endif diff --git a/include/FreqProxTermsWriter.h b/include/FreqProxTermsWriter.h deleted file mode 100644 index 95e96965..00000000 --- a/include/FreqProxTermsWriter.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FREQPROXTERMSWRITER_H -#define FREQPROXTERMSWRITER_H - -#include "TermsHashConsumer.h" -#include "RawPostingList.h" - -namespace Lucene -{ - class FreqProxTermsWriter : public TermsHashConsumer - { - public: - virtual ~FreqProxTermsWriter(); - - LUCENE_CLASS(FreqProxTermsWriter); - - protected: - ByteArray payloadBuffer; - - public: - virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread); - virtual void createPostings(Collection postings, int32_t start, int32_t count); - virtual void closeDocStore(SegmentWriteStatePtr state); - virtual void abort(); - virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - - /// Walk through all unique text tokens (Posting instances) found in this field and serialize them - /// into a single RAM segment. - void appendPostings(Collection fields, FormatPostingsFieldsConsumerPtr consumer); - - virtual int32_t bytesPerPosting(); - - protected: - static int32_t compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2); - }; - - class FreqProxTermsWriterPostingList : public RawPostingList - { - public: - FreqProxTermsWriterPostingList(); - virtual ~FreqProxTermsWriterPostingList(); - - LUCENE_CLASS(FreqProxTermsWriterPostingList); - - public: - int32_t docFreq; // # times this term occurs in the current doc - int32_t lastDocID; // Last docID where this term occurred - int32_t lastDocCode; // Code for prior doc - int32_t lastPosition; // Last position where this term occurred - }; -} - -#endif diff --git a/include/FreqProxTermsWriterPerField.h b/include/FreqProxTermsWriterPerField.h deleted file mode 100644 index d7f00c68..00000000 --- a/include/FreqProxTermsWriterPerField.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FREQPROXTERMSWRITERPERFIELD_H -#define FREQPROXTERMSWRITERPERFIELD_H - -#include "TermsHashConsumerPerField.h" - -namespace Lucene -{ - class FreqProxTermsWriterPerField : public TermsHashConsumerPerField - { - public: - FreqProxTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, FreqProxTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); - virtual ~FreqProxTermsWriterPerField(); - - LUCENE_CLASS(FreqProxTermsWriterPerField); - - public: - FreqProxTermsWriterPerThreadWeakPtr _perThread; - TermsHashPerFieldWeakPtr _termsHashPerField; - FieldInfoPtr fieldInfo; - DocStatePtr docState; - FieldInvertStatePtr fieldState; - bool omitTermFreqAndPositions; - PayloadAttributePtr payloadAttribute; - bool hasPayloads; - - public: - virtual int32_t getStreamCount(); - virtual void finish(); - virtual void skippingLongTerm(); - virtual int32_t compareTo(LuceneObjectPtr other); - void reset(); - virtual bool start(Collection fields, int32_t count); - virtual void start(FieldablePtr field); - void writeProx(FreqProxTermsWriterPostingListPtr p, int32_t proxCode); - virtual void newTerm(RawPostingListPtr p); - virtual void addTerm(RawPostingListPtr p); - void abort(); - }; -} - -#endif diff --git a/include/FreqProxTermsWriterPerThread.h b/include/FreqProxTermsWriterPerThread.h deleted file mode 100644 index 46fcb648..00000000 --- a/include/FreqProxTermsWriterPerThread.h +++ /dev/null @@ -1,34 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FREQPROXTERMSWRITERPERTHREAD_H -#define FREQPROXTERMSWRITERPERTHREAD_H - -#include "TermsHashConsumerPerThread.h" - -namespace Lucene -{ - class FreqProxTermsWriterPerThread : public TermsHashConsumerPerThread - { - public: - FreqProxTermsWriterPerThread(TermsHashPerThreadPtr perThread); - virtual ~FreqProxTermsWriterPerThread(); - - LUCENE_CLASS(FreqProxTermsWriterPerThread); - - public: - TermsHashPerThreadWeakPtr _termsHashPerThread; - DocStatePtr docState; - - public: - virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo); - virtual void startDocument(); - virtual DocWriterPtr finishDocument(); - virtual void abort(); - }; -} - -#endif diff --git a/include/FuzzyQuery.h b/include/FuzzyQuery.h deleted file mode 100644 index b12376c7..00000000 --- a/include/FuzzyQuery.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FUZZYQUERY_H -#define FUZZYQUERY_H - -#include "MultiTermQuery.h" - -namespace Lucene -{ - /// Implements the fuzzy search query. The similarity measurement is based on the Levenshtein (edit - /// distance) algorithm. - /// - /// Warning: this query is not very scalable with its default prefix length of 0 - in this case, *every* - /// term will be enumerated and cause an edit score calculation. - class LPPAPI FuzzyQuery : public MultiTermQuery - { - public: - /// Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity - /// to term. If a prefixLength > 0 is specified, a common prefix of that length is also required. - /// @param term The term to search for - /// @param minimumSimilarity A value between 0 and 1 to set the required similarity between the query - /// term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same - /// length as the query term is considered similar to the query term if the edit distance between - /// both terms is less than length(term) * 0.5 - /// @param prefixLength Length of common (non-fuzzy) prefix - FuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); - FuzzyQuery(TermPtr term, double minimumSimilarity); - FuzzyQuery(TermPtr term); - - virtual ~FuzzyQuery(); - - LUCENE_CLASS(FuzzyQuery); - - protected: - double minimumSimilarity; - int32_t prefixLength; - bool termLongEnough; - - TermPtr term; - - public: - static double defaultMinSimilarity(); - static const int32_t defaultPrefixLength; - - public: - using MultiTermQuery::toString; - - /// Returns the minimum similarity that is required for this query to match. - /// @return float value between 0.0 and 1.0 - double getMinSimilarity(); - - /// Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that - /// must be identical (not fuzzy) to the query term if the query is to match that term. - int32_t getPrefixLength(); - - /// Returns the pattern term. - TermPtr getTerm(); - - virtual void setRewriteMethod(RewriteMethodPtr method); - virtual QueryPtr rewrite(IndexReaderPtr reader); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - protected: - void ConstructQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); - - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/FuzzyTermEnum.h b/include/FuzzyTermEnum.h deleted file mode 100644 index 29f14c84..00000000 --- a/include/FuzzyTermEnum.h +++ /dev/null @@ -1,116 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef FUZZYTERMENUM_H -#define FUZZYTERMENUM_H - -#include "FilteredTermEnum.h" - -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating all terms that are similar to the specified filter term. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater - /// than all that precede it. - class LPPAPI FuzzyTermEnum : public FilteredTermEnum - { - public: - /// Constructor for enumeration of all terms from specified reader which share a prefix of length - /// prefixLength with term and which have a fuzzy similarity > minSimilarity. - /// - /// After calling the constructor the enumeration is already pointing to the first valid term if - /// such a term exists. - /// @param reader Delivers terms. - /// @param term Pattern term. - /// @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5. - /// @param prefixLength Length of required common prefix. Default value is 0. - FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength); - FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity); - FuzzyTermEnum(IndexReaderPtr reader, TermPtr term); - - virtual ~FuzzyTermEnum(); - - LUCENE_CLASS(FuzzyTermEnum); - - protected: - /// Allows us save time required to create a new array every time similarity is called. - Collection p; - Collection d; - - double _similarity; - bool _endEnum; - - TermPtr searchTerm; - String field; - String text; - String prefix; - - double minimumSimilarity; - double scale_factor; - - public: - virtual double difference(); - virtual bool endEnum(); - virtual void close(); - - protected: - void ConstructTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength); - - /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance between - /// the given term and the comparing term. - virtual bool termCompare(TermPtr term); - - /// - /// Compute Levenshtein distance - /// - /// Similarity returns a number that is 1.0f or less (including negative numbers) based on how similar the - /// Term is compared to a target term. It returns exactly 0.0 when - ///
-        /// editDistance > maximumEditDistance
-        /// 
- /// - /// Otherwise it returns: - ///
-        /// 1 - (editDistance / length)
-        /// 
- /// where length is the length of the shortest term (text or target) including a prefix that are identical - /// and editDistance is the Levenshtein distance for the two words. - /// - /// Embedded within this algorithm is a fail-fast Levenshtein distance algorithm. The fail-fast algorithm - /// differs from the standard Levenshtein distance algorithm in that it is aborted if it is discovered that - /// the minimum distance between the words is greater than some threshold. - /// - /// To calculate the maximum distance threshold we use the following formula: - ///
-        /// (1 - minimumSimilarity) * length
-        /// 
- /// where length is the shortest term including any prefix that is not part of the similarity comparison. - /// This formula was derived by solving for what maximum value of distance returns false for the following - /// statements: - ///
-        /// similarity = 1 - ((double)distance / (double)(prefixLength + std::min(textlen, targetlen)));
-        /// return (similarity > minimumSimilarity);
-        /// 
- /// where distance is the Levenshtein distance for the two words. - /// - /// Levenshtein distance (also known as edit distance) is a measure of similarity between two strings where - /// the distance is measured as the number of character deletions, insertions or substitutions required to - /// transform one string to the other string. - /// - /// @param target The target word or phrase. - /// @return the similarity, 0.0 or less indicates that it matches less than the required threshold and 1.0 - /// indicates that the text and target are identical. - double similarity(const String& target); - - /// The max Distance is the maximum Levenshtein distance for the text compared to some other value that - /// results in score that is better than the minimum similarity. - /// @param m The length of the "other value" - /// @return The maximum Levenshtein distance that we care about - int32_t calculateMaxDistance(int32_t m); - }; -} - -#endif diff --git a/include/HashMap.h b/include/HashMap.h deleted file mode 100644 index a01fb551..00000000 --- a/include/HashMap.h +++ /dev/null @@ -1,191 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef HASHMAP_H -#define HASHMAP_H - -#include -#include "LuceneSync.h" - -namespace Lucene -{ - /// Utility template class to handle hash maps that can be safely copied and shared - template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > - class HashMap : public LuceneSync - { - public: - typedef HashMap this_type; - typedef std::pair key_value; - typedef boost::unordered_map< KEY, VALUE, HASH, EQUAL, Allocator > map_type; - typedef typename map_type::iterator iterator; - typedef typename map_type::const_iterator const_iterator; - typedef KEY key_type; - typedef VALUE value_type; - - virtual ~HashMap() - { - } - - protected: - boost::shared_ptr mapContainer; - - public: - static this_type newInstance() - { - this_type instance; - instance.mapContainer = Lucene::newInstance(); - return instance; - } - - void reset() - { - mapContainer.reset(); - } - - int32_t size() const - { - return (int32_t)mapContainer->size(); - } - - bool empty() const - { - return mapContainer->empty(); - } - - void clear() - { - mapContainer->clear(); - } - - iterator begin() - { - return mapContainer->begin(); - } - - iterator end() - { - return mapContainer->end(); - } - - const_iterator begin() const - { - return mapContainer->begin(); - } - - const_iterator end() const - { - return mapContainer->end(); - } - - operator bool() const - { - return mapContainer; - } - - bool operator! () const - { - return !mapContainer; - } - - map_type& operator= (const map_type& other) - { - mapContainer = other.mapContainer; - return *this; - } - - void put(const KEY& key, const VALUE& value) - { - (*mapContainer)[key] = value; - } - - template - void putAll(ITER first, ITER last) - { - for (iterator current = first; current != last; ++current) - (*mapContainer)[current->first] = current->second; - } - - template - void remove(ITER pos) - { - mapContainer->erase(pos); - } - - template - ITER remove(ITER first, ITER last) - { - return mapContainer->erase(first, last); - } - - bool remove(const KEY& key) - { - return (mapContainer->erase(key) > 0); - } - - iterator find(const KEY& key) - { - return mapContainer->find(key); - } - - VALUE get(const KEY& key) const - { - iterator findValue = mapContainer->find(key); - return findValue == mapContainer->end() ? VALUE() : findValue->second; - } - - bool contains(const KEY& key) const - { - return (mapContainer->find(key) != mapContainer->end()); - } - - VALUE& operator[] (const KEY& key) - { - return (*mapContainer)[key]; - } - }; - - /// Utility template class to handle weak keyed maps - template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > - class WeakHashMap : public HashMap - { - public: - typedef WeakHashMap this_type; - typedef std::pair key_value; - typedef typename boost::unordered_map< KEY, VALUE, HASH, EQUAL, Allocator > map_type; - typedef typename map_type::iterator iterator; - - static this_type newInstance() - { - this_type instance; - instance.mapContainer = Lucene::newInstance(); - return instance; - } - - void removeWeak() - { - if (!this->mapContainer || this->mapContainer->empty()) - return; - map_type clearCopy; - for (iterator key = this->mapContainer->begin(); key != this->mapContainer->end(); ++key) - { - if (!key->first.expired()) - clearCopy.insert(*key); - } - this->mapContainer->swap(clearCopy); - } - - VALUE get(const KEY& key) - { - iterator findValue = this->mapContainer->find(key); - if (findValue != this->mapContainer->end()) - return findValue->second; - removeWeak(); - return VALUE(); - } - }; -} - -#endif diff --git a/include/HashSet.h b/include/HashSet.h deleted file mode 100644 index d65979a8..00000000 --- a/include/HashSet.h +++ /dev/null @@ -1,133 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef HASHSET_H -#define HASHSET_H - -#include -#include "LuceneSync.h" - -namespace Lucene -{ - /// Utility template class to handle hash set collections that can be safely copied and shared - template < class TYPE, class HASH = boost::hash, class EQUAL = std::equal_to > - class HashSet : public LuceneSync - { - public: - typedef HashSet this_type; - typedef boost::unordered_set< TYPE, HASH, EQUAL, Allocator > set_type; - typedef typename set_type::iterator iterator; - typedef typename set_type::const_iterator const_iterator; - typedef TYPE value_type; - - virtual ~HashSet() - { - } - - protected: - boost::shared_ptr setContainer; - - public: - static this_type newInstance() - { - this_type instance; - instance.setContainer = Lucene::newInstance(); - return instance; - } - - template - static this_type newInstance(ITER first, ITER last) - { - this_type instance; - instance.setContainer = Lucene::newInstance(first, last); - return instance; - } - - void reset() - { - setContainer.reset(); - } - - int32_t size() const - { - return (int32_t)setContainer->size(); - } - - bool empty() const - { - return setContainer->empty(); - } - - void clear() - { - setContainer->clear(); - } - - iterator begin() - { - return setContainer->begin(); - } - - iterator end() - { - return setContainer->end(); - } - - const_iterator begin() const - { - return setContainer->begin(); - } - - const_iterator end() const - { - return setContainer->end(); - } - - operator bool() const - { - return setContainer; - } - - bool operator! () const - { - return !setContainer; - } - - set_type& operator= (const set_type& other) - { - setContainer = other.setContainer; - return *this; - } - - bool add(const TYPE& type) - { - return setContainer->insert(type).second; - } - - template - void addAll(ITER first, ITER last) - { - setContainer->insert(first, last); - } - - bool remove(const TYPE& type) - { - return (setContainer->erase(type) > 0); - } - - iterator find(const TYPE& type) - { - return setContainer->find(type); - } - - bool contains(const TYPE& type) const - { - return (setContainer->find(type) != setContainer->end()); - } - }; -} - -#endif diff --git a/include/HitQueue.h b/include/HitQueue.h deleted file mode 100644 index 19cb44a6..00000000 --- a/include/HitQueue.h +++ /dev/null @@ -1,34 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef HITQUEUE_H -#define HITQUEUE_H - -#include "HitQueueBase.h" - -namespace Lucene -{ - class HitQueue : public HitQueueBase - { - public: - /// Creates a new instance with size elements. - HitQueue(int32_t size, bool prePopulate); - virtual ~HitQueue(); - - LUCENE_CLASS(HitQueue); - - protected: - bool prePopulate; - - protected: - virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); - - /// Returns null if prePopulate is false. - virtual ScoreDocPtr getSentinelObject(); - }; -} - -#endif diff --git a/include/HitQueueBase.h b/include/HitQueueBase.h deleted file mode 100644 index 487fa6c1..00000000 --- a/include/HitQueueBase.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef HITQUEUEBASE_H -#define HITQUEUEBASE_H - -#include "PriorityQueue.h" - -namespace Lucene -{ - class LPPAPI HitQueueBase : public LuceneObject - { - public: - HitQueueBase(int32_t size); - virtual ~HitQueueBase(); - - LUCENE_CLASS(HitQueueBase); - - public: - virtual ScoreDocPtr add(ScoreDocPtr scoreDoc); - virtual ScoreDocPtr addOverflow(ScoreDocPtr scoreDoc); - virtual ScoreDocPtr top(); - virtual ScoreDocPtr pop(); - virtual ScoreDocPtr updateTop(); - virtual int32_t size(); - virtual bool empty(); - virtual void clear(); - - protected: - PriorityQueueScoreDocsPtr queue; - int32_t queueSize; - - public: - virtual void initialize(); - - protected: - virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) = 0; - virtual ScoreDocPtr getSentinelObject(); - - friend class PriorityQueueScoreDocs; - }; - - class LPPAPI PriorityQueueScoreDocs : public PriorityQueue - { - public: - PriorityQueueScoreDocs(HitQueueBasePtr hitQueue, int32_t size); - virtual ~PriorityQueueScoreDocs(); - - LUCENE_CLASS(PriorityQueueScoreDocs); - - protected: - HitQueueBaseWeakPtr _hitQueue; - - protected: - virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); - virtual ScoreDocPtr getSentinelObject(); - }; -} - -#endif diff --git a/include/ISOLatin1AccentFilter.h b/include/ISOLatin1AccentFilter.h deleted file mode 100644 index 282396d5..00000000 --- a/include/ISOLatin1AccentFilter.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ISOLATIN1ACCENTFILTER_H -#define ISOLATIN1ACCENTFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// A filter that replaces accented characters in the ISO Latin 1 character set (ISO-8859-1) by their unaccented - /// equivalent. The case will not be altered. - /// - /// For instance, 'à' will be replaced by 'a'. - /// - /// @deprecated If you build a new index, use {@link ASCIIFoldingFilter} which covers a superset of Latin 1. - /// This class is included for use with existing indexes and will be removed in a future release (possibly Lucene 4.0). - class LPPAPI ISOLatin1AccentFilter : public TokenFilter - { - public: - ISOLatin1AccentFilter(TokenStreamPtr input); - virtual ~ISOLatin1AccentFilter(); - - LUCENE_CLASS(ISOLatin1AccentFilter); - - protected: - CharArray output; - int32_t outputPos; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// To replace accented characters in a String by unaccented equivalents. - void removeAccents(const wchar_t* input, int32_t length); - }; -} - -#endif diff --git a/include/IndexCommit.h b/include/IndexCommit.h deleted file mode 100644 index e0eea7d7..00000000 --- a/include/IndexCommit.h +++ /dev/null @@ -1,76 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXCOMMIT_H -#define INDEXCOMMIT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Represents a single commit into an index as seen by the {@link IndexDeletionPolicy} or {@link IndexReader}. - /// - /// Changes to the content of an index are made visible only after the writer who made that change commits by - /// writing a new segments file (segments_N). This point in time, when the action of writing of a new segments - /// file to the directory is completed, is an index commit. - /// - /// Each index commit point has a unique segments file associated with it. The segments file associated with a - /// later index commit point would have a larger N. - class LPPAPI IndexCommit : public LuceneObject - { - public: - virtual ~IndexCommit(); - - LUCENE_CLASS(IndexCommit); - - public: - /// Get the segments file (segments_N) associated with this commit point. - virtual String getSegmentsFileName() = 0; - - /// Returns all index files referenced by this commit point. - virtual HashSet getFileNames() = 0; - - /// Returns the {@link Directory} for the index. - virtual DirectoryPtr getDirectory() = 0; - - /// Delete this commit point. This only applies when using the commit point in the context of IndexWriter's - /// IndexDeletionPolicy. - /// - /// Upon calling this, the writer is notified that this commit point should be deleted. - /// - /// Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect - /// and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or - /// {@link IndexDeletionPolicy#onCommit onCommit()} methods. - virtual void deleteCommit() = 0; - - virtual bool isDeleted() = 0; - - /// Returns true if this commit is an optimized index. - virtual bool isOptimized() = 0; - - /// Two IndexCommits are equal if both their Directory and versions are equal. - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - - /// Returns the version for this IndexCommit. This is the same value that {@link IndexReader#getVersion} - /// would return if it were opened on this commit. - virtual int64_t getVersion() = 0; - - /// Returns the generation (the _N in segments_N) for this IndexCommit. - virtual int64_t getGeneration() = 0; - - /// Convenience method that returns the last modified time of the segments_N file corresponding to this - /// index commit, equivalent to getDirectory()->fileModified(getSegmentsFileName()). - virtual int64_t getTimestamp(); - - /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. Map is - /// String -> String. - virtual MapStringString getUserData() = 0; - }; -} - -#endif diff --git a/include/IndexDeletionPolicy.h b/include/IndexDeletionPolicy.h deleted file mode 100644 index c2927629..00000000 --- a/include/IndexDeletionPolicy.h +++ /dev/null @@ -1,71 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXDELETIONPOLICY_H -#define INDEXDELETIONPOLICY_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Policy for deletion of stale {@link IndexCommit index commits}. - /// Implement this interface, and pass - /// it to one of the {@link IndexWriter} or {@link IndexReader} constructors, to customize when older - /// {@link IndexCommit point-in-time commits} are deleted from the index directory. The default deletion - /// policy is {@link KeepOnlyLastCommitDeletionPolicy}, which always removes old commits as soon as a new - /// commit is done (this matches the behavior before 2.2). - /// - /// One expected use case for this (and the reason why it was first created) is to work around problems - /// with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on - /// last close" semantics that Lucene's "point in time" search normally relies on. By implementing a - /// custom deletion policy, such as "a commit is only removed once it has been stale for more than X - /// minutes", you can give your readers time to refresh to the new commit before {@link IndexWriter} - /// removes the old commits. Note that doing so will increase the storage requirements of the index. - class LPPAPI IndexDeletionPolicy : public LuceneObject - { - protected: - IndexDeletionPolicy(); - - public: - virtual ~IndexDeletionPolicy(); - - LUCENE_CLASS(IndexDeletionPolicy); - - public: - /// This is called once when a writer is first instantiated to give the policy a chance to remove old - /// commit points. - /// - /// The writer locates all index commits present in the index directory and calls this method. The - /// policy may choose to delete some of the commit points, doing so by calling method {@link - /// IndexCommit#delete delete()} of {@link IndexCommit}. - /// - /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to - /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the - /// index content while doing that. - /// - /// @param commits List of current {@link IndexCommit point-in-time commits}, sorted by age (the 0th - /// one is the oldest commit). - virtual void onInit(Collection commits) = 0; - - /// This is called each time the writer completed a commit. This gives the policy a chance to remove - /// old commit points with each commit. - /// - /// The policy may now choose to delete old commit points by calling method {@link - /// IndexCommit#delete delete()} of {@link IndexCommit}. - /// - /// This method is only called when {@link IndexWriter#commit} or {@link IndexWriter#close} is called, - /// or possibly not at all if the {@link IndexWriter#rollback} is called. - /// - /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to - /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the - /// index content while doing that. - /// - /// @param commits List of {@link IndexCommit}, sorted by age (the 0th one is the oldest commit). - virtual void onCommit(Collection commits) = 0; - }; -} - -#endif diff --git a/include/IndexFileDeleter.h b/include/IndexFileDeleter.h deleted file mode 100644 index fb300a77..00000000 --- a/include/IndexFileDeleter.h +++ /dev/null @@ -1,204 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXFILEDELETER_H -#define INDEXFILEDELETER_H - -#include "IndexCommit.h" - -namespace Lucene -{ - /// This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a - /// segments_N file in the Directory (a "commit", ie. a committed SegmentInfos) or because it's an in-memory - /// SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference - /// counting to map the live SegmentInfos instances to individual files in the Directory. - /// - /// The same directory file may be referenced by more than one IndexCommit, i.e. more than one SegmentInfos. - /// Therefore we count how many commits reference each file. When all the commits referencing a certain file have - /// been deleted, the refcount for that file becomes zero, and the file is deleted. - /// - /// A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per - /// commit (onCommit), to decide when a commit should be removed. - /// - /// It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of - /// file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter. - /// - /// The current default deletion policy is {@link KeepOnlyLastCommitDeletionPolicy}, which removes all prior commits - /// when a new commit has completed. This matches the behavior before 2.2. - /// - /// Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly - /// with no retry logic. - class IndexFileDeleter : public LuceneObject - { - public: - /// Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call - /// the policy to let it delete commits. This will remove any files not referenced by any of the commits. - IndexFileDeleter(DirectoryPtr directory, IndexDeletionPolicyPtr policy, SegmentInfosPtr segmentInfos, InfoStreamPtr infoStream, DocumentsWriterPtr docWriter, HashSet synced); - virtual ~IndexFileDeleter(); - - LUCENE_CLASS(IndexFileDeleter); - - protected: - /// Files that we tried to delete but failed (likely because they are open and we are running on Windows), - /// so we will retry them again later - HashSet deletable; - - /// Reference count for all files in the index. Counts how many existing commits reference a file. - MapStringRefCount refCounts; - - /// Holds all commits (segments_N) currently in the index. This will have just 1 commit if you are using the - /// default delete policy (KeepOnlyLastCommitDeletionPolicy). Other policies may leave commit points live for - /// longer in which case this list would be longer than 1 - Collection commits; - - /// Holds files we had incref'd from the previous non-commit checkpoint - Collection< HashSet > lastFiles; - - /// Commits that the IndexDeletionPolicy have decided to delete - Collection commitsToDelete; - - InfoStreamPtr infoStream; - DirectoryPtr directory; - IndexDeletionPolicyPtr policy; - DocumentsWriterPtr docWriter; - - SegmentInfosPtr lastSegmentInfos; - HashSet synced; - - /// Change to true to see details of reference counts when infoStream != null - static bool VERBOSE_REF_COUNTS; - - public: - bool startingCommitDeleted; - - protected: - void message(const String& message); - - /// Remove the CommitPoints in the commitsToDelete List by DecRef'ing all files from each SegmentInfos. - void deleteCommits(); - - void deletePendingFiles(); - - RefCountPtr getRefCount(const String& fileName); - - public: - void setInfoStream(InfoStreamPtr infoStream); - - SegmentInfosPtr getLastSegmentInfos(); - - /// Writer calls this when it has hit an error and had to roll back, to tell us that there may now be - /// unreferenced files in the filesystem. So we re-list the filesystem and delete such files. If - /// segmentName is non-null, we will only delete files corresponding to that segment. - void refresh(const String& segmentName); - void refresh(); - - void close(); - - /// For definition of "check point" see IndexWriter comments: "Clarification: Check Points (and commits)". - /// Writer calls this when it has made a "consistent change" to the index, meaning new files are written to - /// the index and the in-memory SegmentInfos have been modified to point to those files. - /// - /// This may or may not be a commit (segments_N may or may not have been written). - /// - /// We simply incref the files referenced by the new SegmentInfos and decref the files we had previously - /// seen (if any). - /// - /// If this is a commit, we also call the policy to give it a chance to remove other commits. If any - /// commits are removed, we decref their files as well. - void checkpoint(SegmentInfosPtr segmentInfos, bool isCommit); - - void incRef(SegmentInfosPtr segmentInfos, bool isCommit); - void incRef(HashSet files); - void incRef(const String& fileName); - void decRef(HashSet files); - void decRef(const String& fileName); - void decRef(SegmentInfosPtr segmentInfos); - - bool exists(const String& fileName); - - void deleteFiles(HashSet files); - - /// Deletes the specified files, but only if they are new (have not yet been incref'd). - void deleteNewFiles(HashSet files); - - void deleteFile(const String& fileName); - }; - - /// Tracks the reference count for a single index file - class RefCount : public LuceneObject - { - public: - RefCount(const String& fileName); - virtual ~RefCount(); - - LUCENE_CLASS(RefCount); - - public: - String fileName; // fileName used only for better assert error messages - bool initDone; - int32_t count; - - public: - int32_t IncRef(); - int32_t DecRef(); - }; - - /// Holds details for each commit point. This class is also passed to the deletion policy. Note: this class - /// has a natural ordering that is inconsistent with equals. - class CommitPoint : public IndexCommit - { - public: - CommitPoint(Collection commitsToDelete, DirectoryPtr directory, SegmentInfosPtr segmentInfos); - virtual ~CommitPoint(); - - LUCENE_CLASS(CommitPoint); - - public: - int64_t gen; - HashSet files; - String segmentsFileName; - bool deleted; - DirectoryPtr directory; - Collection commitsToDelete; - int64_t version; - int64_t generation; - bool _isOptimized; - MapStringString userData; - - public: - virtual String toString(); - - /// Returns true if this commit is an optimized index. - virtual bool isOptimized(); - - /// Get the segments file (segments_N) associated with this commit point. - virtual String getSegmentsFileName(); - - /// Returns all index files referenced by this commit point. - virtual HashSet getFileNames(); - - /// Returns the {@link Directory} for the index. - virtual DirectoryPtr getDirectory(); - - /// Returns the version for this IndexCommit. - virtual int64_t getVersion(); - - /// Returns the generation (the _N in segments_N) for this IndexCommit. - virtual int64_t getGeneration(); - - /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. - virtual MapStringString getUserData(); - - /// Called only be the deletion policy, to remove this commit point from the index. - virtual void deleteCommit(); - - virtual bool isDeleted(); - - virtual int32_t compareTo(LuceneObjectPtr other); - }; -} - -#endif diff --git a/include/IndexFileNameFilter.h b/include/IndexFileNameFilter.h deleted file mode 100644 index f80a27d9..00000000 --- a/include/IndexFileNameFilter.h +++ /dev/null @@ -1,31 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXFILENAMEFILTER_H -#define INDEXFILENAMEFILTER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Filename filter that accept filenames and extensions only created by Lucene. - class LPPAPI IndexFileNameFilter : public LuceneObject - { - public: - /// Returns true if this is a file known to be a Lucene index file. - static bool accept(const String& directory, const String& name); - - /// Returns true if this is a file that would be contained in a CFS file. - /// This function should only be called on files that pass the - /// {@link #accept} (ie, are already known to be a Lucene index file). - static bool isCFSFile(const String& name); - - /// Return singleton IndexFileNameFilter - static IndexFileNameFilterPtr getFilter(); - }; -} - -#endif diff --git a/include/IndexFileNames.h b/include/IndexFileNames.h deleted file mode 100644 index 990a224f..00000000 --- a/include/IndexFileNames.h +++ /dev/null @@ -1,116 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXFILENAMES_H -#define INDEXFILENAMES_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Constants representing filenames and extensions used by Lucene. - class IndexFileNames : public LuceneObject - { - public: - virtual ~IndexFileNames(); - LUCENE_CLASS(IndexFileNames); - - public: - /// Name of the index segment file. - static const String& SEGMENTS(); - - /// Name of the generation reference file name. - static const String& SEGMENTS_GEN(); - - /// Name of the index deletable file (only used in pre-lockless indices). - static const String& DELETABLE(); - - /// Extension of norms file. - static const String& NORMS_EXTENSION(); - - /// Extension of freq postings file. - static const String& FREQ_EXTENSION(); - - /// Extension of prox postings file. - static const String& PROX_EXTENSION(); - - /// Extension of terms file. - static const String& TERMS_EXTENSION(); - - /// Extension of terms index file. - static const String& TERMS_INDEX_EXTENSION(); - - /// Extension of stored fields index file. - static const String& FIELDS_INDEX_EXTENSION(); - - /// Extension of stored fields file. - static const String& FIELDS_EXTENSION(); - - /// Extension of vectors fields file. - static const String& VECTORS_FIELDS_EXTENSION(); - - /// Extension of vectors documents file. - static const String& VECTORS_DOCUMENTS_EXTENSION(); - - /// Extension of vectors index file. - static const String& VECTORS_INDEX_EXTENSION(); - - /// Extension of compound file. - static const String& COMPOUND_FILE_EXTENSION(); - - /// Extension of compound file for doc store files. - static const String& COMPOUND_FILE_STORE_EXTENSION(); - - /// Extension of deletes. - static const String& DELETES_EXTENSION(); - - /// Extension of field infos. - static const String& FIELD_INFOS_EXTENSION(); - - /// Extension of plain norms. - static const String& PLAIN_NORMS_EXTENSION(); - - /// Extension of separate norms. - static const String& SEPARATE_NORMS_EXTENSION(); - - /// Extension of gen file. - static const String& GEN_EXTENSION(); - - /// This array contains all filename extensions used by Lucene's index - /// files, with two exceptions, namely the extension made up from - /// ".f" + number and from ".s" + number. Also note that Lucene's - /// "segments_N" files do not have any filename extension. - static const HashSet INDEX_EXTENSIONS(); - - /// File extensions that are added to a compound file (same as - /// {@link #INDEX_EXTENSIONS}, minus "del", "gen", "cfs"). - static const HashSet INDEX_EXTENSIONS_IN_COMPOUND_FILE(); - - static const HashSet STORE_INDEX_EXTENSIONS(); - static const HashSet NON_STORE_INDEX_EXTENSIONS(); - - /// File extensions of old-style index files. - static const HashSet COMPOUND_EXTENSIONS(); - - /// File extensions for term vector support. - static const HashSet VECTOR_EXTENSIONS(); - - /// Computes the full file name from base, extension and generation. - /// If the generation is {@link SegmentInfo#NO}, the file name is null. - /// If it's {@link SegmentInfo#WITHOUT_GEN} the file name is base+extension. - /// If it's > 0, the file name is base_generation+extension. - static String fileNameFromGeneration(const String& base, const String& extension, int64_t gen); - - /// Returns true if the provided filename is one of the doc store files - /// (ends with an extension in STORE_INDEX_EXTENSIONS). - static bool isDocStoreFile(const String& fileName); - - /// Return segment file name. - static String segmentFileName(const String& segmentName, const String& ext); - }; -} - -#endif diff --git a/include/IndexInput.h b/include/IndexInput.h deleted file mode 100644 index 33c7902a..00000000 --- a/include/IndexInput.h +++ /dev/null @@ -1,124 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXINPUT_H -#define INDEXINPUT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract base class for input from a file in a {@link Directory}. - /// A random-access input stream. Used for all Lucene index input operations. - /// @see Directory - class LPPAPI IndexInput : public LuceneObject - { - public: - IndexInput(); - virtual ~IndexInput(); - - LUCENE_CLASS(IndexInput); - - protected: - bool preUTF8Strings; // true if we are reading old (modified UTF8) string format - - public: - /// Reads and returns a single byte. - /// @see IndexOutput#writeByte(uint8_t) - virtual uint8_t readByte() = 0; - - /// Reads a specified number of bytes into an array at the specified offset. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @see IndexOutput#writeBytes(const uint8_t*, int) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) = 0; - - /// Reads a specified number of bytes into an array at the specified offset - /// with control over whether the read should be buffered (callers who have - /// their own buffer should pass in "false" for useBuffer). Currently only - /// {@link BufferedIndexInput} respects this parameter. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @param useBuffer set to false if the caller will handle buffering. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); - - /// Reads four bytes and returns an int. - /// @see IndexOutput#writeInt(int32_t) - virtual int32_t readInt(); - - /// Reads an int stored in variable-length format. Reads between one and five - /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. - /// @see IndexOutput#writeVInt(int32_t) - virtual int32_t readVInt(); - - /// Reads eight bytes and returns a int64. - /// @see IndexOutput#writeLong(int64_t) - virtual int64_t readLong(); - - /// Reads a int64 stored in variable-length format. Reads between one and nine - /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. - virtual int64_t readVLong(); - - /// Call this if readString should read characters stored in the old modified - /// UTF8 format. This is used for indices written pre-2.4. - virtual void setModifiedUTF8StringsMode(); - - /// Reads a string. - /// @see IndexOutput#writeString(const String&) - virtual String readString(); - - /// Reads a modified UTF8 format string. - virtual String readModifiedUTF8String(); - - /// Reads Lucene's old "modified UTF-8" encoded characters into an array. - /// @param buffer the array to read characters into. - /// @param start the offset in the array to start storing characters. - /// @param length the number of characters to read. - /// @see IndexOutput#writeChars(const String& s, int32_t, int32_t) - virtual int32_t readChars(wchar_t* buffer, int32_t start, int32_t length); - - /// Similar to {@link #readChars(wchar_t*, int32_t, int32_t)} but does not - /// do any conversion operations on the bytes it is reading in. It still - /// has to invoke {@link #readByte()} just as {@link #readChars(wchar_t*, int32_t, int32_t)} - /// does, but it does not need a buffer to store anything and it does not have - /// to do any of the bitwise operations, since we don't actually care what is - /// in the byte except to determine how many more bytes to read. - /// @param length The number of chars to read. - /// @deprecated this method operates on old "modified utf8" encoded strings. - virtual void skipChars(int32_t length); - - /// Closes the stream to further operations. - virtual void close() = 0; - - /// Returns the current position in this file, where the next read will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer() = 0; - - /// Sets current position in this file, where the next read will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos) = 0; - - /// The number of bytes in the file. - virtual int64_t length() = 0; - - /// Returns a clone of this stream. - /// - /// Clones of a stream access the same data, and are positioned at the same - /// point as the stream they were cloned from. - /// - /// Subclasses must ensure that clones may be positioned at different points - /// in the input from each other and from the stream they were cloned from. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Read string map as a series of key/value pairs. - virtual MapStringString readStringStringMap(); - }; -} - -#endif diff --git a/include/IndexOutput.h b/include/IndexOutput.h deleted file mode 100644 index 29f1700f..00000000 --- a/include/IndexOutput.h +++ /dev/null @@ -1,108 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXOUTPUT_H -#define INDEXOUTPUT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract base class for output to a file in a Directory. A random-access output stream. Used for all - /// Lucene index output operations. - /// @see Directory - /// @see IndexInput - class LPPAPI IndexOutput : public LuceneObject - { - public: - virtual ~IndexOutput(); - - LUCENE_CLASS(IndexOutput); - - protected: - static const int32_t COPY_BUFFER_SIZE; - ByteArray copyBuffer; - - public: - /// Writes a single byte. - /// @see IndexInput#readByte() - virtual void writeByte(uint8_t b) = 0; - - /// Writes an array of bytes. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) - virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length) = 0; - - /// Forces any buffered output to be written. - virtual void flush() = 0; - - /// Closes this stream to further operations. - virtual void close() = 0; - - /// Returns the current position in this file, where the next write will occur. - virtual int64_t getFilePointer() = 0; - - /// Sets current position in this file, where the next write will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos) = 0; - - /// The number of bytes in the file. - virtual int64_t length() = 0; - - public: - /// Writes an array of bytes. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) - void writeBytes(const uint8_t* b, int32_t length); - - /// Writes an int as four bytes. - /// @see IndexInput#readInt() - void writeInt(int32_t i); - - /// Writes an int in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. - /// Negative numbers are not supported. - /// @see IndexInput#readVInt() - void writeVInt(int32_t i); - - /// Writes a int64 as eight bytes. - /// @see IndexInput#readLong() - void writeLong(int64_t i); - - /// Writes an int64 in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. - /// Negative numbers are not supported. - /// @see IndexInput#readVLong() - void writeVLong(int64_t i); - - /// Writes a string. - /// @see IndexInput#readString() - void writeString(const String& s); - - /// Writes a sub sequence of characters from s as the old format (modified UTF-8 encoded bytes). - /// @param s the source of the characters. - /// @param start the first character in the sequence. - /// @param length the number of characters in the sequence. - /// @deprecated -- please use {@link #writeString} - void writeChars(const String& s, int32_t start, int32_t length); - - /// Copy numBytes bytes from input to ourself. - void copyBytes(IndexInputPtr input, int64_t numBytes); - - /// Set the file length. By default, this method does nothing (it's optional for a Directory to implement it). - /// But, certain Directory implementations (for example @see FSDirectory) can use this to inform the underlying IO - /// system to pre-allocate the file to the specified size. If the length is longer than the current file length, - /// the bytes added to the file are undefined. Otherwise the file is truncated. - /// @param length file length. - void setLength(int64_t length); - - /// Write string map as a series of key/value pairs. - /// @param map map of string-string key-values. - void writeStringStringMap(MapStringString map); - }; -} - -#endif diff --git a/include/IndexReader.h b/include/IndexReader.h deleted file mode 100644 index ef515267..00000000 --- a/include/IndexReader.h +++ /dev/null @@ -1,558 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXREADER_H -#define INDEXREADER_H - -#include "SegmentInfos.h" - -namespace Lucene -{ - /// IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done - /// entirely through this abstract interface, so that any subclass which implements it is searchable. - /// - /// Concrete subclasses of IndexReader are usually constructed with a call to one of the static open methods, - /// eg. {@link #open(DirectoryPtr, bool)}. - /// - /// For efficiency, in this API documents are often referred to via document numbers, non-negative integers which - /// each name a unique document in the index. These document numbers are ephemeral -they may change as documents - /// are added to and deleted from an index. Clients should thus not rely on a given document having the same number - /// between sessions. - /// - /// An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used - /// to delete documents from the index then. - /// - /// NOTE: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations - /// in this base class and instead always throw UnsupportedOperation exception. Subclasses are strongly encouraged to - /// override these methods, but in many cases may not need to. - /// - /// NOTE: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the bool - /// readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted - /// method. You must specify false if you want to make changes with the resulting IndexReader. - /// - /// NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, - /// concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader - /// instance; use your own (non-Lucene) objects instead. - class LPPAPI IndexReader : public LuceneObject - { - public: - IndexReader(); - virtual ~IndexReader(); - - LUCENE_CLASS(IndexReader); - - public: - /// Constants describing field properties, for example used for {@link IndexReader#getFieldNames(FieldOption)}. - enum FieldOption - { - /// All fields - FIELD_OPTION_ALL, - /// All indexed fields - FIELD_OPTION_INDEXED, - /// All fields that store payloads - FIELD_OPTION_STORES_PAYLOADS, - /// All fields that omit tf - FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS, - /// All fields which are not indexed - FIELD_OPTION_UNINDEXED, - /// All fields which are indexed with termvectors enabled - FIELD_OPTION_INDEXED_WITH_TERMVECTOR, - /// All fields which are indexed but don't have termvectors enabled - FIELD_OPTION_INDEXED_NO_TERMVECTOR, - /// All fields with termvectors enabled. Please note that only standard termvector fields are returned - FIELD_OPTION_TERMVECTOR, - /// All fields with termvectors with position values enabled - FIELD_OPTION_TERMVECTOR_WITH_POSITION, - /// All fields with termvectors with offset values enabled - FIELD_OPTION_TERMVECTOR_WITH_OFFSET, - /// All fields with termvectors with offset values and position values enabled - FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET - }; - - static const int32_t DEFAULT_TERMS_INDEX_DIVISOR; - - protected: - bool closed; - bool _hasChanges; - int32_t refCount; - - public: - /// Returns the current refCount for this reader - int32_t getRefCount(); - - /// Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader can be - /// closed safely, i.e. as soon as there are no more references. Be sure to always call a corresponding {@link - /// #decRef}, in a finally clause; otherwise the reader may never be closed. Note that {@link #close} simply - /// calls decRef(), which means that the IndexReader will not really be closed until {@link #decRef} has been - /// called for all outstanding references. - /// @see #decRef - void incRef(); - - /// Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes - /// (if any) are committed to the index and this reader is closed. - /// @see #incRef - void decRef(); - - /// Returns a IndexReader reading the index in the given Directory, with readOnly = true. - /// @param directory the index directory - static IndexReaderPtr open(DirectoryPtr directory); - - /// Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true, since it - /// gives much better concurrent performance, unless you intend to do write operations (delete documents or change - /// norms) with the reader. - /// @param directory the index directory - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - static IndexReaderPtr open(DirectoryPtr directory, bool readOnly); - - /// Returns an IndexReader reading the index in the given {@link IndexCommit}. You should pass readOnly = true, - /// since it gives much better concurrent performance, unless you intend to do write operations (delete documents - /// or change norms) with the reader. - /// @param commit the commit point to open - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - static IndexReaderPtr open(IndexCommitPtr commit, bool readOnly); - - /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. - /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write - /// operations (delete documents or change norms) with the reader. - /// @param directory the index directory - /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform - /// deletes or to set norms); see {@link IndexWriter} for details. - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly); - - /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. - /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write - /// operations (delete documents or change norms) with the reader. - /// @param directory the index directory - /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform - /// deletes or to set norms); see {@link IndexWriter} for details. - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the - /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at - /// indexing time while this setting can be set per reader. When set to N, then one in every - /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 - /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. The - /// default value is 1. Set this to -1 to skip loading the terms index entirely. - static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); - - /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom - /// {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, - /// unless you intend to do write operations (delete documents or change norms) with the reader. - /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to list all - /// commits in a directory - /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform - /// deletes or to set norms); see {@link IndexWriter} for details. - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - static IndexReaderPtr open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly); - - /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom {@link - /// IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, unless - /// you intend to do write operations (delete documents or change norms) with the reader. - /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to - /// list all commits in a directory - /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform deletes - /// or to set norms); see {@link IndexWriter} for details. - /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the same effect as - /// {@link IndexWriter#setTermIndexInterval} except that setting must be done at indexing time while this setting can - /// be set per reader. When set to N, then one in every N * termIndexInterval terms in the index is loaded into - /// memory. By setting this to a value > 1 you can reduce memory usage, at the expense of higher latency when loading - /// a TermInfo. The default value is 1. Set this to -1 to skip loading the terms index entirely. - static IndexReaderPtr open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); - - /// Refreshes an IndexReader if the index has changed since this instance was (re)opened. - /// - /// Opening an IndexReader is an expensive operation. This method can be used to refresh an existing IndexReader to - /// reduce these costs. This method tries to only load segments that have changed or were created after the - /// IndexReader was (re)opened. - /// - /// If the index has not changed since this instance was (re)opened, then this call is a NOOP and returns this - /// instance. Otherwise, a new instance is returned. The old instance is not closed and remains usable. - /// - /// If the reader is reopened, even though they share resources internally, it's safe to make changes (deletions, - /// norms) with the new reader. All shared mutable state obeys "copy on write" semantics to ensure the changes are - /// not seen by other readers. - /// - /// You can determine whether a reader was actually reopened by comparing the old instance with the - /// instance returned by this method: - /// - ///
-        /// IndexReaderPtr reader = ... 
-        /// ...
-        /// IndexReaderPtr newReader = r.reopen();
-        /// if (newReader != reader)
-        /// {
-        ///     ... // reader was reopened
-        ///     reader->close();
-        /// }
-        /// reader = newReader;
-        /// ...
-        /// 
- /// - /// Be sure to synchronize that code so that other threads, if present, can never use reader after it has been - /// closed and before it's switched to newReader. If this reader is a near real-time reader (obtained from - /// {@link IndexWriter#getReader()}, reopen() will simply call writer.getReader() again for you, though this - /// may change in the future. - virtual IndexReaderPtr reopen(); - - /// Just like {@link #reopen()}, except you can change the readOnly of the original reader. If the index is - /// unchanged but readOnly is different then a new reader will be returned. - virtual IndexReaderPtr reopen(bool openReadOnly); - - /// Reopen this reader on a specific commit point. This always returns a readOnly reader. If the specified commit - /// point matches what this reader is already on, and this reader is already readOnly, then this same instance is - /// returned; if it is not already readOnly, a readOnly clone is returned. - virtual IndexReaderPtr reopen(IndexCommitPtr commit); - - /// Efficiently clones the IndexReader (sharing most internal state). - /// - /// On cloning a reader with pending changes (deletions, norms), the original reader transfers its write lock to the - /// cloned reader. This means only the cloned reader may make further changes to the index, and commit the changes - /// to the index on close, but the old reader still reflects all changes made up until it was cloned. - /// - /// Like {@link #reopen()}, it's safe to make changes to either the original or the cloned reader: all shared mutable - /// state obeys "copy on write" semantics to ensure the changes are not seen by other readers. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable reader. - virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); - - /// Returns the directory associated with this index. The default implementation returns the directory specified by - /// subclasses when delegating to the IndexReader(Directory) constructor, or throws an UnsupportedOperation exception - /// if one was not specified. - virtual DirectoryPtr directory(); - - /// Returns the time the index in the named directory was last modified. Do not use this to check - /// whether the reader is still up-to-date, use {@link #isCurrent()} instead. - static int64_t lastModified(DirectoryPtr directory2); - - /// Reads version number from segments files. The version number is initialized with a timestamp - /// and then increased by one for each change of the index. - /// @param directory where the index resides. - /// @return version number. - static int64_t getCurrentVersion(DirectoryPtr directory); - - /// Reads commitUserData, previously passed to {@link IndexWriter#commit(MapStringString)}, from - /// current index segments file. This will return null if {@link IndexWriter#commit(MapStringString)} - /// has never been called for this index. - static MapStringString getCommitUserData(DirectoryPtr directory); - - /// Version number when this IndexReader was opened. Not implemented in the IndexReader base class. - /// - /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link - /// #reopen} on a reader based on a Directory), then this method returns the version recorded in the - /// commit that the reader opened. This version is advanced every time {@link IndexWriter#commit} - /// is called. - /// - /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link - /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this - /// method returns the version of the last commit done by the writer. Note that even as further - /// changes are made with the writer, the version will not changed until a commit is completed. - /// Thus, you should not rely on this method to determine when a near real-time reader should be - /// opened. Use {@link #isCurrent} instead. - virtual int64_t getVersion(); - - /// Retrieve the String userData optionally passed to IndexWriter#commit. This will return null if - /// {@link IndexWriter#commit(MapStringString)} has never been called for this index. - virtual MapStringString getCommitUserData(); - - /// Check whether any new changes have occurred to the index since this reader was opened. - /// - /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link - /// #reopen} on a reader based on a Directory), then this method checks if any further commits (see - /// {@link IndexWriter#commit} have occurred in that directory). - /// - /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link - /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this - /// method checks if either a new commit has occurred, or any new uncommitted changes have taken - /// place via the writer. Note that even if the writer has only performed merging, this method - /// will still return false. - /// - /// In any event, if this returns false, you should call {@link #reopen} to get a new reader that - /// sees the changes. - virtual bool isCurrent(); - - /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented - /// in the IndexReader base class. - /// @return true if the index is optimized; false otherwise - virtual bool isOptimized(); - - /// Return an array of term frequency vectors for the specified document. The array contains a - /// vector for each vectorized field in the document. Each vector contains terms and frequencies - /// for all terms in a given vectorized field. If no such fields existed, the method returns null. - /// The term vectors that are returned may either be of type {@link TermFreqVector} or of type - /// {@link TermPositionVector} if positions or offsets have been stored. - /// - /// @param docNumber document for which term frequency vectors are returned - /// @return array of term frequency vectors. May be null if no term vectors have been stored for the - /// specified document. - virtual Collection getTermFreqVectors(int32_t docNumber) = 0; - - /// Return a term frequency vector for the specified document and field. The returned vector contains - /// terms and frequencies for the terms in the specified field of this document, if the field had the - /// storeTermVector flag set. If termvectors had been stored with positions or offsets, a - /// {@link TermPositionVector} is returned. - /// - /// @param docNumber document for which the term frequency vector is returned. - /// @param field field for which the term frequency vector is returned. - /// @return term frequency vector May be null if field does not exist in the specified document or - /// term vector was not stored. - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0; - - /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays - /// of the {@link TermFreqVector}. - /// @param docNumber The number of the document to load the vector for - /// @param field The name of the field to load - /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) = 0; - - /// Map all the term vectors for all fields in a Document - /// @param docNumber The number of the document to load the vector for - /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) = 0; - - /// Returns true if an index exists at the specified directory. If the directory does not exist or - /// if there is no index in it. - /// @param directory the directory to check for an index - /// @return true if an index exists; false otherwise - static bool indexExists(DirectoryPtr directory); - - /// Returns the number of documents in this index. - virtual int32_t numDocs() = 0; - - /// Returns one greater than the largest possible document number. This may be used to, eg., determine - /// how big to allocate an array which will have an element for every document number in an index. - virtual int32_t maxDoc() = 0; - - /// Returns the number of deleted documents. - int32_t numDeletedDocs(); - - /// Returns the stored fields of the n'th Document in this index. - /// - /// NOTE: for performance reasons, this method does not check if the requested document is deleted, and - /// therefore asking for a deleted document may yield unspecified results. Usually this is not required, - /// however you can call {@link #isDeleted(int)} with the requested document ID to verify the document - /// is not deleted. - virtual DocumentPtr document(int32_t n); - - /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine - /// what {@link Field}s to load and how they should be loaded. - /// NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy - /// {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} - /// to be available after closing you must explicitly load it or fetch the Document again with a new - /// loader. - /// - /// NOTE: for performance reasons, this method does not check if the requested document is deleted, - /// and therefore asking for a deleted document may yield unspecified results. Usually this is not - /// required, however you can call {@link #isDeleted(int32_t)} with the requested document ID to verify - /// the document is not deleted. - /// - /// @param n Get the document at the n'th position - /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on - /// the Document. May be null, in which case all Fields will be loaded. - /// @return The stored fields of the {@link Document} at the n'th position - /// @see Fieldable - /// @see FieldSelector - /// @see SetBasedFieldSelector - /// @see LoadFirstFieldSelector - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector) = 0; - - /// Returns true if document n has been deleted - virtual bool isDeleted(int32_t n) = 0; - - /// Returns true if any documents have been deleted - virtual bool hasDeletions() = 0; - - /// Used for testing - virtual bool hasChanges(); - - /// Returns true if there are norms stored for this field. - virtual bool hasNorms(const String& field); - - /// Returns the byte-encoded normalization factor for the named field of every document. This is used - /// by the search code to score documents. - /// @see Field#setBoost(double) - virtual ByteArray norms(const String& field) = 0; - - /// Reads the byte-encoded normalization factor for the named field of every document. This is used - /// by the search code to score documents. - /// @see Field#setBoost(double) - virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0; - - /// Resets the normalization factor for the named field of the named document. The norm represents - /// the product of the field's {@link Fieldable#setBoost(double) boost} and its {@link - /// Similarity#lengthNorm(String, int) length normalization}. Thus, to preserve the length normalization - /// values when resetting this, one should base the new value upon the old. - /// - /// NOTE: If this field does not store norms, then this method call will silently do nothing. - /// - /// @see #norms(String) - /// @see Similarity#decodeNorm(byte) - virtual void setNorm(int32_t doc, const String& field, uint8_t value); - - /// Resets the normalization factor for the named field of the named document. - /// - /// @see #norms(String) - /// @see Similarity#decodeNorm(byte) - virtual void setNorm(int32_t doc, const String& field, double value); - - /// Returns an enumeration of all the terms in the index. The enumeration is ordered by - /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. - /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting - /// enumeration before calling other methods such as {@link TermEnum#term()}. - virtual TermEnumPtr terms() = 0; - - /// Returns an enumeration of all terms starting at a given term. If the given term does not - /// exist, the enumeration is positioned at the first term greater than the supplied term. - /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede - /// it in the enumeration. - virtual TermEnumPtr terms(TermPtr t) = 0; - - /// Returns the number of documents containing the term t. - virtual int32_t docFreq(TermPtr t) = 0; - - /// Returns an enumeration of all the documents which contain term. For each document, the - /// document number, the frequency of the term in that document is also provided, for use in - /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. - /// The enumeration is ordered by document number. Each document number is greater than all - /// that precede it in the enumeration. - virtual TermDocsPtr termDocs(TermPtr term); - - /// Returns an unpositioned {@link TermDocs} enumerator. - virtual TermDocsPtr termDocs() = 0; - - /// Returns an enumeration of all the documents which contain term. For each document, in - /// addition to the document number and frequency of the term in that document, a list of all - /// of the ordinal positions of the term in the document is available. Thus, this method - /// positions of the term in the document is available. - /// This positional information facilitates phrase and proximity searching. - /// The enumeration is ordered by document number. Each document number is greater than all - /// that precede it in the enumeration. - virtual TermPositionsPtr termPositions(TermPtr term); - - /// Returns an unpositioned {@link TermPositions} enumerator. - virtual TermPositionsPtr termPositions() = 0; - - /// Deletes the document numbered docNum. Once a document is deleted it will not appear in - /// TermDocs or TermPostitions enumerations. Attempts to read its field with the {@link - /// #document} method will result in an error. The presence of this document may still be - /// reflected in the {@link #docFreq} statistic, though this will be corrected eventually as - /// the index is further modified. - virtual void deleteDocument(int32_t docNum); - - /// Deletes all documents that have a given term indexed. This is useful if one uses a - /// document field to hold a unique ID string for the document. Then to delete such a - /// document, one merely constructs a term with the appropriate field and the unique ID string - /// as its text and passes it to this method. See {@link #deleteDocument(int)} for information - /// about when this deletion will become effective. - /// @return the number of documents deleted - virtual int32_t deleteDocuments(TermPtr term); - - /// Undeletes all documents currently marked as deleted in this index. - virtual void undeleteAll(); - - void flush(); - - /// @param commitUserData Opaque Map (String -> String) that's recorded into the segments file - /// in the index, and retrievable by {@link IndexReader#getCommitUserData}. - void flush(MapStringString commitUserData); - - /// Commit changes resulting from delete, undeleteAll, or setNorm operations. - /// If an exception is hit, then either no changes or all changes will have been committed to - /// the index (transactional semantics). - void commit(MapStringString commitUserData); - - /// Closes files associated with this index. Also saves any new deletions to disk. - /// No other methods should be called after this has been called. - void close(); - - /// Get a list of unique field names that exist in this index and have the specified field option information. - /// @param fieldOption specifies which field option should be available for the returned fields - /// @return Collection of Strings indicating the names of the fields. - virtual HashSet getFieldNames(FieldOption fieldOption) = 0; - - /// Return the IndexCommit that this reader has opened. This method is only implemented by those - /// readers that correspond to a Directory with its own segments_N file. - virtual IndexCommitPtr getIndexCommit(); - - /// Prints the filename and size of each file within a given compound file. Add the -extract flag - /// to extract files to the current working directory. In order to make the extracted version of - /// the index work, you have to copy the segments file from the compound index into the directory - /// where the extracted files are stored. - /// @param args Usage: IndexReader [-extract] - static void main(Collection args); - - /// Returns all commit points that exist in the Directory. Normally, because the default is {@link - /// KeepOnlyLastCommitDeletionPolicy}, there would be only one commit point. But if you're using a - /// custom {@link IndexDeletionPolicy} then there could be many commits. Once you have a given - /// commit, you can open a reader on it by calling {@link IndexReader#open(IndexCommit,bool)}. - /// There must be at least one commit in the Directory, else this method throws an exception. - /// Note that if a commit is in progress while this method is running, that commit may or may not - /// be returned array. - static Collection listCommits(DirectoryPtr dir); - - /// Returns the sequential sub readers that this reader is logically composed of. For example, - /// IndexSearcher uses this API to drive searching by one sub reader at a time. If this reader is - /// not composed of sequential child readers, it should return null. If this method returns an empty - /// array, that means this reader is a null reader (for example a MultiReader that has no sub readers). - /// - /// NOTE: You should not try using sub-readers returned by this method to make any changes (setNorm, - /// deleteDocument, etc.). While this might succeed for one composite reader (like MultiReader), it - /// will most likely lead to index corruption for other readers (like DirectoryReader obtained - /// through {@link #open}. Use the parent reader directly. - virtual Collection getSequentialSubReaders(); - - virtual LuceneObjectPtr getFieldCacheKey(); - - /// This returns null if the reader has no deletions. - virtual LuceneObjectPtr getDeletesCacheKey(); - - /// Returns the number of unique terms (across all fields) in this reader. - /// - /// This method returns int64_t, even though internally Lucene cannot handle more than 2^31 unique - /// terms, for a possible future when this limitation is removed. - virtual int64_t getUniqueTermCount(); - - /// For IndexReader implementations that use TermInfosReader to read terms, this returns the current - /// indexDivisor as specified when the reader was opened. - virtual int32_t getTermInfosIndexDivisor(); - - protected: - void ensureOpen(); - - static IndexReaderPtr open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor); - - /// Implements setNorm in subclass. - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0; - - /// Implements deletion of the document numbered docNum. - /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. - virtual void doDelete(int32_t docNum) = 0; - - /// Implements actual undeleteAll() in subclass. - virtual void doUndeleteAll() = 0; - - /// Does nothing by default. Subclasses that require a write lock for index modifications must - /// implement this method. - virtual void acquireWriteLock(); - - /// Commit changes resulting from delete, undeleteAll, or setNorm operations. - /// If an exception is hit, then either no changes or all changes will have been committed to - /// the index (transactional semantics). - void commit(); - - /// Implements commit. - virtual void doCommit(MapStringString commitUserData) = 0; - - /// Implements close. - virtual void doClose() = 0; - - friend class DirectoryReader; - friend class ParallelReader; - }; -} - -#endif diff --git a/include/IndexSearcher.h b/include/IndexSearcher.h deleted file mode 100644 index 7282d45c..00000000 --- a/include/IndexSearcher.h +++ /dev/null @@ -1,102 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXSEARCHER_H -#define INDEXSEARCHER_H - -#include "Searcher.h" - -namespace Lucene -{ - /// Implements search over a single IndexReader. - /// - /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link - /// #search(QueryPtr, FilterPtr, int32_t)} methods. For performance reasons it is recommended to open only - /// one IndexSearcher and use it for all of your searches. - /// - /// NOTE: {@link IndexSearcher} instances are completely thread safe, meaning multiple threads can call any - /// of its methods, concurrently. If your application requires external synchronization, you should not - /// synchronize on the IndexSearcher instance; use your own (non-Lucene) objects instead. - class LPPAPI IndexSearcher : public Searcher - { - public: - /// Creates a searcher searching the index in the named directory. You should pass readOnly = true, - /// since it gives much better concurrent performance, unless you intend to do write operations (delete - /// documents or change norms) with the underlying IndexReader. - /// @param path Directory where IndexReader will be opened - /// @param readOnly If true, the underlying IndexReader will be opened readOnly - IndexSearcher(DirectoryPtr path, bool readOnly = true); - - /// Creates a searcher searching the provided index. - IndexSearcher(IndexReaderPtr reader); - - /// Directly specify the reader, subReaders and their docID starts. - IndexSearcher(IndexReaderPtr reader, Collection subReaders, Collection docStarts); - - virtual ~IndexSearcher(); - - LUCENE_CLASS(IndexSearcher); - - public: - IndexReaderPtr reader; - - protected: - bool closeReader; - - Collection subReaders; - Collection docStarts; - - bool fieldSortDoTrackScores; - bool fieldSortDoMaxScore; - - public: - /// Return the {@link IndexReader} this searches. - IndexReaderPtr getIndexReader(); - - /// Note that the underlying IndexReader is not closed, if IndexSearcher was constructed with - /// IndexSearcher(IndexReaderPtr reader). If the IndexReader was supplied implicitly by specifying a - /// directory, then the IndexReader gets closed. - virtual void close(); - - virtual int32_t docFreq(TermPtr term); - virtual DocumentPtr doc(int32_t n); - virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); - virtual int32_t maxDoc(); - - using Searcher::search; - using Searcher::explain; - - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); - - /// Just like {@link #search(WeightPtr, FilterPtr, int32_t, SortPtr)}, but you choose whether or not the - /// fields in the returned {@link FieldDoc} instances should be set by specifying fillFields. - /// - /// NOTE: this does not compute scores by default. If you need scores, create a {@link TopFieldCollector} - /// instance by calling {@link TopFieldCollector#create} and then pass that to {@link #search(WeightPtr, - /// FilterPtr, CollectorPtr)}. - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort, bool fillFields); - - virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); - virtual QueryPtr rewrite(QueryPtr query); - virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); - - /// By default, no scores are computed when sorting by field (using {@link #search(QueryPtr, FilterPtr, - /// int32_t, SortPtr)}). You can change that, per IndexSearcher instance, by calling this method. Note - /// that this will incur a CPU cost. - /// - /// @param doTrackScores If true, then scores are returned for every matching document in {@link TopFieldDocs}. - /// @param doMaxScore If true, then the max score for all matching docs is computed. - virtual void setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore); - - protected: - void ConstructSearcher(IndexReaderPtr reader, bool closeReader); - void gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader); - void searchWithFilter(IndexReaderPtr reader, WeightPtr weight, FilterPtr filter, CollectorPtr collector); - }; -} - -#endif diff --git a/include/IndexWriter.h b/include/IndexWriter.h deleted file mode 100644 index cd820abc..00000000 --- a/include/IndexWriter.h +++ /dev/null @@ -1,1113 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INDEXWRITER_H -#define INDEXWRITER_H - -#include "MergePolicy.h" - -namespace Lucene -{ - /// An IndexWriter creates and maintains an index. - /// - /// The create argument to the {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, bool, int32_t) constructor} - /// determines whether a new index is created, or whether an existing index is opened. Note that you can - /// open an index with create=true even while readers are using the index. The old readers will continue - /// to search the "point in time" snapshot they had opened, and won't see the newly created index until - /// they re-open. There are also {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, int32_t) constructors} - /// with no create argument which will create a new index if there is not already an index at the provided - /// path and otherwise open the existing index. - /// - /// In either case, documents are added with {@link #addDocument(DocumentPtr) addDocument} and removed - /// with {@link #deleteDocuments(TermPtr)} or {@link #deleteDocuments(QueryPtr)}. A document can be updated - /// with {@link #updateDocument(TermPtr, DocumentPtr) updateDocument} (which just deletes and then adds - /// the entire document). When finished adding, deleting and updating documents, {@link #close() close} - /// should be called. - /// - /// These changes are buffered in memory and periodically flushed to the {@link Directory} (during the - /// above method calls). A flush is triggered when there are enough buffered deletes (see - /// {@link #setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is - /// sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see - /// {@link #setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage - /// hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that - /// flushing just moves the internal buffered state in IndexWriter into the index, but these changes are - /// not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may - /// also trigger one or more segment merges which by default run with a background thread so as not to - /// block the addDocument calls (see mergePolicy below for changing the {@link MergeScheduler}). - /// - /// If an index will not have more documents added for a while and optimal search performance is desired, - /// then either the full {@link #optimize() optimize} method or partial {@link #optimize(int32_t)} method - /// should be called before the index is closed. - /// - /// Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter - /// on the same directory will lead to a LockObtainFailed exception. The LockObtainFailed exception is also - /// thrown if an IndexReader on the same directory is used to delete documents from the index. - /// - /// IndexWriter allows an optional {@link IndexDeletionPolicy} implementation to be specified. You can use - /// this to control when prior commits are deleted from the index. The default policy is {@link - /// KeepOnlyLastCommitDeletionPolicy} which removes all prior commits as soon as a new commit is done (this - /// matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous - /// "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit - /// without having the old commit deleted out from under them. This is necessary on file systems like NFS - /// that do not support "delete on last close" semantics, which Lucene's "point in time" search normally - /// relies on. - /// - /// IndexWriter allows you to separately change the {@link MergePolicy} and the {@link MergeScheduler}. - /// The {@link MergePolicy} is invoked whenever there are changes to the segments in the index. Its role - /// is to select which merges to do, if any, and return a {@link MergePolicy.MergeSpecification} describing - /// the merges. It also selects merges to do for optimize(). (The default is {@link LogByteSizeMergePolicy}. - /// Then, the {@link MergeScheduler} is invoked with the requested merges and it decides when and how to run - /// the merges. The default is {@link ConcurrentMergeScheduler}. - /// - /// NOTE: if you hit an std::bad_alloc then IndexWriter will quietly record this fact and block all future - /// segment commits. This is a defensive measure in case any internal state (buffered documents and - /// deletions) were corrupted. Any subsequent calls to {@link #commit()} will throw an IllegalState - /// exception. The only course of action is to call {@link #close()}, which internally will call {@link - /// #rollback()}, to undo any changes to the index since the last commit. You can also just call {@link - /// #rollback()} directly. - /// - /// NOTE: {@link IndexWriter} instances are completely thread safe, meaning multiple threads can call any of - /// its methods, concurrently. If your application requires external synchronization, you should not - /// synchronize on the IndexWriter instance as this may cause deadlock; use your own (non-Lucene) objects - /// instead. - /// - /// Clarification: Check Points (and commits) - /// IndexWriter writes new index files to the directory without writing a new segments_N file which - /// references these new files. It also means that the state of the in memory SegmentInfos object is different - /// than the most recent segments_N file written to the directory. - /// - /// Each time the SegmentInfos is changed, and matches the (possibly modified) directory files, we have a new - /// "check point". If the modified/new SegmentInfos is written to disk - as a new (generation of) segments_N - /// file - this check point is also an IndexCommit. - /// - /// A new checkpoint always replaces the previous checkpoint and becomes the new "front" of the index. This - /// allows the IndexFileDeleter to delete files that are referenced only by stale checkpoints (files that were - /// created since the last commit, but are no longer referenced by the "front" of the index). For this, - /// IndexFileDeleter keeps track of the last non commit checkpoint. - class LPPAPI IndexWriter : public LuceneObject - { - protected: - IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexingChainPtr indexingChain, IndexCommitPtr commit); - - public: - IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl); - IndexWriter(DirectoryPtr d, AnalyzerPtr a, int32_t mfl); - IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl); - IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl); - IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexCommitPtr commit); - virtual ~IndexWriter(); - - LUCENE_CLASS(IndexWriter); - - protected: - int64_t writeLockTimeout; - - /// The normal read buffer size defaults to 1024, but increasing this during merging seems to - /// yield performance gains. However we don't want to increase it too much because there are - /// quite a few BufferedIndexInputs created during merging. - static const int32_t MERGE_READ_BUFFER_SIZE; - - SynchronizePtr messageIDLock; - static int32_t MESSAGE_ID; - int32_t messageID; - bool hitOOM; - - DirectoryPtr directory; // where this index resides - AnalyzerPtr analyzer; // how to analyze text - - bool create; - IndexDeletionPolicyPtr deletionPolicy; - IndexingChainPtr indexingChain; - IndexCommitPtr indexCommit; - - SimilarityPtr similarity; // how to normalize - - int64_t changeCount; // increments every time a change is completed - int64_t lastCommitChangeCount; // last changeCount that was committed - - SegmentInfosPtr rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - MapSegmentInfoInt rollbackSegments; - - SegmentInfosPtr localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - int32_t localFlushedDocCount; - - SegmentInfosPtr segmentInfos; // the segments - - DocumentsWriterPtr docWriter; - IndexFileDeleterPtr deleter; - - SetSegmentInfo segmentsToOptimize; // used by optimize to note those needing optimization - int32_t optimizeMaxNumSegments; - - LockPtr writeLock; - - int32_t termIndexInterval; - - bool closed; - bool closing; - - SetSegmentInfo mergingSegments; - MergePolicyPtr mergePolicy; - MergeSchedulerPtr mergeScheduler; - Collection pendingMerges; - SetOneMerge runningMerges; - Collection mergeExceptions; - int64_t mergeGen; - bool stopMerges; - - int32_t flushCount; - int32_t flushDeletesCount; - - /// Used to only allow one addIndexes to proceed at once - int32_t readCount; // count of how many threads are holding read lock - int64_t writeThread; // non-null if any thread holds write lock - int32_t upgradeCount; - - int32_t readerTermsIndexDivisor; - - // This is a "write once" variable (like the organic dye on a DVD-R that may or may not - // be heated by a laser and then cooled to permanently record the event): it's false, - // until getReader() is called for the first time, at which point it's switched to true - // and never changes back to false. Once this is true, we hold open and reuse SegmentReader - // instances internally for applying deletes, doing merges, and reopening near real-time readers. - bool poolReaders; - - /// The maximum number of terms that will be indexed for a single field in a document. This - /// limits the amount of memory required for indexing, so that collections with very large files - /// will not crash the indexing process by running out of memory. - /// Note that this effectively truncates large documents, excluding from the index terms that - /// occur further in the document. If you know your source documents are large, be sure to set - /// this value high enough to accommodate the expected size. If you set it to INT_MAX, then the - /// only limit is your memory, but you should anticipate an std::bad_alloc. By default, no more - /// than 10,000 terms will be indexed for a field. - /// - /// @see #setMaxFieldLength(int32_t) - int32_t maxFieldLength; - - InfoStreamPtr infoStream; - static InfoStreamPtr defaultInfoStream; - - HashSet synced; // files that have been sync'd already - HashSet syncing; // files that are now being sync'd - - IndexReaderWarmerPtr mergedSegmentWarmer; - - /// Used only by commit; lock order is commitLock -> IW - SynchronizePtr commitLock; - - INTERNAL: - SegmentInfosPtr pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) - int64_t pendingCommitChangeCount; - - ReaderPoolPtr readerPool; - - public: - /// Default value for the write lock timeout (1,000). - /// @see #setDefaultWriteLockTimeout - static int64_t WRITE_LOCK_TIMEOUT; - - static const String WRITE_LOCK_NAME; - - /// Value to denote a flush trigger is disabled. - static const int32_t DISABLE_AUTO_FLUSH; - - /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using - /// {@link #setMaxBufferedDocs(int32_t)}. - static const int32_t DEFAULT_MAX_BUFFERED_DOCS; - - /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). - /// Change using {@link #setRAMBufferSizeMB}. - static const double DEFAULT_RAM_BUFFER_SIZE_MB; - - /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using - /// {@link #setMaxBufferedDeleteTerms(int32_t)}. - static const int32_t DEFAULT_MAX_BUFFERED_DELETE_TERMS; - - /// Default value is 10,000. Change using {@link #setMaxFieldLength(int32_t)}. - static const int32_t DEFAULT_MAX_FIELD_LENGTH; - - /// Default value is 128. Change using {@link #setTermIndexInterval(int32_t)}. - static const int32_t DEFAULT_TERM_INDEX_INTERVAL; - - /// Absolute hard maximum length for a term. If a term arrives from the analyzer longer than - /// this length, it is skipped and a message is printed to infoStream, if set (see {@link - /// #setInfoStream}). - static int32_t MAX_TERM_LENGTH(); - - /// Sets the maximum field length to INT_MAX - static const int32_t MaxFieldLengthUNLIMITED; - - /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} - static const int32_t MaxFieldLengthLIMITED; - - public: - virtual void initialize(); - - /// Returns a read-only reader, covering all committed as well as un-committed changes to the - /// index. This provides "near real-time" searching, in that changes made during an IndexWriter - /// session can be quickly made available for searching without closing the writer nor calling - /// {@link #commit}. - /// - /// Note that this is functionally equivalent to calling {#commit} and then using {@link - /// IndexReader#open} to open a new reader. But the turnaround time of this method should be - /// faster since it avoids the potentially costly {@link #commit}. - /// - /// You must close the {@link IndexReader} returned by this method once you are done using it. - /// - /// It's near real-time because there is no hard guarantee on how quickly you can get a new - /// reader after making changes with IndexWriter. You'll have to experiment in your situation - /// to determine if it's fast enough. As this is a new and experimental feature, please report - /// back on your findings so we can learn, improve and iterate. - /// - /// The resulting reader supports {@link IndexReader#reopen}, but that call will simply forward - /// back to this method (though this may change in the future). - /// - /// The very first time this method is called, this writer instance will make every effort to - /// pool the readers that it opens for doing merges, applying deletes, etc. This means additional - /// resources (RAM, file descriptors, CPU time) will be consumed. - /// - /// For lower latency on reopening a reader, you should call {@link #setMergedSegmentWarmer} to - /// pre-warm a newly merged segment before it's committed to the index. This is important for - /// minimizing index-to-search delay after a large merge. - /// - /// If an addIndexes* call is running in another thread, then this reader will only search those - /// segments from the foreign index that have been successfully copied over, so far. - /// - /// NOTE: Once the writer is closed, any outstanding readers may continue to be used. However, - /// if you attempt to reopen any of those readers, you'll hit an AlreadyClosed exception. - /// - /// NOTE: This API is experimental and might change in incompatible ways in the next release. - /// - /// @return IndexReader that covers entire index plus all changes made so far by this IndexWriter - /// instance - virtual IndexReaderPtr getReader(); - - /// Like {@link #getReader}, except you can specify which termInfosIndexDivisor should be used for - /// any newly opened readers. - /// - /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the - /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at - /// indexing time while this setting can be set per reader. When set to N, then one in every - /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 - /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. - /// The default value is 1. Set this to -1 to skip loading the terms index entirely. - virtual IndexReaderPtr getReader(int32_t termInfosIndexDivisor); - - /// Obtain the number of deleted docs for a pooled reader. If the reader isn't being pooled, - /// the segmentInfo's delCount is returned. - virtual int32_t numDeletedDocs(SegmentInfoPtr info); - - virtual void acquireWrite(); - virtual void releaseWrite(); - virtual void acquireRead(); - - /// Allows one readLock to upgrade to a writeLock even if there are other readLocks as long - /// as all other readLocks are also blocked in this method - virtual void upgradeReadToWrite(); - - virtual void releaseRead(); - virtual bool isOpen(bool includePendingClose); - virtual void message(const String& message); - - /// Get the current setting of whether newly flushed segments will use the compound file format. - /// Note that this just returns the value previously set with setUseCompoundFile(bool), or the - /// default value (true). You cannot use this to query the status of previously flushed segments. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.getUseCompoundFile - /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument - /// exception is thrown. - /// @see #setUseCompoundFile(bool) - virtual bool getUseCompoundFile(); - - /// Setting to turn on usage of a compound file. When on, multiple files for each segment are - /// merged into a single file when a new segment is flushed. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.setUseCompoundFile - /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument - /// exception is thrown. - virtual void setUseCompoundFile(bool value); - - /// Set the Similarity implementation used by this IndexWriter. - virtual void setSimilarity(SimilarityPtr similarity); - - /// Return the Similarity implementation used by this IndexWriter. - /// This defaults to the current value of {@link Similarity#getDefault()}. - virtual SimilarityPtr getSimilarity(); - - /// Set the interval between indexed terms. Large values cause less memory to be used by - /// IndexReader, but slow random-access to terms. Small values cause more memory to be used by - /// an IndexReader, and speed random-access to terms. - /// - /// This parameter determines the amount of computation required per query term, regardless of - /// the number of documents that contain that term. In particular, it is the maximum number of - /// other terms that must be scanned before a term is located and its frequency and position - /// information may be processed. In a large index with user-entered query terms, query - /// processing time is likely to be dominated not by term lookup but rather by the processing of - /// frequency and positional data. In a small index or when many uncommon query terms are - /// generated (eg., by wildcard queries) term lookup may become a dominant cost. - /// - /// In particular, numUniqueTerms/interval terms are read into memory by an IndexReader, and on - /// average, interval/2 terms must be scanned for each random term access. - /// - /// @see #DEFAULT_TERM_INDEX_INTERVAL - virtual void setTermIndexInterval(int32_t interval); - - /// Return the interval between indexed terms. - /// @see #setTermIndexInterval(int32_t) - virtual int32_t getTermIndexInterval(); - - /// Set the merge policy used by this writer. - virtual void setMergePolicy(MergePolicyPtr mp); - - /// Returns the current MergePolicy in use by this writer. - /// @see #setMergePolicy - virtual MergePolicyPtr getMergePolicy(); - - /// Set the merge scheduler used by this writer. - virtual void setMergeScheduler(MergeSchedulerPtr mergeScheduler); - - /// Returns the current MergePolicy in use by this writer. - /// @see #setMergePolicy - virtual MergeSchedulerPtr getMergeScheduler(); - - /// Determines the largest segment (measured by document count) that may be merged with other - /// segments. Small values (eg., less than 10,000) are best for interactive indexing, as this - /// limits the length of pauses while indexing to a few seconds. Larger values are best for - /// batched indexing and speedier searches. - /// - /// The default value is INT_MAX. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.setMaxMergeDocs as - /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument - /// exception is thrown. - /// - /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit - /// by net size (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. - virtual void setMaxMergeDocs(int32_t maxMergeDocs); - - /// Returns the largest segment (measured by document count) that may be merged with other - /// segments. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.getMaxMergeDocs as - /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument - /// exception is thrown. - /// - /// @see #setMaxMergeDocs - virtual int32_t getMaxMergeDocs(); - - /// The maximum number of terms that will be indexed for a single field in a document. This - /// limits the amount of memory required for indexing, so that collections with very large files - /// will not crash the indexing process by running out of memory. This setting refers to the - /// number of running terms, not to the number of different terms. - /// Note: this silently truncates large documents, excluding from the index all terms that occur - /// further in the document. If you know your source documents are large, be sure to set this - /// value high enough to accommodate the expected size. If you set it to INT_MAX, then the only - /// limit is your memory, but you should anticipate an std::bad_alloc. - /// By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be indexed for a field. - virtual void setMaxFieldLength(int32_t maxFieldLength); - - /// Returns the maximum number of terms that will be indexed for a single field in a document. - /// @see #setMaxFieldLength - virtual int32_t getMaxFieldLength(); - - /// Sets the termsIndexDivisor passed to any readers that IndexWriter opens, for example when - /// applying deletes or creating a near-real-time reader in {@link IndexWriter#getReader}. - /// Default value is {@link IndexReader#DEFAULT_TERMS_INDEX_DIVISOR}. - virtual void setReaderTermsIndexDivisor(int32_t divisor); - - /// @see #setReaderTermsIndexDivisor() - virtual int32_t getReaderTermsIndexDivisor(); - - /// Determines the minimal number of documents required before the buffered in-memory documents - /// are flushed as a new Segment. Large values generally gives faster indexing. - /// - /// When this is set, the writer will flush every maxBufferedDocs added documents. Pass in - /// {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to number of buffered - /// documents. Note that if flushing by RAM usage is also enabled, then the flush will be - /// triggered by whichever comes first. - /// - /// Disabled by default (writer flushes by RAM usage). - /// - /// @see #setRAMBufferSizeMB - virtual void setMaxBufferedDocs(int32_t maxBufferedDocs); - - /// Returns the number of buffered added documents that will trigger a flush if enabled. - /// @see #setMaxBufferedDocs - virtual int32_t getMaxBufferedDocs(); - - /// Determines the amount of RAM that may be used for buffering added documents and deletions - /// before they are flushed to the Directory. Generally for faster indexing performance it's - /// best to flush by RAM usage instead of document count and use as large a RAM buffer as you can. - /// - /// When this is set, the writer will flush whenever buffered documents and deletions use this - /// much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to RAM usage. - /// Note that if flushing by document count is also enabled, then the flush will be triggered by - /// whichever comes first. - /// - /// Note: the account of RAM usage for pending deletions is only approximate. Specifically, if - /// you delete by Query, Lucene currently has no way to measure the RAM usage if individual - /// Queries so the accounting will under-estimate and you should compensate by either calling - /// commit() periodically yourself, or by using {@link #setMaxBufferedDeleteTerms} to flush by - /// count instead of RAM usage (each buffered delete Query counts as one). - /// - /// Note: because IndexWriter uses int32_t when managing its internal storage, the absolute - /// maximum value for this setting is somewhat less than 2048 MB. The precise limit depends on - /// various factors, such as how large your documents are, how many fields have norms, etc., so - /// it's best to set this value comfortably under 2048. - /// - /// The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. - virtual void setRAMBufferSizeMB(double mb); - - /// Returns the value set by {@link #setRAMBufferSizeMB} if enabled. - virtual double getRAMBufferSizeMB(); - - /// Determines the minimal number of delete terms required before the buffered in-memory delete - /// terms are applied and flushed. If there are documents buffered in memory at the time, they - /// are merged and a new segment is created. - /// - /// Disabled by default (writer flushes by RAM usage). - /// @see #setRAMBufferSizeMB - virtual void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); - - /// Returns the number of buffered deleted terms that will trigger a flush if enabled. - /// @see #setMaxBufferedDeleteTerms - virtual int32_t getMaxBufferedDeleteTerms(); - - /// Determines how often segment indices are merged by addDocument(). With smaller values, less - /// RAM is used while indexing, and searches on unoptimized indices are faster, but indexing - /// speed is slower. With larger values, more RAM is used during indexing, and while searches - /// on unoptimized indices are slower, indexing is faster. Thus larger values (> 10) are best - /// for batch index creation, and smaller values (< 10) for indices that are interactively maintained. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.setMergeFactor as long - /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception - /// is thrown. This must never be less than 2. The default value is 10. - virtual void setMergeFactor(int32_t mergeFactor); - - /// Returns the number of segments that are merged at once and also controls the total number of - /// segments allowed to accumulate in the index. - /// - /// Note that this method is a convenience method: it just calls mergePolicy.getMergeFactor as long - /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception - /// is thrown. - /// @see #setMergeFactor - virtual int32_t getMergeFactor(); - - /// If non-null, this will be the default infoStream used by a newly instantiated IndexWriter. - /// @see #setInfoStream - static void setDefaultInfoStream(InfoStreamPtr infoStream); - - /// Returns the current default infoStream for newly instantiated IndexWriters. - /// @see #setDefaultInfoStream - static InfoStreamPtr getDefaultInfoStream(); - - /// If non-null, information about merges, deletes and a message when maxFieldLength is reached - /// will be printed to this. - virtual void setInfoStream(InfoStreamPtr infoStream); - - /// Returns the current infoStream in use by this writer. - /// @see #setInfoStream - virtual InfoStreamPtr getInfoStream(); - - /// Returns true if verbosing is enabled (i.e., infoStream != null). - virtual bool verbose(); - - /// Sets the maximum time to wait for a write lock (in milliseconds) for this instance of - /// IndexWriter. @see #setDefaultWriteLockTimeout to change the default value for all instances - /// of IndexWriter. - virtual void setWriteLockTimeout(int64_t writeLockTimeout); - - /// Returns allowed timeout when acquiring the write lock. - /// @see #setWriteLockTimeout - virtual int64_t getWriteLockTimeout(); - - /// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock - /// (in milliseconds). - static void setDefaultWriteLockTimeout(int64_t writeLockTimeout); - - /// Returns default write lock timeout for newly instantiated IndexWriters. - /// @see #setDefaultWriteLockTimeout - static int64_t getDefaultWriteLockTimeout(); - - /// Commits all changes to an index and closes all associated files. Note that this may be - /// a costly operation, so try to re-use a single writer instead of closing and opening a - /// new one. See {@link #commit()} for caveats about write caching done by some IO devices. - /// - /// If an Exception is hit during close, eg due to disk full or some other reason, then both - /// the on-disk index and the internal state of the IndexWriter instance will be consistent. - /// However, the close will not be complete even though part of it (flushing buffered documents) - /// may have succeeded, so the write lock will still be held. - /// - /// If you can correct the underlying cause (eg free up some disk space) then you can call - /// close() again. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. - virtual void close(); - - /// Closes the index with or without waiting for currently running merges to finish. This is - /// only meaningful when using a MergeScheduler that runs merges in background threads. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. - /// - /// NOTE: it is dangerous to always call close(false), especially when IndexWriter is not open - /// for very long, because this can result in "merge starvation" whereby long merges will never - /// have a chance to finish. This will cause too many segments in your index over time. - /// - /// @param waitForMerges if true, this call will block until all merges complete; else, it will - /// ask all running merges to abort, wait until those merges have finished (which should be at - /// most a few seconds), and then return. - virtual void close(bool waitForMerges); - - /// Returns the Directory used by this index. - virtual DirectoryPtr getDirectory(); - - /// Returns the analyzer used by this index. - virtual AnalyzerPtr getAnalyzer(); - - /// Returns total number of docs in this index, including docs not yet flushed (still in the - /// RAM buffer), not counting deletions. - /// @see #numDocs - virtual int32_t maxDoc(); - - /// Returns total number of docs in this index, including docs not yet flushed (still in the - /// RAM buffer), and including deletions. - /// NOTE: buffered deletions are not counted. If you really need these to be counted you should - /// call {@link #commit()} first. - virtual int32_t numDocs(); - - virtual bool hasDeletions(); - - /// Adds a document to this index. If the document contains more than {@link - /// #setMaxFieldLength(int32_t)} terms for a given field, the remainder are discarded. - /// - /// Note that if an Exception is hit (for example disk full) then the index will be consistent, - /// but this document may not have been added. Furthermore, it's possible the index will have - /// one segment in non-compound format even when using compound files (when a merge has partially - /// succeeded). - /// - /// This method periodically flushes pending documents to the Directory, and also periodically - /// triggers segment merges in the index according to the {@link MergePolicy} in use. - /// - /// Merges temporarily consume space in the directory. The amount of space required is up to 1X - /// the size of all segments being merged, when no size of all segments being merged, when no - /// 2X the size of all segments being merged when readers/searchers are open against the index - /// (see {@link #optimize()} for details). The sequence of primitive merge operations performed - /// is governed by the merge policy. - /// - /// Note that each term in the document can be no longer than 16383 characters, otherwise an - /// IllegalArgument exception will be thrown. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void addDocument(DocumentPtr doc); - - /// Adds a document to this index, using the provided analyzer instead of the value of {@link - /// #getAnalyzer()}. If the document contains more than {@link #setMaxFieldLength(int32_t)} terms - /// for a given field, the remainder are discarded. - /// - /// See {@link #addDocument(DocumentPtr)} for details on index and IndexWriter state after an - /// exception, and flushing/merging temporary free space requirements. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void addDocument(DocumentPtr doc, AnalyzerPtr analyzer); - - /// Deletes the document(s) containing term. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param term the term to identify the documents to be deleted - virtual void deleteDocuments(TermPtr term); - - /// Deletes the document(s) containing any of the terms. All deletes are flushed at the same time. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param terms array of terms to identify the documents to be deleted - virtual void deleteDocuments(Collection terms); - - /// Deletes the document(s) matching the provided query. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param query the query to identify the documents to be deleted - virtual void deleteDocuments(QueryPtr query); - - /// Deletes the document(s) matching any of the provided queries. All deletes are flushed at - /// the same time. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param queries array of queries to identify the documents to be deleted - virtual void deleteDocuments(Collection queries); - - /// Updates a document by first deleting the document(s) containing term and then adding the new - /// document. The delete and then add are atomic as seen by a reader on the same index (flush - /// may happen only after the add). - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param term the term to identify the document(s) to be deleted - /// @param doc the document to be added - virtual void updateDocument(TermPtr term, DocumentPtr doc); - - /// Updates a document by first deleting the document(s) containing term and then adding the new - /// document. The delete and then add are atomic as seen by a reader on the same index (flush - /// may happen only after the add). - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param term the term to identify the document(s) to be deleted - /// @param doc the document to be added - /// @param analyzer the analyzer to use when analyzing the document - virtual void updateDocument(TermPtr term, DocumentPtr doc, AnalyzerPtr analyzer); - - virtual int32_t getSegmentCount(); - virtual int32_t getNumBufferedDocuments(); - virtual int32_t getDocCount(int32_t i); - virtual int32_t getFlushCount(); - virtual int32_t getFlushDeletesCount(); - - virtual String newSegmentName(); - - /// Requests an "optimize" operation on an index, priming the index for the fastest available - /// search. Traditionally this has meant merging all segments into a single segment as is done in - /// the default merge policy, but individual merge policies may implement optimize in different ways. - /// - /// It is recommended that this method be called upon completion of indexing. In environments with - /// frequent updates, optimize is best done during low volume times, if at all. - /// - /// Note that optimize requires 2X the index size free space in your Directory (3X if you're using - /// compound file format). For example, if your index size is 10 MB then you need 20 MB free for - /// optimize to complete (30 MB if you're using compound file format). - /// - /// If some but not all readers re-open while an optimize is underway, this will cause > 2X temporary - /// space to be consumed as those new readers will then hold open the partially optimized segments at - /// that time. It is best not to re-open readers while optimize is running. - /// - /// The actual temporary usage could be much less than these figures (it depends on many factors). - /// - /// In general, once the optimize completes, the total size of the index will be less than the size - /// of the starting index. It could be quite a bit smaller (if there were many pending deletes) or - /// just slightly smaller. - /// - /// If an Exception is hit during optimize(), for example due to disk full, the index will not be - /// corrupt and no documents will have been lost. However, it may have been partially optimized - /// (some segments were merged but not all), and it's possible that one of the segments in the index - /// will be in non-compound format even when using compound file format. This will occur when the - /// exception is hit during conversion of the segment into compound format. - /// - /// This call will optimize those segments present in the index when the call started. If other - /// threads are still adding documents and flushing segments, those newly created segments will not - /// be optimized unless you call optimize again. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @see LogMergePolicy#findMergesForOptimize - virtual void optimize(); - - /// Optimize the index down to <= maxNumSegments. If maxNumSegments==1 then this is the same as - /// {@link #optimize()}. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param maxNumSegments maximum number of segments left in the index after optimization finishes - virtual void optimize(int32_t maxNumSegments); - - /// Just like {@link #optimize()}, except you can specify whether the call should block until the - /// optimize completes. This is only meaningful with a {@link MergeScheduler} that is able to run - /// merges in background threads. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void optimize(bool doWait); - - /// Just like {@link #optimize(int32_t)}, except you can specify whether the call should block - /// until the optimize completes. This is only meaningful with a {@link MergeScheduler} that is - /// able to run merges in background threads. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void optimize(int32_t maxNumSegments, bool doWait); - - /// Just like {@link #expungeDeletes()}, except you can specify whether the call should block - /// until the operation completes. This is only meaningful with a {@link MergeScheduler} that - /// is able to run merges in background threads. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void expungeDeletes(bool doWait); - - /// Expunges all deletes from the index. When an index has many document deletions (or updates - /// to existing documents), it's best to either call optimize or expungeDeletes to remove all - /// unused data in the index associated with the deleted documents. To see how many deletions - /// you have pending in your index, call {@link IndexReader#numDeletedDocs}. This saves disk - /// space and memory usage while searching. expungeDeletes should be somewhat faster than - /// optimize since it does not insist on reducing the index to a single segment (though, this - /// depends on the {@link MergePolicy}; see {@link MergePolicy#findMergesToExpungeDeletes}.). - /// Note that this call does not first commit any buffered documents, so you must do so yourself - /// if necessary. See also {@link #expungeDeletes(bool)} - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void expungeDeletes(); - - /// Asks the mergePolicy whether any merges are necessary now and if so, runs the requested - /// merges and then iterate (test again if merges are needed) until no more merges are returned - /// by the mergePolicy. - /// - /// Explicit calls to maybeMerge() are usually not necessary. The most common case is when merge - /// policy parameters have changed. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void maybeMerge(); - - /// The {@link MergeScheduler} calls this method to retrieve the next merge requested by the - /// MergePolicy. - virtual OneMergePtr getNextMerge(); - - /// Close the IndexWriter without committing any changes that have occurred since the last commit - /// (or since it was opened, if commit hasn't been called). This removes any temporary files that - /// had been created, after which the state of the index will be the same as it was when commit() - /// was last called or when this writer was first opened. This also clears a previous call to - /// {@link #prepareCommit}. - virtual void rollback(); - - /// Delete all documents in the index. - /// - /// This method will drop all buffered documents and will remove all segments from the index. This - /// change will not be visible until a {@link #commit()} has been called. This method can be rolled - /// back using {@link #rollback()}. - /// - /// NOTE: this method is much faster than using {@link #deleteDocuments()}. - /// - /// NOTE: this method will forcefully abort all merges in progress. If other threads are running - /// {@link #optimize()} or any of the addIndexes methods, they will receive {@link - /// MergePolicy.MergeAbortedException} - virtual void deleteAll(); - - /// Wait for any currently outstanding merges to finish. - /// - /// It is guaranteed that any merges started prior to calling this method will have completed once - /// this method completes. - virtual void waitForMerges(); - - /// Merges all segments from an array of indexes into this index. - /// - /// This may be used to parallelize batch indexing. A large document collection can be broken into - /// sub-collections. Each sub-collection can be indexed in parallel, on a different thread, process - /// or machine. The complete index can then be created by merging sub-collection indexes with this - /// method. - /// - /// NOTE: the index in each Directory must not be changed (opened by a writer) while this method is - /// running. This method does not acquire a write lock in each input Directory, so it is up to the - /// caller to enforce this. - /// - /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will - /// be paused until this method completes. - /// - /// This method is transactional in how exceptions are handled: it does not commit a new segments_N - /// file until all indexes are added. This means if an exception occurs (for example disk full), - /// then either no indexes will have been added or they all will have been. - /// - /// Note that this requires temporary free space in the Directory up to 2X the sum of all input - /// indexes (including the starting index). If readers/searchers are open against the starting index, - /// then temporary free space required will be higher by the size of the starting index (see - /// {@link #optimize()} for details). - /// - /// Once this completes, the final size of the index will be less than the sum of all input index - /// sizes (including the starting index). It could be quite a bit smaller (if there were many pending - /// deletes) or just slightly smaller. - /// - /// This requires this index not be among those to be added. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void addIndexesNoOptimize(Collection dirs); - - /// Merges the provided indexes into this index. - /// After this completes, the index is optimized. The provided IndexReaders are not closed. - /// - /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will - /// be paused until this method completes. - /// - /// See {@link #addIndexesNoOptimize} for details on transactional semantics, temporary free space - /// required in the Directory, and non-CFS segments on an exception. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void addIndexes(Collection readers); - - /// Prepare for commit. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// @see #prepareCommit(MapStringString) - virtual void prepareCommit(); - - /// Prepare for commit, specifying commitUserData Map (String -> String). This does the first phase - /// of 2-phase commit. This method does all steps necessary to commit changes since this writer was - /// opened: flushes pending added and deleted docs, syncs the index files, writes most of next - /// segments_N file. After calling this you must call either {@link #commit()} to finish the commit, - /// or {@link #rollback()} to revert the commit and undo all changes done since the writer was opened. - /// - /// You can also just call {@link #commit(Map)} directly without prepareCommit first in which case - /// that method will internally call prepareCommit. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @param commitUserData Opaque Map (String->String) that's recorded into the segments file in the - /// index, and retrievable by {@link IndexReader#getCommitUserData}. Note that when IndexWriter - /// commits itself during {@link #close}, the commitUserData is unchanged (just carried over from the - /// prior commit). If this is null then the previous commitUserData is kept. Also, the commitUserData - // will only "stick" if there are actually changes in the index to commit. - virtual void prepareCommit(MapStringString commitUserData); - - /// Commits all pending changes (added & deleted documents, optimizations, segment merges, added - /// indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the - /// changes and the index updates will survive an OS or machine crash or power loss. Note that this - /// does not wait for any running background merges to finish. This may be a costly operation, so you - /// should test the cost in your application and do it only when really necessary. - /// - /// Note that this operation calls Directory.sync on the index files. That call should not return until - /// the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. - /// But, beware: some hardware devices may in fact cache writes even during fsync, and return before the - /// bits are actually on stable storage, to give the appearance of faster performance. If you have such - /// a device, and it does not have a battery backup (for example) then on power loss it may still lose - /// data. Lucene cannot guarantee consistency on such devices. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - /// - /// @see #prepareCommit - /// @see #commit(MapStringString) - virtual void commit(); - - /// Commits all changes to the index, specifying a commitUserData Map (String -> String). This just - /// calls {@link #prepareCommit(MapStringString)} (if you didn't already call it) and then - /// {@link #finishCommit}. - /// - /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. - virtual void commit(MapStringString commitUserData); - - /// Return the total size of all index files currently cached in memory. Useful for size management - /// with flushRamDocs() - virtual int64_t ramSizeInBytes(); - - /// Return the number of documents currently buffered in RAM. - virtual int32_t numRamDocs(); - - /// Merges the indicated segments, replacing them in the stack with a single segment. - virtual void merge(OneMergePtr merge); - - /// Hook that's called when the specified merge is complete. - virtual void mergeSuccess(OneMergePtr merge); - - /// Checks whether this merge involves any segments already participating in a merge. If not, this - /// merge is "registered", meaning we record that its segments are now participating in a merge, - /// and true is returned. Else (the merge conflicts) false is returned. - virtual bool registerMerge(OneMergePtr merge); - - /// Does initial setup for a merge, which is fast but holds the synchronized lock on IndexWriter - /// instance. - virtual void mergeInit(OneMergePtr merge); - - /// Does finishing for a merge, which is fast but holds the synchronized lock on IndexWriter instance. - virtual void mergeFinish(OneMergePtr merge); - - virtual void addMergeException(OneMergePtr merge); - - /// For test purposes. - virtual int32_t getBufferedDeleteTermsSize(); - - /// For test purposes. - virtual int32_t getNumBufferedDeleteTerms(); - - /// Utility routines for tests - virtual SegmentInfoPtr newestSegment(); - - virtual String segString(); - - /// Returns true if the index in the named directory is currently locked. - /// @param directory the directory to check for a lock - static bool isLocked(DirectoryPtr directory); - - /// Forcibly unlocks the index in the named directory. - /// Caution: this should only be used by failure recovery code, when it is known that no other process - /// nor thread is in fact currently accessing this index. - static void unlock(DirectoryPtr directory); - - /// Set the merged segment warmer. See {@link IndexReaderWarmer}. - virtual void setMergedSegmentWarmer(IndexReaderWarmerPtr warmer); - - /// Returns the current merged segment warmer. See {@link IndexReaderWarmer}. - virtual IndexReaderWarmerPtr getMergedSegmentWarmer(); - - /// Used only by assert for testing. Current points: - /// startDoFlush - /// startCommitMerge - /// startStartCommit - /// midStartCommit - /// midStartCommit2 - /// midStartCommitSuccess - /// finishStartCommit - /// startCommitMergeDeletes - /// startMergeInit - /// startApplyDeletes - /// startMergeInit - /// startMergeInit - virtual bool testPoint(const String& name); - - virtual bool nrtIsCurrent(SegmentInfosPtr infos); - virtual bool isClosed(); - - protected: - virtual void ensureOpen(bool includePendingClose); - virtual void ensureOpen(); - virtual void setMessageID(InfoStreamPtr infoStream); - - /// Casts current mergePolicy to LogMergePolicy, and throws an exception if the - /// mergePolicy is not a LogMergePolicy. - virtual LogMergePolicyPtr getLogMergePolicy(); - - virtual void setRollbackSegmentInfos(SegmentInfosPtr infos); - - /// If we are flushing by doc count (not by RAM usage), and using LogDocMergePolicy then push - /// maxBufferedDocs down as its minMergeDocs, to keep backwards compatibility. - virtual void pushMaxBufferedDocs(); - - virtual void messageState(); - - /// Returns true if this thread should attempt to close, or false if IndexWriter is now closed; - /// else, waits until another thread finishes closing - virtual bool shouldClose(); - virtual void closeInternal(bool waitForMerges); - - /// Tells the docWriter to close its currently open shared doc stores (stored fields & vectors - /// files). Return value specifies whether new doc store files are compound or not. - virtual bool flushDocStores(); - - /// Returns true if any merges in pendingMerges or runningMerges are optimization merges. - virtual bool optimizeMergesPending(); - - virtual void maybeMerge(bool optimize); - virtual void maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize); - virtual void updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize); - - /// Like {@link #getNextMerge()} except only returns a merge if it's external. - virtual OneMergePtr getNextExternalMerge(); - - /// Begin a transaction. During a transaction, any segment merges that happen (or ram segments - /// flushed) will not write a new segments file and will not remove any files that were present - /// at the start of the transaction. You must make a matched call to commitTransaction() or - /// rollbackTransaction() to finish the transaction. - /// - /// Note that buffered documents and delete terms are not handled within the transactions, so - /// they must be flushed before the transaction is started. - virtual void startTransaction(bool haveReadLock); - - /// Rolls back the transaction and restores state to where we were at the start. - virtual void rollbackTransaction(); - - /// Commits the transaction. This will write the new segments file and remove and pending - /// deletions we have accumulated during the transaction. - virtual void commitTransaction(); - virtual void rollbackInternal(); - - virtual void finishMerges(bool waitForMerges); - - /// Called whenever the SegmentInfos has been updated and the index files referenced exist - /// (correctly) in the index directory. - virtual void checkpoint(); - - virtual void finishAddIndexes(); - virtual void blockAddIndexes(bool includePendingClose); - virtual void resumeAddIndexes(); - virtual void resetMergeExceptions(); - virtual void noDupDirs(Collection dirs); - - virtual bool hasExternalSegments(); - - /// If any of our segments are using a directory != ours then we have to either copy them over one - /// by one, merge them (if merge policy has chosen to) or wait until currently running merges (in - /// the background) complete. We don't return until the SegmentInfos has no more external segments. - /// Currently this is only used by addIndexesNoOptimize(). - virtual void resolveExternalSegments(); - - /// A hook for extending classes to execute operations after pending added and deleted documents have - /// been flushed to the Directory but before the change is committed (new segments_N file written). - virtual void doAfterFlush(); - - /// A hook for extending classes to execute operations before pending added and deleted documents are - /// flushed to the Directory. - virtual void doBeforeFlush(); - - virtual void commit(int64_t sizeInBytes); - virtual void finishCommit(); - - /// Flush all in-memory buffered updates (adds and deletes) to the Directory. - /// @param triggerMerge if true, we may merge segments (if deletes or docs were flushed) if necessary - /// @param flushDocStores if false we are allowed to keep doc stores open to share with the next segment - /// @param flushDeletes whether pending deletes should also be flushed - virtual void flush(bool triggerMerge, bool flushDocStores, bool flushDeletes); - virtual bool doFlush(bool flushDocStores, bool flushDeletes); - virtual bool doFlushInternal(bool flushDocStores, bool flushDeletes); - - virtual int32_t ensureContiguousMerge(OneMergePtr merge); - - /// Carefully merges deletes for the segments we just merged. This is tricky because, although merging - /// will clear all deletes (compacts the documents), new deletes may have been flushed to the segments - /// since the merge was started. This method "carries over" such new deletes onto the newly merged - /// segment, and saves the resulting deletes file (incrementing the delete generation for merge.info). - /// If no deletes were flushed, no new deletes file is saved. - virtual void commitMergedDeletes(OneMergePtr merge, SegmentReaderPtr mergeReader); - virtual bool commitMerge(OneMergePtr merge, SegmentMergerPtr merger, int32_t mergedDocCount, SegmentReaderPtr mergedReader); - - virtual LuceneException handleMergeException(const LuceneException& exc, OneMergePtr merge); - - virtual void _mergeInit(OneMergePtr merge); - - virtual void setDiagnostics(SegmentInfoPtr info, const String& source); - virtual void setDiagnostics(SegmentInfoPtr info, const String& source, MapStringString details); - - virtual void setMergeDocStoreIsCompoundFile(OneMergePtr merge); - virtual void closeMergeReaders(OneMergePtr merge, bool suppressExceptions); - - /// Does the actual (time-consuming) work of the merge, but without holding synchronized lock on - /// IndexWriter instance. - virtual int32_t mergeMiddle(OneMergePtr merge); - - /// Apply buffered deletes to all segments. - virtual bool applyDeletes(); - - virtual String segString(SegmentInfosPtr infos); - - virtual bool startSync(const String& fileName, HashSet pending); - virtual void finishSync(const String& fileName, bool success); - - /// Blocks until all files in syncing are sync'd - bool waitForAllSynced(HashSet syncing); - void doWait(); - - /// Walk through all files referenced by the current segmentInfos and ask the Directory to sync each - /// file, if it wasn't already. If that succeeds, then we prepare a new segments_N file but do not - /// fully commit it. - virtual void startCommit(int64_t sizeInBytes, MapStringString commitUserData); - - virtual LuceneException handleOOM(const std::bad_alloc& oom, const String& location); - - friend class ReaderPool; - }; - - /// If {@link #getReader} has been called (ie, this writer is in near real-time mode), then after - /// a merge completes, this class can be invoked to warm the reader on the newly merged segment, - /// before the merge commits. This is not required for near real-time search, but will reduce - /// search latency on opening a new near real-time reader after a merge completes. - /// - /// NOTE: warm is called before any deletes have been carried over to the merged segment. - class LPPAPI IndexReaderWarmer : public LuceneObject - { - public: - virtual ~IndexReaderWarmer(); - - LUCENE_CLASS(IndexReaderWarmer); - - public: - virtual void warm(IndexReaderPtr reader) = 0; - }; -} - -#endif diff --git a/include/InfoStream.h b/include/InfoStream.h deleted file mode 100644 index 7a32bf27..00000000 --- a/include/InfoStream.h +++ /dev/null @@ -1,68 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INFOSTREAM_H -#define INFOSTREAM_H - -#include "LuceneObject.h" -#include - -namespace Lucene -{ - /// Utility class to support streaming info messages. - class LPPAPI InfoStream : public LuceneObject - { - protected: - InfoStream(); - - public: - virtual ~InfoStream(); - LUCENE_CLASS(InfoStream); - - public: - virtual InfoStream& operator<< (const String& t) = 0; - }; - - /// Stream override to write messages to a file. - class LPPAPI InfoStreamFile : public InfoStream - { - public: - InfoStreamFile(const String& path); - virtual ~InfoStreamFile(); - - LUCENE_CLASS(InfoStreamFile); - - protected: - std::wofstream file; - - public: - virtual InfoStreamFile& operator<< (const String& t); - }; - - /// Stream override to write messages to a std::cout. - class LPPAPI InfoStreamOut : public InfoStream - { - public: - virtual ~InfoStreamOut(); - LUCENE_CLASS(InfoStreamOut); - - public: - virtual InfoStreamOut& operator<< (const String& t); - }; - - /// Null stream override to eat messages. - class LPPAPI InfoStreamNull : public InfoStream - { - public: - virtual ~InfoStreamNull(); - LUCENE_CLASS(InfoStreamNull); - - public: - virtual InfoStreamNull& operator<< (const String& t); - }; -} - -#endif diff --git a/include/InputStreamReader.h b/include/InputStreamReader.h deleted file mode 100644 index eaa3a70c..00000000 --- a/include/InputStreamReader.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INPUTSTREAMREADER_H -#define INPUTSTREAMREADER_H - -#include "Reader.h" - -namespace Lucene -{ - /// An InputStreamReader is a bridge from byte streams to character streams. - class InputStreamReader : public Reader - { - public: - /// Create an InputStreamReader that uses the utf8 charset. - InputStreamReader(ReaderPtr reader); - virtual ~InputStreamReader(); - - LUCENE_CLASS(InputStreamReader); - - protected: - ReaderPtr reader; - UTF8DecoderStreamPtr decoder; - - public: - /// Read a single character. - virtual int32_t read(); - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); - - /// Close the stream. - virtual void close(); - - /// Tell whether this stream supports the mark() operation - virtual bool markSupported(); - - /// Reset the stream. - virtual void reset(); - }; -} - -#endif diff --git a/include/IntBlockPool.h b/include/IntBlockPool.h deleted file mode 100644 index 08926595..00000000 --- a/include/IntBlockPool.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INTBLOCKPOOL_H -#define INTBLOCKPOOL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class IntBlockPool : public LuceneObject - { - public: - IntBlockPool(DocumentsWriterPtr docWriter, bool trackAllocations); - virtual ~IntBlockPool(); - - LUCENE_CLASS(IntBlockPool); - - public: - Collection buffers; - - int32_t bufferUpto; // Which buffer we are upto - int32_t intUpto; // Where we are in head buffer - - IntArray buffer; // Current head buffer - int32_t intOffset; // Current head offset - bool trackAllocations; - - protected: - DocumentsWriterWeakPtr _docWriter; - - public: - void reset(); - void nextBuffer(); - }; -} - -#endif diff --git a/include/IntFieldSource.h b/include/IntFieldSource.h deleted file mode 100644 index 22056433..00000000 --- a/include/IntFieldSource.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INTFIELDSOURCE_H -#define INTFIELDSOURCE_H - -#include "FieldCacheSource.h" - -namespace Lucene -{ - /// Obtains int field values from the {@link FieldCache} using getInts() and makes those values available - /// as other numeric types, casting as needed. - /// - /// @see FieldCacheSource for requirements on the field. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, - /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU - /// per lookup but will not consume double the FieldCache RAM. - class LPPAPI IntFieldSource : public FieldCacheSource - { - public: - /// Create a cached int field source with a specific string-to-int parser. - IntFieldSource(const String& field, IntParserPtr parser = IntParserPtr()); - virtual ~IntFieldSource(); - - LUCENE_CLASS(IntFieldSource); - - protected: - IntParserPtr parser; - - public: - virtual String description(); - virtual DocValuesPtr getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader); - virtual bool cachedFieldSourceEquals(FieldCacheSourcePtr other); - virtual int32_t cachedFieldSourceHashCode(); - }; -} - -#endif diff --git a/include/InvertedDocConsumer.h b/include/InvertedDocConsumer.h deleted file mode 100644 index efc68d5a..00000000 --- a/include/InvertedDocConsumer.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCCONSUMER_H -#define INVERTEDDOCCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocConsumer : public LuceneObject - { - public: - virtual ~InvertedDocConsumer(); - - LUCENE_CLASS(InvertedDocConsumer); - - public: - FieldInfosPtr fieldInfos; - - public: - /// Add a new thread - virtual InvertedDocConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread) = 0; - - /// Abort (called after hitting AbortException) - virtual void abort() = 0; - - /// Flush a new segment - virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; - - /// Close doc stores - virtual void closeDocStore(SegmentWriteStatePtr state) = 0; - - /// Attempt to free RAM, returning true if any RAM was freed - virtual bool freeRAM() = 0; - - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - }; -} - -#endif diff --git a/include/InvertedDocConsumerPerField.h b/include/InvertedDocConsumerPerField.h deleted file mode 100644 index bf227d4e..00000000 --- a/include/InvertedDocConsumerPerField.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCCONSUMERPERFIELD_H -#define INVERTEDDOCCONSUMERPERFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocConsumerPerField : public LuceneObject - { - public: - virtual ~InvertedDocConsumerPerField(); - - LUCENE_CLASS(InvertedDocConsumerPerField); - - public: - /// Called once per field, and is given all Fieldable occurrences for this field in the document. - /// Return true if you wish to see inverted tokens for these fields - virtual bool start(Collection fields, int32_t count) = 0; - - /// Called before a field instance is being processed - virtual void start(FieldablePtr field) = 0; - - /// Called once per inverted token - virtual void add() = 0; - - /// Called once per field per document, after all Fieldable occurrences are inverted - virtual void finish() = 0; - - /// Called on hitting an aborting exception - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/InvertedDocConsumerPerThread.h b/include/InvertedDocConsumerPerThread.h deleted file mode 100644 index 6b538851..00000000 --- a/include/InvertedDocConsumerPerThread.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCCONSUMERPERTHREAD_H -#define INVERTEDDOCCONSUMERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocConsumerPerThread : public LuceneObject - { - public: - virtual ~InvertedDocConsumerPerThread(); - - LUCENE_CLASS(InvertedDocConsumerPerThread); - - public: - virtual void startDocument() = 0; - virtual InvertedDocConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) = 0; - virtual DocWriterPtr finishDocument() = 0; - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/InvertedDocEndConsumer.h b/include/InvertedDocEndConsumer.h deleted file mode 100644 index 6a251d97..00000000 --- a/include/InvertedDocEndConsumer.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCENDCONSUMER_H -#define INVERTEDDOCENDCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocEndConsumer : public LuceneObject - { - public: - virtual ~InvertedDocEndConsumer(); - - LUCENE_CLASS(InvertedDocEndConsumer); - - public: - virtual InvertedDocEndConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread) = 0; - virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; - virtual void closeDocStore(SegmentWriteStatePtr state) = 0; - virtual void abort() = 0; - virtual void setFieldInfos(FieldInfosPtr fieldInfos) = 0; - }; -} - -#endif diff --git a/include/InvertedDocEndConsumerPerField.h b/include/InvertedDocEndConsumerPerField.h deleted file mode 100644 index 3ce6e336..00000000 --- a/include/InvertedDocEndConsumerPerField.h +++ /dev/null @@ -1,27 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCENDCONSUMERPERFIELD_H -#define INVERTEDDOCENDCONSUMERPERFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocEndConsumerPerField : public LuceneObject - { - public: - virtual ~InvertedDocEndConsumerPerField(); - - LUCENE_CLASS(InvertedDocEndConsumerPerField); - - public: - virtual void finish() = 0; - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/InvertedDocEndConsumerPerThread.h b/include/InvertedDocEndConsumerPerThread.h deleted file mode 100644 index d827b64c..00000000 --- a/include/InvertedDocEndConsumerPerThread.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef INVERTEDDOCENDCONSUMERPERTHREAD_H -#define INVERTEDDOCENDCONSUMERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class InvertedDocEndConsumerPerThread : public LuceneObject - { - public: - virtual ~InvertedDocEndConsumerPerThread(); - - LUCENE_CLASS(InvertedDocEndConsumerPerThread); - - public: - virtual void startDocument() = 0; - virtual InvertedDocEndConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) = 0; - virtual void finishDocument() = 0; - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/KeepOnlyLastCommitDeletionPolicy.h b/include/KeepOnlyLastCommitDeletionPolicy.h deleted file mode 100644 index ed808fcd..00000000 --- a/include/KeepOnlyLastCommitDeletionPolicy.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef KEEPONLYLASTCOMMITDELETIONPOLICY_H -#define KEEPONLYLASTCOMMITDELETIONPOLICY_H - -#include "IndexDeletionPolicy.h" - -namespace Lucene -{ - /// This {@link IndexDeletionPolicy} implementation that keeps only the most recent commit and immediately - /// removes all prior commits after a new commit is done. This is the default deletion policy. - class LPPAPI KeepOnlyLastCommitDeletionPolicy : public IndexDeletionPolicy - { - public: - virtual ~KeepOnlyLastCommitDeletionPolicy(); - - LUCENE_CLASS(KeepOnlyLastCommitDeletionPolicy); - - public: - /// Deletes all commits except the most recent one. - virtual void onInit(Collection commits); - - /// Deletes all commits except the most recent one. - virtual void onCommit(Collection commits); - }; -} - -#endif diff --git a/include/KeywordAnalyzer.h b/include/KeywordAnalyzer.h deleted file mode 100644 index e666f20b..00000000 --- a/include/KeywordAnalyzer.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef KEYWORDANALYZER_H -#define KEYWORDANALYZER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// Tokenizes the entire stream as a single token. This is useful for data like zip codes, ids, and some - /// product names. - class LPPAPI KeywordAnalyzer : public Analyzer - { - public: - virtual ~KeywordAnalyzer(); - - LUCENE_CLASS(KeywordAnalyzer); - - public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; -} - -#endif diff --git a/include/KeywordTokenizer.h b/include/KeywordTokenizer.h deleted file mode 100644 index 03294871..00000000 --- a/include/KeywordTokenizer.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef KEYWORDTOKENIZER_H -#define KEYWORDTOKENIZER_H - -#include "Tokenizer.h" - -namespace Lucene -{ - /// Emits the entire input as a single token. - class LPPAPI KeywordTokenizer : public Tokenizer - { - public: - KeywordTokenizer(ReaderPtr input); - KeywordTokenizer(ReaderPtr input, int32_t bufferSize); - KeywordTokenizer(AttributeSourcePtr source, ReaderPtr input, int32_t bufferSize); - KeywordTokenizer(AttributeFactoryPtr factory, ReaderPtr input, int32_t bufferSize); - - virtual ~KeywordTokenizer(); - - LUCENE_CLASS(KeywordTokenizer); - - protected: - static const int32_t DEFAULT_BUFFER_SIZE; - - bool done; - int32_t finalOffset; - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - - protected: - void init(int32_t bufferSize); - - public: - virtual bool incrementToken(); - virtual void end(); - virtual void reset(); - }; -} - -#endif diff --git a/include/LengthFilter.h b/include/LengthFilter.h deleted file mode 100644 index e140d3c0..00000000 --- a/include/LengthFilter.h +++ /dev/null @@ -1,37 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LENGTHFILTER_H -#define LENGTHFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// Removes words that are too long or too short from the stream. - class LPPAPI LengthFilter : public TokenFilter - { - public: - /// Build a filter that removes words that are too long or too short from the text. - LengthFilter(TokenStreamPtr input, int32_t min, int32_t max); - virtual ~LengthFilter(); - - LUCENE_CLASS(LengthFilter); - - public: - int32_t min; - int32_t max; - - protected: - TermAttributePtr termAtt; - - public: - /// Returns the next input Token whose term() is the right len - virtual bool incrementToken(); - }; -} - -#endif diff --git a/include/LetterTokenizer.h b/include/LetterTokenizer.h deleted file mode 100644 index 2a78a565..00000000 --- a/include/LetterTokenizer.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LETTERTOKENIZER_H -#define LETTERTOKENIZER_H - -#include "CharTokenizer.h" - -namespace Lucene -{ - /// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to say, it defines tokens as maximal - /// strings of adjacent letters, as defined UnicodeUtil::isAlpha(c) predicate. - /// - /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, where - /// words are not separated by spaces. - class LPPAPI LetterTokenizer : public CharTokenizer - { - public: - /// Construct a new LetterTokenizer. - LetterTokenizer(ReaderPtr input); - - /// Construct a new LetterTokenizer using a given {@link AttributeSource}. - LetterTokenizer(AttributeSourcePtr source, ReaderPtr input); - - /// Construct a new LetterTokenizer using a given {@link AttributeFactory}. - LetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~LetterTokenizer(); - - LUCENE_CLASS(LetterTokenizer); - - public: - /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). - virtual bool isTokenChar(wchar_t c); - }; -} - -#endif diff --git a/include/LoadFirstFieldSelector.h b/include/LoadFirstFieldSelector.h deleted file mode 100644 index 62c21fd3..00000000 --- a/include/LoadFirstFieldSelector.h +++ /dev/null @@ -1,28 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOADFIRSTFIELDSELECTOR_H -#define LOADFIRSTFIELDSELECTOR_H - -#include "FieldSelector.h" - -namespace Lucene -{ - /// Load the First field and break. - /// See {@link FieldSelectorResult#LOAD_AND_BREAK} - class LPPAPI LoadFirstFieldSelector : public FieldSelector - { - public: - virtual ~LoadFirstFieldSelector(); - - LUCENE_CLASS(LoadFirstFieldSelector); - - public: - virtual FieldSelectorResult accept(const String& fieldName); - }; -} - -#endif diff --git a/include/Lock.h b/include/Lock.h deleted file mode 100644 index 184daae2..00000000 --- a/include/Lock.h +++ /dev/null @@ -1,50 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOCK_H -#define LOCK_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// An interprocess mutex lock. - /// @see Directory#makeLock(const String&) - class LPPAPI Lock : public LuceneObject - { - public: - virtual ~Lock(); - LUCENE_CLASS(Lock); - - public: - /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. - static const int32_t LOCK_OBTAIN_WAIT_FOREVER; - - /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. - static const int32_t LOCK_POLL_INTERVAL; - - public: - /// Attempts to obtain exclusive access and immediately return upon success or failure. - /// @return true if exclusive access is obtained. - virtual bool obtain() = 0; - - /// Releases exclusive access. - virtual void release() = 0; - - /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} - /// before using the resource. - virtual bool isLocked() = 0; - - /// Attempts to obtain an exclusive lock within amount of time given. Polls once per {@link #LOCK_POLL_INTERVAL} - /// (currently 1000) milliseconds until lockWaitTimeout is passed. - /// @param lockWaitTimeout length of time to wait in milliseconds or {@link #LOCK_OBTAIN_WAIT_FOREVER} - /// to retry forever. - /// @return true if lock was obtained. - bool obtain(int32_t lockWaitTimeout); - }; -} - -#endif diff --git a/include/LockFactory.h b/include/LockFactory.h deleted file mode 100644 index 387445fe..00000000 --- a/include/LockFactory.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOCKFACTORY_H -#define LOCKFACTORY_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Base class for Locking implementation. {@link Directory} uses - /// instances of this class to implement locking. - /// Note that there are some useful tools to verify that - /// your LockFactory is working correctly: {@link - /// VerifyingLockFactory}, {@link LockStressTest}, {@link - /// LockVerifyServer}. - /// @see LockVerifyServer - /// @see LockStressTest - /// @see VerifyingLockFactory - class LPPAPI LockFactory : public LuceneObject - { - public: - virtual ~LockFactory(); - - LUCENE_CLASS(LockFactory); - - protected: - String lockPrefix; - - public: - /// Set the prefix in use for all locks created in this LockFactory. This is normally called once, when a - /// Directory gets this LockFactory instance. However, you can also call this (after this instance is - /// assigned to a Directory) to override the prefix in use. This is helpful if you're running Lucene on - /// machines that have different mount points for the same shared directory. - virtual void setLockPrefix(const String& lockPrefix); - - /// Get the prefix in use for all locks created in this LockFactory. - virtual String getLockPrefix(); - - /// Return a new Lock instance identified by lockName. - /// @param lockName name of the lock to be created. - virtual LockPtr makeLock(const String& lockName) = 0; - - /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you - /// are certain this lock is no longer in use. - /// @param lockName name of the lock to be cleared. - virtual void clearLock(const String& lockName) = 0; - }; -} - -#endif diff --git a/include/LogByteSizeMergePolicy.h b/include/LogByteSizeMergePolicy.h deleted file mode 100644 index a600f83b..00000000 --- a/include/LogByteSizeMergePolicy.h +++ /dev/null @@ -1,61 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOGBYTESIZEMERGEPOLICY_H -#define LOGBYTESIZEMERGEPOLICY_H - -#include "LogMergePolicy.h" - -namespace Lucene -{ - /// This is a {@link LogMergePolicy} that measures size of a segment as the total byte size of the - /// segment's files. - class LPPAPI LogByteSizeMergePolicy : public LogMergePolicy - { - public: - LogByteSizeMergePolicy(IndexWriterPtr writer); - virtual ~LogByteSizeMergePolicy(); - - LUCENE_CLASS(LogByteSizeMergePolicy); - - public: - /// Default minimum segment size. @see setMinMergeMB. - static const double DEFAULT_MIN_MERGE_MB; - - /// Default maximum segment size. A segment of this size or larger will never be merged. - /// @see setMaxMergeMB - static const double DEFAULT_MAX_MERGE_MB; - - protected: - virtual int64_t size(SegmentInfoPtr info); - - public: - /// Determines the largest segment (measured by total byte size of the segment's files, in MB) - /// that may be merged with other segments. Small values (eg., less than 50 MB) are best for - /// interactive indexing, as this limits the length of pauses while indexing to a few seconds. - /// Larger values are best for batched indexing and speedier searches. - /// - /// Note that {@link #setMaxMergeDocs} is also used to check whether a segment is too large for - /// merging (it's either or). - void setMaxMergeMB(double mb); - - /// Returns the largest segment (measured by total byte size of the segment's files, in MB) that - /// may be merged with other segments. @see #setMaxMergeMB - double getMaxMergeMB(); - - /// Sets the minimum size for the lowest level segments. Any segments below this size are - /// considered to be on the same level (even if they vary drastically in size) and will be merged - /// whenever there are mergeFactor of them. This effectively truncates the "long tail" of small - /// segments that would otherwise be created into a single level. If you set this too large, it - /// could greatly increase the merging cost during indexing (if you flush many small segments). - void setMinMergeMB(double mb); - - /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeMB - double getMinMergeMB(); - }; -} - -#endif diff --git a/include/LogDocMergePolicy.h b/include/LogDocMergePolicy.h deleted file mode 100644 index f40bb2b3..00000000 --- a/include/LogDocMergePolicy.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOGDOCMERGEPOLICY_H -#define LOGDOCMERGEPOLICY_H - -#include "LogMergePolicy.h" - -namespace Lucene -{ - /// This is a {@link LogMergePolicy} that measures size of a segment as the number of documents - /// (not taking deletions into account). - class LPPAPI LogDocMergePolicy : public LogMergePolicy - { - public: - LogDocMergePolicy(IndexWriterPtr writer); - virtual ~LogDocMergePolicy(); - - LUCENE_CLASS(LogDocMergePolicy); - - public: - /// Default minimum segment size. @see setMinMergeDocs - static const int32_t DEFAULT_MIN_MERGE_DOCS; - - protected: - virtual int64_t size(SegmentInfoPtr info); - - public: - /// Sets the minimum size for the lowest level segments. Any segments below this size are considered - /// to be on the same level (even if they vary drastically in size) and will be merged whenever there - /// are mergeFactor of them. This effectively truncates the "long tail" of small segments that would - /// otherwise be created into a single level. If you set this too large, it could greatly increase the - /// merging cost during indexing (if you flush many small segments). - void setMinMergeDocs(int32_t minMergeDocs); - - /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeDocs - int32_t getMinMergeDocs(); - }; -} - -#endif diff --git a/include/LogMergePolicy.h b/include/LogMergePolicy.h deleted file mode 100644 index 9b268901..00000000 --- a/include/LogMergePolicy.h +++ /dev/null @@ -1,161 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOGMERGEPOLICY_H -#define LOGMERGEPOLICY_H - -#include "MergePolicy.h" - -namespace Lucene -{ - /// This class implements a {@link MergePolicy} that tries to merge segments into levels of exponentially - /// increasing size, where each level has fewer segments than the value of the merge factor. Whenever extra - /// segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. - /// You can get or set the merge factor using {@link #getMergeFactor()} and {@link #setMergeFactor(int)} - /// respectively. - /// - /// This class is abstract and requires a subclass to define the {@link #size} method which specifies how a - /// segment's size is determined. {@link LogDocMergePolicy} is one subclass that measures size by document - /// count in the segment. {@link LogByteSizeMergePolicy} is another subclass that measures size as the total - /// byte size of the file(s) for the segment. - class LPPAPI LogMergePolicy : public MergePolicy - { - public: - LogMergePolicy(IndexWriterPtr writer); - virtual ~LogMergePolicy(); - - LUCENE_CLASS(LogMergePolicy); - - protected: - int32_t mergeFactor; - - double noCFSRatio; - - bool calibrateSizeByDeletes; - bool _useCompoundFile; - bool _useCompoundDocStore; - - public: - /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment - /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. - static const double LEVEL_LOG_SPAN; - - /// Default merge factor, which is how many segments are merged at a time. - static const int32_t DEFAULT_MERGE_FACTOR; - - /// Default maximum segment size. A segment of this size or larger will never be merged. - /// @see setMaxMergeDocs - static const int32_t DEFAULT_MAX_MERGE_DOCS; - - /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. - /// @see #setNoCFSRatio - static const double DEFAULT_NO_CFS_RATIO; - - int64_t minMergeSize; - int64_t maxMergeSize; - int32_t maxMergeDocs; - - public: - /// @see #setNoCFSRatio - double getNoCFSRatio(); - - /// If a merged segment will be more than this percentage of the total size of the index, leave the segment as - /// non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless of merge size. - void setNoCFSRatio(double noCFSRatio); - - /// Returns the number of segments that are merged at once and also controls the total number of segments - /// allowed to accumulate in the index. - int32_t getMergeFactor(); - - /// Determines how often segment indices are merged by addDocument(). With smaller values, less RAM is - /// used while indexing, and searches on unoptimized indices are faster, but indexing speed is slower. - /// With larger values, more RAM is used during indexing, and while searches on unoptimized indices are - /// slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller - /// values (< 10) for indices that are interactively maintained. - void setMergeFactor(int32_t mergeFactor); - - /// Returns true if a newly flushed (not from merge) segment should use the compound file format. - virtual bool useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment); - - /// Sets whether compound file format should be used for newly flushed and newly merged segments. - void setUseCompoundFile(bool useCompoundFile); - - /// Returns true if newly flushed and newly merge segments are written in compound file format. - /// @see #setUseCompoundFile - bool getUseCompoundFile(); - - /// Returns true if the doc store files should use the compound file format. - virtual bool useCompoundDocStore(SegmentInfosPtr segments); - - /// Sets whether compound file format should be used for newly flushed and newly merged doc store - /// segment files (term vectors and stored fields). - void setUseCompoundDocStore(bool useCompoundDocStore); - - /// Returns true if newly flushed and newly merge doc store segment files (term vectors and stored fields) - /// are written in compound file format. @see #setUseCompoundDocStore - bool getUseCompoundDocStore(); - - /// Sets whether the segment size should be calibrated by the number of deletes when choosing segments - /// for merge. - void setCalibrateSizeByDeletes(bool calibrateSizeByDeletes); - - /// Returns true if the segment size should be calibrated by the number of deletes when choosing segments - /// for merge. - bool getCalibrateSizeByDeletes(); - - /// Release all resources for the policy. - virtual void close(); - - /// Returns the merges necessary to optimize the index. This merge policy defines "optimized" to mean only - /// one segment in the index, where that segment has no deletions pending nor separate norms, and it is in - /// compound file format if the current useCompoundFile setting is true. This method returns multiple merges - /// (mergeFactor at a time) so the {@link MergeScheduler} in use may make use of concurrency. - virtual MergeSpecificationPtr findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize); - - /// Finds merges necessary to expunge all deletes from the index. We simply merge adjacent segments that have - /// deletes, up to mergeFactor at a time. - virtual MergeSpecificationPtr findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos); - - /// Checks if any merges are now necessary and returns a {@link MergePolicy.MergeSpecification} if so. A merge - /// is necessary when there are more than {@link #setMergeFactor} segments at a given level. When multiple - /// levels have too many segments, this method will return multiple merges, allowing the {@link MergeScheduler} - /// to use concurrency. - virtual MergeSpecificationPtr findMerges(SegmentInfosPtr segmentInfos); - - /// Determines the largest segment (measured by document count) that may be merged with other segments. - /// Small values (eg., less than 10,000) are best for interactive indexing, as this limits the length of - /// pauses while indexing to a few seconds. Larger values are best for batched indexing and speedier searches. - /// - /// The default value is INT_MAX. - /// - /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit by net size - /// (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. - void setMaxMergeDocs(int32_t maxMergeDocs); - - /// Returns the largest segment (measured by document count) that may be merged with other segments. - /// @see #setMaxMergeDocs - int32_t getMaxMergeDocs(); - - protected: - bool verbose(); - void message(const String& message); - - virtual int64_t size(SegmentInfoPtr info) = 0; - - int64_t sizeDocs(SegmentInfoPtr info); - int64_t sizeBytes(SegmentInfoPtr info); - - bool isOptimized(SegmentInfosPtr infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize); - - /// Returns true if this single info is optimized (has no pending norms or deletes, is in the same dir as the - /// writer, and matches the current compound file setting - bool isOptimized(SegmentInfoPtr info); - - OneMergePtr makeOneMerge(SegmentInfosPtr infos, SegmentInfosPtr infosToMerge); - }; -} - -#endif diff --git a/include/LowerCaseFilter.h b/include/LowerCaseFilter.h deleted file mode 100644 index fae528e4..00000000 --- a/include/LowerCaseFilter.h +++ /dev/null @@ -1,31 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOWERCASEFILTER_H -#define LOWERCASEFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// Normalizes token text to lower case. - class LPPAPI LowerCaseFilter : public TokenFilter - { - public: - LowerCaseFilter(TokenStreamPtr input); - virtual ~LowerCaseFilter(); - - LUCENE_CLASS(LowerCaseFilter); - - protected: - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; -} - -#endif diff --git a/include/LowerCaseTokenizer.h b/include/LowerCaseTokenizer.h deleted file mode 100644 index 909bec71..00000000 --- a/include/LowerCaseTokenizer.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LOWERCASETOKENIZER_H -#define LOWERCASETOKENIZER_H - -#include "LetterTokenizer.h" - -namespace Lucene -{ - /// LowerCaseTokenizer performs the function of LetterTokenizer and LowerCaseFilter together. It divides text at - /// non-letters and converts them to lower case. While it is functionally equivalent to the combination of - /// LetterTokenizer and LowerCaseFilter, there is a performance advantage to doing the two tasks at once, hence - /// this (redundant) implementation. - /// - /// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, - /// where words are not separated by spaces. - class LPPAPI LowerCaseTokenizer : public LetterTokenizer - { - public: - /// Construct a new LowerCaseTokenizer. - LowerCaseTokenizer(ReaderPtr input); - - /// Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. - LowerCaseTokenizer(AttributeSourcePtr source, ReaderPtr input); - - /// Construct a new LowerCaseTokenizer using a given {@link AttributeFactory}. - LowerCaseTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~LowerCaseTokenizer(); - - LUCENE_CLASS(LowerCaseTokenizer); - - public: - /// Converts char to lower case CharFolder::toLower. - virtual wchar_t normalize(wchar_t c); - }; -} - -#endif diff --git a/include/Lucene.h b/include/Lucene.h deleted file mode 100644 index 5303cd46..00000000 --- a/include/Lucene.h +++ /dev/null @@ -1,227 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENE_H -#define LUCENE_H - -#include "Config.h" - -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -using boost::int8_t; -using boost::uint8_t; -using boost::int16_t; -using boost::uint16_t; -using boost::int32_t; -using boost::uint32_t; -using boost::int64_t; -using boost::uint64_t; - -#define SIZEOF_ARRAY(arr) (sizeof(arr) / sizeof((arr)[0])) - -#include "LuceneTypes.h" -#include "Allocator.h" - -namespace boost -{ - struct blank; - class thread; - class any; - template < typename Signature > class function; - namespace interprocess - { - class file_lock; - } - namespace posix_time - { - class ptime; - } -} - -namespace Lucene -{ - typedef std::basic_string< char, std::char_traits, Allocator > SingleString; - typedef std::basic_ostringstream< char, std::char_traits, Allocator > SingleStringStream; - typedef std::basic_string< wchar_t, std::char_traits, Allocator > String; - typedef std::basic_ostringstream< wchar_t, std::char_traits, Allocator > StringStream; - - const std::basic_string< wchar_t, std::char_traits, Allocator > EmptyString; - - typedef boost::shared_ptr filelockPtr; - typedef boost::shared_ptr threadPtr; - - typedef boost::shared_ptr ofstreamPtr; - typedef boost::shared_ptr ifstreamPtr; - typedef boost::shared_ptr localePtr; -} - -#include "LuceneFactory.h" -#include "LuceneException.h" -#include "Array.h" -#include "Collection.h" -#include "Map.h" -#include "Set.h" -#include "HashMap.h" -#include "HashSet.h" -#include "Constants.h" - -namespace Lucene -{ - typedef Array ByteArray; - typedef Array IntArray; - typedef Array LongArray; - typedef Array CharArray; - typedef Array DoubleArray; - - template - struct luceneEquals - { - inline bool operator()(const TYPE& first, const TYPE& second) const - { - return first ? first->equals(second) : (!first && !second); - } - }; - - template - struct luceneEqualTo - { - luceneEqualTo(const TYPE& type) : equalType(type) {} - inline bool operator()(const TYPE& other) const - { - return equalType->equals(other); - } - const TYPE& equalType; - }; - - template - struct luceneWeakEquals - { - inline bool operator()(const TYPE& first, const TYPE& second) const - { - if (first.expired() || second.expired()) - return (first.expired() && second.expired()); - return first.lock()->equals(second.lock()); - } - }; - - template - struct luceneHash : std::unary_function - { - std::size_t operator()(const TYPE& type) const - { - return type ? type->hashCode() : 0; - } - }; - - template - struct luceneWeakHash : std::unary_function - { - std::size_t operator()(const TYPE& type) const - { - return type.expired() ? 0 : type.lock()->hashCode(); - } - }; - - template - struct luceneCompare - { - inline bool operator()(const TYPE& first, const TYPE& second) const - { - if (!second) - return false; - if (!first) - return true; - return (first->compareTo(second) < 0); - } - }; - - typedef boost::blank VariantNull; - typedef boost::variant FieldsData; - typedef boost::variant ComparableValue; - typedef boost::variant NumericValue; - typedef boost::variant StringValue; - typedef boost::variant, Collection, Collection, VariantNull> CollectionValue; - - typedef HashSet< SegmentInfoPtr, luceneHash, luceneEquals > SetSegmentInfo; - typedef HashSet< MergeThreadPtr, luceneHash, luceneEquals > SetMergeThread; - typedef HashSet< OneMergePtr, luceneHash, luceneEquals > SetOneMerge; - typedef HashSet< QueryPtr, luceneHash, luceneEquals > SetQuery; - typedef HashSet< TermPtr, luceneHash, luceneEquals > SetTerm; - typedef HashSet< BooleanClausePtr, luceneHash, luceneEquals > SetBooleanClause; - typedef HashSet< ReaderFieldPtr, luceneHash, luceneEquals > SetReaderField; - typedef HashSet SetByteArray; - - typedef HashMap< String, String > MapStringString; - typedef HashMap< wchar_t, NormalizeCharMapPtr > MapCharNormalizeCharMap; - typedef HashMap< String, AnalyzerPtr > MapStringAnalyzer; - typedef HashMap< String, ByteArray > MapStringByteArray; - typedef HashMap< String, int32_t > MapStringInt; - typedef HashMap< String, FieldInfoPtr > MapStringFieldInfo; - typedef HashMap< String, Collection > MapStringCollectionTermVectorEntry; - typedef HashMap< String, RefCountPtr > MapStringRefCount; - typedef HashMap< int32_t, TermVectorsPositionInfoPtr > MapIntTermVectorsPositionInfo; - typedef HashMap< String, MapIntTermVectorsPositionInfo > MapStringMapIntTermVectorsPositionInfo; - typedef HashMap< String, NormPtr > MapStringNorm; - typedef HashMap< String, TermVectorEntryPtr > MapStringTermVectorEntry; - typedef HashMap< String, RAMFilePtr > MapStringRAMFile; - typedef HashMap< int32_t, ByteArray > MapIntByteArray; - typedef HashMap< int32_t, FilterItemPtr > MapIntFilterItem; - typedef HashMap< int32_t, double > MapIntDouble; - typedef HashMap< int64_t, int32_t > MapLongInt; - typedef HashMap< String, double > MapStringDouble; - typedef HashMap< int32_t, CachePtr > MapStringCache; - typedef HashMap< String, LockPtr > MapStringLock; - - typedef HashMap< SegmentInfoPtr, SegmentReaderPtr, luceneHash, luceneEquals > MapSegmentInfoSegmentReader; - typedef HashMap< SegmentInfoPtr, int32_t, luceneHash, luceneEquals > MapSegmentInfoInt; - typedef HashMap< DocFieldConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField; - typedef HashMap< InvertedDocConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField; - typedef HashMap< InvertedDocEndConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField; - typedef HashMap< TermsHashConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField; - typedef HashMap< FieldInfoPtr, Collection, luceneHash, luceneEquals > MapFieldInfoCollectionNormsWriterPerField; - typedef HashMap< IndexReaderPtr, HashSet, luceneHash, luceneEquals > MapIndexReaderSetString; - typedef HashMap< TermPtr, int32_t, luceneHash, luceneEquals > MapTermInt; - typedef HashMap< QueryPtr, int32_t, luceneHash, luceneEquals > MapQueryInt; - typedef HashMap< EntryPtr, boost::any, luceneHash, luceneEquals > MapEntryAny; - typedef HashMap< PhrasePositionsPtr, LuceneObjectPtr, luceneHash, luceneEquals > MapPhrasePositionsLuceneObject; - typedef HashMap< ReaderFieldPtr, SetReaderField, luceneHash, luceneEquals > MapReaderFieldSetReaderField; - - typedef WeakHashMap< LuceneObjectWeakPtr, LuceneObjectPtr, luceneWeakHash, luceneWeakEquals > WeakMapObjectObject; - typedef WeakHashMap< LuceneObjectWeakPtr, MapEntryAny, luceneWeakHash, luceneWeakEquals > WeakMapLuceneObjectMapEntryAny; - - typedef Map< String, AttributePtr > MapStringAttribute; - typedef Map< int64_t, DocumentsWriterThreadStatePtr > MapThreadDocumentsWriterThreadState; - typedef Map< String, IndexReaderPtr > MapStringIndexReader; - typedef Map< TermPtr, NumPtr, luceneCompare > MapTermNum; - - typedef boost::function TermVectorEntryComparator; - - template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class SimpleLRUCache; - typedef SimpleLRUCache< TermPtr, TermInfoPtr, luceneHash, luceneEquals > TermInfoCache; - typedef boost::shared_ptr TermInfoCachePtr; -} - -#include "Synchronize.h" -#include "CycleCheck.h" -#if defined(LPP_BUILDING_LIB) || defined(LPP_EXPOSE_INTERNAL) -#define INTERNAL public -#else -#define INTERNAL protected -#endif - -#endif diff --git a/include/LuceneException.h b/include/LuceneException.h deleted file mode 100644 index 073bfe0e..00000000 --- a/include/LuceneException.h +++ /dev/null @@ -1,102 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENEEXCEPTION_H -#define LUCENEEXCEPTION_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Lucene exception container. - class LPPAPI LuceneException : public std::exception - { - public: - enum ExceptionType - { - Null, - AlreadyClosed, - Compression, - CorruptIndex, - FieldReader, - FileNotFound, - IllegalArgument, - IllegalState, - IndexOutOfBounds, - IO, - LockObtainFailed, - LockReleaseFailed, - Lookahead, - MergeAborted, - Merge, - NoSuchDirectory, - NullPointer, - NumberFormat, - OutOfMemory, - Parse, - QueryParser, - Runtime, - StaleReader, - StopFillCache, - Temporary, - TimeExceeded, - TooManyClauses, - UnsupportedOperation - }; - - LuceneException(const String& error = EmptyString, LuceneException::ExceptionType type = Null) throw(); - ~LuceneException() throw(); - - protected: - ExceptionType type; - String error; - - public: - ExceptionType getType() const; - String getError() const; - bool isNull() const; - void throwException(); - }; - - template - class ExceptionTemplate : public ParentException - { - public: - ExceptionTemplate(const String& error = EmptyString, LuceneException::ExceptionType type = Type) : ParentException(error, type) - { - } - }; - - typedef ExceptionTemplate RuntimeException; - typedef ExceptionTemplate OutOfMemoryError; - typedef ExceptionTemplate TemporaryException; - typedef ExceptionTemplate IllegalStateException; - typedef ExceptionTemplate IllegalArgumentException; - typedef ExceptionTemplate IndexOutOfBoundsException; - typedef ExceptionTemplate NullPointerException; - typedef ExceptionTemplate FieldReaderException; - typedef ExceptionTemplate MergeException; - typedef ExceptionTemplate StopFillCacheException; - typedef ExceptionTemplate TimeExceededException; - typedef ExceptionTemplate TooManyClausesException; - typedef ExceptionTemplate UnsupportedOperationException; - typedef ExceptionTemplate NumberFormatException; - typedef ExceptionTemplate AlreadyClosedException; - typedef ExceptionTemplate IOException; - typedef ExceptionTemplate CorruptIndexException; - typedef ExceptionTemplate FileNotFoundException; - typedef ExceptionTemplate LockObtainFailedException; - typedef ExceptionTemplate LockReleaseFailedException; - typedef ExceptionTemplate MergeAbortedException; - typedef ExceptionTemplate StaleReaderException; - typedef ExceptionTemplate NoSuchDirectoryException; - typedef ExceptionTemplate LookaheadSuccess; - typedef ExceptionTemplate ParseException; - typedef ExceptionTemplate QueryParserError; - typedef ExceptionTemplate CompressionException; -} - -#endif diff --git a/include/LuceneFactory.h b/include/LuceneFactory.h deleted file mode 100644 index 30d497b2..00000000 --- a/include/LuceneFactory.h +++ /dev/null @@ -1,195 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENEFACTORY_H -#define LUCENEFACTORY_H - -#include - -namespace Lucene -{ - template - boost::shared_ptr newInstance() - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T); - #else - return boost::allocate_shared(Allocator()); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1)); - #else - return boost::allocate_shared(Allocator(), a1); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2)); - #else - return boost::allocate_shared(Allocator(), a1, a2); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4, a5)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4, a5); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4, a5, a6); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4, a5, a6, a7); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4, a5, a6, a7, a8); - #endif - } - - template - boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) - { - #if BOOST_VERSION <= 103800 - return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9)); - #else - return boost::allocate_shared(Allocator(), a1, a2, a3, a4, a5, a6, a7, a8, a9); - #endif - } - - template - boost::shared_ptr newLucene() - { - boost::shared_ptr instance(newInstance()); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1) - { - boost::shared_ptr instance(newInstance(a1)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2) - { - boost::shared_ptr instance(newInstance(a1, a2)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3) - { - boost::shared_ptr instance(newInstance(a1, a2, a3)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8)); - instance->initialize(); - return instance; - } - - template - boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) - { - boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8, a9)); - instance->initialize(); - return instance; - } -} - -#endif diff --git a/include/LuceneObject.h b/include/LuceneObject.h deleted file mode 100644 index af10f7f9..00000000 --- a/include/LuceneObject.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENEOBJECT_H -#define LUCENEOBJECT_H - -#include -#include "LuceneSync.h" - -#ifdef LPP_USE_CYCLIC_CHECK -#define LUCENE_INTERFACE(Name) \ - static String _getClassName() { return L###Name; } \ - virtual String getClassName() { return L###Name; } \ - CycleCheckT cycleCheck; -#else -#define LUCENE_INTERFACE(Name) \ - static String _getClassName() { return L###Name; } \ - virtual String getClassName() { return L###Name; } -#endif - -#define LUCENE_CLASS(Name) \ - LUCENE_INTERFACE(Name); \ - boost::shared_ptr shared_from_this() { return boost::static_pointer_cast(LuceneObject::shared_from_this()); } \ - -namespace Lucene -{ - /// Base class for all Lucene classes - class LPPAPI LuceneObject : public LuceneSync, public boost::enable_shared_from_this - { - public: - virtual ~LuceneObject(); - - protected: - LuceneObject(); - - public: - /// Called directly after instantiation to create objects that depend on this object being - /// fully constructed. - virtual void initialize(); - - /// Return clone of this object - /// @param other clone reference - null when called initially, then set in top virtual override. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Return hash code for this object. - virtual int32_t hashCode(); - - /// Return whether two objects are equal - virtual bool equals(LuceneObjectPtr other); - - /// Compare two objects - virtual int32_t compareTo(LuceneObjectPtr other); - - /// Returns a string representation of the object - virtual String toString(); - }; -} - -#endif diff --git a/include/LuceneSignal.h b/include/LuceneSignal.h deleted file mode 100644 index 11559e32..00000000 --- a/include/LuceneSignal.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENESIGNAL_H -#define LUCENESIGNAL_H - -#include -#include "Lucene.h" - -namespace Lucene -{ - /// Utility class to support signaling notifications. - class LPPAPI LuceneSignal - { - public: - LuceneSignal(SynchronizePtr objectLock = SynchronizePtr()); - virtual ~LuceneSignal(); - - protected: - boost::mutex waitMutex; - boost::condition signalCondition; - SynchronizePtr objectLock; - - public: - /// create a new LuceneSignal instance atomically. - static void createSignal(LuceneSignalPtr& signal, SynchronizePtr objectLock); - - /// Wait for signal using an optional timeout. - void wait(int32_t timeout = 0); - - /// Notify all threads waiting for signal. - void notifyAll(); - }; -} - - -#endif diff --git a/include/LuceneSync.h b/include/LuceneSync.h deleted file mode 100644 index 0d2826bc..00000000 --- a/include/LuceneSync.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENESYNC_H -#define LUCENESYNC_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Base class for all Lucene synchronised classes - class LPPAPI LuceneSync - { - public: - virtual ~LuceneSync(); - - protected: - SynchronizePtr objectLock; - LuceneSignalPtr objectSignal; - - public: - /// Return this object synchronize lock. - virtual SynchronizePtr getSync(); - - /// Return this object signal. - virtual LuceneSignalPtr getSignal(); - - /// Lock this object using an optional timeout. - virtual void lock(int32_t timeout = 0); - - /// Unlock this object. - virtual void unlock(); - - /// Returns true if this object is currently locked by current thread. - virtual bool holdsLock(); - - /// Wait for signal using an optional timeout. - virtual void wait(int32_t timeout = 0); - - /// Notify all threads waiting for signal. - virtual void notifyAll(); - }; -} - -#endif diff --git a/include/LuceneThread.h b/include/LuceneThread.h deleted file mode 100644 index 985e0598..00000000 --- a/include/LuceneThread.h +++ /dev/null @@ -1,86 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENETHREAD_H -#define LUCENETHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Lucene thread container. - /// - /// It seems there are major issues with using boost::thread::id under Windows. - /// After many hours of debugging and trying various strategies, I was unable to fix an - /// occasional crash whereby boost::thread::thread_data was being deleted prematurely. - /// - /// This problem is most visible when running the AtomicUpdateTest test suite. - /// - /// Therefore, I now uniquely identify threads by their native id. - class LPPAPI LuceneThread : public LuceneObject - { - public: - LuceneThread(); - virtual ~LuceneThread(); - - LUCENE_CLASS(LuceneThread); - - public: - static const int32_t MAX_PRIORITY; - static const int32_t NORM_PRIORITY; - static const int32_t MIN_PRIORITY; - - protected: - threadPtr thread; - - /// Flag to indicate running thread. - /// @see #isAlive - bool running; - - public: - /// start thread see {@link #run}. - virtual void start(); - - /// return whether thread is current running. - virtual bool isAlive(); - - /// set running thread priority. - virtual void setPriority(int32_t priority); - - /// return running thread priority. - virtual int32_t getPriority(); - - /// wait for thread to finish using an optional timeout. - virtual bool join(int32_t timeout = 0); - - /// causes the currently executing thread object to temporarily pause and allow other threads to execute. - virtual void yield(); - - /// override to provide the body of the thread. - virtual void run() = 0; - - /// Return representation of current execution thread. - static int64_t currentId(); - - /// Suspends current execution thread for a given time. - static void threadSleep(int32_t time); - - /// Yield current execution thread. - static void threadYield(); - - protected: - /// set thread running state. - void setRunning(bool running); - - /// return thread running state. - bool isRunning(); - - /// function that controls the lifetime of the running thread. - static void runThread(LuceneThread* thread); - }; -} - -#endif diff --git a/include/LuceneTypes.h b/include/LuceneTypes.h deleted file mode 100644 index 71c93f32..00000000 --- a/include/LuceneTypes.h +++ /dev/null @@ -1,563 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef LUCENETYPES_H -#define LUCENETYPES_H - -#include "Lucene.h" - -#define DECLARE_SHARED_PTR(Type) \ - class Type; \ - typedef boost::shared_ptr Type##Ptr; \ - typedef boost::weak_ptr Type##WeakPtr; - -namespace Lucene -{ - // analysis - DECLARE_SHARED_PTR(Analyzer) - DECLARE_SHARED_PTR(ASCIIFoldingFilter) - DECLARE_SHARED_PTR(BaseCharFilter) - DECLARE_SHARED_PTR(CachingTokenFilter) - DECLARE_SHARED_PTR(CharArraySet) - DECLARE_SHARED_PTR(CharFilter) - DECLARE_SHARED_PTR(CharReader) - DECLARE_SHARED_PTR(CharStream) - DECLARE_SHARED_PTR(CharTokenizer) - DECLARE_SHARED_PTR(FlagsAttribute) - DECLARE_SHARED_PTR(ISOLatin1AccentFilter) - DECLARE_SHARED_PTR(KeywordAnalyzer) - DECLARE_SHARED_PTR(KeywordTokenizer) - DECLARE_SHARED_PTR(LengthFilter) - DECLARE_SHARED_PTR(LetterTokenizer) - DECLARE_SHARED_PTR(LowerCaseFilter) - DECLARE_SHARED_PTR(LowerCaseTokenizer) - DECLARE_SHARED_PTR(MappingCharFilter) - DECLARE_SHARED_PTR(NormalizeCharMap) - DECLARE_SHARED_PTR(NumericTokenStream) - DECLARE_SHARED_PTR(OffsetAttribute) - DECLARE_SHARED_PTR(PayloadAttribute) - DECLARE_SHARED_PTR(PerFieldAnalyzerWrapper) - DECLARE_SHARED_PTR(PorterStemFilter) - DECLARE_SHARED_PTR(PorterStemmer) - DECLARE_SHARED_PTR(PositionIncrementAttribute) - DECLARE_SHARED_PTR(SimpleAnalyzer) - DECLARE_SHARED_PTR(SinkFilter) - DECLARE_SHARED_PTR(SinkTokenStream) - DECLARE_SHARED_PTR(StandardAnalyzer) - DECLARE_SHARED_PTR(StandardAnalyzerSavedStreams) - DECLARE_SHARED_PTR(StandardFilter) - DECLARE_SHARED_PTR(StandardTokenizer) - DECLARE_SHARED_PTR(StandardTokenizerImpl) - DECLARE_SHARED_PTR(StopAnalyzer) - DECLARE_SHARED_PTR(StopAnalyzerSavedStreams) - DECLARE_SHARED_PTR(StopFilter) - DECLARE_SHARED_PTR(TeeSinkTokenFilter) - DECLARE_SHARED_PTR(TermAttribute) - DECLARE_SHARED_PTR(Token) - DECLARE_SHARED_PTR(TokenAttributeFactory) - DECLARE_SHARED_PTR(TokenFilter) - DECLARE_SHARED_PTR(Tokenizer) - DECLARE_SHARED_PTR(TokenStream) - DECLARE_SHARED_PTR(TypeAttribute) - DECLARE_SHARED_PTR(WhitespaceAnalyzer) - DECLARE_SHARED_PTR(WhitespaceTokenizer) - DECLARE_SHARED_PTR(WordlistLoader) - - // document - DECLARE_SHARED_PTR(AbstractField) - DECLARE_SHARED_PTR(CompressionTools) - DECLARE_SHARED_PTR(DateField) - DECLARE_SHARED_PTR(DateTools) - DECLARE_SHARED_PTR(Document) - DECLARE_SHARED_PTR(Field) - DECLARE_SHARED_PTR(Fieldable) - DECLARE_SHARED_PTR(FieldSelector) - DECLARE_SHARED_PTR(LoadFirstFieldSelector) - DECLARE_SHARED_PTR(MapFieldSelector) - DECLARE_SHARED_PTR(NumberTools) - DECLARE_SHARED_PTR(NumericField) - DECLARE_SHARED_PTR(SetBasedFieldSelector) - - // index - DECLARE_SHARED_PTR(AbstractAllTermDocs) - DECLARE_SHARED_PTR(AllTermDocs) - DECLARE_SHARED_PTR(BufferedDeletes) - DECLARE_SHARED_PTR(ByteBlockAllocator) - DECLARE_SHARED_PTR(ByteBlockPool) - DECLARE_SHARED_PTR(ByteBlockPoolAllocatorBase) - DECLARE_SHARED_PTR(ByteSliceReader) - DECLARE_SHARED_PTR(ByteSliceWriter) - DECLARE_SHARED_PTR(CharBlockPool) - DECLARE_SHARED_PTR(CheckAbort) - DECLARE_SHARED_PTR(CheckIndex) - DECLARE_SHARED_PTR(CommitPoint) - DECLARE_SHARED_PTR(CompoundFileReader) - DECLARE_SHARED_PTR(CompoundFileWriter) - DECLARE_SHARED_PTR(ConcurrentMergeScheduler) - DECLARE_SHARED_PTR(CoreReaders) - DECLARE_SHARED_PTR(CSIndexInput) - DECLARE_SHARED_PTR(DefaultIndexingChain) - DECLARE_SHARED_PTR(DefaultSkipListReader) - DECLARE_SHARED_PTR(DefaultSkipListWriter) - DECLARE_SHARED_PTR(DirectoryReader) - DECLARE_SHARED_PTR(DocConsumer) - DECLARE_SHARED_PTR(DocConsumerPerThread) - DECLARE_SHARED_PTR(DocFieldConsumer) - DECLARE_SHARED_PTR(DocFieldConsumerPerField) - DECLARE_SHARED_PTR(DocFieldConsumerPerThread) - DECLARE_SHARED_PTR(DocFieldConsumers) - DECLARE_SHARED_PTR(DocFieldConsumersPerDoc) - DECLARE_SHARED_PTR(DocFieldConsumersPerField) - DECLARE_SHARED_PTR(DocFieldConsumersPerThread) - DECLARE_SHARED_PTR(DocFieldProcessor) - DECLARE_SHARED_PTR(DocFieldProcessorPerField) - DECLARE_SHARED_PTR(DocFieldProcessorPerThread) - DECLARE_SHARED_PTR(DocFieldProcessorPerThreadPerDoc) - DECLARE_SHARED_PTR(DocInverter) - DECLARE_SHARED_PTR(DocInverterPerField) - DECLARE_SHARED_PTR(DocInverterPerThread) - DECLARE_SHARED_PTR(DocState) - DECLARE_SHARED_PTR(DocumentsWriter) - DECLARE_SHARED_PTR(DocumentsWriterThreadState) - DECLARE_SHARED_PTR(DocWriter) - DECLARE_SHARED_PTR(FieldInfo) - DECLARE_SHARED_PTR(FieldInfos) - DECLARE_SHARED_PTR(FieldInvertState) - DECLARE_SHARED_PTR(FieldNormStatus) - DECLARE_SHARED_PTR(FieldSortedTermVectorMapper) - DECLARE_SHARED_PTR(FieldsReader) - DECLARE_SHARED_PTR(FieldsReaderLocal) - DECLARE_SHARED_PTR(FieldsWriter) - DECLARE_SHARED_PTR(FilterIndexReader) - DECLARE_SHARED_PTR(FindSegmentsModified) - DECLARE_SHARED_PTR(FindSegmentsOpen) - DECLARE_SHARED_PTR(FindSegmentsRead) - DECLARE_SHARED_PTR(FindSegmentsReopen) - DECLARE_SHARED_PTR(FormatPostingsDocsConsumer) - DECLARE_SHARED_PTR(FormatPostingsDocsWriter) - DECLARE_SHARED_PTR(FormatPostingsFieldsConsumer) - DECLARE_SHARED_PTR(FormatPostingsFieldsWriter) - DECLARE_SHARED_PTR(FormatPostingsPositionsConsumer) - DECLARE_SHARED_PTR(FormatPostingsPositionsWriter) - DECLARE_SHARED_PTR(FormatPostingsTermsConsumer) - DECLARE_SHARED_PTR(FormatPostingsTermsWriter) - DECLARE_SHARED_PTR(FreqProxFieldMergeState) - DECLARE_SHARED_PTR(FreqProxTermsWriter) - DECLARE_SHARED_PTR(FreqProxTermsWriterPerField) - DECLARE_SHARED_PTR(FreqProxTermsWriterPerThread) - DECLARE_SHARED_PTR(FreqProxTermsWriterPostingList) - DECLARE_SHARED_PTR(IndexCommit) - DECLARE_SHARED_PTR(IndexDeletionPolicy) - DECLARE_SHARED_PTR(IndexFileDeleter) - DECLARE_SHARED_PTR(IndexFileNameFilter) - DECLARE_SHARED_PTR(IndexingChain) - DECLARE_SHARED_PTR(IndexReader) - DECLARE_SHARED_PTR(IndexReaderWarmer) - DECLARE_SHARED_PTR(IndexStatus) - DECLARE_SHARED_PTR(IndexWriter) - DECLARE_SHARED_PTR(IntBlockPool) - DECLARE_SHARED_PTR(IntQueue) - DECLARE_SHARED_PTR(InvertedDocConsumer) - DECLARE_SHARED_PTR(InvertedDocConsumerPerField) - DECLARE_SHARED_PTR(InvertedDocConsumerPerThread) - DECLARE_SHARED_PTR(InvertedDocEndConsumer) - DECLARE_SHARED_PTR(InvertedDocEndConsumerPerField) - DECLARE_SHARED_PTR(InvertedDocEndConsumerPerThread) - DECLARE_SHARED_PTR(KeepOnlyLastCommitDeletionPolicy) - DECLARE_SHARED_PTR(LogByteSizeMergePolicy) - DECLARE_SHARED_PTR(LogDocMergePolicy) - DECLARE_SHARED_PTR(LogMergePolicy) - DECLARE_SHARED_PTR(MergeDocIDRemapper) - DECLARE_SHARED_PTR(MergePolicy) - DECLARE_SHARED_PTR(MergeScheduler) - DECLARE_SHARED_PTR(MergeSpecification) - DECLARE_SHARED_PTR(MergeThread) - DECLARE_SHARED_PTR(MultiLevelSkipListReader) - DECLARE_SHARED_PTR(MultiLevelSkipListWriter) - DECLARE_SHARED_PTR(MultipleTermPositions) - DECLARE_SHARED_PTR(MultiReader) - DECLARE_SHARED_PTR(MultiTermDocs) - DECLARE_SHARED_PTR(MultiTermEnum) - DECLARE_SHARED_PTR(MultiTermPositions) - DECLARE_SHARED_PTR(MyCommitPoint) - DECLARE_SHARED_PTR(MySegmentTermDocs) - DECLARE_SHARED_PTR(Norm) - DECLARE_SHARED_PTR(NormsWriter) - DECLARE_SHARED_PTR(NormsWriterPerField) - DECLARE_SHARED_PTR(NormsWriterPerThread) - DECLARE_SHARED_PTR(Num) - DECLARE_SHARED_PTR(OneMerge) - DECLARE_SHARED_PTR(ParallelArrayTermVectorMapper) - DECLARE_SHARED_PTR(ParallelReader) - DECLARE_SHARED_PTR(ParallelTermEnum) - DECLARE_SHARED_PTR(ParallelTermDocs) - DECLARE_SHARED_PTR(ParallelTermPositions) - DECLARE_SHARED_PTR(Payload) - DECLARE_SHARED_PTR(PerDocBuffer) - DECLARE_SHARED_PTR(PositionBasedTermVectorMapper) - DECLARE_SHARED_PTR(RawPostingList) - DECLARE_SHARED_PTR(ReaderCommit) - DECLARE_SHARED_PTR(ReaderPool) - DECLARE_SHARED_PTR(ReadOnlyDirectoryReader) - DECLARE_SHARED_PTR(ReadOnlySegmentReader) - DECLARE_SHARED_PTR(RefCount) - DECLARE_SHARED_PTR(ReusableStringReader) - DECLARE_SHARED_PTR(SegmentInfo) - DECLARE_SHARED_PTR(SegmentInfoCollection) - DECLARE_SHARED_PTR(SegmentInfos) - DECLARE_SHARED_PTR(SegmentInfoStatus) - DECLARE_SHARED_PTR(SegmentMergeInfo) - DECLARE_SHARED_PTR(SegmentMergeQueue) - DECLARE_SHARED_PTR(SegmentMerger) - DECLARE_SHARED_PTR(SegmentReader) - DECLARE_SHARED_PTR(SegmentReaderRef) - DECLARE_SHARED_PTR(SegmentTermDocs) - DECLARE_SHARED_PTR(SegmentTermEnum) - DECLARE_SHARED_PTR(SegmentTermPositions) - DECLARE_SHARED_PTR(SegmentTermPositionVector) - DECLARE_SHARED_PTR(SegmentTermVector) - DECLARE_SHARED_PTR(SegmentWriteState) - DECLARE_SHARED_PTR(SerialMergeScheduler) - DECLARE_SHARED_PTR(SingleTokenAttributeSource) - DECLARE_SHARED_PTR(SkipBuffer) - DECLARE_SHARED_PTR(SkipDocWriter) - DECLARE_SHARED_PTR(SnapshotDeletionPolicy) - DECLARE_SHARED_PTR(SortedTermVectorMapper) - DECLARE_SHARED_PTR(StoredFieldStatus) - DECLARE_SHARED_PTR(StoredFieldsWriter) - DECLARE_SHARED_PTR(StoredFieldsWriterPerDoc) - DECLARE_SHARED_PTR(StoredFieldsWriterPerThread) - DECLARE_SHARED_PTR(Term) - DECLARE_SHARED_PTR(TermBuffer) - DECLARE_SHARED_PTR(TermEnum) - DECLARE_SHARED_PTR(TermDocs) - DECLARE_SHARED_PTR(TermFreqVector) - DECLARE_SHARED_PTR(TermIndexStatus) - DECLARE_SHARED_PTR(TermInfo) - DECLARE_SHARED_PTR(TermInfosReader) - DECLARE_SHARED_PTR(TermInfosReaderThreadResources) - DECLARE_SHARED_PTR(TermInfosWriter) - DECLARE_SHARED_PTR(TermPositions) - DECLARE_SHARED_PTR(TermPositionsQueue) - DECLARE_SHARED_PTR(TermPositionVector) - DECLARE_SHARED_PTR(TermsHash) - DECLARE_SHARED_PTR(TermsHashConsumer) - DECLARE_SHARED_PTR(TermsHashConsumerPerField) - DECLARE_SHARED_PTR(TermsHashConsumerPerThread) - DECLARE_SHARED_PTR(TermsHashPerField) - DECLARE_SHARED_PTR(TermsHashPerThread) - DECLARE_SHARED_PTR(TermVectorEntry) - DECLARE_SHARED_PTR(TermVectorEntryFreqSortedComparator) - DECLARE_SHARED_PTR(TermVectorMapper) - DECLARE_SHARED_PTR(TermVectorOffsetInfo) - DECLARE_SHARED_PTR(TermVectorsReader) - DECLARE_SHARED_PTR(TermVectorStatus) - DECLARE_SHARED_PTR(TermVectorsTermsWriter) - DECLARE_SHARED_PTR(TermVectorsTermsWriterPerDoc) - DECLARE_SHARED_PTR(TermVectorsTermsWriterPerField) - DECLARE_SHARED_PTR(TermVectorsTermsWriterPerThread) - DECLARE_SHARED_PTR(TermVectorsTermsWriterPostingList) - DECLARE_SHARED_PTR(TermVectorsWriter) - DECLARE_SHARED_PTR(TermVectorsPositionInfo) - DECLARE_SHARED_PTR(WaitQueue) - - // query parser - DECLARE_SHARED_PTR(FastCharStream) - DECLARE_SHARED_PTR(MultiFieldQueryParser) - DECLARE_SHARED_PTR(QueryParser) - DECLARE_SHARED_PTR(QueryParserCharStream) - DECLARE_SHARED_PTR(QueryParserConstants) - DECLARE_SHARED_PTR(QueryParserToken) - DECLARE_SHARED_PTR(QueryParserTokenManager) - - // search - DECLARE_SHARED_PTR(AveragePayloadFunction) - DECLARE_SHARED_PTR(BooleanClause) - DECLARE_SHARED_PTR(BooleanQuery) - DECLARE_SHARED_PTR(BooleanScorer) - DECLARE_SHARED_PTR(BooleanScorerCollector) - DECLARE_SHARED_PTR(BooleanScorer2) - DECLARE_SHARED_PTR(BooleanWeight) - DECLARE_SHARED_PTR(Bucket) - DECLARE_SHARED_PTR(BucketScorer) - DECLARE_SHARED_PTR(BucketTable) - DECLARE_SHARED_PTR(ByteCache) - DECLARE_SHARED_PTR(ByteFieldSource) - DECLARE_SHARED_PTR(ByteParser) - DECLARE_SHARED_PTR(Cache) - DECLARE_SHARED_PTR(CachedDfSource) - DECLARE_SHARED_PTR(CachingSpanFilter) - DECLARE_SHARED_PTR(CachingWrapperFilter) - DECLARE_SHARED_PTR(CellQueue) - DECLARE_SHARED_PTR(Collector) - DECLARE_SHARED_PTR(ComplexExplanation) - DECLARE_SHARED_PTR(ConjunctionScorer) - DECLARE_SHARED_PTR(ConstantScoreAutoRewrite) - DECLARE_SHARED_PTR(ConstantScoreAutoRewriteDefault) - DECLARE_SHARED_PTR(ConstantScoreBooleanQueryRewrite) - DECLARE_SHARED_PTR(ConstantScoreFilterRewrite) - DECLARE_SHARED_PTR(ConstantScoreQuery) - DECLARE_SHARED_PTR(ConstantScorer) - DECLARE_SHARED_PTR(ConstantWeight) - DECLARE_SHARED_PTR(Coordinator) - DECLARE_SHARED_PTR(CountingConjunctionSumScorer) - DECLARE_SHARED_PTR(CountingDisjunctionSumScorer) - DECLARE_SHARED_PTR(CreationPlaceholder) - DECLARE_SHARED_PTR(CustomScoreProvider) - DECLARE_SHARED_PTR(CustomScoreQuery) - DECLARE_SHARED_PTR(CustomWeight) - DECLARE_SHARED_PTR(CustomScorer) - DECLARE_SHARED_PTR(DefaultByteParser) - DECLARE_SHARED_PTR(DefaultCustomScoreProvider) - DECLARE_SHARED_PTR(DefaultDoubleParser) - DECLARE_SHARED_PTR(DefaultIntParser) - DECLARE_SHARED_PTR(DefaultLongParser) - DECLARE_SHARED_PTR(DefaultSimilarity) - DECLARE_SHARED_PTR(DisjunctionMaxQuery) - DECLARE_SHARED_PTR(DisjunctionMaxScorer) - DECLARE_SHARED_PTR(DisjunctionMaxWeight) - DECLARE_SHARED_PTR(DisjunctionSumScorer) - DECLARE_SHARED_PTR(DocIdSet) - DECLARE_SHARED_PTR(DocIdSetIterator) - DECLARE_SHARED_PTR(DocValues) - DECLARE_SHARED_PTR(DoubleCache) - DECLARE_SHARED_PTR(DoubleFieldSource) - DECLARE_SHARED_PTR(DoubleParser) - DECLARE_SHARED_PTR(EmptyDocIdSet) - DECLARE_SHARED_PTR(EmptyDocIdSetIterator) - DECLARE_SHARED_PTR(Entry) - DECLARE_SHARED_PTR(ExactPhraseScorer) - DECLARE_SHARED_PTR(Explanation) - DECLARE_SHARED_PTR(FieldCache) - DECLARE_SHARED_PTR(FieldCacheDocIdSet) - DECLARE_SHARED_PTR(FieldCacheEntry) - DECLARE_SHARED_PTR(FieldCacheEntryImpl) - DECLARE_SHARED_PTR(FieldCacheImpl) - DECLARE_SHARED_PTR(FieldCacheRangeFilter) - DECLARE_SHARED_PTR(FieldCacheRangeFilterByte) - DECLARE_SHARED_PTR(FieldCacheRangeFilterDouble) - DECLARE_SHARED_PTR(FieldCacheRangeFilterInt) - DECLARE_SHARED_PTR(FieldCacheRangeFilterLong) - DECLARE_SHARED_PTR(FieldCacheRangeFilterString) - DECLARE_SHARED_PTR(FieldCacheSource) - DECLARE_SHARED_PTR(FieldCacheTermsFilter) - DECLARE_SHARED_PTR(FieldCacheTermsFilterDocIdSet) - DECLARE_SHARED_PTR(FieldComparator) - DECLARE_SHARED_PTR(FieldComparatorSource) - DECLARE_SHARED_PTR(FieldDoc) - DECLARE_SHARED_PTR(FieldDocIdSetIteratorIncrement) - DECLARE_SHARED_PTR(FieldDocIdSetIteratorTermDocs) - DECLARE_SHARED_PTR(FieldDocSortedHitQueue) - DECLARE_SHARED_PTR(FieldMaskingSpanQuery) - DECLARE_SHARED_PTR(FieldScoreQuery) - DECLARE_SHARED_PTR(FieldValueHitQueue) - DECLARE_SHARED_PTR(FieldValueHitQueueEntry) - DECLARE_SHARED_PTR(Filter) - DECLARE_SHARED_PTR(FilterCache) - DECLARE_SHARED_PTR(FilterCleaner) - DECLARE_SHARED_PTR(FilteredDocIdSet) - DECLARE_SHARED_PTR(FilteredDocIdSetIterator) - DECLARE_SHARED_PTR(FilteredQuery) - DECLARE_SHARED_PTR(FilteredQueryWeight) - DECLARE_SHARED_PTR(FilteredTermEnum) - DECLARE_SHARED_PTR(FilterItem) - DECLARE_SHARED_PTR(FilterManager) - DECLARE_SHARED_PTR(FuzzyQuery) - DECLARE_SHARED_PTR(FuzzyTermEnum) - DECLARE_SHARED_PTR(HitQueue) - DECLARE_SHARED_PTR(HitQueueBase) - DECLARE_SHARED_PTR(IDFExplanation) - DECLARE_SHARED_PTR(IndexSearcher) - DECLARE_SHARED_PTR(IntCache) - DECLARE_SHARED_PTR(IntFieldSource) - DECLARE_SHARED_PTR(IntParser) - DECLARE_SHARED_PTR(LongCache) - DECLARE_SHARED_PTR(LongParser) - DECLARE_SHARED_PTR(MatchAllDocsQuery) - DECLARE_SHARED_PTR(MatchAllDocsWeight) - DECLARE_SHARED_PTR(MatchAllScorer) - DECLARE_SHARED_PTR(MaxPayloadFunction) - DECLARE_SHARED_PTR(MinPayloadFunction) - DECLARE_SHARED_PTR(MultiComparatorsFieldValueHitQueue) - DECLARE_SHARED_PTR(MultiPhraseQuery) - DECLARE_SHARED_PTR(MultiSearcher) - DECLARE_SHARED_PTR(MultiSearcherCallableNoSort) - DECLARE_SHARED_PTR(MultiSearcherCallableWithSort) - DECLARE_SHARED_PTR(MultiTermQuery) - DECLARE_SHARED_PTR(MultiTermQueryWrapperFilter) - DECLARE_SHARED_PTR(NearSpansOrdered) - DECLARE_SHARED_PTR(NearSpansUnordered) - DECLARE_SHARED_PTR(NumericRangeFilter) - DECLARE_SHARED_PTR(NumericRangeQuery) - DECLARE_SHARED_PTR(NumericUtilsDoubleParser) - DECLARE_SHARED_PTR(NumericUtilsIntParser) - DECLARE_SHARED_PTR(NumericUtilsLongParser) - DECLARE_SHARED_PTR(OneComparatorFieldValueHitQueue) - DECLARE_SHARED_PTR(OrdFieldSource) - DECLARE_SHARED_PTR(ParallelMultiSearcher) - DECLARE_SHARED_PTR(Parser) - DECLARE_SHARED_PTR(PayloadFunction) - DECLARE_SHARED_PTR(PayloadNearQuery) - DECLARE_SHARED_PTR(PayloadNearSpanScorer) - DECLARE_SHARED_PTR(PayloadNearSpanWeight) - DECLARE_SHARED_PTR(PayloadSpanUtil) - DECLARE_SHARED_PTR(PayloadTermQuery) - DECLARE_SHARED_PTR(PayloadTermSpanScorer) - DECLARE_SHARED_PTR(PayloadTermWeight) - DECLARE_SHARED_PTR(PhrasePositions) - DECLARE_SHARED_PTR(PhraseQuery) - DECLARE_SHARED_PTR(PhraseQueue) - DECLARE_SHARED_PTR(PhraseScorer) - DECLARE_SHARED_PTR(PositionInfo) - DECLARE_SHARED_PTR(PositiveScoresOnlyCollector) - DECLARE_SHARED_PTR(PrefixFilter) - DECLARE_SHARED_PTR(PrefixQuery) - DECLARE_SHARED_PTR(PrefixTermEnum) - DECLARE_SHARED_PTR(PriorityQueueScoreDocs) - DECLARE_SHARED_PTR(Query) - DECLARE_SHARED_PTR(QueryTermVector) - DECLARE_SHARED_PTR(QueryWrapperFilter) - DECLARE_SHARED_PTR(ReqExclScorer) - DECLARE_SHARED_PTR(ReqOptSumScorer) - DECLARE_SHARED_PTR(RewriteMethod) - DECLARE_SHARED_PTR(ReverseOrdFieldSource) - DECLARE_SHARED_PTR(ScoreCachingWrappingScorer) - DECLARE_SHARED_PTR(ScoreDoc) - DECLARE_SHARED_PTR(Scorer) - DECLARE_SHARED_PTR(ScoreTerm) - DECLARE_SHARED_PTR(ScoreTermQueue) - DECLARE_SHARED_PTR(ScoringBooleanQueryRewrite) - DECLARE_SHARED_PTR(Searchable) - DECLARE_SHARED_PTR(Searcher) - DECLARE_SHARED_PTR(Similarity) - DECLARE_SHARED_PTR(SimilarityDisableCoord) - DECLARE_SHARED_PTR(SimilarityDelegator) - DECLARE_SHARED_PTR(SimilarityIDFExplanation) - DECLARE_SHARED_PTR(SingleMatchScorer) - DECLARE_SHARED_PTR(SingleTermEnum) - DECLARE_SHARED_PTR(SloppyPhraseScorer) - DECLARE_SHARED_PTR(Sort) - DECLARE_SHARED_PTR(SortField) - DECLARE_SHARED_PTR(SpanFilter) - DECLARE_SHARED_PTR(SpanFilterResult) - DECLARE_SHARED_PTR(SpanFirstQuery) - DECLARE_SHARED_PTR(SpanNearQuery) - DECLARE_SHARED_PTR(SpanNotQuery) - DECLARE_SHARED_PTR(SpanOrQuery) - DECLARE_SHARED_PTR(SpanQuery) - DECLARE_SHARED_PTR(SpanQueryFilter) - DECLARE_SHARED_PTR(SpanQueue) - DECLARE_SHARED_PTR(Spans) - DECLARE_SHARED_PTR(SpansCell) - DECLARE_SHARED_PTR(SpanScorer) - DECLARE_SHARED_PTR(SpanTermQuery) - DECLARE_SHARED_PTR(SpanWeight) - DECLARE_SHARED_PTR(StartEnd) - DECLARE_SHARED_PTR(StringCache) - DECLARE_SHARED_PTR(StringIndex) - DECLARE_SHARED_PTR(StringIndexCache) - DECLARE_SHARED_PTR(SubScorer) - DECLARE_SHARED_PTR(TermQuery) - DECLARE_SHARED_PTR(TermRangeFilter) - DECLARE_SHARED_PTR(TermRangeQuery) - DECLARE_SHARED_PTR(TermRangeTermEnum) - DECLARE_SHARED_PTR(TermScorer) - DECLARE_SHARED_PTR(TermSpans) - DECLARE_SHARED_PTR(TimeLimitingCollector) - DECLARE_SHARED_PTR(TimerThread) - DECLARE_SHARED_PTR(TopDocs) - DECLARE_SHARED_PTR(TopDocsCollector) - DECLARE_SHARED_PTR(TopFieldCollector) - DECLARE_SHARED_PTR(TopFieldDocs) - DECLARE_SHARED_PTR(TopScoreDocCollector) - DECLARE_SHARED_PTR(ValueSource) - DECLARE_SHARED_PTR(ValueSourceQuery) - DECLARE_SHARED_PTR(ValueSourceScorer) - DECLARE_SHARED_PTR(ValueSourceWeight) - DECLARE_SHARED_PTR(Weight) - DECLARE_SHARED_PTR(WildcardQuery) - DECLARE_SHARED_PTR(WildcardTermEnum) - - // store - DECLARE_SHARED_PTR(BufferedIndexInput) - DECLARE_SHARED_PTR(BufferedIndexOutput) - DECLARE_SHARED_PTR(ChecksumIndexInput) - DECLARE_SHARED_PTR(ChecksumIndexOutput) - DECLARE_SHARED_PTR(Directory) - DECLARE_SHARED_PTR(FileSwitchDirectory) - DECLARE_SHARED_PTR(FSDirectory) - DECLARE_SHARED_PTR(FSLockFactory) - DECLARE_SHARED_PTR(IndexInput) - DECLARE_SHARED_PTR(IndexOutput) - DECLARE_SHARED_PTR(InputFile) - DECLARE_SHARED_PTR(Lock) - DECLARE_SHARED_PTR(LockFactory) - DECLARE_SHARED_PTR(MMapDirectory) - DECLARE_SHARED_PTR(MMapIndexInput) - DECLARE_SHARED_PTR(NativeFSLock) - DECLARE_SHARED_PTR(NativeFSLockFactory) - DECLARE_SHARED_PTR(NoLock) - DECLARE_SHARED_PTR(NoLockFactory) - DECLARE_SHARED_PTR(OutputFile) - DECLARE_SHARED_PTR(RAMDirectory) - DECLARE_SHARED_PTR(RAMFile) - DECLARE_SHARED_PTR(RAMInputStream) - DECLARE_SHARED_PTR(RAMOutputStream) - DECLARE_SHARED_PTR(SimpleFSDirectory) - DECLARE_SHARED_PTR(SimpleFSIndexInput) - DECLARE_SHARED_PTR(SimpleFSIndexOutput) - DECLARE_SHARED_PTR(SimpleFSLock) - DECLARE_SHARED_PTR(SimpleFSLockFactory) - DECLARE_SHARED_PTR(SingleInstanceLock) - DECLARE_SHARED_PTR(SingleInstanceLockFactory) - - // util - DECLARE_SHARED_PTR(Attribute) - DECLARE_SHARED_PTR(AttributeFactory) - DECLARE_SHARED_PTR(AttributeSource) - DECLARE_SHARED_PTR(AttributeSourceState) - DECLARE_SHARED_PTR(BitSet) - DECLARE_SHARED_PTR(BitVector) - DECLARE_SHARED_PTR(BufferedReader) - DECLARE_SHARED_PTR(Collator) - DECLARE_SHARED_PTR(DefaultAttributeFactory) - DECLARE_SHARED_PTR(DocIdBitSet) - DECLARE_SHARED_PTR(FieldCacheSanityChecker) - DECLARE_SHARED_PTR(FileReader) - DECLARE_SHARED_PTR(Future) - DECLARE_SHARED_PTR(HeapedScorerDoc) - DECLARE_SHARED_PTR(InfoStream) - DECLARE_SHARED_PTR(InfoStreamFile) - DECLARE_SHARED_PTR(InfoStreamOut) - DECLARE_SHARED_PTR(InputStreamReader) - DECLARE_SHARED_PTR(Insanity) - DECLARE_SHARED_PTR(IntRangeBuilder) - DECLARE_SHARED_PTR(LongRangeBuilder) - DECLARE_SHARED_PTR(LuceneObject) - DECLARE_SHARED_PTR(LuceneSignal) - DECLARE_SHARED_PTR(LuceneThread) - DECLARE_SHARED_PTR(NumericUtils) - DECLARE_SHARED_PTR(OpenBitSet) - DECLARE_SHARED_PTR(OpenBitSetDISI) - DECLARE_SHARED_PTR(OpenBitSetIterator) - DECLARE_SHARED_PTR(Random) - DECLARE_SHARED_PTR(Reader) - DECLARE_SHARED_PTR(ReaderField) - DECLARE_SHARED_PTR(ScorerDocQueue) - DECLARE_SHARED_PTR(SortedVIntList) - DECLARE_SHARED_PTR(StringReader) - DECLARE_SHARED_PTR(Synchronize) - DECLARE_SHARED_PTR(ThreadPool) - DECLARE_SHARED_PTR(UnicodeResult) - DECLARE_SHARED_PTR(UTF8Decoder) - DECLARE_SHARED_PTR(UTF8DecoderStream) - DECLARE_SHARED_PTR(UTF8Encoder) - DECLARE_SHARED_PTR(UTF8EncoderStream) - DECLARE_SHARED_PTR(UTF8Result) - DECLARE_SHARED_PTR(UTF16Decoder) -} - -#endif diff --git a/include/MMapDirectory.h b/include/MMapDirectory.h deleted file mode 100644 index e3104b1f..00000000 --- a/include/MMapDirectory.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MMAPDIRECTORY_H -#define MMAPDIRECTORY_H - -#include "FSDirectory.h" - -namespace Lucene -{ - /// File-based {@link Directory} implementation that uses mmap for reading, and {@link SimpleFSIndexOutput} for writing. - /// - /// NOTE: memory mapping uses up a portion of the virtual memory address space in your process equal to the size of the - /// file being mapped. Before using this class, be sure your have plenty of virtual address space. - /// - /// NOTE: Accessing this class either directly or indirectly from a thread while it's interrupted can close the - /// underlying channel immediately if at the same time the thread is blocked on IO. The channel will remain closed and - /// subsequent access to {@link MMapDirectory} will throw an exception. - class LPPAPI MMapDirectory : public FSDirectory - { - public: - /// Create a new MMapDirectory for the named location. - /// @param path the path of the directory. - /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) - MMapDirectory(const String& path, LockFactoryPtr lockFactory = LockFactoryPtr()); - - virtual ~MMapDirectory(); - - LUCENE_CLASS(MMapDirectory); - - public: - using FSDirectory::openInput; - - /// Creates an IndexInput for the file with the given name. - virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); - - /// Creates an IndexOutput for the file with the given name. - virtual IndexOutputPtr createOutput(const String& name); - }; -} - -#endif diff --git a/include/Map.h b/include/Map.h deleted file mode 100644 index db466e5c..00000000 --- a/include/Map.h +++ /dev/null @@ -1,151 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MAP_H -#define MAP_H - -#include -#include "LuceneSync.h" - -namespace Lucene -{ - /// Utility template class to handle maps that can be safely copied and shared - template < class KEY, class VALUE, class LESS = std::less > - class Map : public LuceneSync - { - public: - typedef Map this_type; - typedef std::pair key_value; - typedef std::map< KEY, VALUE, LESS, Allocator > map_type; - typedef typename map_type::iterator iterator; - typedef typename map_type::const_iterator const_iterator; - typedef KEY key_type; - typedef VALUE value_type; - - virtual ~Map() - { - } - - protected: - boost::shared_ptr mapContainer; - - public: - static this_type newInstance() - { - this_type instance; - instance.mapContainer = Lucene::newInstance(); - return instance; - } - - void reset() - { - mapContainer.reset(); - } - - int32_t size() const - { - return (int32_t)mapContainer->size(); - } - - bool empty() const - { - return mapContainer->empty(); - } - - void clear() - { - mapContainer->clear(); - } - - iterator begin() - { - return mapContainer->begin(); - } - - iterator end() - { - return mapContainer->end(); - } - - const_iterator begin() const - { - return mapContainer->begin(); - } - - const_iterator end() const - { - return mapContainer->end(); - } - - operator bool() const - { - return mapContainer; - } - - bool operator! () const - { - return !mapContainer; - } - - map_type& operator= (const map_type& other) - { - mapContainer = other.mapContainer; - return *this; - } - - void put(const KEY& key, const VALUE& value) - { - (*mapContainer)[key] = value; - } - - template - void putAll(ITER first, ITER last) - { - for (iterator current = first; current != last; ++current) - (*mapContainer)[current->first] = current->second; - } - - template - void remove(ITER pos) - { - mapContainer->erase(pos); - } - - template - ITER remove(ITER first, ITER last) - { - return mapContainer->erase(first, last); - } - - bool remove(const KEY& key) - { - return (mapContainer->erase(key) > 0); - } - - iterator find(const KEY& key) - { - return mapContainer->find(key); - } - - VALUE get(const KEY& key) const - { - iterator findValue = mapContainer->find(key); - return findValue == mapContainer->end() ? VALUE() : findValue->second; - } - - bool contains(const KEY& key) const - { - return (mapContainer->find(key) != mapContainer->end()); - } - - VALUE& operator[] (const KEY& key) - { - return (*mapContainer)[key]; - } - }; -} - -#endif diff --git a/include/MapFieldSelector.h b/include/MapFieldSelector.h deleted file mode 100644 index f633c207..00000000 --- a/include/MapFieldSelector.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MAPFIELDSELECTOR_H -#define MAPFIELDSELECTOR_H - -#include "FieldSelector.h" - -namespace Lucene -{ - typedef HashMap MapStringFieldSelectorResult; - - /// A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s - class LPPAPI MapFieldSelector : public FieldSelector - { - public: - /// Create a MapFieldSelector - /// @param fieldSelections maps from field names (String) to {@link FieldSelectorResult}s - MapFieldSelector(MapStringFieldSelectorResult fieldSelections); - - /// Create a MapFieldSelector - /// @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD. - MapFieldSelector(Collection fields); - - virtual ~MapFieldSelector(); - - LUCENE_CLASS(MapFieldSelector); - - public: - MapStringFieldSelectorResult fieldSelections; - - public: - /// Load field according to its associated value in fieldSelections - /// @param field a field name - /// @return the fieldSelections value that field maps to or NO_LOAD if none. - virtual FieldSelectorResult accept(const String& fieldName); - }; -} - -#endif diff --git a/include/MapOfSets.h b/include/MapOfSets.h deleted file mode 100644 index 768671f4..00000000 --- a/include/MapOfSets.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MAPOFSETS_H -#define MAPOFSETS_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Helper class for keeping Lists of Objects associated with keys. - template - class MapOfSets - { - public: - typedef HashSet set_type; - typedef HashMap map_type; - - MapOfSets(map_type m) - { - theMap = m; - } - - protected: - map_type theMap; - - public: - /// @return direct access to the map backing this object. - map_type getMap() - { - return theMap; - } - - /// Adds val to the HashSet associated with key in the HashMap. If key is not already in the map, - /// a new HashSet will first be created. - /// @return the size of the HashSet associated with key once val is added to it. - int32_t put(MAPKEY key, SETVALUE val) - { - typename map_type::iterator entry = theMap.find(key); - if (entry != theMap.end()) - { - entry->second.add(val); - return entry->second.size(); - } - else - { - set_type theSet(set_type::newInstance()); - theSet.add(val); - theMap.put(key, theSet); - return 1; - } - } - - /// Adds multiple vals to the HashSet associated with key in the HashMap. If key is not already in - /// the map, a new HashSet will first be created. - /// @return the size of the HashSet associated with key once val is added to it. - int32_t putAll(MAPKEY key, set_type vals) - { - typename map_type::iterator entry = theMap.find(key); - if (entry != theMap.end()) - { - entry->second.addAll(vals.begin(), vals.end()); - return entry->second.size(); - } - else - { - set_type theSet(set_type::newInstance(vals.begin(), vals.end())); - theMap.put(key, theSet); - return theSet.size(); - } - } - }; -} - -#endif diff --git a/include/MappingCharFilter.h b/include/MappingCharFilter.h deleted file mode 100644 index b65093e4..00000000 --- a/include/MappingCharFilter.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MAPPINGCHARFILTER_H -#define MAPPINGCHARFILTER_H - -#include "BaseCharFilter.h" - -namespace Lucene -{ - /// Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap} to the character - /// stream, and correcting the resulting changes to the offsets. - class LPPAPI MappingCharFilter : public BaseCharFilter - { - public: - /// Default constructor that takes a {@link CharStream}. - MappingCharFilter(NormalizeCharMapPtr normMap, CharStreamPtr in); - - /// Easy-use constructor that takes a {@link Reader}. - MappingCharFilter(NormalizeCharMapPtr normMap, ReaderPtr in); - - virtual ~MappingCharFilter(); - - LUCENE_CLASS(MappingCharFilter); - - protected: - NormalizeCharMapPtr normMap; - Collection buffer; - String replacement; - int32_t charPointer; - int32_t nextCharCounter; - - public: - virtual int32_t read(); - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - - protected: - int32_t nextChar(); - void pushChar(int32_t c); - void pushLastChar(int32_t c); - NormalizeCharMapPtr match(NormalizeCharMapPtr map); - }; -} - -#endif diff --git a/include/MatchAllDocsQuery.h b/include/MatchAllDocsQuery.h deleted file mode 100644 index 63df02c8..00000000 --- a/include/MatchAllDocsQuery.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MATCHALLDOCSQUERY_H -#define MATCHALLDOCSQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A query that matches all documents. - class LPPAPI MatchAllDocsQuery : public Query - { - public: - /// @param normsField Field used for normalization factor (document boost). Null if nothing. - MatchAllDocsQuery(const String& normsField = EmptyString); - - virtual ~MatchAllDocsQuery(); - - LUCENE_CLASS(MatchAllDocsQuery); - - protected: - String normsField; - - public: - using Query::toString; - - virtual WeightPtr createWeight(SearcherPtr searcher); - virtual void extractTerms(SetTerm terms); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class MatchAllDocsWeight; - }; -} - -#endif diff --git a/include/MaxPayloadFunction.h b/include/MaxPayloadFunction.h deleted file mode 100644 index a69aa479..00000000 --- a/include/MaxPayloadFunction.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MAXPAYLOADFUNCTION_H -#define MAXPAYLOADFUNCTION_H - -#include "PayloadFunction.h" - -namespace Lucene -{ - /// Returns the maximum payload score seen, else 1 if there are no payloads on the doc. - /// - /// Is thread safe and completely reusable. - class LPPAPI MaxPayloadFunction : public PayloadFunction - { - public: - virtual ~MaxPayloadFunction(); - LUCENE_CLASS(MaxPayloadFunction); - - public: - virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, - double currentScore, double currentPayloadScore); - virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - }; -} - -#endif diff --git a/include/MergeDocIDRemapper.h b/include/MergeDocIDRemapper.h deleted file mode 100644 index 3f0e1f3a..00000000 --- a/include/MergeDocIDRemapper.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MERGEDOCIDREMAPPER_H -#define MERGEDOCIDREMAPPER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Remaps docIDs after a merge has completed, where the merged segments had at least one deletion. - /// This is used to renumber the buffered deletes in IndexWriter when a merge of segments with deletions - /// commits. - class MergeDocIDRemapper : public LuceneObject - { - public: - MergeDocIDRemapper(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount); - virtual ~MergeDocIDRemapper(); - - LUCENE_CLASS(MergeDocIDRemapper); - - public: - Collection starts; // used for binary search of mapped docID - Collection newStarts; // starts, minus the deletes - Collection< Collection > docMaps; // maps docIDs in the merged set - int32_t minDocID; // minimum docID that needs renumbering - int32_t maxDocID; // 1+ the max docID that needs renumbering - int32_t docShift; // total # deleted docs that were compacted by this merge - - public: - int32_t remap(int32_t oldDocID); - }; -} - -#endif diff --git a/include/MergePolicy.h b/include/MergePolicy.h deleted file mode 100644 index 5ba5beb6..00000000 --- a/include/MergePolicy.h +++ /dev/null @@ -1,138 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MERGEPOLICY_H -#define MERGEPOLICY_H - -#include "SegmentInfos.h" - -namespace Lucene -{ - /// A MergePolicy determines the sequence of primitive merge operations to be used for overall merge - /// and optimize operations. - /// - /// Whenever the segments in an index have been altered by {@link IndexWriter}, either the addition of - /// a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that - /// may now need to cascade, {@link IndexWriter} invokes {@link #findMerges} to give the MergePolicy a - /// chance to pick merges that are now required. This method returns a {@link MergeSpecification} - /// instance describing the set of merges that should be done, or null if no merges are necessary. - /// When IndexWriter.optimize is called, it calls {@link #findMergesForOptimize} and the MergePolicy - /// should then return the necessary merges. - /// - /// Note that the policy can return more than one merge at a time. In this case, if the writer is using - /// {@link SerialMergeScheduler}, the merges will be run sequentially but if it is using {@link - /// ConcurrentMergeScheduler} they will be run concurrently. - /// - /// The default MergePolicy is {@link LogByteSizeMergePolicy}. - /// - /// NOTE: This API is new and still experimental (subject to change suddenly in the next release) - class LPPAPI MergePolicy : public LuceneObject - { - public: - MergePolicy(IndexWriterPtr writer); - virtual ~MergePolicy(); - - LUCENE_CLASS(MergePolicy); - - protected: - IndexWriterWeakPtr _writer; - - public: - /// Determine what set of merge operations are now necessary on the index. {@link IndexWriter} calls - /// this whenever there is a change to the segments. This call is always synchronized on the {@link - /// IndexWriter} instance so only one thread at a time will call this method. - /// @param segmentInfos the total set of segments in the index - virtual MergeSpecificationPtr findMerges(SegmentInfosPtr segmentInfos) = 0; - - /// Determine what set of merge operations is necessary in order to optimize the index. {@link - /// IndexWriter} calls this when its {@link IndexWriter#optimize()} method is called. This call is - /// always synchronized on the {@link IndexWriter} instance so only one thread at a time will call - /// this method. - /// @param segmentInfos the total set of segments in the index - /// @param maxSegmentCount requested maximum number of segments in the index (currently this is always 1) - /// @param segmentsToOptimize contains the specific SegmentInfo instances that must be merged away. - /// This may be a subset of all SegmentInfos. - virtual MergeSpecificationPtr findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) = 0; - - /// Determine what set of merge operations is necessary in order to expunge all deletes from the index. - /// @param segmentInfos the total set of segments in the index - virtual MergeSpecificationPtr findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos) = 0; - - /// Release all resources for the policy. - virtual void close() = 0; - - /// Returns true if a newly flushed (not from merge) segment should use the compound file format. - virtual bool useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment) = 0; - - /// Returns true if the doc store files should use the compound file format. - virtual bool useCompoundDocStore(SegmentInfosPtr segments) = 0; - }; - - /// OneMerge provides the information necessary to perform an individual primitive merge operation, - /// resulting in a single new segment. The merge spec includes the subset of segments to be merged - /// as well as whether the new segment should use the compound file format. - class LPPAPI OneMerge : public LuceneObject - { - public: - OneMerge(SegmentInfosPtr segments, bool useCompoundFile); - virtual ~OneMerge(); - - LUCENE_CLASS(OneMerge); - - public: - SegmentInfoPtr info; // used by IndexWriter - bool mergeDocStores; // used by IndexWriter - bool optimize; // used by IndexWriter - bool registerDone; // used by IndexWriter - int64_t mergeGen; // used by IndexWriter - bool isExternal; // used by IndexWriter - int32_t maxNumSegmentsOptimize; // used by IndexWriter - Collection readers; // used by IndexWriter - Collection readersClone; // used by IndexWriter - - SegmentInfosPtr segments; - bool useCompoundFile; - bool aborted; - LuceneException error; - - public: - /// Record that an exception occurred while executing this merge - void setException(const LuceneException& error); - - /// Retrieve previous exception set by {@link #setException}. - LuceneException getException(); - - /// Mark this merge as aborted. If this is called before the merge is committed then the merge will not be committed. - void abort(); - - /// Returns true if this merge was aborted. - bool isAborted(); - - void checkAborted(DirectoryPtr dir); - - String segString(DirectoryPtr dir); - }; - - /// A MergeSpecification instance provides the information necessary to perform multiple merges. - /// It simply contains a list of {@link OneMerge} instances. - class LPPAPI MergeSpecification : public LuceneObject - { - public: - MergeSpecification(); - virtual ~MergeSpecification(); - - LUCENE_CLASS(MergeSpecification); - - public: - Collection merges; - - public: - void add(OneMergePtr merge); - String segString(DirectoryPtr dir); - }; -} - -#endif diff --git a/include/MergeScheduler.h b/include/MergeScheduler.h deleted file mode 100644 index 25825dcd..00000000 --- a/include/MergeScheduler.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MERGESCHEDULER_H -#define MERGESCHEDULER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// {@link IndexWriter} uses an instance implementing this interface to execute the merges - /// selected by a {@link MergePolicy}. The default MergeScheduler is {@link ConcurrentMergeScheduler}. - class LPPAPI MergeScheduler : public LuceneObject - { - public: - virtual ~MergeScheduler(); - - LUCENE_CLASS(MergeScheduler); - - public: - /// Run the merges provided by {@link IndexWriter#getNextMerge()}. - virtual void merge(IndexWriterPtr writer) = 0; - - /// Close this MergeScheduler. - virtual void close() = 0; - }; -} - -#endif diff --git a/include/MinPayloadFunction.h b/include/MinPayloadFunction.h deleted file mode 100644 index c15c098a..00000000 --- a/include/MinPayloadFunction.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MINPAYLOADFUNCTION_H -#define MINPAYLOADFUNCTION_H - -#include "PayloadFunction.h" - -namespace Lucene -{ - /// Calculates the minimum payload seen - class LPPAPI MinPayloadFunction : public PayloadFunction - { - public: - virtual ~MinPayloadFunction(); - LUCENE_CLASS(MinPayloadFunction); - - public: - virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, - double currentScore, double currentPayloadScore); - virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - }; -} - -#endif diff --git a/include/MiscUtils.h b/include/MiscUtils.h deleted file mode 100644 index ed65f70c..00000000 --- a/include/MiscUtils.h +++ /dev/null @@ -1,142 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MISCUTILS_H -#define MISCUTILS_H - -#include "Lucene.h" - -namespace Lucene -{ - class LPPAPI MiscUtils - { - protected: - static const uint32_t SINGLE_EXPONENT_MASK; - static const uint32_t SINGLE_MANTISSA_MASK; - static const uint32_t SINGLE_NAN_BITS; - - static const uint64_t DOUBLE_SIGN_MASK; - static const uint64_t DOUBLE_EXPONENT_MASK; - static const uint64_t DOUBLE_MANTISSA_MASK; - static const uint64_t DOUBLE_NAN_BITS; - - public: - /// Return given time in milliseconds. - static uint64_t getTimeMillis(boost::posix_time::ptime time); - - /// Returns the current time in milliseconds. - static uint64_t currentTimeMillis(); - - /// This over-allocates proportional to the list size, making room for additional growth. - /// The over-allocation is mild, but is enough to give linear-time amortized behavior over a long - /// sequence of appends(). - /// The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... - static int32_t getNextSize(int32_t targetSize); - - /// Only reallocate if we are "substantially" smaller. This saves us from "running hot" (constantly - /// making a bit bigger then a bit smaller, over and over) - static int32_t getShrinkSize(int32_t currentSize, int32_t targetSize); - - /// Compares two byte[] arrays, element by element, and returns the number of elements common to - /// both arrays. - /// @param bytes1 The first byte[] to compare - /// @param bytes2 The second byte[] to compare - /// @return The number of common elements. - static int32_t bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2); - - template - static int32_t hashLucene(TYPE type) - { - return type->hashCode(); - } - - template - static int32_t hashNumeric(TYPE type) - { - return type; - } - - template - static int32_t hashCode(ITER first, ITER last, PRED pred) - { - int32_t code = 0; - for (ITER hash = first; hash != last; ++hash) - code = code * 31 + pred(*hash); - return code; - } - - /// Returns hash of chars in range start (inclusive) to end (inclusive) - static int32_t hashCode(const wchar_t* array, int32_t start, int32_t end); - - /// Returns hash of bytes in range start (inclusive) to end (inclusive) - static int32_t hashCode(const uint8_t* array, int32_t start, int32_t end); - - /// Returns hash code of given boolean - static int32_t hashCode(bool value); - - /// Copy elements from on buffer to another - template - static void arrayCopy(SOURCE source, int32_t sourceOffset, DEST dest, int32_t destOffset, int32_t length) - { - std::copy(source + sourceOffset, source + sourceOffset + length, dest + destOffset); - } - - /// Fill buffer with given element - template - static void arrayFill(DEST dest, int32_t destFrom, int32_t destTo, FILL value) - { - std::fill(dest + destFrom, dest + destTo, value); - } - - /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point - /// "single format" bit layout. - static int32_t doubleToIntBits(double value); - - /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point - /// "single format" bit layout, preserving Not-a-Number (NaN) values. - static int32_t doubleToRawIntBits(double value); - - /// Returns the float value corresponding to a given bit representation. The argument is considered to be a - /// representation of a floating-point value according to the IEEE 754 floating-point "single format" bit layout. - static double intBitsToDouble(int32_t bits); - - /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point - /// "double format" bit layout. - static int64_t doubleToLongBits(double value); - - /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point - /// "double format" bit layout, preserving Not-a-Number (NaN) values. - static int64_t doubleToRawLongBits(double value); - - /// Returns the double value corresponding to a given bit representation. The argument is considered to be a - /// representation of a floating-point value according to the IEEE 754 floating-point "double format" bit layout. - static double longBitsToDouble(int64_t bits); - - /// Returns true if the specified number is infinitely large in magnitude, false otherwise. - static bool isInfinite(double value); - - /// Returns true if this Double value is a Not-a-Number (NaN), false otherwise. - static bool isNaN(double value); - - /// Return whether given Lucene object is of a specified type - template - static bool typeOf(LuceneObjectPtr object) - { - return boost::dynamic_pointer_cast(object); - } - - /// Return whether given Lucene objects are of equal type. - static bool equalTypes(LuceneObjectPtr first, LuceneObjectPtr second); - - /// Perform unsigned right-shift (left bits are zero filled) - static int64_t unsignedShift(int64_t num, int64_t shift); - - /// Perform unsigned right-shift (left bits are zero filled) - static int32_t unsignedShift(int32_t num, int32_t shift); - }; -} - -#endif diff --git a/include/MultiFieldQueryParser.h b/include/MultiFieldQueryParser.h deleted file mode 100644 index 252e75bb..00000000 --- a/include/MultiFieldQueryParser.h +++ /dev/null @@ -1,138 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTIFIELDQUERYPARSER_H -#define MULTIFIELDQUERYPARSER_H - -#include "QueryParser.h" -#include "BooleanClause.h" - -namespace Lucene -{ - /// A QueryParser which constructs queries to search multiple fields. - class LPPAPI MultiFieldQueryParser : public QueryParser - { - public: - /// Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to - /// apply to each term. - /// - /// It will, when parse(String query) is called, construct a query like this (assuming the query - /// consists of two terms and you specify the two fields title and body): - ///
-        /// (title:term1 body:term1) (title:term2 body:term2)
-        /// 
- /// - /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: - ///
-        /// +(title:term1 body:term1) +(title:term2 body:term2)
-        /// 
- /// - /// When you pass a boost (title=>5 body=>10) you can get: - ///
-        /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
-        /// 
- /// - /// In other words, all the query's terms must appear, but it doesn't matter in what fields they - /// appear. - MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer, MapStringDouble boosts); - - /// Creates a MultiFieldQueryParser. It will, when parse(String query) is called, construct a - /// query like this (assuming the query consists of two terms and you specify the two fields - /// title and body): - ///
-        /// (title:term1 body:term1) (title:term2 body:term2)
-        /// 
- /// - /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: - ///
-        /// +(title:term1 body:term1) +(title:term2 body:term2)
-        /// 
- /// - /// In other words, all the query's terms must appear, but it doesn't matter in what fields they - /// appear. - MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer); - - virtual ~MultiFieldQueryParser(); - - LUCENE_CLASS(MultiFieldQueryParser); - - protected: - Collection fields; - MapStringDouble boosts; - - public: - using QueryParser::parse; - - /// Parses a query which searches on the fields specified. - /// - /// If x fields are specified, this effectively constructs: - ///
-        /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
-        /// 
- /// @param matchVersion Lucene version to match; this is passed through to QueryParser. - /// @param queries Queries strings to parse - /// @param fields Fields to search on - /// @param analyzer Analyzer to use - static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, AnalyzerPtr analyzer); - - /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as - /// required, and others as prohibited. - /// - ///
-        /// Usage:
-        /// Collection fields = newCollection(L"filename", L"contents", L"description");
-        /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
-        /// MultiFieldQueryParser::parse(L"query", fields, flags, analyzer);
-        /// 
- /// - /// The code above would construct a query: - ///
-        /// (filename:query) +(contents:query) -(description:query)
-        /// 
- /// - /// @param matchVersion Lucene version to match; this is passed through to QueryParser. - /// @param query Query string to parse - /// @param fields Fields to search on - /// @param flags Flags describing the fields - /// @param analyzer Analyzer to use - static QueryPtr parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, AnalyzerPtr analyzer); - - /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as - /// required, and others as prohibited. - /// - ///
-        /// Usage:
-        /// Collection query = newCollection(L"query1", L"query2", L"query3");
-        /// Collection fields = newCollection(L"filename", L"contents", L"description");
-        /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
-        /// MultiFieldQueryParser::parse(query, fields, flags, analyzer);
-        /// 
- /// - /// The code above would construct a query: - ///
-        /// (filename:query1) +(contents:query2) -(description:query3)
-        /// 
- /// - /// @param matchVersion Lucene version to match; this is passed through to QueryParser. - /// @param queries Queries string to parse - /// @param fields Fields to search on - /// @param flags Flags describing the fields - /// @param analyzer Analyzer to use - static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, AnalyzerPtr analyzer); - - protected: - virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); - virtual QueryPtr getFieldQuery(const String& field, const String& queryText); - void applySlop(QueryPtr query, int32_t slop); - - virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); - virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); - virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); - virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); - }; -} - -#endif diff --git a/include/MultiLevelSkipListReader.h b/include/MultiLevelSkipListReader.h deleted file mode 100644 index ae0f0ce8..00000000 --- a/include/MultiLevelSkipListReader.h +++ /dev/null @@ -1,123 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTILEVELSKIPLISTREADER_H -#define MULTILEVELSKIPLISTREADER_H - -#include "IndexInput.h" - -namespace Lucene -{ - /// This abstract class reads skip lists with multiple levels. - /// - /// See {@link MultiLevelSkipListWriter} for the information about the encoding of the multi level skip lists. - /// - /// Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} which defines the - /// actual format of the skip data. - class MultiLevelSkipListReader : public LuceneObject - { - public: - MultiLevelSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval); - virtual ~MultiLevelSkipListReader(); - - LUCENE_CLASS(MultiLevelSkipListReader); - - protected: - /// the maximum number of skip levels possible for this index - int32_t maxNumberOfSkipLevels; - - /// number of levels in this skip list - int32_t numberOfSkipLevels; - - /// Defines the number of top skip levels to buffer in memory. Reducing this number results in less - /// memory usage, but possibly slower performance due to more random I/Os. Please notice that the space - /// each level occupies is limited by the skipInterval. The top level can not contain more than - /// skipLevel entries, the second top level can not contain more than skipLevel^2 entries and so forth. - int32_t numberOfLevelsToBuffer; - - int32_t docCount; - bool haveSkipped; - - Collection skipStream; // skipStream for each level - Collection skipPointer; // the start pointer of each skip level - Collection skipInterval; // skipInterval of each level - Collection numSkipped; // number of docs skipped per level - - Collection skipDoc; // doc id of current skip entry per level - int32_t lastDoc; // doc id of last read skip entry with docId <= target - Collection childPointer; // child pointer of current skip entry per level - int64_t lastChildPointer; // childPointer of last read skip entry with docId <= target - - bool inputIsBuffered; - - public: - /// Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. - virtual int32_t getDoc(); - - /// Skips entries to the first beyond the current whose document number is greater than or equal to - /// target. Returns the current doc count. - virtual int32_t skipTo(int32_t target); - - virtual void close(); - - /// Initializes the reader. - virtual void init(int64_t skipPointer, int32_t df); - - protected: - virtual bool loadNextSkip(int32_t level); - - /// Seeks the skip entry on the given level - virtual void seekChild(int32_t level); - - /// Loads the skip levels - virtual void loadSkipLevels(); - - /// Subclasses must implement the actual skip data encoding in this method. - /// - /// @param level the level skip data shall be read from - /// @param skipStream the skip stream to read from - virtual int32_t readSkipData(int32_t level, IndexInputPtr skipStream) = 0; - - /// Copies the values of the last read skip entry on this level - virtual void setLastSkipData(int32_t level); - }; - - /// Used to buffer the top skip levels - class SkipBuffer : public IndexInput - { - public: - SkipBuffer(IndexInputPtr input, int32_t length); - virtual ~SkipBuffer(); - - LUCENE_CLASS(SkipBuffer); - - protected: - ByteArray data; - int64_t pointer; - int32_t pos; - - public: - /// Closes the stream to further operations. - virtual void close(); - - /// Returns the current position in this file, where the next read will occur. - virtual int64_t getFilePointer(); - - /// The number of bytes in the file. - virtual int64_t length(); - - /// Reads and returns a single byte. - virtual uint8_t readByte(); - - /// Reads a specified number of bytes into an array at the specified offset. - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Sets current position in this file, where the next read will occur. - virtual void seek(int64_t pos); - }; -} - -#endif diff --git a/include/MultiLevelSkipListWriter.h b/include/MultiLevelSkipListWriter.h deleted file mode 100644 index e1b3a501..00000000 --- a/include/MultiLevelSkipListWriter.h +++ /dev/null @@ -1,76 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTILEVELSKIPLISTWRITER_H -#define MULTILEVELSKIPLISTWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This abstract class writes skip lists with multiple levels. - /// - /// Example for skipInterval = 3: - /// - /// c (skip level 2) - /// c c c (skip level 1) - /// x x x x x x x x x x (skip level 0) - /// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) - /// 3 6 9 12 15 18 21 24 27 30 (df) - /// - /// d - document - /// x - skip data - /// c - skip data with child pointer - /// - /// Skip level i contains every skipInterval-th entry from skip level i-1. - /// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). - /// - /// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. - /// This guarantees a logarithmic amount of skips to find the target document. - /// - /// While this class takes care of writing the different skip levels, subclasses must define the - /// actual format of the skip data. - class MultiLevelSkipListWriter : public LuceneObject - { - public: - MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df); - virtual ~MultiLevelSkipListWriter(); - - LUCENE_CLASS(MultiLevelSkipListWriter); - - protected: - /// number of levels in this skip list - int32_t numberOfSkipLevels; - - /// the skip interval in the list with level = 0 - int32_t skipInterval; - - /// for every skip level a different buffer is used - Collection skipBuffer; - - public: - /// Writes the current skip data to the buffers. The current document frequency determines - /// the max level is skip data is to be written to. - /// @param df the current document frequency - void bufferSkip(int32_t df); - - /// Writes the buffered skip lists to the given output. - /// @param output the IndexOutput the skip lists shall be written to - /// @return the pointer the skip list starts - int64_t writeSkip(IndexOutputPtr output); - - protected: - void init(); - virtual void resetSkip(); - - /// Subclasses must implement the actual skip data encoding in this method. - /// @param level the level skip data shall be writing for - /// @param skipBuffer the skip buffer to write to - virtual void writeSkipData(int32_t level, IndexOutputPtr skipBuffer) = 0; - }; -} - -#endif diff --git a/include/MultiPhraseQuery.h b/include/MultiPhraseQuery.h deleted file mode 100644 index 52656288..00000000 --- a/include/MultiPhraseQuery.h +++ /dev/null @@ -1,80 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTIPHRASEQUERY_H -#define MULTIPHRASEQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// MultiPhraseQuery is a generalized version of PhraseQuery, with an added method {@link #add(Term[])}. - /// To use this class, to search for the phrase "Microsoft app*" first use add(Term) on the term "Microsoft", - /// then find all terms that have "app" as prefix using IndexReader.terms(Term), and use - /// MultiPhraseQuery.add(Term[] terms) to add them to the query. - class LPPAPI MultiPhraseQuery : public Query - { - public: - MultiPhraseQuery(); - virtual ~MultiPhraseQuery(); - - LUCENE_CLASS(MultiPhraseQuery); - - protected: - String field; - Collection< Collection > termArrays; - Collection positions; - int32_t slop; - - public: - using Query::toString; - - /// Sets the phrase slop for this query. - /// @see PhraseQuery#setSlop(int32_t) - void setSlop(int32_t s); - - /// Gets the phrase slop for this query. - /// @see PhraseQuery#getSlop() - int32_t getSlop(); - - /// Add a single term at the next position in the phrase. - /// @see PhraseQuery#add(Term) - void add(TermPtr term); - - /// Add multiple terms at the next position in the phrase. Any of the terms may match. - /// @see PhraseQuery#add(Term) - void add(Collection terms); - - /// Allows to specify the relative position of terms within the phrase. - /// @see PhraseQuery#add(Term, int) - void add(Collection terms, int32_t position); - - /// Returns a List of the terms in the multiphrase. Do not modify the List or its contents. - Collection< Collection > getTermArrays(); - - /// Returns the relative positions of terms in this phrase. - Collection getPositions(); - - virtual void extractTerms(SetTerm terms); - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - int32_t termArraysHashCode(); - bool termArraysEquals(Collection< Collection > first, Collection< Collection > second); - - friend class MultiPhraseWeight; - }; -} - -#endif diff --git a/include/MultiReader.h b/include/MultiReader.h deleted file mode 100644 index 138507bb..00000000 --- a/include/MultiReader.h +++ /dev/null @@ -1,142 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTIREADER_H -#define MULTIREADER_H - -#include "IndexReader.h" - -namespace Lucene -{ - /// An IndexReader which reads multiple indexes, appending their content. - class LPPAPI MultiReader : public IndexReader - { - public: - /// Construct a MultiReader aggregating the named set of (sub)readers. Directory locking for delete, - /// undeleteAll, and setNorm operations is left to the subreaders. - /// @param closeSubReaders indicates whether the subreaders should be closed when this MultiReader is closed - /// @param subReaders set of (sub)readers - MultiReader(Collection subReaders, bool closeSubReaders = true); - - virtual ~MultiReader(); - - LUCENE_CLASS(MultiReader); - - protected: - Collection subReaders; - Collection starts; // 1st docno for each segment - Collection decrefOnClose; // remember which subreaders to decRef on close - MapStringByteArray normsCache; - int32_t _maxDoc; - int32_t _numDocs; - bool _hasDeletions; - - public: - /// Tries to reopen the subreaders. - /// - /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), - /// then a new MultiReader instance is returned, otherwise this instance is returned. - /// - /// A re-opened instance might share one or more subreaders with the old instance. Index modification - /// operations result in undefined behavior when performed before the old instance is closed. (see {@link - /// IndexReader#reopen()}). - /// - /// If subreaders are shared, then the reference count of those readers is increased to ensure that the - /// subreaders remain open until the last referring reader is closed. - virtual IndexReaderPtr reopen(); - - /// Clones the subreaders. (see {@link IndexReader#clone()}). - /// - /// If subreaders are shared, then the reference count of those readers is increased to ensure that the - /// subreaders remain open until the last referring reader is closed. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - virtual Collection getTermFreqVectors(int32_t docNumber); - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - - virtual bool isOptimized(); - - /// Returns the number of documents in this index. - virtual int32_t numDocs(); - - /// Returns one greater than the largest possible document number. - virtual int32_t maxDoc(); - - /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine - /// what {@link Field}s to load and how they should be loaded. - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - - /// Returns true if document n has been deleted - virtual bool isDeleted(int32_t n); - - /// Returns true if any documents have been deleted - virtual bool hasDeletions(); - - /// Returns true if there are norms stored for this field. - virtual bool hasNorms(const String& field); - - /// Returns the byte-encoded normalization factor for the named field of every document. - virtual ByteArray norms(const String& field); - - /// Reads the byte-encoded normalization factor for the named field of every document. - virtual void norms(const String& field, ByteArray norms, int32_t offset); - - /// Returns an enumeration of all the terms in the index. - virtual TermEnumPtr terms(); - - /// Returns an enumeration of all terms starting at a given term. - virtual TermEnumPtr terms(TermPtr t); - - /// Returns the number of documents containing the term t. - virtual int32_t docFreq(TermPtr t); - - /// Returns an unpositioned {@link TermDocs} enumerator. - virtual TermDocsPtr termDocs(); - - /// Returns an unpositioned {@link TermPositions} enumerator. - virtual TermPositionsPtr termPositions(); - - /// Get a list of unique field names that exist in this index and have the specified field option - /// information. - virtual HashSet getFieldNames(FieldOption fieldOption); - - /// Checks recursively if all subreaders are up to date. - virtual bool isCurrent(); - - /// Not implemented. - virtual int64_t getVersion(); - - /// Returns the sequential sub readers that this reader is logically composed of. - virtual Collection getSequentialSubReaders(); - - protected: - /// If clone is true then we clone each of the subreaders - /// @param doClone - /// @return New IndexReader, or same one (this) if reopen/clone is not necessary - IndexReaderPtr doReopen(bool doClone); - - /// Implements deletion of the document numbered docNum. - virtual void doDelete(int32_t docNum); - - /// Implements actual undeleteAll() in subclass. - virtual void doUndeleteAll(); - - /// Find reader for doc n - int32_t readerIndex(int32_t n); - - /// Implements setNorm in subclass. - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - - virtual void doCommit(MapStringString commitUserData); - - /// Implements close. - virtual void doClose(); - }; -} - -#endif diff --git a/include/MultiSearcher.h b/include/MultiSearcher.h deleted file mode 100644 index 1a7e7b31..00000000 --- a/include/MultiSearcher.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTISEARCHER_H -#define MULTISEARCHER_H - -#include "Searcher.h" -#include "Collector.h" - -namespace Lucene -{ - /// Implements search over a set of Searchables. - /// - /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link - /// #search(QueryPtr, FilterPtr, int32_t)} methods. - class LPPAPI MultiSearcher : public Searcher - { - public: - /// Creates a searcher which searches searchers. - MultiSearcher(Collection searchables); - - virtual ~MultiSearcher(); - - LUCENE_CLASS(MultiSearcher); - - protected: - Collection searchables; - Collection starts; - int32_t _maxDoc; - - public: - using Searcher::search; - - /// Return the array of {@link Searchable}s this searches. - Collection getSearchables(); - - virtual void close(); - virtual int32_t docFreq(TermPtr term); - virtual DocumentPtr doc(int32_t n); - virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); - - /// Returns index of the searcher for document n in the array used to construct this searcher. - int32_t subSearcher(int32_t n); - - /// Returns the document number of document n within its sub-index. - int32_t subDoc(int32_t n); - - virtual int32_t maxDoc(); - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); - virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); - virtual QueryPtr rewrite(QueryPtr query); - virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); - - protected: - Collection getStarts(); - - /// Create weight in multiple index scenario. - /// - /// Distributed query processing is done in the following steps: - /// 1. rewrite query. - /// 2. extract necessary terms. - /// 3. collect dfs for these terms from the Searchables. - /// 4. create query weight using aggregate dfs. - /// 5. distribute that weight to Searchables. - /// 6. merge results. - /// - /// Steps 1-4 are done here, 5+6 in the search() methods - /// - /// @return rewritten queries - virtual WeightPtr createWeight(QueryPtr query); - }; -} - -#endif diff --git a/include/MultiTermQuery.h b/include/MultiTermQuery.h deleted file mode 100644 index 1ffece5b..00000000 --- a/include/MultiTermQuery.h +++ /dev/null @@ -1,180 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTITERMQUERY_H -#define MULTITERMQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// An abstract {@link Query} that matches documents containing a subset of terms provided by a {@link - /// FilteredTermEnum} enumeration. - /// - /// This query cannot be used directly; you must subclass it and define {@link #getEnum} to provide a - /// {@link FilteredTermEnum} that iterates through the terms to be matched. - /// - /// NOTE: if {@link #setRewriteMethod} is either {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link - /// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a {@link BooleanQuery.TooManyClauses} exception - /// during searching, which happens when the number of terms to be searched exceeds {@link - /// BooleanQuery#getMaxClauseCount()}. Setting {@link #setRewriteMethod} to {@link - /// #CONSTANT_SCORE_FILTER_REWRITE} prevents this. - /// - /// The recommended rewrite method is {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU - /// computing unhelpful scores, and it tries to pick the most performant rewrite method given the query. - /// - /// Note that {@link QueryParser} produces MultiTermQueries using {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} - /// by default. - class LPPAPI MultiTermQuery : public Query - { - public: - MultiTermQuery(); - virtual ~MultiTermQuery(); - - LUCENE_CLASS(MultiTermQuery); - - protected: - RewriteMethodPtr rewriteMethod; - int32_t numberOfTerms; - - public: - /// A rewrite method that first creates a private Filter, by visiting each term in sequence and marking - /// all docs for that term. Matching documents are assigned a constant score equal to the query's boost. - /// - /// This method is faster than the BooleanQuery rewrite methods when the number of matched terms or matched - /// documents is non-trivial. Also, it will never hit an errant TooManyClauses exception. - /// - /// @see #setRewriteMethod - static RewriteMethodPtr CONSTANT_SCORE_FILTER_REWRITE(); - - /// A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause in a - /// BooleanQuery, and keeps the scores as computed by the query. Note that typically such scores are - /// meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use - /// {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. - /// - /// NOTE: This rewrite method will hit {@link BooleanQuery.TooManyClauses} if the number of terms exceeds - /// {@link BooleanQuery#getMaxClauseCount}. - /// - /// @see #setRewriteMethod - static RewriteMethodPtr SCORING_BOOLEAN_QUERY_REWRITE(); - - /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except scores are not computed. Instead, each matching - /// document receives a constant score equal to the query's boost. - /// - /// NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds {@link - /// BooleanQuery#getMaxClauseCount}. - /// - /// @see #setRewriteMethod - static RewriteMethodPtr CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(); - - /// Read-only default instance of {@link ConstantScoreAutoRewrite}, with {@link - /// ConstantScoreAutoRewrite#setTermCountCutoff} set to {@link ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} - /// and {@link ConstantScoreAutoRewrite#setDocCountPercent} set to {@link - /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. Note that you cannot alter the configuration of - /// this instance; you'll need to create a private instance instead. - static RewriteMethodPtr CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); - - /// Return the number of unique terms visited during execution of the query. If there are many of them, - /// you may consider using another query type or optimize your total term count in index. - /// - /// This method is not thread safe, be sure to only call it when no query is running! If you re-use the - /// same query instance for another search, be sure to first reset the term counter with {@link - /// #clearTotalNumberOfTerms}. - /// - /// On optimized indexes / no MultiReaders, you get the correct number of unique terms for the whole index. - /// Use this number to compare different queries. For non-optimized indexes this number can also be achieved - /// in non-constant-score mode. In constant-score mode you get the total number of terms seeked for all - /// segments / sub-readers. - /// @see #clearTotalNumberOfTerms - int32_t getTotalNumberOfTerms(); - - /// Resets the counting of unique terms. Do this before executing the query/filter. - /// @see #getTotalNumberOfTerms - void clearTotalNumberOfTerms(); - - virtual QueryPtr rewrite(IndexReaderPtr reader); - - /// @see #setRewriteMethod - virtual RewriteMethodPtr getRewriteMethod(); - - /// Sets the rewrite method to be used when executing the query. You can use one of the four core methods, - /// or implement your own subclass of {@link RewriteMethod}. - virtual void setRewriteMethod(RewriteMethodPtr method); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - protected: - /// Construct the enumeration to be used, expanding the pattern term. - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader) = 0; - - void incTotalNumberOfTerms(int32_t inc); - - friend class MultiTermQueryWrapperFilter; - friend class ScoringBooleanQueryRewrite; - friend class ConstantScoreAutoRewrite; - }; - - /// Abstract class that defines how the query is rewritten. - class LPPAPI RewriteMethod : public LuceneObject - { - public: - virtual ~RewriteMethod(); - LUCENE_CLASS(RewriteMethod); - - public: - virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) = 0; - }; - - /// A rewrite method that tries to pick the best constant-score rewrite method based on term and document - /// counts from the query. If both the number of terms and documents is small enough, then {@link - /// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is - /// used. - class LPPAPI ConstantScoreAutoRewrite : public RewriteMethod - { - public: - ConstantScoreAutoRewrite(); - virtual ~ConstantScoreAutoRewrite(); - - LUCENE_CLASS(ConstantScoreAutoRewrite); - - public: - // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms - // in the query, the filter method is fastest - static const int32_t DEFAULT_TERM_COUNT_CUTOFF; - - // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest - static const double DEFAULT_DOC_COUNT_PERCENT; - - protected: - int32_t termCountCutoff; - double docCountPercent; - - public: - /// If the number of terms in this query is equal to or larger than this setting then {@link - /// #CONSTANT_SCORE_FILTER_REWRITE} is used. - virtual void setTermCountCutoff(int32_t count); - - /// @see #setTermCountCutoff - virtual int32_t getTermCountCutoff(); - - /// If the number of documents to be visited in the postings exceeds this specified percentage of the - /// maxDoc() for the index, then {@link #CONSTANT_SCORE_FILTER_REWRITE} is used. - /// @param percent 0.0 to 100.0 - virtual void setDocCountPercent(double percent); - - /// @see #setDocCountPercent - virtual double getDocCountPercent(); - - virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); - - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - }; -} - -#endif diff --git a/include/MultiTermQueryWrapperFilter.h b/include/MultiTermQueryWrapperFilter.h deleted file mode 100644 index 8026a3aa..00000000 --- a/include/MultiTermQueryWrapperFilter.h +++ /dev/null @@ -1,59 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTITERMQUERYWRAPPERFILTER_H -#define MULTITERMQUERYWRAPPERFILTER_H - -#include "Filter.h" - -namespace Lucene -{ - /// A wrapper for {@link MultiTermQuery}, that exposes its functionality as a {@link Filter}. - /// - /// MultiTermQueryWrapperFilter is not designed to be used by itself. Normally you subclass it to - /// provide a Filter counterpart for a {@link MultiTermQuery} subclass. - /// - /// For example, {@link TermRangeFilter} and {@link PrefixFilter} extend MultiTermQueryWrapperFilter. - /// This class also provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; - /// this is why it is not abstract. - class LPPAPI MultiTermQueryWrapperFilter : public Filter - { - INTERNAL: - /// Wrap a {@link MultiTermQuery} as a Filter. - MultiTermQueryWrapperFilter(MultiTermQueryPtr query); - - public: - virtual ~MultiTermQueryWrapperFilter(); - - LUCENE_CLASS(MultiTermQueryWrapperFilter); - - protected: - MultiTermQueryPtr query; - - public: - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Return the number of unique terms visited during execution of the filter. If there are many of them, - /// you may consider using another filter type or optimize your total term count in index. - /// - /// This method is not thread safe, be sure to only call it when no filter is running! If you re-use the - /// same filter instance for another search, be sure to first reset the term counter with {@link - /// #clearTotalNumberOfTerms}. - /// @see #clearTotalNumberOfTerms - int32_t getTotalNumberOfTerms(); - - /// Resets the counting of unique terms. Do this before executing the filter. - /// @see #getTotalNumberOfTerms - void clearTotalNumberOfTerms(); - - /// Returns a DocIdSet with documents that should be permitted in search results. - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/MultipleTermPositions.h b/include/MultipleTermPositions.h deleted file mode 100644 index 18a44427..00000000 --- a/include/MultipleTermPositions.h +++ /dev/null @@ -1,55 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef MULTIPLETERMPOSITIONS_H -#define MULTIPLETERMPOSITIONS_H - -#include "TermPositions.h" - -namespace Lucene -{ - /// Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as a single - /// {@link TermPositions}. - class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject - { - public: - MultipleTermPositions(IndexReaderPtr indexReader, Collection terms); - virtual ~MultipleTermPositions(); - - LUCENE_CLASS(MultipleTermPositions); - - protected: - int32_t _doc; - int32_t _freq; - TermPositionsQueuePtr termPositionsQueue; - IntQueuePtr posList; - - public: - virtual bool next(); - virtual int32_t nextPosition(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t freq(); - virtual void close(); - - /// Not implemented. - virtual void seek(TermPtr term); - - /// Not implemented. - virtual void seek(TermEnumPtr termEnum); - - /// Not implemented. - virtual int32_t read(Collection docs, Collection freqs); - - /// Not implemented. - virtual ByteArray getPayload(ByteArray data, int32_t offset); - - /// @return false - virtual bool isPayloadAvailable(); - }; -} - -#endif diff --git a/include/NativeFSLockFactory.h b/include/NativeFSLockFactory.h deleted file mode 100644 index 75ccb305..00000000 --- a/include/NativeFSLockFactory.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NATIVEFSLOCKFACTORY_H -#define NATIVEFSLOCKFACTORY_H - -#include "FSLockFactory.h" - -namespace Lucene -{ - /// Implements {@link LockFactory} using native file lock. - /// @see LockFactory - class LPPAPI NativeFSLockFactory : public FSLockFactory - { - public: - /// Create a NativeFSLockFactory instance, storing lock files into - /// the specified lockDirName. - /// @param lockDirName where lock files are created. - NativeFSLockFactory(const String& lockDirName = EmptyString); - virtual ~NativeFSLockFactory(); - - LUCENE_CLASS(NativeFSLockFactory); - - public: - /// Return a new Lock instance identified by lockName. - /// @param lockName name of the lock to be created. - virtual LockPtr makeLock(const String& lockName); - - /// Attempt to clear (forcefully unlock and remove) the - /// specified lock. Only call this at a time when you are - /// certain this lock is no longer in use. - /// @param lockName name of the lock to be cleared. - virtual void clearLock(const String& lockName); - - protected: - /// Simple test to verify locking system is "working". On NFS, if - /// it's mis-configured, you can hit long (35 second) timeouts which - /// cause Lock.obtain to take far too long (it assumes the obtain() - /// call takes zero time). - void acquireTestLock(); - }; -} - -#endif diff --git a/include/NearSpansOrdered.h b/include/NearSpansOrdered.h deleted file mode 100644 index 1345fc76..00000000 --- a/include/NearSpansOrdered.h +++ /dev/null @@ -1,99 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NEARSPANSORDERED_H -#define NEARSPANSORDERED_H - -#include "Spans.h" - -namespace Lucene -{ - /// A Spans that is formed from the ordered subspans of a SpanNearQuery where the subspans do not overlap - /// and have a maximum slop between them. - /// - /// The formed spans only contains minimum slop matches. The matching slop is computed from the distance(s) - /// between the non overlapping matching Spans. - /// - /// Successive matches are always formed from the successive Spans of the SpanNearQuery. - /// - /// The formed spans may contain overlaps when the slop is at least 1. For example, when querying using - ///
t1 t2 t3
- /// with slop at least 1, the fragment: - ///
t1 t2 t1 t3 t2 t3
- /// matches twice: - ///
t1 t2 .. t3      
- ///
      t1 .. t2 t3
- /// - /// Note: Only public for subclassing. Most implementations should not need this class - class LPPAPI NearSpansOrdered : public Spans - { - public: - NearSpansOrdered(SpanNearQueryPtr spanNearQuery, IndexReaderPtr reader, bool collectPayloads = true); - virtual ~NearSpansOrdered(); - - LUCENE_CLASS(NearSpansOrdered); - - protected: - int32_t allowedSlop; - bool firstTime; - bool more; - - /// The spans in the same order as the SpanNearQuery - Collection subSpans; - - /// Indicates that all subSpans have same doc() - bool inSameDoc; - - int32_t matchDoc; - int32_t matchStart; - int32_t matchEnd; - Collection matchPayload; - - Collection subSpansByDoc; - SpanNearQueryPtr query; - bool collectPayloads; - - public: - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - - Collection getSubSpans(); - - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual bool next(); - virtual bool skipTo(int32_t target); - - /// Check whether two Spans in the same document are ordered. - /// @return true if spans1 starts before spans2 or the spans start at the same position, and - /// spans1 ends before spans2. - static bool docSpansOrdered(SpansPtr spans1, SpansPtr spans2); - - virtual String toString(); - - protected: - /// Advances the subSpans to just after an ordered match with a minimum slop that is smaller than the - /// slop allowed by the SpanNearQuery. - /// @return true if there is such a match. - bool advanceAfterOrdered(); - - /// Advance the subSpans to the same document. - bool toSameDoc(); - - // Like {@link #docSpansOrdered(SpansPtr, SpansPtr)}, but use the spans starts and ends as parameters. - static bool docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2); - - /// Order the subSpans within the same document by advancing all later spans after the previous one. - bool stretchToOrder(); - - /// The subSpans are ordered in the same doc, so there is a possible match. Compute the slop while - /// making the match as short as possible by advancing all subSpans except the last one in reverse order. - bool shrinkToAfterShortestMatch(); - }; -} - -#endif diff --git a/include/NearSpansUnordered.h b/include/NearSpansUnordered.h deleted file mode 100644 index e1ca899e..00000000 --- a/include/NearSpansUnordered.h +++ /dev/null @@ -1,71 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NEARSPANSUNORDERED_H -#define NEARSPANSUNORDERED_H - -#include "Spans.h" - -namespace Lucene -{ - /// Similar to {@link NearSpansOrdered}, but for the unordered case. - /// - /// Only public for subclassing. Most implementations should not need this class - class LPPAPI NearSpansUnordered : public Spans - { - public: - NearSpansUnordered(SpanNearQueryPtr query, IndexReaderPtr reader); - virtual ~NearSpansUnordered(); - - LUCENE_CLASS(NearSpansUnordered); - - protected: - SpanNearQueryPtr query; - IndexReaderPtr reader; - - Collection ordered; // spans in query order - Collection subSpans; - int32_t slop; // from query - - SpansCellPtr first; // linked list of spans - SpansCellPtr last; // sorted by doc only - - int32_t totalLength; // sum of current lengths - - CellQueuePtr queue; // sorted queue of spans - SpansCellPtr max; // max element in queue - - bool more; // true if not done - bool firstTime; // true before first next() - - public: - virtual void initialize(); - - Collection getSubSpans(); - - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual String toString(); - - protected: - SpansCellPtr min(); - void initList(bool next); - void addToList(SpansCellPtr cell); - void firstToLast(); - void queueToList(); - void listToQueue(); - bool atMatch(); - - friend class SpansCell; - }; -} - -#endif diff --git a/include/NoLockFactory.h b/include/NoLockFactory.h deleted file mode 100644 index 96a0c0f5..00000000 --- a/include/NoLockFactory.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NOLOCKFACTORY_H -#define NOLOCKFACTORY_H - -#include "LockFactory.h" - -namespace Lucene -{ - /// Use this {@link LockFactory} to disable locking entirely. Only one instance of this lock is created. - /// You should call {@link #getNoLockFactory()} to get the instance. - /// - /// @see LockFactory - class LPPAPI NoLockFactory : public LockFactory - { - public: - virtual ~NoLockFactory(); - - LUCENE_CLASS(NoLockFactory); - - private: - static NoLockPtr getSingletonLock(); - - public: - static NoLockFactoryPtr getNoLockFactory(); - - /// Return a new Lock instance identified by lockName. - virtual LockPtr makeLock(const String& lockName); - - /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you - /// are certain this lock is no longer in use. - virtual void clearLock(const String& lockName); - }; -} - -#endif diff --git a/include/NormalizeCharMap.h b/include/NormalizeCharMap.h deleted file mode 100644 index dfe0e968..00000000 --- a/include/NormalizeCharMap.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NORMALIZECHARMAP_H -#define NORMALIZECHARMAP_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Holds a map of String input to String output, to be used with {@link MappingCharFilter}. - class LPPAPI NormalizeCharMap : public LuceneObject - { - public: - NormalizeCharMap(); - virtual ~NormalizeCharMap(); - - LUCENE_CLASS(NormalizeCharMap); - - public: - MapCharNormalizeCharMap submap; - String normStr; - int32_t diff; - - public: - /// Records a replacement to be applied to the inputs stream. Whenever singleMatch occurs in the input, it - /// will be replaced with replacement. - /// - /// @param singleMatch input String to be replaced - /// @param replacement output String - void add(const String& singleMatch, const String& replacement); - }; -} - -#endif diff --git a/include/NormsWriter.h b/include/NormsWriter.h deleted file mode 100644 index 5dce5a1d..00000000 --- a/include/NormsWriter.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NORMSWRITER_H -#define NORMSWRITER_H - -#include "InvertedDocEndConsumer.h" - -namespace Lucene -{ - /// Writes norms. Each thread X field accumulates the norms for the doc/fields it saw, then the flush method - /// below merges all of these together into a single _X.nrm file. - class NormsWriter : public InvertedDocEndConsumer - { - public: - NormsWriter(); - virtual ~NormsWriter(); - - LUCENE_CLASS(NormsWriter); - - protected: - FieldInfosPtr fieldInfos; - - public: - virtual InvertedDocEndConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread); - virtual void abort(); - - // We only write the _X.nrm file at flush - virtual void files(HashSet files); - - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - - /// Produce _X.nrm if any document had a field with norms not disabled - virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - virtual void closeDocStore(SegmentWriteStatePtr state); - - protected: - static uint8_t getDefaultNorm(); - }; -} - -#endif diff --git a/include/NormsWriterPerField.h b/include/NormsWriterPerField.h deleted file mode 100644 index 15cbe40c..00000000 --- a/include/NormsWriterPerField.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NORMSWRITERPERFIELD_H -#define NORMSWRITERPERFIELD_H - -#include "InvertedDocEndConsumerPerField.h" - -namespace Lucene -{ - /// Taps into DocInverter, as an InvertedDocEndConsumer, which is called at the end of inverting each field. - /// We just look at the length for the field (docState.length) and record the norm. - class NormsWriterPerField : public InvertedDocEndConsumerPerField - { - public: - NormsWriterPerField(DocInverterPerFieldPtr docInverterPerField, NormsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); - virtual ~NormsWriterPerField(); - - LUCENE_CLASS(NormsWriterPerField); - - public: - NormsWriterPerThreadWeakPtr _perThread; - FieldInfoPtr fieldInfo; - DocStatePtr docState; - - // Holds all docID/norm pairs we've seen - Collection docIDs; - ByteArray norms; - int32_t upto; - - FieldInvertStatePtr fieldState; - - public: - void reset(); - virtual void abort(); - - /// Compare two objects - virtual int32_t compareTo(LuceneObjectPtr other); - - virtual void finish(); - }; -} - -#endif diff --git a/include/NormsWriterPerThread.h b/include/NormsWriterPerThread.h deleted file mode 100644 index eb80ea57..00000000 --- a/include/NormsWriterPerThread.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NORMSWRITERPERTHREAD_H -#define NORMSWRITERPERTHREAD_H - -#include "InvertedDocEndConsumerPerThread.h" - -namespace Lucene -{ - class NormsWriterPerThread : public InvertedDocEndConsumerPerThread - { - public: - NormsWriterPerThread(DocInverterPerThreadPtr docInverterPerThread, NormsWriterPtr normsWriter); - virtual ~NormsWriterPerThread(); - - LUCENE_CLASS(NormsWriterPerThread); - - public: - NormsWriterWeakPtr _normsWriter; - DocStatePtr docState; - - public: - virtual InvertedDocEndConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo); - virtual void abort(); - virtual void startDocument(); - virtual void finishDocument(); - - bool freeRAM(); - }; -} - -#endif diff --git a/include/NumberTools.h b/include/NumberTools.h deleted file mode 100644 index 7c871030..00000000 --- a/include/NumberTools.h +++ /dev/null @@ -1,59 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMBERTOOLS_H -#define NUMBERTOOLS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Provides support for converting longs to Strings, and back again. The strings are structured so that - /// lexicographic sorting order is preserved. - /// - /// That is, if l1 is less than l2 for any two longs l1 and l2, then NumberTools.longToString(l1) is - /// lexicographically less than NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) - /// - /// This class handles all long values (unlike {@link DateField}). - /// - /// @deprecated For new indexes use {@link NumericUtils} instead, which provides a sortable binary representation - /// (prefix encoded) of numeric values. - /// To index and efficiently query numeric values use {@link NumericField} and {@link NumericRangeQuery}. This - /// class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). - class LPPAPI NumberTools : public LuceneObject - { - public: - virtual ~NumberTools(); - - LUCENE_CLASS(NumberTools); - - protected: - static const int32_t RADIX; - - static const wchar_t NEGATIVE_PREFIX; - - // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX - static const wchar_t POSITIVE_PREFIX; - - public: - /// Equivalent to longToString(LLONG_MIN) - static const String& MIN_STRING_VALUE(); - - /// Equivalent to longToString(LLONG_MAX) - static const String& MAX_STRING_VALUE(); - - /// The length of (all) strings returned by {@link #longToString} - static int32_t STR_SIZE(); - - /// Converts a long to a String suitable for indexing. - static String longToString(int64_t l); - - /// Converts a String that was returned by {@link #longToString} back to a long. - static int64_t stringToLong(const String& str); - }; -} - -#endif diff --git a/include/NumericField.h b/include/NumericField.h deleted file mode 100644 index b7a07a3e..00000000 --- a/include/NumericField.h +++ /dev/null @@ -1,133 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMERICFIELD_H -#define NUMERICFIELD_H - -#include "Field.h" - -namespace Lucene -{ - /// This class provides a {@link Field} that enables indexing of numeric values for efficient range filtering and - /// sorting. The native types int32_t, int64_t and double are directly supported. However, any value that can be - /// converted into these native types can also be indexed. For example, date/time values represented by a {@link - /// Date} can be translated into a int64_t value. If you don't need millisecond precision, you can quantize the - /// value, either by dividing the result or using the separate getters (for year, month, etc.) to construct an int32_t - /// or int64_t value. - /// - /// To perform range querying or filtering against a NumericField, use {@link NumericRangeQuery} or {@link - /// NumericRangeFilter}. To sort according to a NumericField, use the normal numeric sort types, eg {@link - /// SortField#INT}. NumericField values can also be loaded directly from {@link FieldCache}. - /// - /// By default, a NumericField's value is not stored but is indexed for range filtering and sorting. You can use the - /// {@link #NumericField(String,Field.Store,boolean)} constructor if you need to change these defaults. - /// - /// You may add the same field name as a NumericField to the same document more than once. Range querying and - /// filtering will be the logical OR of all values; so a range query will hit all documents that have at least one - /// value in the range. However sort behavior is not defined. If you need to sort, you should separately index a - /// single-valued NumericField. - /// - /// A NumericField will consume somewhat more disk space in the index than an ordinary single-valued field. However, - /// for a typical index that includes substantial textual content per document, this increase will likely be in the - /// noise. - /// - /// Within Lucene, each numeric value is indexed as a trie structure, where each term is logically assigned to larger - /// and larger pre-defined brackets (which are simply lower-precision representations of the value). The step size - /// between each successive bracket is called the precisionStep, measured in bits. Smaller precisionStep values - /// result in larger number of brackets, which consumes more disk space in the index but may result in faster range - /// search performance. The default value 4 was selected for a reasonable trade off of disk space consumption versus - /// performance. You can use the expert constructor {@link #NumericField(String,int,Field.Store,boolean)} if you'd - /// like to change the value. Note that you must also specify a congruent value when creating {@link NumericRangeQuery} - /// or {@link NumericRangeFilter}. For low cardinality fields larger precision steps are good. If the cardinality - /// is < 100, it is fair to use {@link INT_MAX}, which produces one term per value. - /// - /// For more information on the internals of numeric trie indexing, including the precisionStep configuration, see - /// {@link NumericRangeQuery}. The format of indexed values is described in {@link NumericUtils}. - /// - /// If you only need to sort by numeric value, and never run range querying/filtering, you can index using a - /// precisionStep of {@link MAX_INT}. This will minimize disk space consumed. - /// - /// More advanced users can instead use {@link NumericTokenStream} directly, when indexing numbers. This class is a - /// wrapper around this token stream type for easier, more intuitive usage. - /// - /// NOTE: This class is only used during indexing. When retrieving the stored field value from a {@link Document} - /// instance after search, you will get a conventional {@link Fieldable} instance where the numeric values are - /// returned as strings (according to toString(value) of the used data type). - class LPPAPI NumericField : public AbstractField - { - public: - /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} - /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, - /// set a value using the various set???Value() methods. - /// This constructor creates an indexed, but not stored field. - /// @param name the field name - NumericField(const String& name); - - /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} - /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, - /// set a value using the various set???Value() methods. - /// This constructor creates an indexed, but not stored field. - /// @param name the field name - /// @param store if the field should be stored in plain text form (according to toString(value) of the used - /// data type) - /// @param index if the field should be indexed using {@link NumericTokenStream} - NumericField(const String& name, Field::Store store, bool index); - - /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with - /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() - /// methods. This constructor creates an indexed, but not stored field. - /// @param name the field name - /// @param precisionStep the used precision step - NumericField(const String& name, int32_t precisionStep); - - /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with - /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() - /// methods. This constructor creates an indexed, but not stored field. - /// @param name the field name - /// @param precisionStep the used precision step - /// @param store if the field should be stored in plain text form (according to toString(value) of the used - /// data type) - /// @param index if the field should be indexed using {@link NumericTokenStream} - NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index); - - virtual ~NumericField(); - - LUCENE_CLASS(NumericField); - - protected: - NumericTokenStreamPtr tokenStream; - - public: - /// Returns a {@link NumericTokenStream} for indexing the numeric value. - virtual TokenStreamPtr tokenStreamValue(); - - /// Returns always null for numeric fields - virtual ByteArray getBinaryValue(ByteArray result); - - /// Returns always null for numeric fields - virtual ReaderPtr readerValue(); - - /// Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). - virtual String stringValue(); - - /// Returns the current numeric value. - virtual int64_t getNumericValue(); - - /// Initializes the field with the supplied long value. - /// @param value the numeric value - virtual NumericFieldPtr setLongValue(int64_t value); - - /// Initializes the field with the supplied int value. - /// @param value the numeric value - virtual NumericFieldPtr setIntValue(int32_t value); - - /// Initializes the field with the supplied double value. - /// @param value the numeric value - virtual NumericFieldPtr setDoubleValue(double value); - }; -} - -#endif diff --git a/include/NumericRangeFilter.h b/include/NumericRangeFilter.h deleted file mode 100644 index 6f99077a..00000000 --- a/include/NumericRangeFilter.h +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMERICRANGEFILTER_H -#define NUMERICRANGEFILTER_H - -#include "MultiTermQueryWrapperFilter.h" - -namespace Lucene -{ - /// A {@link Filter} that only accepts numeric values within a specified range. To use this, you must first - /// index the numeric values using {@link NumericField} ({@link NumericTokenStream}). - /// - /// You create a new NumericRangeFilter with the static factory methods, eg: - ///
-    /// FilterPtr f = NumericRangeFilter::newDoubleRange(L"weight", 0.3, 0.10, true, true);
-    /// 
- /// accepts all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. - /// - /// See {@link NumericRangeQuery} for details on how Lucene indexes and searches numeric valued fields. - class LPPAPI NumericRangeFilter : public MultiTermQueryWrapperFilter - { - public: - NumericRangeFilter(NumericRangeQueryPtr query); - virtual ~NumericRangeFilter(); - - LUCENE_CLASS(NumericRangeFilter); - - public: - /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. - static NumericRangeFilterPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeFilterPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. - static NumericRangeFilterPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeFilterPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. - static NumericRangeFilterPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeFilterPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int, long or double range using the given - /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min - /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents - /// excluding the bounds, with inclusive on the boundaries are hits, too. - static NumericRangeFilterPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int, long or double range range using the default - /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which are in - /// fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting inclusive to false - /// it will match all documents excluding the bounds, with inclusive on the boundaries are hits, too. - static NumericRangeFilterPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); - - /// Returns the field name for this filter - String getField(); - - /// Returns true if the lower endpoint is inclusive - bool includesMin(); - - /// Returns true if the upper endpoint is inclusive - bool includesMax(); - - /// Returns the lower value of this range filter - NumericValue getMin(); - - /// Returns the upper value of this range filter - NumericValue getMax(); - }; -} - -#endif diff --git a/include/NumericRangeQuery.h b/include/NumericRangeQuery.h deleted file mode 100644 index b66826e7..00000000 --- a/include/NumericRangeQuery.h +++ /dev/null @@ -1,189 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMERICRANGEQUERY_H -#define NUMERICRANGEQUERY_H - -#include "MultiTermQuery.h" -#include "FilteredTermEnum.h" -#include "NumericUtils.h" - -namespace Lucene -{ - /// A {@link Query} that matches numeric values within a specified range. To use this, you must first - /// index the numeric values using {@link NumericField} (expert: {@link NumericTokenStream}). If your - /// terms are instead textual, you should use {@link TermRangeQuery}. {@link NumericRangeFilter} is the - /// filter equivalent of this query. - /// - /// You create a new NumericRangeQuery with the static factory methods, eg: - ///
-    /// QueryPtr q = NumericRangeQuery::newDoubleRange("weight", 0.3, 0.10, true, true);
-    /// 
- /// matches all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. - /// - /// The performance of NumericRangeQuery is much better than the corresponding {@link TermRangeQuery} - /// because the number of terms that must be searched is usually far fewer, thanks to trie indexing, - /// described below. - /// - /// You can optionally specify a precisionStep when creating this query. This is necessary if you've - /// changed this configuration from its default (4) during indexing. Lower values consume more disk - /// space but speed up searching. Suitable values are between 1 and 8. A good starting point to test - /// is 4, which is the default value for all Numeric* classes. See below for details. - /// - /// This query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for 32 bit - /// integer ranges with precisionStep <=8 and 64 bit (long/double) ranges with precisionStep <=6. - /// Otherwise it uses {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the number of terms - /// is likely to be high. With precision steps of <=4, this query can be run with one of the BooleanQuery - /// rewrite methods without changing BooleanQuery's default max clause count. - /// - /// How it works - /// - /// See the publication about panFMP, where this - /// algorithm was described (referred to as TrieRangeQuery): - ///
Schindler, U, Diepenbroek, M, 2008. - /// Generic XML-based Framework for Metadata Portals. - /// Computers & Geosciences 34 (12), 1947-1955. - /// doi:10.1016/j.cageo.2008.02.023
- /// - /// A quote from this paper: Because Apache Lucene is a full-text search engine and not a conventional - /// database, it cannot handle numerical ranges (eg., field value is inside user defined bounds, even - /// dates are numerical values). We have developed an extension to Apache Lucene that stores the - /// numerical values in a special string-encoded format with variable precision (all numerical values like - /// doubles, longs, and ints are converted to lexicographic sortable string representations and stored - /// with different precisions (for a more detailed description of how the values are stored, see {@link - /// NumericUtils}). A range is then divided recursively into multiple intervals for searching: - /// The center of the range is searched only with the lowest possible precision in the trie, while the - /// boundaries are matched more exactly. This reduces the number of terms dramatically. - /// - /// For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that uses a - /// lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the lowest - /// precision. Overall, a range could consist of a theoretical maximum of 7*255*2 + 255 = 3825 distinct - /// terms (when there is a term for every distinct value of an 8-byte-number in the index and the range - /// covers almost all of them; a maximum of 255 distinct values is used because it would always be possible - /// to reduce the full 256 values to one term with degraded precision). In practice, we have seen up to - /// 300 terms in most cases (index with 500,000 metadata records and a uniform value distribution). - /// - /// Precision Step: - /// You can choose any precisionStep when encoding values. Lower step values mean more precisions and so - /// more terms in index (and index gets larger). On the other hand, the maximum number of terms to match - /// reduces, which optimized query speed. The formula to calculate the maximum term count is: - ///
-    /// n = [ (bitsPerValue/precisionStep - 1) * (2 ^ precisionStep - 1 ) * 2 ] + (2 ^ precisionStep - 1 )
-    /// 
- /// - /// (this formula is only correct, when bitsPerValue/precisionStep is an integer; in other cases, the value - /// must be rounded up and the last summand must contain the modulo of the division as precision step). - /// For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision step of 2, - /// n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking in the term enum of the - /// index. Because of this, the ideal precisionStep value can only be found out by testing. Important: You - /// can index with a lower precision step value and test search speed using a multiple of the original step - /// value. - /// - /// Good values for precisionStep are depending on usage and data type: - ///
    - ///
  • The default for all data types is 4, which is used, when no precisionStep is given. - ///
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. - ///
  • Ideal value in most cases for 32 bit data types (int) is 4. - ///
  • For low cardinality fields larger precision steps are good. If the cardinality is < 100, it is - /// fair to use {@link Integer#MAX_VALUE} (see below). - ///
  • Steps >=64 for long/double and >=32 for int/float produces one token per value in the index and - /// querying is as slow as a conventional {@link TermRangeQuery}. But it can be used to produce fields, - /// that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as precisionStep). - /// Using {@link NumericField NumericFields} for sorting is ideal, because building the field cache is much - /// faster than with text-only numbers. These fields have one term per value and therefore also work with - /// term enumeration for building distinct lists (eg. facets / preselected values to search for). - /// Sorting is also possible with range query optimized fields using one of the above precisionSteps. - ///
- /// - /// Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed that - /// {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) took - /// about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs - /// and executing this class took <100ms to complete (on an Opteron64 machine, 8 bit precision step). This - /// query type was developed for a geographic portal, where the performance for eg. bounding boxes or exact - /// date/time stamps is important. - class LPPAPI NumericRangeQuery : public MultiTermQuery - { - public: - NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); - virtual ~NumericRangeQuery(); - - LUCENE_CLASS(NumericRangeQuery); - - INTERNAL: - String field; - int32_t precisionStep; - int32_t valSize; - NumericValue min; - NumericValue max; - bool minInclusive; - bool maxInclusive; - - public: - using MultiTermQuery::toString; - - /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. - static NumericRangeQueryPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeQueryPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. - static NumericRangeQueryPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeQueryPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. - static NumericRangeQueryPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep - /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). - static NumericRangeQueryPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the given - /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min - /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents - /// excluding the bounds, with inclusive on the boundaries are hits, too. - static NumericRangeQueryPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); - - /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the default - /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which - /// are in fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting - /// inclusive to false it will match all documents excluding the bounds, with inclusive on the boundaries - /// are hits, too. - static NumericRangeQueryPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); - - /// Returns the field name for this query - String getField(); - - /// Returns true if the lower endpoint is inclusive - bool includesMin(); - - /// Returns true if the upper endpoint is inclusive - bool includesMax(); - - /// Returns the lower value of this range query - NumericValue getMin(); - - /// Returns the upper value of this range query - NumericValue getMax(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - protected: - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); - - friend class NumericRangeTermEnum; - }; -} - -#endif diff --git a/include/NumericTokenStream.h b/include/NumericTokenStream.h deleted file mode 100644 index 0e1269d5..00000000 --- a/include/NumericTokenStream.h +++ /dev/null @@ -1,120 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMERICTOKENSTREAM_H -#define NUMERICTOKENSTREAM_H - -#include "TokenStream.h" - -namespace Lucene -{ - /// This class provides a {@link TokenStream} for indexing numeric values that can be used by {@link NumericRangeQuery} - /// or {@link NumericRangeFilter}. - /// - /// Note that for simple usage, {@link NumericField} is recommended. {@link NumericField} disables norms and term freqs, - /// as they are not usually needed during searching. If you need to change these settings, you should use this class. - /// - /// See {@link NumericField} for capabilities of fields indexed numerically. - /// - /// Here's an example usage, for an int field: - /// - /// FieldPtr field = newLucene(name, newLucene(precisionStep)->setIntValue(value)); - /// field->setOmitNorms(true); - /// field->setOmitTermFreqAndPositions(true); - /// document->add(field); - /// - /// For optimal performance, re-use the TokenStream and Field instance for more than one document: - /// - /// NumericTokenStreamPtr stream = newLucene(precisionStep); - /// FieldPtr field = newLucene(name, stream); - /// field->setOmitNorms(true); - /// field->setOmitTermFreqAndPositions(true); - /// DocumentPtr document = newLucene(); - /// document->add(field); - /// - /// for (all documents) - /// { - /// stream->setIntValue(value); - /// writer->addDocument(document); - /// } - /// - /// This stream is not intended to be used in analyzers; it's more for iterating the different precisions during - /// indexing a specific numeric value. - /// - /// NOTE: as token streams are only consumed once the document is added to the index, if you index more than one - /// numeric field, use a separate NumericTokenStream * instance for each. - /// - /// See {@link NumericRangeQuery} for more details on the precisionStep - /// parameter as well as how numeric fields work under the hood. - class LPPAPI NumericTokenStream : public TokenStream - { - public: - /// Creates a token stream for numeric values using the default precisionStep {@link - /// NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, before using set a - /// value using the various setValue() methods. - NumericTokenStream(); - - /// Creates a token stream for numeric values with the specified precisionStep. The stream is not yet - /// initialized, before using set a value using the various setValue() methods. - NumericTokenStream(int32_t precisionStep); - - /// Creates a token stream for numeric values with the specified precisionStep using the given {@link - /// AttributeSource}. The stream is not yet initialized, before using set a value using the various - /// setValue() methods. - NumericTokenStream(AttributeSourcePtr source, int32_t precisionStep); - - /// Creates a token stream for numeric values with the specified precisionStep using the given {@link - /// AttributeFactory}. The stream is not yet initialized, before using set a value using the various - /// setValue() methods. - NumericTokenStream(AttributeFactoryPtr factory, int32_t precisionStep); - - virtual ~NumericTokenStream(); - - LUCENE_CLASS(NumericTokenStream); - - protected: - TermAttributePtr termAtt; - TypeAttributePtr typeAtt; - PositionIncrementAttributePtr posIncrAtt; - - int32_t shift; - int32_t valSize; // valSize == 0 means not initialized - int32_t precisionStep; - - int64_t value; - - public: - /// The full precision token gets this token type assigned. - static const String& TOKEN_TYPE_FULL_PREC(); - - /// The lower precision tokens gets this token type assigned. - static const String& TOKEN_TYPE_LOWER_PREC(); - - /// Initializes the token stream with the supplied long value. - /// @param value the value, for which this TokenStream should enumerate tokens. - /// @return this instance, because of this you can use it the following way: - /// newLucene(name, newLucene(precisionStep)->setLongValue(value)) - NumericTokenStreamPtr setLongValue(int64_t value); - - /// Initializes the token stream with the supplied int value. - /// @param value the value, for which this TokenStream should enumerate tokens. - /// @return this instance, because of this you can use it the following way: - /// newLucene(name, newLucene(precisionStep)->setIntValue(value)) - NumericTokenStreamPtr setIntValue(int32_t value); - - /// Initializes the token stream with the supplied double value. - /// @param value the value, for which this TokenStream should enumerate tokens. - /// @return this instance, because of this you can use it the following way: - /// newLucene(name, newLucene(precisionStep)->setDoubleValue(value)) - NumericTokenStreamPtr setDoubleValue(double value); - - virtual void reset(); - virtual bool incrementToken(); - virtual String toString(); - }; -} - -#endif diff --git a/include/NumericUtils.h b/include/NumericUtils.h deleted file mode 100644 index bc7763ca..00000000 --- a/include/NumericUtils.h +++ /dev/null @@ -1,180 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef NUMERICUTILS_H -#define NUMERICUTILS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This is a helper class to generate prefix-encoded representations for numerical values and supplies converters - /// to represent double values as sortable integers/longs. - /// - /// To quickly execute range queries in Apache Lucene, a range is divided recursively into multiple intervals for - /// searching: The center of the range is searched only with the lowest possible precision in the trie, while the - /// boundaries are matched more exactly. This reduces the number of terms dramatically. - /// - /// This class generates terms to achieve this: First the numerical integer values need to be converted to strings. - /// For that integer values (32 bit or 64 bit) are made unsigned and the bits are converted to ASCII chars with each - /// 7 bit. The resulting string is sortable like the original integer value. Each value is also prefixed (in the - /// first char) by the shift value (number of bits removed) used during encoding. - /// - /// To also index floating point numbers, this class supplies two methods to convert them to integer values by - /// changing their bit layout: {@link #doubleToSortableLong}, {@link #doubleToSortableInt}. You will have no precision - /// loss by converting floating point numbers to integers and back (only that the integer form is not usable). Other - /// data types like dates can easily converted to longs or ints (eg. date to long). - /// - /// For easy usage, the trie algorithm is implemented for indexing inside {@link NumericTokenStream} that can index - /// int, long, and double. For querying, {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query - /// part for the same data types. - /// - /// This class can also be used, to generate lexicographically sortable (according {@link std::string#compare}) - /// representations of numeric data types for other usages (eg. sorting). - class LPPAPI NumericUtils : public LuceneObject - { - public: - virtual ~NumericUtils(); - - LUCENE_CLASS(NumericUtils); - - public: - /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, - /// and {@link NumericRangeFilter} as default. - static const int32_t PRECISION_STEP_DEFAULT; - - /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + - /// shift in the first character. - static const wchar_t SHIFT_START_LONG; - - /// The maximum term length (used for char[] buffer size) for encoding long values. - /// @see #longToPrefixCoded(long,int,char[]) - static const int32_t BUF_SIZE_LONG; - - /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + - /// shift in the first character. - static const wchar_t SHIFT_START_INT; - - /// The maximum term length (used for char[] buffer size) for encoding int values. - /// @see #intToPrefixCoded(int,int,char[]) - static const int32_t BUF_SIZE_INT; - - public: - /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by - /// {@link NumericTokenStream}. - /// @param val the numeric value - /// @param shift how many bits to strip from the right - /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} length - /// @return number of chars written to buffer - static int32_t longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer); - - /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by - /// {@link LongRangeBuilder}. - /// @param val the numeric value - /// @param shift how many bits to strip from the right - static String longToPrefixCoded(int64_t val, int32_t shift); - - /// This is a convenience method, that returns prefix coded bits of a long without reducing the precision. - /// It can be used to store the full precision value as a stored field in index. - /// To decode, use {@link #prefixCodedToLong}. - static String longToPrefixCoded(int64_t val); - - /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link - /// NumericTokenStream}. - /// @param val the numeric value - /// @param shift how many bits to strip from the right - /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} length - /// @return number of chars written to buffer - static int32_t intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer); - - /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link - /// IntRangeBuilder}. - /// @param val the numeric value - /// @param shift how many bits to strip from the right - static String intToPrefixCoded(int32_t val, int32_t shift); - - /// This is a convenience method, that returns prefix coded bits of an int without reducing the precision. - /// It can be used to store the full precision value as a stored field in index. - /// To decode, use {@link #prefixCodedToInt}. - static String intToPrefixCoded(int32_t val); - - /// Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. - /// This method can be used to decode eg. a stored field. - /// @see #longToPrefixCoded(int64_t) - static int64_t prefixCodedToLong(const String& prefixCoded); - - /// Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. - /// This method can be used to decode eg. a stored field. - /// @see #intToPrefixCoded(int32_t) - static int32_t prefixCodedToInt(const String& prefixCoded); - - /// Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 - /// floating-point "double format" bit layout and then some bits are swapped, to be able to compare the - /// result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t. - /// @see #sortableLongToDouble - static int64_t doubleToSortableLong(double val); - - /// Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val)) - static String doubleToPrefixCoded(double val); - - /// Converts a sortable long back to a double. - /// @see #doubleToSortableLong - static double sortableLongToDouble(int64_t val); - - /// Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val)) - static double prefixCodedToDouble(const String& val); - - /// Splits a int64_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} - /// for each call to its {@link LongRangeBuilder#addRange(String,String)} method. - /// This method is used by {@link NumericRangeQuery}. - static void splitLongRange(LongRangeBuilderPtr builder, int32_t precisionStep, int64_t minBound, int64_t maxBound); - - /// Splits an int32_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} - /// for each call to its {@link IntRangeBuilder#addRange(String,String)} method. - /// This method is used by {@link NumericRangeQuery}. - static void splitIntRange(IntRangeBuilderPtr builder, int32_t precisionStep, int32_t minBound, int32_t maxBound); - - /// This helper does the splitting for both 32 and 64 bit. - static void splitRange(LuceneObjectPtr builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound); - - /// Helper that delegates to correct range builder - static void addRange(LuceneObjectPtr builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift); - }; - - /// Callback for {@link #splitLongRange}. You need to overwrite only one of the methods. - /// NOTE: This is a very low-level interface, the method signatures may change in later versions. - class LPPAPI LongRangeBuilder : public LuceneObject - { - public: - virtual ~LongRangeBuilder(); - - public: - /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build - /// classical (inclusive) range queries from them. - virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); - - /// Overwrite this method, if you like to receive the raw long range bounds. You can use this for eg. debugging - /// purposes (print out range bounds). - virtual void addRange(int64_t min, int64_t max, int32_t shift); - }; - - class LPPAPI IntRangeBuilder : public LuceneObject - { - public: - virtual ~IntRangeBuilder(); - - public: - /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build - /// classical range (inclusive) queries from them. - virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); - - /// Overwrite this method, if you like to receive the raw int range bounds. You can use this for eg. debugging - /// purposes (print out range bounds). - virtual void addRange(int32_t min, int32_t max, int32_t shift); - }; -} - -#endif diff --git a/include/OffsetAttribute.h b/include/OffsetAttribute.h deleted file mode 100644 index a2e9c256..00000000 --- a/include/OffsetAttribute.h +++ /dev/null @@ -1,53 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef OFFSETATTRIBUTE_H -#define OFFSETATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// The start and end character offset of a Token. - class LPPAPI OffsetAttribute : public Attribute - { - public: - OffsetAttribute(); - virtual ~OffsetAttribute(); - - LUCENE_CLASS(OffsetAttribute); - - protected: - int32_t _startOffset; - int32_t _endOffset; - - public: - virtual String toString(); - - /// Returns this Token's starting offset, the position of the first character corresponding to this token - /// in the source text. - /// - /// Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), - /// as the term text may have been altered by a stemmer or some other filter. - virtual int32_t startOffset(); - - /// Set the starting and ending offset. - /// @see #startOffset() and #endOffset() - virtual void setOffset(int32_t startOffset, int32_t endOffset); - - /// Returns this Token's ending offset, one greater than the position of the last character corresponding - /// to this token in the source text. The length of the token in the source text is (endOffset - startOffset). - virtual int32_t endOffset(); - - virtual void clear(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual void copyTo(AttributePtr target); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/OpenBitSet.h b/include/OpenBitSet.h deleted file mode 100644 index ce121c7b..00000000 --- a/include/OpenBitSet.h +++ /dev/null @@ -1,236 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef OPENBITSET_H -#define OPENBITSET_H - -#include "DocIdSet.h" - -namespace Lucene -{ - /// An "open" BitSet implementation that allows direct access to the array of words storing the bits. - /// - /// The goals of OpenBitSet are the fastest implementation possible, and maximum code reuse. Extra - /// safety and encapsulation may always be built on top, but if that's built in, the cost can never - /// be removed (and hence people re-implement their own version in order to get better performance). - class LPPAPI OpenBitSet : public DocIdSet - { - public: - /// Constructs an OpenBitSet large enough to hold numBits. - OpenBitSet(int64_t numBits = 64); - - /// Constructs an OpenBitSet from an existing LongArray. - /// - /// The first 64 bits are in long[0], with bit index 0 at the least significant bit, and bit - /// index 63 at the most significant. Given a bit index, the word containing it is long[index/64], - /// and it is at bit number index%64 within that word. - /// - /// numWords are the number of elements in the array that contain set bits (non-zero longs). - /// numWords should be <= bits.length(), and any existing words in the array at position >= - /// numWords should be zero. - OpenBitSet(LongArray bits, int32_t numWords); - - virtual ~OpenBitSet(); - - LUCENE_CLASS(OpenBitSet); - - protected: - LongArray bits; - int32_t wlen; // number of words (elements) used in the array - - public: - virtual DocIdSetIteratorPtr iterator(); - - /// This DocIdSet implementation is cacheable. - virtual bool isCacheable(); - - /// Returns the current capacity in bits (1 greater than the index of the last bit) - int64_t capacity(); - - /// Returns the current capacity of this set. Included for compatibility. This is *not* - /// equal to {@link #cardinality} - int64_t size(); - - /// Returns true if there are no set bits - bool isEmpty(); - - /// Returns the long[] storing the bits - LongArray getBits(); - - /// Sets a new long[] to use as the bit storage - void setBits(LongArray bits); - - /// Gets the number of longs in the array that are in use - int32_t getNumWords(); - - /// Sets the number of longs in the array that are in use - void setNumWords(int32_t numWords); - - /// Returns true or false for the specified bit index. - bool get(int32_t index); - - /// Returns true or false for the specified bit index. - /// The index should be less than the OpenBitSet size - bool fastGet(int32_t index); - - /// Returns true or false for the specified bit index - bool get(int64_t index); - - /// Returns true or false for the specified bit index. - /// The index should be less than the OpenBitSet size. - bool fastGet(int64_t index); - - /// Returns 1 if the bit is set, 0 if not. - /// The index should be less than the OpenBitSet size - int32_t getBit(int32_t index); - - /// Sets a bit, expanding the set size if necessary - void set(int64_t index); - - /// Sets the bit at the specified index. - /// The index should be less than the OpenBitSet size. - void fastSet(int32_t index); - - /// Sets the bit at the specified index. - /// The index should be less than the OpenBitSet size. - void fastSet(int64_t index); - - /// Sets a range of bits, expanding the set size if necessary - /// @param startIndex lower index - /// @param endIndex one-past the last bit to set - void set(int64_t startIndex, int64_t endIndex); - - /// Clears a bit. - /// The index should be less than the OpenBitSet size. - void fastClear(int32_t index); - - /// Clears a bit. - /// The index should be less than the OpenBitSet size. - void fastClear(int64_t index); - - /// Clears a bit, allowing access beyond the current set size without changing the size. - void clear(int64_t index); - - /// Clears a range of bits. Clearing past the end does not change the size of the set. - /// @param startIndex lower index - /// @param endIndex one-past the last bit to clear - void clear(int32_t startIndex, int32_t endIndex); - - /// Clears a range of bits. Clearing past the end does not change the size of the set. - /// @param startIndex lower index - /// @param endIndex one-past the last bit to clear - void clear(int64_t startIndex, int64_t endIndex); - - /// Sets a bit and returns the previous value. - /// The index should be less than the OpenBitSet size. - bool getAndSet(int32_t index); - - /// Sets a bit and returns the previous value. - /// The index should be less than the OpenBitSet size. - bool getAndSet(int64_t index); - - /// Flips a bit. - /// The index should be less than the OpenBitSet size. - void fastFlip(int32_t index); - - /// Flips a bit. - /// The index should be less than the OpenBitSet size. - void fastFlip(int64_t index); - - /// Flips a bit, expanding the set size if necessary - void flip(int64_t index); - - /// Flips a bit and returns the resulting bit value. - /// The index should be less than the OpenBitSet size. - bool flipAndGet(int32_t index); - - /// Flips a bit and returns the resulting bit value. - /// The index should be less than the OpenBitSet size. - bool flipAndGet(int64_t index); - - /// Flips a range of bits, expanding the set size if necessary - /// @param startIndex lower index - /// @param endIndex one-past the last bit to flip - void flip(int64_t startIndex, int64_t endIndex); - - /// @return the number of set bits - int64_t cardinality(); - - /// Returns the popcount or cardinality of the intersection of the two sets. - /// Neither set is modified. - static int64_t intersectionCount(OpenBitSetPtr a, OpenBitSetPtr b); - - /// Returns the popcount or cardinality of the union of the two sets. - /// Neither set is modified. - static int64_t unionCount(OpenBitSetPtr a, OpenBitSetPtr b); - - /// Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". - /// Neither set is modified. - static int64_t andNotCount(OpenBitSetPtr a, OpenBitSetPtr b); - - /// Returns the popcount or cardinality of the exclusive-or of the two sets. - /// Neither set is modified. - static int64_t xorCount(OpenBitSetPtr a, OpenBitSetPtr b); - - /// Returns the index of the first set bit starting at the index specified. - /// -1 is returned if there are no more set bits. - int32_t nextSetBit(int32_t index); - - /// Returns the index of the first set bit starting at the index specified. - /// -1 is returned if there are no more set bits. - int64_t nextSetBit(int64_t index); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// this = this AND other - void intersect(OpenBitSetPtr other); - - /// this = this OR other - void _union(OpenBitSetPtr other); - - /// Remove all elements set in other. this = this AND_NOT other - void remove(OpenBitSetPtr other); - - /// this = this XOR other - void _xor(OpenBitSetPtr other); - - /// see {@link intersect} - void _and(OpenBitSetPtr other); - - /// see {@link union} - void _or(OpenBitSetPtr other); - - /// see {@link remove} - void andNot(OpenBitSetPtr other); - - /// Returns true if the sets have any elements in common - bool intersects(OpenBitSetPtr other); - - /// Expand the LongArray with the size given as a number of words (64 bit longs). - /// getNumWords() is unchanged by this call. - void ensureCapacityWords(int32_t numWords); - - /// Ensure that the LongArray is big enough to hold numBits, expanding it if necessary. - /// getNumWords() is unchanged by this call. - void ensureCapacity(int64_t numBits); - - /// Lowers numWords, the number of words in use, by checking for trailing zero words. - void trimTrailingZeros(); - - /// Returns the number of 64 bit words it would take to hold numBits. - static int32_t bits2words(int64_t numBits); - - /// Returns true if both sets have the same bits set - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - - protected: - int32_t expandingWordNum(int64_t index); - }; -} - -#endif diff --git a/include/OpenBitSetDISI.h b/include/OpenBitSetDISI.h deleted file mode 100644 index 6068d8e5..00000000 --- a/include/OpenBitSetDISI.h +++ /dev/null @@ -1,50 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef OPENBITSETDISI_H -#define OPENBITSETDISI_H - -#include "OpenBitSet.h" - -namespace Lucene -{ - class LPPAPI OpenBitSetDISI : public OpenBitSet - { - public: - /// Construct an OpenBitSetDISI with its bits set from the doc ids of the given DocIdSetIterator. - /// Also give a maximum size one larger than the largest doc id for which a bit may ever be set on - /// this OpenBitSetDISI. - OpenBitSetDISI(DocIdSetIteratorPtr disi, int32_t maxSize); - - /// Construct an OpenBitSetDISI with no bits set, and a given maximum size one larger than the largest - /// doc id for which a bit may ever be set on this OpenBitSetDISI. - OpenBitSetDISI(int32_t maxSize); - - virtual ~OpenBitSetDISI(); - - LUCENE_CLASS(OpenBitSetDISI); - - public: - /// Perform an in-place OR with the doc ids from a given DocIdSetIterator, setting the bit for each - /// such doc id. These doc ids should be smaller than the maximum size passed to the constructor. - void inPlaceOr(DocIdSetIteratorPtr disi); - - /// Perform an in-place AND with the doc ids from a given DocIdSetIterator, leaving only the bits set - /// for which the doc ids are in common. These doc ids should be smaller than the maximum size passed - /// to the constructor. - void inPlaceAnd(DocIdSetIteratorPtr disi); - - /// Perform an in-place NOT with the doc ids from a given DocIdSetIterator, clearing all the bits for - /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. - void inPlaceNot(DocIdSetIteratorPtr disi); - - /// Perform an inplace XOR with the doc ids from a given DocIdSetIterator, flipping all the bits for - /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. - void inPlaceXor(DocIdSetIteratorPtr disi); - }; -} - -#endif diff --git a/include/OpenBitSetIterator.h b/include/OpenBitSetIterator.h deleted file mode 100644 index e0a50693..00000000 --- a/include/OpenBitSetIterator.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef OPENBITSETITERATOR_H -#define OPENBITSETITERATOR_H - -#include "DocIdSetIterator.h" - -namespace Lucene -{ - /// An iterator to iterate over set bits in an OpenBitSet. - /// This is faster than nextSetBit() for iterating over the complete set of bits, - /// especially when the density of the bits set is high. - class LPPAPI OpenBitSetIterator : public DocIdSetIterator - { - public: - OpenBitSetIterator(OpenBitSetPtr bitSet); - OpenBitSetIterator(LongArray bits, int32_t numWords); - virtual ~OpenBitSetIterator(); - - LUCENE_CLASS(OpenBitSetIterator); - - protected: - LongArray arr; - int32_t words; - int32_t i; - int64_t word; - int32_t wordShift; - int32_t indexArray; - int32_t curDocId; - - /// The General Idea: instead of having an array per byte that has the offsets of the - /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). - /// That should be faster than accessing an array for each index, and the total array - /// size is kept smaller (256*sizeof(int32_t))=1K - static const int32_t bitlist[]; - - public: - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - - protected: - /// 64 bit shifts - void shift(); - }; -} - -#endif diff --git a/include/OrdFieldSource.h b/include/OrdFieldSource.h deleted file mode 100644 index c2b730b4..00000000 --- a/include/OrdFieldSource.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef ORDFIELDSOURCE_H -#define ORDFIELDSOURCE_H - -#include "ValueSource.h" - -namespace Lucene -{ - /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex(). - /// - /// The native lucene index order is used to assign an ordinal value for each field value. - /// - /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. - /// Example: - /// If there were only three field values: "apple","banana","pear" then ord("apple")=1, ord("banana")=2, - /// ord("pear")=3 - /// - /// WARNING: ord() depends on the position in an index and can thus change when other documents are inserted - /// or deleted, or if a MultiSearcher is used. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, - /// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU - /// per lookup but will not consume double the FieldCache RAM. - class LPPAPI OrdFieldSource : public ValueSource - { - public: - /// Constructor for a certain field. - ///@param field field whose values order is used. - OrdFieldSource(const String& field); - virtual ~OrdFieldSource(); - - LUCENE_CLASS(OrdFieldSource); - - protected: - String field; - - public: - virtual String description(); - virtual DocValuesPtr getValues(IndexReaderPtr reader); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/ParallelMultiSearcher.h b/include/ParallelMultiSearcher.h deleted file mode 100644 index 25471c13..00000000 --- a/include/ParallelMultiSearcher.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PARALLELMULTISEARCHER_H -#define PARALLELMULTISEARCHER_H - -#include "MultiSearcher.h" - -namespace Lucene -{ - /// Implements parallel search over a set of Searchables. - /// - /// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or - /// {@link #search(QueryPtr, FilterPtr, int32_t)} methods. - class LPPAPI ParallelMultiSearcher : public MultiSearcher - { - public: - /// Creates a {@link Searchable} which searches searchables. - ParallelMultiSearcher(Collection searchables); - virtual ~ParallelMultiSearcher(); - - LUCENE_CLASS(ParallelMultiSearcher); - - public: - /// Executes each {@link Searchable}'s docFreq() in its own thread and waits for each search to - /// complete and merge the results back together. - virtual int32_t docFreq(TermPtr term); - - /// A search implementation which executes each {@link Searchable} in its own thread and waits - /// for each search to complete and merge the results back together. - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); - - /// A search implementation allowing sorting which spans a new thread for each Searchable, waits - /// for each search to complete and merges the results back together. - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); - }; -} - -#endif diff --git a/include/ParallelReader.h b/include/ParallelReader.h deleted file mode 100644 index 0b3a173e..00000000 --- a/include/ParallelReader.h +++ /dev/null @@ -1,181 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PARALLELREADER_H -#define PARALLELREADER_H - -#include "IndexReader.h" - -namespace Lucene -{ - /// An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of - /// documents, but typically each contains different fields. Each document contains the union of the fields - /// of all documents with the same document number. When searching, matches for a query term are from the - /// first index added that has the field. - /// - /// This is useful, eg., with collections that have large fields which change rarely and small fields that - /// change more frequently. The smaller fields may be re-indexed in a new index and both indexes may be - /// searched together. - /// - /// Warning: It is up to you to make sure all indexes are created and modified the same way. For example, - /// if you add documents to one index, you need to add the same documents in the same order to the other - /// indexes. Failure to do so will result in undefined behavior - class LPPAPI ParallelReader : public IndexReader - { - public: - /// Construct a ParallelReader. - /// @param closeSubReaders indicates whether the subreaders should be closed when this ParallelReader - /// is closed - ParallelReader(bool closeSubReaders = true); - - virtual ~ParallelReader(); - - LUCENE_CLASS(ParallelReader); - - protected: - Collection readers; - Collection decrefOnClose; // remember which subreaders to decRef on close - bool incRefReaders; - MapStringIndexReader fieldToReader; - MapIndexReaderSetString readerToFields; - Collection storedFieldReaders; - - int32_t _maxDoc; - int32_t _numDocs; - bool _hasDeletions; - - public: - /// Add an IndexReader. - void add(IndexReaderPtr reader); - - /// Add an IndexReader whose stored fields will not be returned. This can accelerate search when stored - /// fields are only needed from a subset of the IndexReaders. - void add(IndexReaderPtr reader, bool ignoreStoredFields); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Tries to reopen the subreaders. - /// - /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), - /// then a new ParallelReader instance is returned, otherwise this instance is returned. - /// - /// A re-opened instance might share one or more subreaders with the old instance. Index modification - /// operations result in undefined behavior when performed before the old instance is closed. - /// (see {@link IndexReader#reopen()}). - /// - /// If subreaders are shared, then the reference count of those readers is increased to ensure that the - /// subreaders remain open until the last referring reader is closed. - virtual IndexReaderPtr reopen(); - - /// Returns the number of documents in this index. - virtual int32_t numDocs(); - - /// Returns one greater than the largest possible document number. This may be used to, eg., determine - /// how big to allocate an array which will have an element for every document number in an index. - virtual int32_t maxDoc(); - - /// Returns true if any documents have been deleted - virtual bool hasDeletions(); - - /// Returns true if document n has been deleted - virtual bool isDeleted(int32_t n); - - /// Get the {@link Document} at the n'th position. - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - - /// Return an array of term frequency vectors for the specified document. - virtual Collection getTermFreqVectors(int32_t docNumber); - - /// Return a term frequency vector for the specified document and field. - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - - /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays - /// of the {@link TermFreqVector}. - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - - /// Map all the term vectors for all fields in a Document - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - - /// Returns true if there are norms stored for this field. - virtual bool hasNorms(const String& field); - - /// Returns the byte-encoded normalization factor for the named field of every document. - virtual ByteArray norms(const String& field); - - /// Reads the byte-encoded normalization factor for the named field of every document. - virtual void norms(const String& field, ByteArray norms, int32_t offset); - - /// Returns an enumeration of all the terms in the index. The enumeration is ordered by - /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. - /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting - /// enumeration before calling other methods such as {@link TermEnum#term()}. - virtual TermEnumPtr terms(); - - /// Returns an enumeration of all terms starting at a given term. If the given term does not - /// exist, the enumeration is positioned at the first term greater than the supplied term. - /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede - /// it in the enumeration. - virtual TermEnumPtr terms(TermPtr t); - - /// Returns the number of documents containing the term t. - virtual int32_t docFreq(TermPtr t); - - /// Returns an enumeration of all the documents which contain term. For each document, the - /// document number, the frequency of the term in that document is also provided, for use in - /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. - /// The enumeration is ordered by document number. Each document number is greater than all - /// that precede it in the enumeration. - virtual TermDocsPtr termDocs(TermPtr term); - - /// Returns an unpositioned {@link TermDocs} enumerator. - virtual TermDocsPtr termDocs(); - - /// Returns an enumeration of all the documents which contain term. - virtual TermPositionsPtr termPositions(TermPtr term); - - /// Returns an unpositioned {@link TermPositions} enumerator. - virtual TermPositionsPtr termPositions(); - - /// Checks recursively if all subreaders are up to date. - virtual bool isCurrent(); - - /// Checks recursively if all subindexes are optimized - virtual bool isOptimized(); - - /// Not implemented. - virtual int64_t getVersion(); - - Collection getSubReaders(); - - /// Get a list of unique field names that exist in this index and have the specified field option - /// information. - virtual HashSet getFieldNames(FieldOption fieldOption); - - protected: - IndexReaderPtr doReopen(bool doClone); - - /// Implements deletion of the document numbered docNum. - virtual void doDelete(int32_t docNum); - - /// Implements actual undeleteAll(). - virtual void doUndeleteAll(); - - /// Implements setNorm in subclass. - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - - /// Implements commit. - virtual void doCommit(MapStringString commitUserData); - - /// Implements close. - virtual void doClose(); - - friend class ParallelTermEnum; - friend class ParallelTermDocs; - friend class ParallelTermPositions; - }; -} - -#endif diff --git a/include/Payload.h b/include/Payload.h deleted file mode 100644 index a05ddf8a..00000000 --- a/include/Payload.h +++ /dev/null @@ -1,88 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOAD_H -#define PAYLOAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A Payload is metadata that can be stored together with each occurrence of a term. This metadata is stored - /// inline in the posting list of the specific term. - /// - /// To store payloads in the index a {@link TokenStream} has to be used that produces payload data. - /// - /// Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve - /// the payloads from the index. - class LPPAPI Payload : public LuceneObject - { - public: - /// Creates an empty payload and does not allocate a byte array. - Payload(); - - /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, - /// ie. no copy is made. - /// @param data the data of this payload - Payload(ByteArray data); - - /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, - /// ie. no copy is made. - /// @param data the data of this payload - /// @param offset the offset in the data byte array - /// @param length the length of the data - Payload(ByteArray data, int32_t offset, int32_t length); - - virtual ~Payload(); - - LUCENE_CLASS(Payload); - - protected: - /// the byte array containing the payload data - ByteArray data; - - /// the offset within the byte array - int32_t offset; - - /// the length of the payload data - int32_t _length; - - public: - /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. - void setData(ByteArray data); - - /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. - void setData(ByteArray data, int32_t offset, int32_t length); - - /// Returns a reference to the underlying byte array that holds this payloads data. - ByteArray getData(); - - /// Returns the offset in the underlying byte array - int32_t getOffset(); - - /// Returns the length of the payload data. - int32_t length(); - - /// Returns the byte at the given index. - uint8_t byteAt(int32_t index); - - /// Allocates a new byte array, copies the payload data into it and returns it. - ByteArray toByteArray(); - - /// Copies the payload data to a byte array. - /// @param target the target byte array - /// @param targetOffset the offset in the target byte array - void copyTo(ByteArray target, int32_t targetOffset); - - /// Clones this payload by creating a copy of the underlying byte array. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/PayloadAttribute.h b/include/PayloadAttribute.h deleted file mode 100644 index 92a567ff..00000000 --- a/include/PayloadAttribute.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOADATTRIBUTE_H -#define PAYLOADATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// The start and end character offset of a Token. - class LPPAPI PayloadAttribute : public Attribute - { - public: - /// Initialize this attribute with no payload. - PayloadAttribute(); - - /// Initialize this attribute with the given payload. - PayloadAttribute(PayloadPtr payload); - - virtual ~PayloadAttribute(); - - LUCENE_CLASS(PayloadAttribute); - - protected: - PayloadPtr payload; - - public: - virtual String toString(); - - /// Returns this Token's payload. - virtual PayloadPtr getPayload(); - - /// Sets this Token's payload. - virtual void setPayload(PayloadPtr payload); - - virtual void clear(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual void copyTo(AttributePtr target); - }; -} - -#endif diff --git a/include/PayloadFunction.h b/include/PayloadFunction.h deleted file mode 100644 index d5243597..00000000 --- a/include/PayloadFunction.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOADFUNCTION_H -#define PAYLOADFUNCTION_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// An abstract class that defines a way for Payload*Query instances to transform the cumulative - /// effects of payload scores for a document. - /// - /// @see PayloadTermQuery for more information - class LPPAPI PayloadFunction : public LuceneObject - { - protected: - PayloadFunction(); - - public: - virtual ~PayloadFunction(); - LUCENE_CLASS(PayloadFunction); - - public: - /// Calculate the score up to this point for this doc and field - /// @param docId The current doc - /// @param field The field - /// @param start The start position of the matching Span - /// @param end The end position of the matching Span - /// @param numPayloadsSeen The number of payloads seen so far - /// @param currentScore The current score so far - /// @param currentPayloadScore The score for the current payload - /// @return The new current Score - /// - /// @see Spans - virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, - double currentScore, double currentPayloadScore) = 0; - - /// Calculate the final score for all the payloads seen so far for this doc/field - /// @param docId The current doc - /// @param field The current field - /// @param numPayloadsSeen The total number of payloads seen on this document - /// @param payloadScore The raw score for those payloads - /// @return The final score for the payloads - virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) = 0; - - /// Return hash code for this object. - virtual int32_t hashCode() = 0; - - /// Return whether two objects are equal - virtual bool equals(LuceneObjectPtr other) = 0; - }; -} - -#endif diff --git a/include/PayloadNearQuery.h b/include/PayloadNearQuery.h deleted file mode 100644 index baef4cbd..00000000 --- a/include/PayloadNearQuery.h +++ /dev/null @@ -1,101 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOADNEARQUERY_H -#define PAYLOADNEARQUERY_H - -#include "SpanNearQuery.h" -#include "SpanWeight.h" -#include "SpanScorer.h" - -namespace Lucene -{ - /// This class is very similar to {@link SpanNearQuery} except that it factors in the value of the payloads - /// located at each of the positions where the {@link TermSpans} occurs. - /// - /// In order to take advantage of this, you must override {@link Similarity#scorePayload} which returns 1 - /// by default. - /// - /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. - /// - /// @see Similarity#scorePayload - class LPPAPI PayloadNearQuery : public SpanNearQuery - { - public: - PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder); - PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, PayloadFunctionPtr function); - - virtual ~PayloadNearQuery(); - - LUCENE_CLASS(PayloadNearQuery); - - protected: - String fieldName; - PayloadFunctionPtr function; - - public: - using SpanNearQuery::toString; - - virtual WeightPtr createWeight(SearcherPtr searcher); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - friend class PayloadNearSpanWeight; - friend class PayloadNearSpanScorer; - }; - - class LPPAPI PayloadNearSpanWeight : public SpanWeight - { - public: - PayloadNearSpanWeight(SpanQueryPtr query, SearcherPtr searcher); - virtual ~PayloadNearSpanWeight(); - - LUCENE_CLASS(PayloadNearSpanWeight); - - public: - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - }; - - class LPPAPI PayloadNearSpanScorer : public SpanScorer - { - public: - PayloadNearSpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); - virtual ~PayloadNearSpanScorer(); - - LUCENE_CLASS(PayloadNearSpanScorer); - - public: - SpansPtr spans; - SimilarityPtr similarity; - - protected: - double payloadScore; - int32_t payloadsSeen; - - public: - /// Get the payloads associated with all underlying subspans - void getPayloads(Collection subSpans); - - virtual double score(); - - protected: - /// By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to do - /// other things. - /// @param payLoads The payloads - /// @param start The start position of the span being scored - /// @param end The end position of the span being scored - /// @see Spans - void processPayloads(Collection payLoads, int32_t start, int32_t end); - - virtual bool setFreqCurrentDoc(); - virtual ExplanationPtr explain(int32_t doc); - }; -} - -#endif diff --git a/include/PayloadSpanUtil.h b/include/PayloadSpanUtil.h deleted file mode 100644 index 2817c382..00000000 --- a/include/PayloadSpanUtil.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOADSPANUTIL_H -#define PAYLOADSPANUTIL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Experimental class to get set of payloads for most standard Lucene queries. Operates like Highlighter - - /// IndexReader should only contain doc of interest, best to use MemoryIndex. - class LPPAPI PayloadSpanUtil : public LuceneObject - { - public: - /// @param reader That contains doc with payloads to extract - PayloadSpanUtil(IndexReaderPtr reader); - - virtual ~PayloadSpanUtil(); - - LUCENE_CLASS(PayloadSpanUtil); - - protected: - IndexReaderPtr reader; - - public: - /// Query should be rewritten for wild/fuzzy support. - /// @return payloads Collection - Collection getPayloadsForQuery(QueryPtr query); - - protected: - void queryToSpanQuery(QueryPtr query, Collection payloads); - void getPayloads(Collection payloads, SpanQueryPtr query); - }; -} - -#endif diff --git a/include/PayloadTermQuery.h b/include/PayloadTermQuery.h deleted file mode 100644 index 7a9f16a1..00000000 --- a/include/PayloadTermQuery.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PAYLOADTERMQUERY_H -#define PAYLOADTERMQUERY_H - -#include "SpanTermQuery.h" - -namespace Lucene -{ - /// This class is very similar to {@link SpanTermQuery} except that it factors in the value of the payload - /// located at each of the positions where the {@link Term} occurs. - /// - /// In order to take advantage of this, you must override {@link Similarity#scorePayload(int32_t, const String&, - /// int32_t, int32_t, ByteArray, int32_t, int32_t)} which returns 1 by default. - /// - /// Payload scores are aggregated using a pluggable {@link PayloadFunction}. - class LPPAPI PayloadTermQuery : public SpanTermQuery - { - public: - PayloadTermQuery(TermPtr term, PayloadFunctionPtr function, bool includeSpanScore = true); - virtual ~PayloadTermQuery(); - - LUCENE_CLASS(PayloadTermQuery); - - protected: - PayloadFunctionPtr function; - bool includeSpanScore; - - public: - virtual WeightPtr createWeight(SearcherPtr searcher); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - friend class PayloadTermWeight; - friend class PayloadTermSpanScorer; - }; -} - -#endif diff --git a/include/PerFieldAnalyzerWrapper.h b/include/PerFieldAnalyzerWrapper.h deleted file mode 100644 index 71f67248..00000000 --- a/include/PerFieldAnalyzerWrapper.h +++ /dev/null @@ -1,69 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PERFIELDANALYZERWRAPPER_H -#define PERFIELDANALYZERWRAPPER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// This analyzer is used to facilitate scenarios where different fields require different analysis techniques. - /// Use {@link #addAnalyzer} to add a non-default analyzer on a field name basis. - /// - /// Example usage: - /// - ///
-    /// PerFieldAnalyzerWrapperPtr aWrapper = newLucene(newLucene());
-    /// aWrapper->addAnalyzer(L"firstname", newLucene());
-    /// aWrapper->addAnalyzer(L"lastname", newLucene());
-    /// 
- /// - /// In this example, StandardAnalyzer will be used for all fields except "firstname" and "lastname", for which - /// KeywordAnalyzer will be used. - /// - /// A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing and query parsing. - class LPPAPI PerFieldAnalyzerWrapper : public Analyzer - { - public: - /// Constructs with default analyzer. - /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the - /// one provided here. - PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer); - - /// Constructs with default analyzer and a map of analyzers to use for specific fields. - /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the one provided here. - /// @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields - PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer, MapStringAnalyzer fieldAnalyzers); - - virtual ~PerFieldAnalyzerWrapper(); - - LUCENE_CLASS(PerFieldAnalyzerWrapper); - - protected: - AnalyzerPtr defaultAnalyzer; - MapStringAnalyzer analyzerMap; - - public: - /// Defines an analyzer to use for the specified field. - /// @param fieldName field name requiring a non-default analyzer - /// @param analyzer non-default analyzer to use for field - void addAnalyzer(const String& fieldName, AnalyzerPtr analyzer); - - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - - /// Return the positionIncrementGap from the analyzer assigned to fieldName. - virtual int32_t getPositionIncrementGap(const String& fieldName); - - /// Return the offsetGap from the analyzer assigned to field - virtual int32_t getOffsetGap(FieldablePtr field); - - virtual String toString(); - }; -} - -#endif diff --git a/include/PhrasePositions.h b/include/PhrasePositions.h deleted file mode 100644 index df7b5d1e..00000000 --- a/include/PhrasePositions.h +++ /dev/null @@ -1,40 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PHRASEPOSITIONS_H -#define PHRASEPOSITIONS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Position of a term in a document that takes into account the term offset within the phrase. - class PhrasePositions : public LuceneObject - { - public: - PhrasePositions(TermPositionsPtr t, int32_t o); - virtual ~PhrasePositions(); - - LUCENE_CLASS(PhrasePositions); - - public: - int32_t doc; // current doc - int32_t position; // position in doc - int32_t count; // remaining pos in this doc - int32_t offset; // position in phrase - TermPositionsPtr tp; // stream of positions - PhrasePositionsPtr _next; // used to make lists - bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1) - - public: - bool next(); - bool skipTo(int32_t target); - void firstPosition(); - bool nextPosition(); - }; -} - -#endif diff --git a/include/PhraseQuery.h b/include/PhraseQuery.h deleted file mode 100644 index 097129fd..00000000 --- a/include/PhraseQuery.h +++ /dev/null @@ -1,82 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PHRASEQUERY_H -#define PHRASEQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built by - /// QueryParser for input like "new york". - /// - /// This query may be combined with other terms or queries with a {@link BooleanQuery}. - class LPPAPI PhraseQuery : public Query - { - public: - /// Constructs an empty phrase query. - PhraseQuery(); - virtual ~PhraseQuery(); - - LUCENE_CLASS(PhraseQuery); - - protected: - String field; - Collection terms; - Collection positions; - int32_t maxPosition; - int32_t slop; - - public: - using Query::toString; - - /// Sets the number of other words permitted between words in query phrase. If zero, then this is an - /// exact phrase search. For larger values this works like a WITHIN or NEAR operator. - /// - /// The slop is in fact an edit-distance, where the units correspond to moves of terms in the query phrase - /// out of position. For example, to switch the order of two words requires two moves (the first move - /// places the words atop one another), so to permit re-orderings of phrases, the slop must be at least two. - /// - /// More exact matches are scored higher than sloppier matches, thus search results are sorted by exactness. - /// - /// The slop is zero by default, requiring exact matches. - void setSlop(int32_t slop); - - /// Returns the slop. - /// @see #setSlop() - int32_t getSlop(); - - /// Adds a term to the end of the query phrase. - /// The relative position of the term is the one immediately after the last term added. - void add(TermPtr term); - - /// Adds a term to the end of the query phrase. - /// The relative position of the term within the phrase is specified explicitly. This allows eg. phrases - /// with more than one term at the same position or phrases with gaps (eg. in connection with stopwords). - void add(TermPtr term, int32_t position); - - /// Returns the set of terms in this phrase. - Collection getTerms(); - - /// Returns the relative positions of terms in this phrase. - Collection getPositions(); - - virtual WeightPtr createWeight(SearcherPtr searcher); - virtual void extractTerms(SetTerm terms); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class PhraseWeight; - }; -} - -#endif diff --git a/include/PhraseQueue.h b/include/PhraseQueue.h deleted file mode 100644 index f1e2a6cd..00000000 --- a/include/PhraseQueue.h +++ /dev/null @@ -1,27 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PHRASEQUEUE_H -#define PHRASEQUEUE_H - -#include "PriorityQueue.h" - -namespace Lucene -{ - class PhraseQueue : public PriorityQueue - { - public: - PhraseQueue(int32_t size); - virtual ~PhraseQueue(); - - LUCENE_CLASS(PhraseQueue); - - protected: - virtual bool lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second); - }; -} - -#endif diff --git a/include/PhraseScorer.h b/include/PhraseScorer.h deleted file mode 100644 index d4d21023..00000000 --- a/include/PhraseScorer.h +++ /dev/null @@ -1,71 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PHRASESCORER_H -#define PHRASESCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// Scoring functionality for phrase queries. A document is considered matching if it contains the - /// phrase-query terms at "valid" positions. What "valid positions" are depends on the type of the - /// phrase query: for an exact phrase query terms are required to appear in adjacent locations, while - /// for a sloppy phrase query some distance between the terms is allowed. The abstract method {@link - /// #phraseFreq()} of extending classes is invoked for each document containing all the phrase query - /// terms, in order to compute the frequency of the phrase query in that document. A non zero frequency - /// means a match. - class PhraseScorer : public Scorer - { - public: - PhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms); - virtual ~PhraseScorer(); - - LUCENE_CLASS(PhraseScorer); - - protected: - WeightPtr weight; - ByteArray norms; - double value; - - bool firstTime; - bool more; - PhraseQueuePtr pq; - PhrasePositionsPtr first; - PhrasePositionsPtr last; - - double freq; // phrase frequency in current doc as computed by phraseFreq(). - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - virtual int32_t advance(int32_t target); - - /// Phrase frequency in current doc as computed by phraseFreq(). - double currentFreq(); - - virtual String toString(); - - protected: - /// Next without initial increment - bool doNext(); - - /// For a document containing all the phrase query terms, compute the frequency of the phrase in - /// that document. A non zero frequency means a match. - /// Note, that containing all phrase terms does not guarantee a match - they have to be found in - /// matching locations. - /// @return frequency of the phrase in current doc, 0 if not found. - virtual double phraseFreq() = 0; - - void init(); - void sort(); - void pqToList(); - void firstToLast(); - }; -} - -#endif diff --git a/include/PorterStemFilter.h b/include/PorterStemFilter.h deleted file mode 100644 index 7807b5a9..00000000 --- a/include/PorterStemFilter.h +++ /dev/null @@ -1,48 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PORTERSTEMFILTER_H -#define PORTERSTEMFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// Transforms the token stream as per the Porter stemming algorithm. Note: the input to the stemming filter must - /// already be in lower case, so you will need to use LowerCaseFilter or LowerCaseTokenizer further down the Tokenizer - /// chain in order for this to work properly. - /// - /// To use this filter with other analyzers, you'll want to write an Analyzer class that sets up the TokenStream chain - /// as you want it. To use this with LowerCaseTokenizer, for example, you'd write an analyzer like this: - /// - ///
-    /// class MyAnalyzer : public Analyzer
-    /// {
-    /// public:
-    ///     virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader)
-    ///     {
-    ///         return newLucene(newLucene(reader));
-    ///     }
-    /// };
-    /// 
- class LPPAPI PorterStemFilter : public TokenFilter - { - public: - PorterStemFilter(TokenStreamPtr input); - virtual ~PorterStemFilter(); - - LUCENE_CLASS(PorterStemFilter); - - protected: - PorterStemmerPtr stemmer; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; -} - -#endif diff --git a/include/PorterStemmer.h b/include/PorterStemmer.h deleted file mode 100644 index fd619c5e..00000000 --- a/include/PorterStemmer.h +++ /dev/null @@ -1,126 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PORTERSTEMMER_H -#define PORTERSTEMMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author. - /// - /// It may be be regarded as canonical, in that it follows the algorithm presented in Porter, 1980, An algorithm - /// for suffix stripping, Program, Vol. 14, no. 3, pp 130-137, only differing from it at the points marked DEPARTURE. - /// - /// See also http://www.tartarus.org/~martin/PorterStemmer - /// - /// The algorithm as described in the paper could be exactly replicated by adjusting the points of DEPARTURE, but - /// this is barely necessary, because (a) the points of DEPARTURE are definitely improvements, and (b) no encoding - /// of the Porter stemmer I have seen is anything like as exact as this version, even with the points of DEPARTURE! - /// - /// Release 2 (the more old-fashioned, non-thread-safe version may be regarded as release 1.) - class PorterStemmer : public LuceneObject - { - public: - PorterStemmer(); - virtual ~PorterStemmer(); - - LUCENE_CLASS(PorterStemmer); - - protected: - wchar_t* b; // buffer for word to be stemmed - int32_t k; // offset to the end of the string - int32_t j; // a general offset into the string - int32_t i; // initial length of word - bool dirty; - - public: - bool stem(CharArray word); - - /// In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive. - /// Possibly b[k+1] == '\0', but it is not important. The stemmer adjusts the characters b[0] ... b[k] and - /// stores the new end-point of the string, k'. Stemming never increases word length, so 0 <= k' <= k. - bool stem(wchar_t* b, int32_t k); - - wchar_t* getResultBuffer(); - int32_t getResultLength(); - - protected: - /// Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments. - bool cons(int32_t i); - - /// Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowel - /// sequence, and <..> indicates arbitrary presence, - /// - /// gives 0 - /// vc gives 1 - /// vcvc gives 2 - /// vcvcvc gives 3 - /// ... - int32_t m(); - - /// Return true if 0,...j contains a vowel - bool vowelinstem(); - - /// Return true if j,(j-1) contain a double consonant. - bool doublec(int32_t j); - - /// Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w,x or y. - /// This is used when trying to restore an e at the end of a short word. - /// - /// eg. cav(e), lov(e), hop(e), crim(e), but - /// snow, box, tray. - bool cvc(int32_t i); - - /// Returns true if 0,...k ends with the string s. - bool ends(const wchar_t* s); - - /// Sets (j+1),...k to the characters in the string s, readjusting k. - void setto(const wchar_t* s); - - void r(const wchar_t* s); - - /// step1ab() gets rid of plurals and -ed or -ing. eg. - /// - /// caresses -> caress - /// ponies -> poni - /// ties -> ti - /// caress -> caress - /// cats -> cat - /// - /// feed -> feed - /// agreed -> agree - /// disabled -> disable - /// - /// matting -> mat - /// mating -> mate - /// meeting -> meet - /// milling -> mill - /// messing -> mess - /// - /// meetings -> meet - void step1ab(); - - /// Turns terminal y to i when there is another vowel in the stem. - void step1c(); - - /// Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc. note that the - /// string before the suffix must give m() > 0. - void step2(); - - /// Deals with -ic-, -full, -ness etc. similar strategy to step2. - void step3(); - - /// Takes off -ant, -ence etc., in context vcvc. - void step4(); - - /// Removes a final -e if m() > 1, and changes -ll to -l if m() > 1. - void step5(); - }; -} - -#endif diff --git a/include/PositionBasedTermVectorMapper.h b/include/PositionBasedTermVectorMapper.h deleted file mode 100644 index 8be2aa7b..00000000 --- a/include/PositionBasedTermVectorMapper.h +++ /dev/null @@ -1,80 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef POSITIONBASEDTERMVECTORMAPPER_H -#define POSITIONBASEDTERMVECTORMAPPER_H - -#include "TermVectorMapper.h" - -namespace Lucene -{ - class LPPAPI PositionBasedTermVectorMapper : public TermVectorMapper - { - public: - PositionBasedTermVectorMapper(bool ignoringOffsets = false); - virtual ~PositionBasedTermVectorMapper(); - - LUCENE_CLASS(PositionBasedTermVectorMapper); - - protected: - MapStringMapIntTermVectorsPositionInfo fieldToTerms; - - String currentField; - - /// A Map of Integer and TermVectorsPositionInfo - MapIntTermVectorsPositionInfo currentPositions; - - bool storeOffsets; - - public: - /// Never ignores positions. This mapper doesn't make much sense unless there are positions. - /// @return false - virtual bool isIgnoringPositions(); - - /// Callback for the TermVectorReader. - virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); - - /// Callback mechanism used by the TermVectorReader. - virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); - - /// Get the mapping between fields and terms, sorted by the comparator - /// @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is - /// {@link PositionBasedTermVectorMapper}. - MapStringMapIntTermVectorsPositionInfo getFieldToTerms(); - }; - - /// Container for a term at a position - class LPPAPI TermVectorsPositionInfo : public LuceneObject - { - public: - TermVectorsPositionInfo(int32_t position, bool storeOffsets); - virtual ~TermVectorsPositionInfo(); - - LUCENE_CLASS(TermVectorsPositionInfo); - - protected: - int32_t position; - Collection terms; - Collection offsets; - - public: - void addTerm(const String& term, TermVectorOffsetInfoPtr info); - - /// @return The position of the term - int32_t getPosition(); - - /// Note, there may be multiple terms at the same position - /// @return A List of Strings - Collection getTerms(); - - /// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple - /// entries since there may be multiple terms at a position. - /// @return A List of TermVectorOffsetInfo objects, if offsets are stored. - Collection getOffsets(); - }; -} - -#endif diff --git a/include/PositionIncrementAttribute.h b/include/PositionIncrementAttribute.h deleted file mode 100644 index 37235746..00000000 --- a/include/PositionIncrementAttribute.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef POSITIONINCREMENTATTRIBUTE_H -#define POSITIONINCREMENTATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// The positionIncrement determines the position of this token relative to the previous Token in a - /// TokenStream, used in phrase searching. - /// - /// The default value is one. - /// - /// Some common uses for this are: - /// - /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple - /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's - /// increment should be set to zero: the increment of the first instance should be one. Repeating a token - /// with an increment of zero can also be used to boost the scores of matches on that token. - /// - /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want - /// phrases to match across removed stop words, then one could build a stop word filter that removes stop - /// words and also sets the increment to the number of stop words removed before each non-stop word. Then - /// exact phrase queries will only match when the terms occur with no intervening stop words. - /// - /// @see TermPositions - class LPPAPI PositionIncrementAttribute : public Attribute - { - public: - PositionIncrementAttribute(); - virtual ~PositionIncrementAttribute(); - - LUCENE_CLASS(PositionIncrementAttribute); - - protected: - int32_t positionIncrement; - - public: - virtual String toString(); - - /// Set the position increment. The default value is one. - /// @param positionIncrement the distance from the prior term - virtual void setPositionIncrement(int32_t positionIncrement); - - /// Returns the position increment of this Token. - /// @see #setPositionIncrement - virtual int32_t getPositionIncrement(); - - virtual void clear(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual void copyTo(AttributePtr target); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/PositiveScoresOnlyCollector.h b/include/PositiveScoresOnlyCollector.h deleted file mode 100644 index 43f3efad..00000000 --- a/include/PositiveScoresOnlyCollector.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef POSITIVESCORESONLYCOLLECTOR_H -#define POSITIVESCORESONLYCOLLECTOR_H - -#include "Collector.h" - -namespace Lucene -{ - /// A {@link Collector} implementation which wraps another {@link Collector} and makes sure only - /// documents with scores > 0 are collected. - class LPPAPI PositiveScoresOnlyCollector : public Collector - { - public: - PositiveScoresOnlyCollector(CollectorPtr c); - virtual ~PositiveScoresOnlyCollector(); - - LUCENE_CLASS(PositiveScoresOnlyCollector); - - protected: - CollectorPtr collector; - ScorerPtr scorer; - - public: - virtual void collect(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - virtual bool acceptsDocsOutOfOrder(); - }; -} - -#endif diff --git a/include/PrefixFilter.h b/include/PrefixFilter.h deleted file mode 100644 index f9ebae72..00000000 --- a/include/PrefixFilter.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PREFIXFILTER_H -#define PREFIXFILTER_H - -#include "MultiTermQueryWrapperFilter.h" - -namespace Lucene -{ - /// A Filter that restricts search results to values that have a matching prefix in a given field. - class LPPAPI PrefixFilter : public MultiTermQueryWrapperFilter - { - public: - PrefixFilter(TermPtr prefix); - virtual ~PrefixFilter(); - - LUCENE_CLASS(PrefixFilter); - - public: - TermPtr getPrefix(); - - virtual String toString(); - }; -} - -#endif diff --git a/include/PrefixQuery.h b/include/PrefixQuery.h deleted file mode 100644 index 0acf7e4e..00000000 --- a/include/PrefixQuery.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PREFIXQUERY_H -#define PREFIXQUERY_H - -#include "MultiTermQuery.h" - -namespace Lucene -{ - /// A Query that matches documents containing terms with a specified prefix. A PrefixQuery is built by - /// QueryParser for input like app*. - /// - /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. - class LPPAPI PrefixQuery : public MultiTermQuery - { - public: - /// Constructs a query for terms starting with prefix. - PrefixQuery(TermPtr prefix); - - virtual ~PrefixQuery(); - - LUCENE_CLASS(PrefixQuery); - - protected: - TermPtr prefix; - - public: - using MultiTermQuery::toString; - - /// Returns the prefix of this query. - TermPtr getPrefix(); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - protected: - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/PrefixTermEnum.h b/include/PrefixTermEnum.h deleted file mode 100644 index b639fa9d..00000000 --- a/include/PrefixTermEnum.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PREFIXTERMENUM_H -#define PREFIXTERMENUM_H - -#include "FilteredTermEnum.h" - -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating all terms that match the specified prefix filter term. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than - /// all that precede it. - class LPPAPI PrefixTermEnum : public FilteredTermEnum - { - public: - PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix); - virtual ~PrefixTermEnum(); - - LUCENE_CLASS(PrefixTermEnum); - - protected: - TermPtr prefix; - bool _endEnum; - - public: - virtual double difference(); - - protected: - virtual bool endEnum(); - virtual bool termCompare(TermPtr term); - - TermPtr getPrefixTerm(); - }; -} - -#endif diff --git a/include/PriorityQueue.h b/include/PriorityQueue.h deleted file mode 100644 index fb67bb03..00000000 --- a/include/PriorityQueue.h +++ /dev/null @@ -1,225 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef PRIORITYQUEUE_H -#define PRIORITYQUEUE_H - -#include "LuceneObject.h" -#include "MiscUtils.h" - -namespace Lucene -{ - /// A PriorityQueue maintains a partial ordering of its elements such that the least element can always - /// be found in constant time. Put()'s and pop()'s require log(size) time. - /// - /// NOTE: This class pre-allocates a full array of length maxSize + 1. - template - class PriorityQueue : public LuceneObject - { - public: - typedef typename std::vector< TYPE, Allocator > heap_type; - - PriorityQueue(int32_t maxSize) - { - this->_size = 0; - this->_maxSize = maxSize; - } - - virtual ~PriorityQueue() - { - } - - protected: - heap_type heap; - int32_t _size; - int32_t _maxSize; - - public: - virtual void initialize() - { - bool empty = heap.empty(); - - if (empty) - { - int32_t heapSize = 0; - if (_maxSize == 0) - { - // We allocate 1 extra to avoid if statement in top() - heapSize = 2; - } - else if (_maxSize == INT_MAX) - { - // Don't wrap heapSize to -1, in this case, which causes a confusing NegativeArraySizeException. - // Note that very likely this will simply then hit an OOME, but at least that's more indicative - // to caller that this values is too big. We don't +1 in this case, but it's very unlikely in - // practice one will actually insert this many objects into the PQ - heapSize = INT_MAX; - } - else - { - // NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused. - heapSize = _maxSize + 1; - } - this->heap.resize(heapSize); - } - - // If sentinel objects are supported, populate the queue with them - TYPE sentinel = getSentinelObject(); - if (empty && sentinel) - { - heap[1] = sentinel; - for (int32_t i = 2; i < (int32_t)heap.size(); ++i) - heap[i] = getSentinelObject(); - _size = _maxSize; - } - } - - /// Return maximum size of queue - int32_t maxSize() - { - return _maxSize; - } - - /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects - /// than maxSize from initialize an {@link IndexOutOfBoundsException} is thrown. - TYPE add(const TYPE& type) - { - ++_size; - if (_size < 0 || _size >= (int32_t)heap.size()) - boost::throw_exception(IndexOutOfBoundsException()); - heap[_size] = type; - upHeap(); - return heap[1]; - } - - /// Adds an Object to a PriorityQueue in log(size) time. It returns the object (if any) that was - /// dropped off the heap because it was full. This can be the given parameter (in case it is - /// smaller than the full heap's minimum, and couldn't be added), or another object that was - /// previously the smallest value in the heap and now has been replaced by a larger one, or null - /// if the queue wasn't yet full with maxSize elements. - TYPE addOverflow(const TYPE& type) - { - if (_size < _maxSize) - { - add(type); - return TYPE(); - } - else if (_size > 0 && !lessThan(type, heap[1])) - { - TYPE result = heap[1]; - heap[1] = type; - updateTop(); - return result; - } - else - return type; - } - - /// Returns the least element of the PriorityQueue. - TYPE top() - { - // We don't need to check size here: if maxSize is 0, then heap is length 2 array with both - // entries null. If size is 0 then heap[1] is already null. - return heap[1]; - } - - /// Removes and returns the least element of the PriorityQueue. - TYPE pop() - { - if (_size > 0) - { - TYPE result = heap[1]; // save first value - heap[1] = heap[_size]; // move last to first - heap[_size--] = TYPE(); - downHeap(); // adjust heap - return result; - } - else - return TYPE(); - } - - /// Should be called when the Object at top changes values. - TYPE updateTop() - { - downHeap(); - return heap[1]; - } - - /// Returns the number of elements currently stored in the PriorityQueue. - int32_t size() const - { - return _size; - } - - /// Returns whether PriorityQueue is currently empty. - bool empty() const - { - return (_size == 0); - } - - /// Removes all entries from the PriorityQueue. - void clear() - { - for (int32_t i = 0; i <= _size; ++i) - heap[i] = TYPE(); - _size = 0; - } - - protected: - void upHeap() - { - int32_t i = _size; - TYPE node = heap[i]; // save bottom node - int32_t j = MiscUtils::unsignedShift(i, 1); - while (j > 0 && lessThan(node, heap[j])) - { - heap[i] = heap[j]; // shift parents down - i = j; - j = MiscUtils::unsignedShift(j, 1); - } - heap[i] = node; // install saved node - } - - void downHeap() - { - int32_t i = 1; - TYPE node = heap[i]; // save top node - int32_t j = i << 1; // find smaller child - int32_t k = j + 1; - if (k <= _size && lessThan(heap[k], heap[j])) - j = k; - while (j <= _size && lessThan(heap[j], node)) - { - heap[i] = heap[j]; // shift up child - i = j; - j = i << 1; - k = j + 1; - if (k <= _size && lessThan(heap[k], heap[j])) - j = k; - } - heap[i] = node; // install saved node - } - - /// Determines the ordering of objects in this priority queue. Subclasses must define this one method. - virtual bool lessThan(const TYPE& first, const TYPE& second) - { - return std::less()(first, second); - } - - /// This method can be overridden by extending classes to return a sentinel object which will be used by - /// {@link #initialize} to fill the queue, so that the code which uses that queue can always assume it's - /// full and only change the top without attempting to insert any new object. - /// - /// Those sentinel values should always compare worse than any non-sentinel value (ie., {@link #lessThan} - /// should always favour the non-sentinel values). - virtual TYPE getSentinelObject() - { - return TYPE(); - } - }; -} - -#endif diff --git a/include/Query.h b/include/Query.h deleted file mode 100644 index ba992686..00000000 --- a/include/Query.h +++ /dev/null @@ -1,112 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERY_H -#define QUERY_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The abstract base class for queries. - /// - /// Instantiable subclasses are: - /// - /// {@link TermQuery} - /// {@link MultiTermQuery} - /// {@link BooleanQuery} - /// {@link WildcardQuery} - /// {@link PhraseQuery} - /// {@link PrefixQuery} - /// {@link MultiPhraseQuery} - /// {@link FuzzyQuery} - /// {@link TermRangeQuery} - /// {@link NumericRangeQuery} - /// {@link org.apache.lucene.search.spans.SpanQuery} - /// - /// A parser for queries is contained in: {@link QueryParser} - class LPPAPI Query : public LuceneObject - { - public: - Query(); - virtual ~Query(); - - LUCENE_CLASS(Query); - - protected: - double boost; // query boost factor - - public: - /// Sets the boost for this query clause to b. Documents matching this clause will (in addition to - /// the normal weightings) have their score multiplied by b. - virtual void setBoost(double b); - - /// Gets the boost for this clause. Documents matching this clause will (in addition to the normal - /// weightings) have their score multiplied by b. The boost is 1.0 by default. - virtual double getBoost(); - - /// Prints a query to a string, with field assumed to be the default field and omitted. - /// - /// The representation used is one that is supposed to be readable by {@link QueryParser}. However, - /// there are the following limitations: - /// - /// If the query was created by the parser, the printed representation may not be exactly what was - /// parsed. For example, characters that need to be escaped will be represented without the required - /// backslash. - /// - /// Some of the more complicated queries (eg. span queries) don't have a representation that can be - /// parsed by QueryParser. - virtual String toString(const String& field); - - /// Prints a query to a string. - virtual String toString(); - - /// Constructs an appropriate Weight implementation for this query. - /// Only implemented by primitive queries, which re-write to themselves. - virtual WeightPtr createWeight(SearcherPtr searcher); - - /// Constructs and initializes a Weight for a top-level query. - virtual WeightPtr weight(SearcherPtr searcher); - - /// Called to re-write queries into primitive queries. For example, a PrefixQuery will be rewritten - /// into a BooleanQuery that consists of TermQuerys. - virtual QueryPtr rewrite(IndexReaderPtr reader); - - /// Called when re-writing queries under MultiSearcher. - /// - /// Create a single query suitable for use by all subsearchers (in 1-1 correspondence with queries). - /// This is an optimization of the OR of all queries. We handle the common optimization cases of equal - /// queries and overlapping clauses of boolean OR queries (as generated by MultiTermQuery.rewrite()). - /// Be careful overriding this method as queries[0] determines which method will be called and is not - /// necessarily of the same type as the other queries. - virtual QueryPtr combine(Collection queries); - - /// Adds all terms occurring in this query to the terms set. Only works if this query is in its - /// {@link #rewrite rewritten} form. - virtual void extractTerms(SetTerm terms); - - /// Merges the clauses of a set of BooleanQuery's into a single BooleanQuery. - /// - /// A utility for use by {@link #combine(Query[])} implementations. - static QueryPtr mergeBooleanQueries(Collection queries); - - /// Returns the Similarity implementation to be used for this query. Subclasses may override this method - /// to specify their own Similarity implementation, perhaps one that delegates through that of the Searcher. - /// By default the Searcher's Similarity implementation is returned. - virtual SimilarityPtr getSimilarity(SearcherPtr searcher); - - /// Returns a clone of this query. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - /// Return given boost value as a string. - String boostString(); - }; -} - -#endif diff --git a/include/QueryParseError.h b/include/QueryParseError.h deleted file mode 100644 index 32bfa42d..00000000 --- a/include/QueryParseError.h +++ /dev/null @@ -1,53 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSEERROR_H -#define QUERYPARSEERROR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Utility class to handle query parse errors - class QueryParseError : public LuceneObject - { - public: - virtual ~QueryParseError(); - LUCENE_CLASS(QueryParseError); - - public: - /// Returns a detailed message for the Error when it is thrown by the token manager to indicate a - /// lexical error. - /// @param EOFSeen Indicates if EOF caused the lexical error - /// @param curLexState Lexical state in which this error occurred - /// @param errorLine Line number when the error occurred - /// @param errorColumn Column number when the error occurred - /// @param errorAfter Prefix that was seen before this error occurred - /// @param curChar The offending character - static String lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, - const String& errorAfter, wchar_t curChar); - - /// Generate a parse error message and returns it. - /// @param currentToken This is the last token that has been consumed successfully. If this object - /// has been created due to a parse error, the token following this token will (therefore) be the first - /// error token. - /// @param expectedTokenSequences Each entry in this array is an array of integers. Each array of - /// integers represents a sequence of tokens (by their ordinal values) that is expected at this point - /// of the parse. - /// @param tokenImage This is a reference to the "tokenImage" array of the generated parser within - /// which the parse error occurred. - static String parseError(QueryParserTokenPtr currentToken, Collection< Collection > expectedTokenSequences, - Collection tokenImage); - - - protected: - /// Replaces unprintable characters by their escaped (or unicode escaped) equivalents in the - /// given string - static String addEscapes(const String& str); - }; -} - -#endif diff --git a/include/QueryParser.h b/include/QueryParser.h deleted file mode 100644 index 792be977..00000000 --- a/include/QueryParser.h +++ /dev/null @@ -1,475 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSER_H -#define QUERYPARSER_H - -#include "QueryParserConstants.h" -#include "DateTools.h" -#include "BooleanClause.h" - -namespace Lucene -{ - typedef HashMap MapStringResolution; - - /// The most important method is {@link #parse(const String&)}. - /// - /// The syntax for query strings is as follows: - /// A Query is a series of clauses. - /// A clause may be prefixed by: - ///
    - ///
  • a plus (+) or a minus (-) sign, indicating that the clause is required or prohibited respectively; or - ///
  • a term followed by a colon, indicating the field to be searched. This enables one to construct queries - /// which search multiple fields. - ///
- /// - /// A clause may be either: - ///
    - ///
  • a term, indicating all the documents that contain this term; or - ///
  • a nested query, enclosed in parentheses. Note that this may be used with a +/- prefix to require any - /// of a set of terms. - ///
- /// - /// Thus, in BNF, the query grammar is: - ///
-    /// Query  ::= ( Clause )*
-    /// Clause ::= ["+", "-"] [ ":"] (  | "(" Query ")" )
-    /// 
- /// - /// Examples of appropriately formatted queries can be found in the query syntax documentation. - /// - /// In {@link TermRangeQuery}s, QueryParser tries to detect date values, eg. - /// date:[6/1/2005 TO 6/4/2005] produces a range query that searches for "date" fields between - /// 2005-06-01 and 2005-06-04. Note that the format of the accepted input depends on {@link #setLocale(Locale) - /// the locale}. - /// - /// By default a date is converted into a search term using the deprecated {@link DateField} for compatibility - /// reasons. To use the new {@link DateTools} to convert dates, a {@link Resolution} has to be set. - /// - /// The date resolution that shall be used for RangeQueries can be set using {@link #setDateResolution(Resolution)} - /// or {@link #setDateResolution(const String&, Resolution)}. The former sets the default date resolution for - /// all fields, whereas the latter can be used to set field specific date resolutions. Field specific date - /// resolutions take, if set, precedence over the default date resolution. - /// - /// If you use neither {@link DateField} nor {@link DateTools} in your index, you can create your own query - /// parser that inherits QueryParser and overwrites {@link #getRangeQuery(const String&, const String&, - /// const String&, bool)} to use a different method for date conversion. - /// - /// Note that QueryParser is not thread-safe. - /// - /// NOTE: there is a new QueryParser in contrib, which matches the same syntax as this class, but is more modular, - /// enabling substantial customization to how a query is created. - /// - /// NOTE: You must specify the required {@link Version} compatibility when creating QueryParser: - ///
    - ///
  • As of 2.9, {@link #setEnablePositionIncrements} is true by default. - ///
- class LPPAPI QueryParser : public QueryParserConstants, public LuceneObject - { - public: - /// Constructs a query parser. - /// @param matchVersion Lucene version to match. - /// @param field The default field for query terms. - /// @param analyzer Used to find terms in the query text. - QueryParser(LuceneVersion::Version matchVersion, const String& field, AnalyzerPtr analyzer); - - /// Constructor with user supplied QueryParserCharStream. - QueryParser(QueryParserCharStreamPtr stream); - - /// Constructor with generated Token Manager. - QueryParser(QueryParserTokenManagerPtr tokenMgr); - - virtual ~QueryParser(); - - LUCENE_CLASS(QueryParser); - - /// The default operator for parsing queries. Use {@link QueryParser#setDefaultOperator} to change it. - enum Operator { OR_OPERATOR, AND_OPERATOR }; - - protected: - static const int32_t CONJ_NONE; - static const int32_t CONJ_AND; - static const int32_t CONJ_OR; - - static const int32_t MOD_NONE; - static const int32_t MOD_NOT; - static const int32_t MOD_REQ; - - /// The actual operator that parser uses to combine query terms - Operator _operator; - - /// Next token. - int32_t _jj_ntk; - QueryParserTokenPtr jj_scanpos; - QueryParserTokenPtr jj_lastpos; - - int32_t jj_la; - int32_t jj_gen; - Collection jj_la1; - - static const int32_t jj_la1_0[]; - static const int32_t jj_la1_1[]; - - struct JJCalls; - typedef boost::shared_ptr JJCallsPtr; - - struct JJCalls - { - JJCalls() - { - gen = 0; - arg = 0; - } - - int32_t gen; - QueryParserTokenPtr first; - int32_t arg; - JJCallsPtr next; - }; - - Collection jj_2_rtns; - bool jj_rescan; - int32_t jj_gc; - - Collection< Collection > jj_expentries; - Collection jj_expentry; - int32_t jj_kind; - Collection jj_lasttokens; - int32_t jj_endpos; - - public: - bool lowercaseExpandedTerms; - RewriteMethodPtr multiTermRewriteMethod; - bool allowLeadingWildcard; - bool enablePositionIncrements; - - AnalyzerPtr analyzer; - String field; - int32_t phraseSlop; - double fuzzyMinSim; - int32_t fuzzyPrefixLength; - std::locale locale; - - // the default date resolution - DateTools::Resolution dateResolution; - - // maps field names to date resolutions - MapStringResolution fieldToDateResolution; - - // The collator to use when determining range inclusion, for use when constructing RangeQuerys - CollatorPtr rangeCollator; - - /// Generated Token Manager. - QueryParserTokenManagerPtr token_source; - - /// Current token. - QueryParserTokenPtr token; - - /// Next token. - QueryParserTokenPtr jj_nt; - - public: - /// Parses a query string, returning a {@link Query}. - /// @param query The query string to be parsed. - QueryPtr parse(const String& query); - - /// @return Returns the analyzer. - AnalyzerPtr getAnalyzer(); - - /// @return Returns the field. - String getField(); - - /// Get the minimal similarity for fuzzy queries. - double getFuzzyMinSim(); - - /// Set the minimum similarity for fuzzy queries. Default is 0.5. - void setFuzzyMinSim(double fuzzyMinSim); - - /// Get the prefix length for fuzzy queries. - /// @return Returns the fuzzyPrefixLength. - int32_t getFuzzyPrefixLength(); - - /// Set the prefix length for fuzzy queries. Default is 0. - /// @param fuzzyPrefixLength The fuzzyPrefixLength to set. - void setFuzzyPrefixLength(int32_t fuzzyPrefixLength); - - /// Sets the default slop for phrases. If zero, then exact phrase matches are required. - /// Default value is zero. - void setPhraseSlop(int32_t phraseSlop); - - /// Gets the default slop for phrases. - int32_t getPhraseSlop(); - - /// Set to true to allow leading wildcard characters. - /// - /// When set, * or ? are allowed as the first character of a PrefixQuery and WildcardQuery. - /// Note that this can produce very slow queries on big indexes. Default: false. - void setAllowLeadingWildcard(bool allowLeadingWildcard); - - /// @see #setAllowLeadingWildcard(bool) - bool getAllowLeadingWildcard(); - - /// Set to true to enable position increments in result query. - /// - /// When set, result phrase and multi-phrase queries will be aware of position increments. - /// Useful when eg. a StopFilter increases the position increment of the token that follows an - /// omitted token. Default: false. - void setEnablePositionIncrements(bool enable); - - /// @see #setEnablePositionIncrements(bool) - bool getEnablePositionIncrements(); - - /// Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms without - /// any modifiers are considered optional: for example capital of Hungary is equal to capital - /// OR of OR Hungary. - /// In AND_OPERATOR mode terms are considered to be in conjunction: the above mentioned query is - /// parsed as capital AND of AND Hungary - void setDefaultOperator(Operator op); - - /// Gets implicit operator setting, which will be either AND_OPERATOR or OR_OPERATOR. - Operator getDefaultOperator(); - - /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically lower-cased - /// or not. Default is true. - void setLowercaseExpandedTerms(bool lowercaseExpandedTerms); - - /// @see #setLowercaseExpandedTerms(bool) - bool getLowercaseExpandedTerms(); - - /// By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when - /// creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable - /// because it a) Runs faster b) Does not have the scarcity of terms unduly influence score c) avoids - /// any "TooManyClauses" exception. However, if your application really needs to use the old- - /// fashioned BooleanQuery expansion rewriting and the above points are not relevant then use this - /// to change the rewrite method. - void setMultiTermRewriteMethod(RewriteMethodPtr method); - - /// @see #setMultiTermRewriteMethod - RewriteMethodPtr getMultiTermRewriteMethod(); - - /// Set locale used by date range parsing. - void setLocale(std::locale locale); - - /// Returns current locale, allowing access by subclasses. - std::locale getLocale(); - - /// Sets the default date resolution used by RangeQueries for fields for which no specific date - /// resolutions has been set. Field specific resolutions can be set with {@link - /// #setDateResolution(const String&, DateTools::Resolution)}. - /// @param dateResolution The default date resolution to set - void setDateResolution(DateTools::Resolution dateResolution); - - /// Sets the date resolution used by RangeQueries for a specific field. - /// @param fieldName Field for which the date resolution is to be set - /// @param dateResolution Date resolution to set - void setDateResolution(const String& fieldName, DateTools::Resolution dateResolution); - - /// Returns the date resolution that is used by RangeQueries for the given field. Returns null, if - /// no default or field specific date resolution has been set for the given field. - DateTools::Resolution getDateResolution(const String& fieldName); - - /// Sets the collator used to determine index term inclusion in ranges for RangeQuerys. - /// - /// WARNING: Setting the rangeCollator to a non-null collator using this method will cause every - /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending - /// on the number of index Terms in this Field, the operation could be very slow. - /// @param rc The collator to use when constructing RangeQuerys - void setRangeCollator(CollatorPtr rc); - - /// @return the collator used to determine index term inclusion in ranges for RangeQuerys. - CollatorPtr getRangeCollator(); - - /// Command line tool to test QueryParser, using {@link SimpleAnalyzer}. - static int main(Collection args); - - /// Query ::= ( Clause )* - /// Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) - int32_t Conjunction(); - int32_t Modifiers(); - - /// This makes sure that there is no garbage after the query string - virtual QueryPtr TopLevelQuery(const String& field); - - virtual QueryPtr ParseQuery(const String& field); - virtual QueryPtr ParseClause(const String& field); - virtual QueryPtr ParseTerm(const String& field); - - /// Reinitialise. - virtual void ReInit(QueryParserCharStreamPtr stream); - - /// Reinitialise. - virtual void ReInit(QueryParserTokenManagerPtr tokenMgr); - - /// Get the next Token. - virtual QueryParserTokenPtr getNextToken(); - - /// Get the specific Token. - virtual QueryParserTokenPtr getToken(int32_t index); - - /// Generate QueryParserError exception. - virtual void generateParseException(); - - /// Enable tracing. - virtual void enable_tracing(); - - /// Disable tracing. - virtual void disable_tracing(); - - protected: - /// Construct query parser with supplied QueryParserCharStream or TokenManager - void ConstructParser(QueryParserCharStreamPtr stream, QueryParserTokenManagerPtr tokenMgr); - - virtual void addClause(Collection clauses, int32_t conj, int32_t mods, QueryPtr q); - - /// Use the analyzer to get all the tokens, and then build a TermQuery, PhraseQuery, or nothing - /// based on the term count. - virtual QueryPtr getFieldQuery(const String& field, const String& queryText); - - /// Base implementation delegates to {@link #getFieldQuery(const String&, const String&)}. - /// This method may be overridden, for example, to return a SpanNearQuery instead of a PhraseQuery. - virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); - - /// Builds a new TermRangeQuery instance for given min/max parts - virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); - - /// Builds a new BooleanQuery instance - /// @param disableCoord disable coord - BooleanQueryPtr newBooleanQuery(bool disableCoord); - - /// Builds a new BooleanClause instance - /// @param q sub query - /// @param occur how this clause should occur when matching documents - /// @return new BooleanClause instance - BooleanClausePtr newBooleanClause(QueryPtr q, BooleanClause::Occur occur); - - /// Builds a new TermQuery instance - /// @param term term - /// @return new TermQuery instance - QueryPtr newTermQuery(TermPtr term); - - /// Builds a new PhraseQuery instance - /// @return new PhraseQuery instance - PhraseQueryPtr newPhraseQuery(); - - /// Builds a new MultiPhraseQuery instance - /// @return new MultiPhraseQuery instance - MultiPhraseQueryPtr newMultiPhraseQuery(); - - /// Builds a new PrefixQuery instance - /// @param prefix Prefix term - /// @return new PrefixQuery instance - QueryPtr newPrefixQuery(TermPtr prefix); - - /// Builds a new FuzzyQuery instance - /// @param term Term - /// @param minimumSimilarity minimum similarity - /// @param prefixLength prefix length - /// @return new FuzzyQuery Instance - QueryPtr newFuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength); - - /// Builds a new TermRangeQuery instance - /// @param field Field - /// @param part1 min - /// @param part2 max - /// @param inclusive true if range is inclusive - /// @return new TermRangeQuery instance - QueryPtr newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); - - /// Builds a new MatchAllDocsQuery instance - /// @return new MatchAllDocsQuery instance - QueryPtr newMatchAllDocsQuery(); - - /// Builds a new WildcardQuery instance - /// @param t wildcard term - /// @return new WildcardQuery instance - QueryPtr newWildcardQuery(TermPtr term); - - /// Factory method for generating query, given a set of clauses. By default creates a boolean query - /// composed of clauses passed in. - /// - /// Can be overridden by extending classes, to modify query being returned. - /// - /// @param clauses List that contains {@link BooleanClause} instances to join. - /// @return Resulting {@link Query} object. - virtual QueryPtr getBooleanQuery(Collection clauses); - - /// Factory method for generating query, given a set of clauses. By default creates a boolean query - /// composed of clauses passed in. - /// - /// Can be overridden by extending classes, to modify query being returned. - /// - /// @param clauses List that contains {@link BooleanClause} instances to join. - /// @param disableCoord true if coord scoring should be disabled. - /// @return Resulting {@link Query} object. - virtual QueryPtr getBooleanQuery(Collection clauses, bool disableCoord); - - /// Factory method for generating a query. Called when parser parses an input term token that contains - /// one or more wildcard characters (? and *), but is not a prefix term token (one that has just a - /// single * character at the end) - /// - /// Depending on settings, prefix term may be lower-cased automatically. It will not go through the - /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard - /// templates. - /// - /// Can be overridden by extending classes, to provide custom handling for wildcard queries, which may - /// be necessary due to missing analyzer calls. - /// - /// @param field Name of the field query will use. - /// @param termStr Term token that contains one or more wild card characters (? or *), but is not simple - /// prefix term - /// @return Resulting {@link Query} built for the term - virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); - - /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser - /// parses an input term token that uses prefix notation; that is, contains a single '*' wildcard - /// character as its last character. Since this is a special case of generic wildcard term, and such - /// a query can be optimized easily, this usually results in a different query object. - /// - /// Depending on settings, a prefix term may be lower-cased automatically. It will not go through the - /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates. - /// - /// Can be overridden by extending classes, to provide custom handling for wild card queries, which may be - /// necessary due to missing analyzer calls. - /// - /// @param field Name of the field query will use. - /// @param termStr Term token to use for building term for the query (without trailing '*' character) - /// @return Resulting {@link Query} built for the term - virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); - - /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser - /// parses an input term token that has the fuzzy suffix (~) appended. - /// - /// @param field Name of the field query will use. - /// @param termStr Term token to use for building term for the query - /// @return Resulting {@link Query} built for the term - virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); - - /// Returns a String where the escape char has been removed, or kept only once if there was a double - /// escape. Supports escaped unicode characters, eg. translates \\u0041 to A. - String discardEscapeChar(const String& input); - - /// Returns the numeric value of the hexadecimal character - static int32_t hexToInt(wchar_t c); - - /// Returns a String where those characters that QueryParser expects to be escaped are escaped by - /// a preceding \. - static String escape(const String& s); - - bool jj_2_1(int32_t xla); - bool jj_3R_2(); - bool jj_3_1(); - bool jj_3R_3(); - - QueryParserTokenPtr jj_consume_token(int32_t kind); - bool jj_scan_token(int32_t kind); - int32_t jj_ntk(); - void jj_add_error_token(int32_t kind, int32_t pos); - void jj_rescan_token(); - void jj_save(int32_t index, int32_t xla); - }; -} - -#endif diff --git a/include/QueryParserCharStream.h b/include/QueryParserCharStream.h deleted file mode 100644 index c442014b..00000000 --- a/include/QueryParserCharStream.h +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSERCHARSTREAM_H -#define QUERYPARSERCHARSTREAM_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This interface describes a character stream that maintains line and column number positions of - /// the characters. It also has the capability to backup the stream to some extent. An implementation - /// of this interface is used in the QueryParserTokenManager. - /// - /// All the methods except backup can be implemented in any fashion. backup needs to be implemented - /// correctly for the correct operation of the lexer. Rest of the methods are all used to get information - /// like line number, column number and the String that constitutes a token and are not used by the lexer. - /// Hence their implementation won't affect the generated lexer's operation. - class LPPAPI QueryParserCharStream - { - public: - LUCENE_INTERFACE(QueryParserCharStream); - - public: - /// Returns the next character from the selected input. The method of selecting the input is the - /// responsibility of the class implementing this interface. - virtual wchar_t readChar() = 0; - - /// Returns the column position of the character last read. - /// @deprecated - /// @see #getEndColumn - virtual int32_t getColumn() = 0; - - /// Returns the line number of the character last read. - /// @deprecated - /// @see #getEndLine - virtual int32_t getLine() = 0; - - /// Returns the column number of the last character for current token (being matched after the last - /// call to BeginToken). - virtual int32_t getEndColumn() = 0; - - /// Returns the line number of the last character for current token (being matched after the last call - /// to BeginToken). - virtual int32_t getEndLine() = 0; - - /// Returns the column number of the first character for current token (being matched after the last - /// call to BeginToken). - virtual int32_t getBeginColumn() = 0; - - /// Returns the line number of the first character for current token (being matched after the last call - /// to BeginToken). - virtual int32_t getBeginLine() = 0; - - /// Backs up the input stream by amount steps. Lexer calls this method if it had already read some - /// characters, but could not use them to match a (longer) token. So, they will be used again as the - /// prefix of the next token and it is the implementation's's responsibility to do this right. - virtual void backup(int32_t amount) = 0; - - /// Returns the next character that marks the beginning of the next token. All characters must remain - /// in the buffer between two successive calls to this method to implement backup correctly. - virtual wchar_t BeginToken() = 0; - - /// Returns a string made up of characters from the marked token beginning to the current buffer position. - /// Implementations have the choice of returning anything that they want to. For example, for efficiency, - /// one might decide to just return null, which is a valid implementation. - virtual String GetImage() = 0; - - /// Returns an array of characters that make up the suffix of length for the currently matched token. - /// This is used to build up the matched string for use in actions in the case of MORE. - virtual CharArray GetSuffix(int32_t length) = 0; - - /// The lexer calls this function to indicate that it is done with the stream and hence implementations - /// can free any resources held by this class. Again, the body of this function can be just empty and it - /// will not affect the lexer's operation. - virtual void Done() = 0; - }; -} - -#endif diff --git a/include/QueryParserConstants.h b/include/QueryParserConstants.h deleted file mode 100644 index f9bf3b42..00000000 --- a/include/QueryParserConstants.h +++ /dev/null @@ -1,79 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSERCONSTANTS_H -#define QUERYPARSERCONSTANTS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Token literal values and constants. - class LPPAPI QueryParserConstants - { - protected: - QueryParserConstants(); - - public: - virtual ~QueryParserConstants(); - LUCENE_INTERFACE(QueryParserConstants); - - public: - enum RegularExpressionId - { - _EOF = 0, - _NUM_CHAR = 1, - _ESCAPED_CHAR = 2, - _TERM_START_CHAR = 3, - _TERM_CHAR = 4, - _WHITESPACE = 5, - _QUOTED_CHAR = 6, - AND = 8, - OR = 9, - NOT = 10, - PLUS = 11, - MINUS = 12, - LPAREN = 13, - RPAREN = 14, - COLON = 15, - STAR = 16, - CARAT = 17, - QUOTED = 18, - TERM = 19, - FUZZY_SLOP = 20, - PREFIXTERM = 21, - WILDTERM = 22, - RANGEIN_START = 23, - RANGEEX_START = 24, - NUMBER = 25, - RANGEIN_TO = 26, - RANGEIN_END = 27, - RANGEIN_QUOTED = 28, - RANGEIN_GOOP = 29, - RANGEEX_TO = 30, - RANGEEX_END = 31, - RANGEEX_QUOTED = 32, - RANGEEX_GOOP = 33 - }; - - enum LexicalState - { - Boost = 0, - RangeEx = 1, - RangeIn = 2, - DEFAULT = 3 - }; - - /// Literal token values. - static Collection tokenImage; - - protected: - /// Literal token values. - static const wchar_t* _tokenImage[]; - }; -} - -#endif diff --git a/include/QueryParserToken.h b/include/QueryParserToken.h deleted file mode 100644 index 32705db6..00000000 --- a/include/QueryParserToken.h +++ /dev/null @@ -1,70 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSERTOKEN_H -#define QUERYPARSERTOKEN_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Describes the input token stream. - class LPPAPI QueryParserToken : public LuceneObject - { - public: - /// Constructs a new token for the specified Image and Kind. - QueryParserToken(int32_t kind = 0, const String& image = EmptyString); - - virtual ~QueryParserToken(); - - LUCENE_CLASS(QueryParserToken); - - public: - /// An integer that describes the kind of this token. - int32_t kind; - - /// The line number of the first character of this Token. - int32_t beginLine; - - /// The column number of the first character of this Token. - int32_t beginColumn; - - /// The line number of the last character of this Token. - int32_t endLine; - - /// The column number of the last character of this Token. - int32_t endColumn; - - /// The string image of the token. - String image; - - /// A reference to the next regular (non-special) token from the input stream. If this is the last - /// token from the input stream, or if the token manager has not read tokens beyond this one, this - /// field is set to null. This is true only if this token is also a regular token. Otherwise, see - /// below for a description of the contents of this field. - QueryParserTokenPtr next; - - /// This field is used to access special tokens that occur prior to this token, but after the - /// immediately preceding regular (non-special) token. If there are no such special tokens, this - /// field is set to null. When there are more than one such special token, this field refers to the - /// last of these special tokens, which in turn refers to the next previous special token through - /// its specialToken field, and so on until the first special token (whose specialToken field is - /// null). The next fields of special tokens refer to other special tokens that immediately follow - /// it (without an intervening regular token). If there is no such token, this field is null. - QueryParserTokenPtr specialToken; - - public: - /// Returns the image. - virtual String toString(); - - /// Returns a new Token object, by default. However, if you want, you can create and return subclass - /// objects based on the value of ofKind. Simply add the cases to the switch for all those special - /// cases. - static QueryParserTokenPtr newToken(int32_t ofKind, const String& image = EmptyString); - }; -} - -#endif diff --git a/include/QueryParserTokenManager.h b/include/QueryParserTokenManager.h deleted file mode 100644 index 884cd413..00000000 --- a/include/QueryParserTokenManager.h +++ /dev/null @@ -1,111 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYPARSERTOKENMANAGER_H -#define QUERYPARSERTOKENMANAGER_H - -#include "QueryParserConstants.h" - -namespace Lucene -{ - /// Token Manager. - class LPPAPI QueryParserTokenManager : public QueryParserConstants, public LuceneObject - { - public: - QueryParserTokenManager(QueryParserCharStreamPtr stream); - QueryParserTokenManager(QueryParserCharStreamPtr stream, int32_t lexState); - - virtual ~QueryParserTokenManager(); - - LUCENE_CLASS(QueryParserTokenManager); - - public: - /// Debug output. - InfoStreamPtr debugStream; - - protected: - static const int64_t jjbitVec0[]; - static const int64_t jjbitVec1[]; - static const int64_t jjbitVec3[]; - static const int64_t jjbitVec4[]; - static const int32_t jjnextStates[]; - - /// Token literal values. - static const wchar_t* jjstrLiteralImages[]; - - /// Lexer state names. - static const wchar_t* lexStateNames[]; - - /// Lex State array. - static const int32_t jjnewLexState[]; - static const int64_t jjtoToken[]; - static const int64_t jjtoSkip[]; - - int32_t curLexState; - int32_t defaultLexState; - int32_t jjnewStateCnt; - int32_t jjround; - int32_t jjmatchedPos; - int32_t jjmatchedKind; - - QueryParserCharStreamPtr input_stream; - IntArray jjrounds; - IntArray jjstateSet; - wchar_t curChar; - - public: - /// Set debug output. - void setDebugStream(InfoStreamPtr debugStream); - - /// Reinitialise parser. - void ReInit(QueryParserCharStreamPtr stream); - - /// Reinitialise parser. - void ReInit(QueryParserCharStreamPtr stream, int32_t lexState); - - /// Switch to specified lex state. - void SwitchTo(int32_t lexState); - - /// Get the next Token. - QueryParserTokenPtr getNextToken(); - - protected: - int32_t jjStopStringLiteralDfa_3(int32_t pos, int64_t active0); - int32_t jjStartNfa_3(int32_t pos, int64_t active0); - int32_t jjStopAtPos(int32_t pos, int32_t kind); - int32_t jjMoveStringLiteralDfa0_3(); - int32_t jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state); - int32_t jjMoveNfa_3(int32_t startState, int32_t curPos); - int32_t jjStopStringLiteralDfa_1(int32_t pos, int64_t active0); - int32_t jjStartNfa_1(int32_t pos, int64_t active0); - int32_t jjMoveStringLiteralDfa0_1(); - int32_t jjMoveStringLiteralDfa1_1(int64_t active0); - int32_t jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state); - int32_t jjMoveNfa_1(int32_t startState, int32_t curPos); - int32_t jjMoveStringLiteralDfa0_0(); - int32_t jjMoveNfa_0(int32_t startState, int32_t curPos); - int32_t jjStopStringLiteralDfa_2(int32_t pos, int64_t active0); - int32_t jjStartNfa_2(int32_t pos, int64_t active0); - int32_t jjMoveStringLiteralDfa0_2(); - int32_t jjMoveStringLiteralDfa1_2(int64_t active0); - int32_t jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state); - int32_t jjMoveNfa_2(int32_t startState, int32_t curPos); - - static bool jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); - static bool jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); - static bool jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); - - void ReInitRounds(); - QueryParserTokenPtr jjFillToken(); - - void jjCheckNAdd(int32_t state); - void jjAddStates(int32_t start, int32_t end); - void jjCheckNAddTwoStates(int32_t state1, int32_t state2); - void jjCheckNAddStates(int32_t start, int32_t end); - }; -} - -#endif diff --git a/include/QueryTermVector.h b/include/QueryTermVector.h deleted file mode 100644 index 21ff3a36..00000000 --- a/include/QueryTermVector.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYTERMVECTOR_H -#define QUERYTERMVECTOR_H - -#include "TermFreqVector.h" - -namespace Lucene -{ - class LPPAPI QueryTermVector : public TermFreqVector, public LuceneObject - { - public: - /// @param queryTerms The original list of terms from the query, can contain duplicates - QueryTermVector(Collection queryTerms); - QueryTermVector(const String& queryString, AnalyzerPtr analyzer); - - virtual ~QueryTermVector(); - - LUCENE_CLASS(QueryTermVector); - - protected: - Collection terms; - Collection termFreqs; - - public: - virtual String toString(); - - int32_t size(); - Collection getTerms(); - Collection getTermFrequencies(); - int32_t indexOf(const String& term); - Collection indexesOf(Collection terms, int32_t start, int32_t length); - - protected: - void processTerms(Collection queryTerms); - }; -} - -#endif diff --git a/include/QueryWrapperFilter.h b/include/QueryWrapperFilter.h deleted file mode 100644 index 071b22ee..00000000 --- a/include/QueryWrapperFilter.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef QUERYWRAPPERFILTER_H -#define QUERYWRAPPERFILTER_H - -#include "Filter.h" - -namespace Lucene -{ - /// Constrains search results to only match those which also match a provided query. - /// - /// This could be used, for example, with a {@link TermRangeQuery} on a suitably formatted date field to - /// implement date filtering. One could re-use a single QueryFilter that matches, eg., only documents - /// modified within the last week. The QueryFilter and TermRangeQuery would only need to be reconstructed - /// once per day. - class LPPAPI QueryWrapperFilter : public Filter - { - public: - /// Constructs a filter which only matches documents matching query. - QueryWrapperFilter(QueryPtr query); - - virtual ~QueryWrapperFilter(); - - LUCENE_CLASS(QueryWrapperFilter); - - protected: - QueryPtr query; - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/RAMDirectory.h b/include/RAMDirectory.h deleted file mode 100644 index cd3648b0..00000000 --- a/include/RAMDirectory.h +++ /dev/null @@ -1,86 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RAMDIRECTORY_H -#define RAMDIRECTORY_H - -#include "Directory.h" - -namespace Lucene -{ - /// A memory-resident {@link Directory} implementation. Locking implementation is by default the - /// {@link SingleInstanceLockFactory} but can be changed with {@link #setLockFactory}. - /// Lock acquisition sequence: RAMDirectory, then RAMFile - class LPPAPI RAMDirectory : public Directory - { - public: - /// Constructs an empty {@link Directory}. - RAMDirectory(); - - /// Creates a new RAMDirectory instance from a different Directory implementation. - /// This can be used to load a disk-based index into memory. - /// - /// This should be used only with indices that can fit into memory. - /// - /// Note that the resulting RAMDirectory instance is fully independent from the - /// original Directory (it is a complete copy). Any subsequent changes to the - /// original Directory will not be visible in the RAMDirectory instance. - /// @param dir a Directory value - RAMDirectory(DirectoryPtr dir); - - RAMDirectory(DirectoryPtr dir, bool closeDir); - - virtual ~RAMDirectory(); - - LUCENE_CLASS(RAMDirectory); - - INTERNAL: - int64_t _sizeInBytes; - MapStringRAMFile fileMap; - - protected: - DirectoryWeakPtr _dirSource; - bool copyDirectory; - bool closeDir; - - public: - virtual void initialize(); - - /// Returns an array of strings, one for each file in the directory. - virtual HashSet listAll(); - - /// Returns true if a file with the given name exists. - virtual bool fileExists(const String& name); - - /// Returns the time the named file was last modified. - virtual uint64_t fileModified(const String& name); - - /// Set the modified time of an existing file to now. - virtual void touchFile(const String& name); - - /// Returns the length of a file in the directory. - virtual int64_t fileLength(const String& name); - - /// Return total size in bytes of all files in this directory. - /// This is currently quantized to RAMOutputStream::BUFFER_SIZE. - int64_t sizeInBytes(); - - /// Removes an existing file in the directory. - virtual void deleteFile(const String& name); - - /// Creates a new, empty file in the directory with the given name. - /// Returns a stream writing this file. - virtual IndexOutputPtr createOutput(const String& name); - - /// Returns a stream reading an existing file. - virtual IndexInputPtr openInput(const String& name); - - /// Closes the store. - virtual void close(); - }; -} - -#endif diff --git a/include/RAMFile.h b/include/RAMFile.h deleted file mode 100644 index 59e6a6d7..00000000 --- a/include/RAMFile.h +++ /dev/null @@ -1,57 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RAMFILE_H -#define RAMFILE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// File used as buffer in RAMDirectory - class LPPAPI RAMFile : public LuceneObject - { - public: - RAMFile(); // File used as buffer, in no RAMDirectory - RAMFile(RAMDirectoryPtr directory); - virtual ~RAMFile(); - - LUCENE_CLASS(RAMFile); - - INTERNAL: - int64_t length; - RAMDirectoryWeakPtr _directory; - - protected: - Collection buffers; - - int64_t sizeInBytes; - - /// This is publicly modifiable via Directory.touchFile(), so direct access not supported - int64_t lastModified; - - public: - /// For non-stream access from thread that might be concurrent with writing. - int64_t getLength(); - void setLength(int64_t length); - - /// For non-stream access from thread that might be concurrent with writing - int64_t getLastModified(); - void setLastModified(int64_t lastModified); - - int64_t getSizeInBytes(); - - ByteArray addBuffer(int32_t size); - ByteArray getBuffer(int32_t index); - int32_t numBuffers(); - - protected: - /// Allocate a new buffer. Subclasses can allocate differently. - virtual ByteArray newBuffer(int32_t size); - }; -} - -#endif diff --git a/include/RAMInputStream.h b/include/RAMInputStream.h deleted file mode 100644 index 049a52b2..00000000 --- a/include/RAMInputStream.h +++ /dev/null @@ -1,70 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RAMINPUTSTREAM_H -#define RAMINPUTSTREAM_H - -#include "IndexInput.h" - -namespace Lucene -{ - /// A memory-resident {@link IndexInput} implementation. - class RAMInputStream : public IndexInput - { - public: - RAMInputStream(); - RAMInputStream(RAMFilePtr f); - virtual ~RAMInputStream(); - - LUCENE_CLASS(RAMInputStream); - - public: - static const int32_t BUFFER_SIZE; - - protected: - RAMFilePtr file; - int64_t _length; - ByteArray currentBuffer; - int32_t currentBufferIndex; - int32_t bufferPosition; - int64_t bufferStart; - int32_t bufferLength; - - public: - /// Closes the stream to further operations. - virtual void close(); - - /// The number of bytes in the file. - virtual int64_t length(); - - /// Reads and returns a single byte. - /// @see IndexOutput#writeByte(uint8_t) - virtual uint8_t readByte(); - - /// Reads a specified number of bytes into an array at the specified offset. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Returns the current position in this file, where the next read will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next read will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - void switchCurrentBuffer(bool enforceEOF); - }; -} - -#endif diff --git a/include/RAMOutputStream.h b/include/RAMOutputStream.h deleted file mode 100644 index 3dd68c96..00000000 --- a/include/RAMOutputStream.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RAMOUTPUTSTREAM_H -#define RAMOUTPUTSTREAM_H - -#include "IndexOutput.h" - -namespace Lucene -{ - /// A memory-resident {@link IndexOutput} implementation. - class RAMOutputStream : public IndexOutput - { - public: - /// Construct an empty output buffer. - RAMOutputStream(); - RAMOutputStream(RAMFilePtr f); - virtual ~RAMOutputStream(); - - LUCENE_CLASS(RAMOutputStream); - - public: - static const int32_t BUFFER_SIZE; - - protected: - RAMFilePtr file; - ByteArray currentBuffer; - int32_t currentBufferIndex; - int32_t bufferPosition; - int64_t bufferStart; - int32_t bufferLength; - - public: - /// Copy the current contents of this buffer to the named output. - void writeTo(IndexOutputPtr out); - - /// Resets this to an empty file. - void reset(); - - /// Closes this stream to further operations. - virtual void close(); - - /// Sets current position in this file, where the next write will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// The number of bytes in the file. - virtual int64_t length(); - - /// Writes a single byte. - /// @see IndexInput#readByte() - virtual void writeByte(uint8_t b); - - /// Writes an array of bytes. - /// @param b the bytes to write. - /// @param length the number of bytes to write. - /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) - virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); - - /// Forces any buffered output to be written. - virtual void flush(); - - /// Returns the current position in this file, where the next write will occur. - virtual int64_t getFilePointer(); - - /// Returns byte usage of all buffers. - int64_t sizeInBytes(); - - protected: - void switchCurrentBuffer(); - void setFileLength(); - }; -} - -#endif diff --git a/include/Random.h b/include/Random.h deleted file mode 100644 index e36d7017..00000000 --- a/include/Random.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RANDOM_H -#define RANDOM_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Utility class to generate a stream of pseudorandom numbers. - class LPPAPI Random : public LuceneObject - { - public: - Random(); - Random(int64_t seed); - - virtual ~Random(); - - protected: - int64_t seed; - - public: - void setSeed(int64_t seed); - int32_t nextInt(int32_t limit = INT_MAX); - double nextDouble(); - - protected: - int32_t next(int32_t bits); - }; -} - -#endif diff --git a/include/RawPostingList.h b/include/RawPostingList.h deleted file mode 100644 index 10ae9f33..00000000 --- a/include/RawPostingList.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef RAWPOSTINGLIST_H -#define RAWPOSTINGLIST_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This is the base class for an in-memory posting list, keyed by a Token. {@link TermsHash} maintains a - /// hash table holding one instance of this per unique Token. Consumers of TermsHash ({@link TermsHashConsumer}) - /// must subclass this class with its own concrete class. FreqProxTermsWriterPostingList is a private inner - /// class used for the freq/prox postings, and TermVectorsTermsWriterPostingList is a private inner class used - /// to hold TermVectors postings. - class RawPostingList : public LuceneObject - { - public: - RawPostingList(); - virtual ~RawPostingList(); - - LUCENE_CLASS(RawPostingList); - - public: - static const int32_t BYTES_SIZE; - - int32_t textStart; - int32_t intStart; - int32_t byteStart; - }; -} - -#endif diff --git a/include/ReadOnlyDirectoryReader.h b/include/ReadOnlyDirectoryReader.h deleted file mode 100644 index 71e88728..00000000 --- a/include/ReadOnlyDirectoryReader.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef READONLYDIRECTORYREADER_H -#define READONLYDIRECTORYREADER_H - -#include "DirectoryReader.h" - -namespace Lucene -{ - class ReadOnlyDirectoryReader : public DirectoryReader - { - public: - ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor); - ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, - Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor); - ReadOnlyDirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor); - virtual ~ReadOnlyDirectoryReader(); - - LUCENE_CLASS(ReadOnlyDirectoryReader); - - public: - /// Tries to acquire the WriteLock on this directory. this method is only valid if this - /// IndexReader is directory owner. - virtual void acquireWriteLock(); - }; -} - -#endif diff --git a/include/ReadOnlySegmentReader.h b/include/ReadOnlySegmentReader.h deleted file mode 100644 index e4c3c991..00000000 --- a/include/ReadOnlySegmentReader.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef READONLYSEGMENTREADER_H -#define READONLYSEGMENTREADER_H - -#include "SegmentReader.h" - -namespace Lucene -{ - class ReadOnlySegmentReader : public SegmentReader - { - public: - virtual ~ReadOnlySegmentReader(); - - LUCENE_CLASS(ReadOnlySegmentReader); - - public: - static void noWrite(); - - virtual void acquireWriteLock(); - virtual bool isDeleted(int32_t n); - }; -} - -#endif diff --git a/include/Reader.h b/include/Reader.h deleted file mode 100644 index 3bc164e4..00000000 --- a/include/Reader.h +++ /dev/null @@ -1,56 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef READER_H -#define READER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract class for reading character streams. - class LPPAPI Reader : public LuceneObject - { - protected: - Reader(); - - public: - virtual ~Reader(); - LUCENE_CLASS(Reader); - - public: - static const int32_t READER_EOF; - - /// Read a single character. - virtual int32_t read(); - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length) = 0; - - /// Skip characters. - virtual int64_t skip(int64_t n); - - /// Close the stream. - virtual void close() = 0; - - /// Tell whether this stream supports the mark() operation - virtual bool markSupported(); - - /// Mark the present position in the stream. Subsequent calls to reset() will attempt to reposition the - /// stream to this point. - virtual void mark(int32_t readAheadLimit); - - /// Reset the stream. If the stream has been marked, then attempt to reposition it at the mark. If the stream - /// has not been marked, then attempt to reset it in some way appropriate to the particular stream, for example - /// by repositioning it to its starting point. - virtual void reset(); - - /// The number of bytes in the stream. - virtual int64_t length(); - }; -} - -#endif diff --git a/include/ReaderUtil.h b/include/ReaderUtil.h deleted file mode 100644 index 080eb315..00000000 --- a/include/ReaderUtil.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef READERUTIL_H -#define READERUTIL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Common util methods for dealing with {@link IndexReader}s. - class LPPAPI ReaderUtil : public LuceneObject - { - public: - virtual ~ReaderUtil(); - LUCENE_CLASS(ReaderUtil); - - public: - /// Gathers sub-readers from reader into a List. - static void gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader); - - /// Returns sub IndexReader that contains the given document id. - /// - /// @param doc Id of document - /// @param reader Parent reader - /// @return Sub reader of parent which contains the specified doc id - static IndexReaderPtr subReader(int32_t doc, IndexReaderPtr reader); - - /// Returns sub-reader subIndex from reader. - /// - /// @param reader Parent reader - /// @param subIndex Index of desired sub reader - /// @return The subreader at subIndex - static IndexReaderPtr subReader(IndexReaderPtr reader, int32_t subIndex); - - /// Returns index of the searcher/reader for document n in the array used to construct this - /// searcher/reader. - static int32_t subIndex(int32_t n, Collection docStarts); - }; -} - -#endif diff --git a/include/ReqExclScorer.h b/include/ReqExclScorer.h deleted file mode 100644 index 56ebee05..00000000 --- a/include/ReqExclScorer.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef REQEXCLSCORER_H -#define REQEXCLSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// A Scorer for queries with a required subscorer and an excluding (prohibited) sub DocIdSetIterator. - /// This Scorer implements {@link Scorer#skipTo(int32_t)}, and it uses the skipTo() on the given scorers. - class ReqExclScorer : public Scorer - { - public: - /// Construct a ReqExclScorer. - /// @param reqScorer The scorer that must match, except where - /// @param exclDisi indicates exclusion. - ReqExclScorer(ScorerPtr reqScorer, DocIdSetIteratorPtr exclDisi); - virtual ~ReqExclScorer(); - - LUCENE_CLASS(ReqExclScorer); - - protected: - ScorerPtr reqScorer; - DocIdSetIteratorPtr exclDisi; - int32_t doc; - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - - /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} - /// is called the first time. - /// @return The score of the required scorer. - virtual double score(); - - virtual int32_t advance(int32_t target); - - protected: - /// Advance to non excluded doc. - /// - /// On entry: - ///
    - ///
  • reqScorer != null, - ///
  • exclScorer != null, - ///
  • reqScorer was advanced once via next() or skipTo() and reqScorer.doc() may still be excluded. - ///
- /// Advances reqScorer a non excluded required doc, if any. - /// @return true iff there is a non excluded required doc. - int32_t toNonExcluded(); - }; -} - -#endif diff --git a/include/ReqOptSumScorer.h b/include/ReqOptSumScorer.h deleted file mode 100644 index 6d1820b4..00000000 --- a/include/ReqOptSumScorer.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef REQOPTSUMSCORER_H -#define REQOPTSUMSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// A Scorer for queries with a required part and an optional part. Delays skipTo() on the optional part - /// until a score() is needed. This Scorer implements {@link Scorer#skipTo(int32_t)}. - class ReqOptSumScorer : public Scorer - { - public: - ReqOptSumScorer(ScorerPtr reqScorer, ScorerPtr optScorer); - virtual ~ReqOptSumScorer(); - - LUCENE_CLASS(ReqOptSumScorer); - - protected: - ScorerPtr reqScorer; - ScorerPtr optScorer; - - public: - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - - /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} - /// is called the first time. - /// @return The score of the required scorer, eventually increased by the score of the optional scorer when - /// it also matches the current document. - virtual double score(); - }; -} - -#endif diff --git a/include/ReusableStringReader.h b/include/ReusableStringReader.h deleted file mode 100644 index 06daa073..00000000 --- a/include/ReusableStringReader.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef REUSABLESTRINGREADER_H -#define REUSABLESTRINGREADER_H - -#include "Reader.h" - -namespace Lucene -{ - /// Used by DocumentsWriter to implemented a StringReader that can be reset to a new string; we use this - /// when tokenizing the string value from a Field. - class ReusableStringReader : public Reader - { - public: - ReusableStringReader(); - virtual ~ReusableStringReader(); - - LUCENE_CLASS(ReusableStringReader); - - public: - int32_t upto; - int32_t left; - String s; - - public: - virtual void init(const String& s); - - using Reader::read; - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - - /// Close the stream. - virtual void close(); - }; -} - -#endif diff --git a/include/ReverseOrdFieldSource.h b/include/ReverseOrdFieldSource.h deleted file mode 100644 index 30f1f1ad..00000000 --- a/include/ReverseOrdFieldSource.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef REVERSEORDFIELDSOURCE_H -#define REVERSEORDFIELDSOURCE_H - -#include "ValueSource.h" - -namespace Lucene -{ - /// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex() - /// and reverses the order. - /// - /// The native lucene index order is used to assign an ordinal value for each field value. - /// - /// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. Example - /// of reverse ordinal (rord): - /// - /// If there were only three field values: "apple","banana","pear" then rord("apple")=3, rord("banana")=2, - /// ord("pear")=1 - /// - /// WARNING: rord() depends on the position in an index and can thus change when other documents are inserted - /// or deleted, or if a MultiSearcher is used. - /// - /// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite - /// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's - /// best to switch your application to pass only atomic (single segment) readers to this API. - class LPPAPI ReverseOrdFieldSource : public ValueSource - { - public: - /// Constructor for a certain field. - /// @param field field whose values reverse order is used. - ReverseOrdFieldSource(const String& field); - virtual ~ReverseOrdFieldSource(); - - LUCENE_CLASS(ReverseOrdFieldSource); - - protected: - String field; - - public: - virtual String description(); - virtual DocValuesPtr getValues(IndexReaderPtr reader); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/ScoreCachingWrappingScorer.h b/include/ScoreCachingWrappingScorer.h deleted file mode 100644 index 4907d84f..00000000 --- a/include/ScoreCachingWrappingScorer.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SCORECACHINGWRAPPINGSCORER_H -#define SCORECACHINGWRAPPINGSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// A {@link Scorer} which wraps another scorer and caches the score of the current document. Successive - /// calls to {@link #score()} will return the same result and will not invoke the wrapped Scorer's score() - /// method, unless the current document has changed. - /// - /// This class might be useful due to the changes done to the {@link Collector} interface, in which the - /// score is not computed for a document by default, only if the collector requests it. Some collectors - /// may need to use the score in several places, however all they have in hand is a {@link Scorer} object, - /// and might end up computing the score of a document more than once. - class LPPAPI ScoreCachingWrappingScorer : public Scorer - { - public: - /// Creates a new instance by wrapping the given scorer. - ScoreCachingWrappingScorer(ScorerPtr scorer); - virtual ~ScoreCachingWrappingScorer(); - - LUCENE_CLASS(ScoreCachingWrappingScorer); - - protected: - ScorerWeakPtr _scorer; - int32_t curDoc; - double curScore; - - public: - SimilarityPtr getSimilarity(); - virtual double score(); - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual void score(CollectorPtr collector); - virtual int32_t advance(int32_t target); - - protected: - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - }; -} - -#endif diff --git a/include/ScoreDoc.h b/include/ScoreDoc.h deleted file mode 100644 index e7516a87..00000000 --- a/include/ScoreDoc.h +++ /dev/null @@ -1,37 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SCOREDOC_H -#define SCOREDOC_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Expert: Returned by low-level search implementations. - /// @see TopDocs - class LPPAPI ScoreDoc : public LuceneObject - { - public: - ScoreDoc(int32_t doc, double score); - virtual ~ScoreDoc(); - - LUCENE_CLASS(ScoreDoc); - - public: - /// The score of this document for the query. - double score; - - /// A hit document's number. - /// @see Searcher#doc(int32_t) - int32_t doc; - - public: - virtual String toString(); - }; -} - -#endif diff --git a/include/Scorer.h b/include/Scorer.h deleted file mode 100644 index adc0c746..00000000 --- a/include/Scorer.h +++ /dev/null @@ -1,63 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SCORER_H -#define SCORER_H - -#include "DocIdSetIterator.h" - -namespace Lucene -{ - /// Common scoring functionality for different types of queries. - /// - /// A Scorer iterates over documents matching a query in increasing order of doc Id. - /// - /// Document scores are computed using a given Similarity implementation. - /// - /// NOTE: The values NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. Certain collectors - /// (eg {@link TopScoreDocCollector}) will not properly collect hits with these scores. - class LPPAPI Scorer : public DocIdSetIterator - { - public: - /// Constructs a Scorer. - /// @param similarity The Similarity implementation used by this scorer. - Scorer(SimilarityPtr similarity); - virtual ~Scorer(); - - LUCENE_CLASS(Scorer); - - protected: - SimilarityPtr similarity; - - public: - /// Returns the Similarity implementation used by this scorer. - SimilarityPtr getSimilarity(); - - /// Scores and collects all matching documents. - /// @param collector The collector to which all matching documents are passed. - virtual void score(CollectorPtr collector); - - /// Returns the score of the current document matching the query. Initially invalid, until {@link - /// #nextDoc()} or {@link #advance(int32_t)} is called the first time, or when called from within - /// {@link Collector#collect}. - virtual double score() = 0; - - protected: - /// Collects matching documents in a range. Hook for optimization. - /// Note, firstDocID is added to ensure that {@link #nextDoc()} was called before this method. - /// - /// @param collector The collector to which all matching documents are passed. - /// @param max Do not score documents past this. - /// @param firstDocID The first document ID (ensures {@link #nextDoc()} is called before this method. - /// @return true if more matching documents may remain. - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - - friend class BooleanScorer; - friend class ScoreCachingWrappingScorer; - }; -} - -#endif diff --git a/include/ScorerDocQueue.h b/include/ScorerDocQueue.h deleted file mode 100644 index b36c5140..00000000 --- a/include/ScorerDocQueue.h +++ /dev/null @@ -1,77 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SCORERDOCQUEUE_H -#define SCORERDOCQUEUE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A ScorerDocQueue maintains a partial ordering of its Scorers such that the least Scorer can always be - /// found in constant time. Put()'s and pop()'s require log(size) time. The ordering is by Scorer::doc(). - class LPPAPI ScorerDocQueue : public LuceneObject - { - public: - ScorerDocQueue(int32_t maxSize); - virtual ~ScorerDocQueue(); - - LUCENE_CLASS(ScorerDocQueue); - - protected: - Collection heap; - int32_t maxSize; - int32_t _size; - HeapedScorerDocPtr topHSD; // same as heap[1], only for speed - - public: - /// Adds a Scorer to a ScorerDocQueue in log(size) time. If one tries to add more Scorers than maxSize - /// ArrayIndexOutOfBound exception is thrown. - void put(ScorerPtr scorer); - - /// Adds a Scorer to the ScorerDocQueue in log(size) time if either the ScorerDocQueue is not full, or - /// not lessThan(scorer, top()). - /// @return true if scorer is added, false otherwise. - bool insert(ScorerPtr scorer); - - /// Returns the least Scorer of the ScorerDocQueue in constant time. Should not be used when the queue - /// is empty. - ScorerPtr top(); - - /// Returns document number of the least Scorer of the ScorerDocQueue in constant time. - /// Should not be used when the queue is empty. - int32_t topDoc(); - - double topScore(); - bool topNextAndAdjustElsePop(); - bool topSkipToAndAdjustElsePop(int32_t target); - - /// Removes and returns the least scorer of the ScorerDocQueue in log(size) time. Should not be used - /// when the queue is empty. - ScorerPtr pop(); - - /// Should be called when the scorer at top changes doc() value. - void adjustTop(); - - /// Returns the number of scorers currently stored in the ScorerDocQueue. - int32_t size(); - - /// Removes all entries from the ScorerDocQueue. - void clear(); - - protected: - bool checkAdjustElsePop(bool cond); - - /// Removes the least scorer of the ScorerDocQueue in log(size) time. Should not be used when the - /// queue is empty. - void popNoResult(); - - void upHeap(); - void downHeap(); - }; -} - -#endif diff --git a/include/Searchable.h b/include/Searchable.h deleted file mode 100644 index f987e58e..00000000 --- a/include/Searchable.h +++ /dev/null @@ -1,108 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEARCHABLE_H -#define SEARCHABLE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The interface for search implementations. - /// - /// Searchable is the abstract network protocol for searching. Implementations provide search over a single - /// index, over multiple indices, and over indices on remote servers. - /// - /// Queries, filters and sort criteria are designed to be compact so that they may be efficiently passed to a - /// remote index, with only the top-scoring hits being returned, rather than every matching hit. - /// - /// NOTE: this interface is kept public for convenience. Since it is not expected to be implemented directly, - /// it may be changed unexpectedly between releases. - class LPPAPI Searchable - { - public: - LUCENE_INTERFACE(Searchable); - - public: - /// Lower-level search API. - /// - /// {@link Collector#collect(int32_t)} is called for every document. Collector-based access to remote - /// indexes is discouraged. - /// - /// Applications should only use this if they need all of the matching documents. The high-level search - /// API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips non-high-scoring - /// hits. - /// - /// @param weight To match documents - /// @param filter If non-null, used to permit documents to be collected. - /// @param collector To receive hits - virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr collector) = 0; - - /// Frees resources associated with this Searcher. Be careful not to call this method while you are still - /// using objects that reference this Searchable. - virtual void close() = 0; - - /// Returns the number of documents containing term. - /// @see IndexReader#docFreq(TermPtr) - virtual int32_t docFreq(TermPtr term) = 0; - - /// For each term in the terms array, calculates the number of documents containing term. Returns an array - /// with these document frequencies. Used to minimize number of remote calls. - virtual Collection docFreqs(Collection terms) = 0; - - /// Returns one greater than the largest possible document number. - /// @see IndexReader#maxDoc() - virtual int32_t maxDoc() = 0; - - /// Low-level search implementation. Finds the top n hits for query, applying filter if non-null. - /// Applications should usually call {@link Searcher#search(QueryPtr, int32_t)} or {@link - /// Searcher#search(QueryPtr, FilterPtr, int32_t)} instead. - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n) = 0; - - /// Returns the stored fields of document i. - /// @see IndexReader#document(int32_t) - virtual DocumentPtr doc(int32_t n) = 0; - - /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what - /// {@link Field}s to load and how they should be loaded. - /// - /// NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the - /// lazy {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} - /// to be available after closing you must explicitly load it or fetch the Document again with a new loader. - /// - /// @param n Get the document at the n'th position - /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on the - /// Document. May be null, in which case all Fields will be loaded. - /// @return The stored fields of the {@link Document} at the n'th position - /// - /// @see IndexReader#document(int32_t, FieldSelectorPtr) - /// @see Fieldable - /// @see FieldSelector - /// @see SetBasedFieldSelector - /// @see LoadFirstFieldSelector - virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector) = 0; - - /// Called to re-write queries into primitive queries. - virtual QueryPtr rewrite(QueryPtr query) = 0; - - /// Low-level implementation method. Returns an Explanation that describes how doc scored against weight. - /// - /// This is intended to be used in developing Similarity implementations, and for good performance, should - /// not be displayed with every hit. Computing an explanation is as expensive as executing the query over - /// the entire index. - /// - /// Applications should call {@link Searcher#explain(QueryPtr, int32_t)}. - virtual ExplanationPtr explain(WeightPtr weight, int32_t doc) = 0; - - /// Low-level search implementation with arbitrary sorting. Finds the top n hits for query, applying filter - /// if non-null, and sorting the hits by the criteria in sort. - /// - /// Applications should usually call {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)} instead. - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) = 0; - }; -} - -#endif diff --git a/include/Searcher.h b/include/Searcher.h deleted file mode 100644 index a91b10c3..00000000 --- a/include/Searcher.h +++ /dev/null @@ -1,105 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEARCHER_H -#define SEARCHER_H - -#include "Searchable.h" - -namespace Lucene -{ - /// An abstract base class for search implementations. Implements the main search methods. - /// - /// Note that you can only access hits from a Searcher as long as it is not yet closed, otherwise an IO - /// exception will be thrown. - class LPPAPI Searcher : public Searchable, public LuceneObject - { - public: - Searcher(); - virtual ~Searcher(); - - LUCENE_CLASS(Searcher); - - protected: - /// The Similarity implementation used by this searcher. - SimilarityPtr similarity; - - public: - /// Search implementation with arbitrary sorting. Finds the top n hits for query, applying filter if - /// non-null, and sorting the hits by the criteria in sort. - /// - /// NOTE: this does not compute scores by default; use {@link IndexSearcher#setDefaultFieldSortScoring} - /// to enable scoring. - virtual TopFieldDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n, SortPtr sort); - - /// Lower-level search API. - /// - /// {@link Collector#collect(int32_t)} is called for every matching document. - /// - /// Applications should only use this if they need all of the matching documents. The high-level - /// search API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips - /// non-high-scoring hits. - /// - /// Note: The score passed to this method is a raw score. In other words, the score will not necessarily - /// be a double whose value is between 0 and 1. - virtual void search(QueryPtr query, CollectorPtr results); - - /// Lower-level search API. - /// - /// {@link Collector#collect(int32_t)} is called for every matching document. Collector-based access to - /// remote indexes is discouraged. - /// - /// Applications should only use this if they need all of the matching documents. The high-level search - /// API ({@link Searcher#search(QueryPtr, FilterPtr, int32_t)}) is usually more efficient, as it skips - /// non-high-scoring hits. - /// - /// @param query To match documents - /// @param filter If non-null, used to permit documents to be collected. - /// @param results To receive hits - virtual void search(QueryPtr query, FilterPtr filter, CollectorPtr results); - - /// Finds the top n hits for query, applying filter if non-null. - virtual TopDocsPtr search(QueryPtr query, FilterPtr filter, int32_t n); - - /// Finds the top n hits for query. - virtual TopDocsPtr search(QueryPtr query, int32_t n); - - /// Returns an Explanation that describes how doc scored against query. - /// - /// This is intended to be used in developing Similarity implementations, and for good performance, - /// should not be displayed with every hit. Computing an explanation is as expensive as executing the - /// query over the entire index. - virtual ExplanationPtr explain(QueryPtr query, int32_t doc); - - /// Set the Similarity implementation used by this Searcher. - virtual void setSimilarity(SimilarityPtr similarity); - - /// Return the Similarity implementation used by this Searcher. - /// - /// This defaults to the current value of {@link Similarity#getDefault()}. - virtual SimilarityPtr getSimilarity(); - - virtual Collection docFreqs(Collection terms); - - virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results) = 0; - virtual void close() = 0; - virtual int32_t docFreq(TermPtr term) = 0; - virtual int32_t maxDoc() = 0; - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n) = 0; - virtual DocumentPtr doc(int32_t n) = 0; - virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector) = 0; - virtual QueryPtr rewrite(QueryPtr query) = 0; - virtual ExplanationPtr explain(WeightPtr weight, int32_t doc) = 0; - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) = 0; - - protected: - /// Creates a weight for query. - /// @return New weight - virtual WeightPtr createWeight(QueryPtr query); - }; -} - -#endif diff --git a/include/SegmentInfo.h b/include/SegmentInfo.h deleted file mode 100644 index 86bb0313..00000000 --- a/include/SegmentInfo.h +++ /dev/null @@ -1,173 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTINFO_H -#define SEGMENTINFO_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Information about a segment such as it's name, directory, and files - /// related to the segment. - class LPPAPI SegmentInfo : public LuceneObject - { - public: - SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir); - - SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile); - - SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, - bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, - bool docStoreIsCompoundFile, bool hasProx); - - /// Construct a new SegmentInfo instance by reading a previously saved SegmentInfo from input. - /// @param dir directory to load from. - /// @param format format of the segments info file. - /// @param input input handle to read segment info from. - SegmentInfo(DirectoryPtr dir, int32_t format, IndexInputPtr input); - - virtual ~SegmentInfo(); - - LUCENE_CLASS(SegmentInfo); - - public: - static const int32_t NO; // no norms; no deletes; - static const int32_t YES; // have norms; have deletes; - static const int32_t CHECK_DIR; // must check dir to see if there are norms/deletions - static const int32_t WITHOUT_GEN; // a file name that has no GEN in it. - - protected: - // true if this is a segments file written before lock-less commits (2.1) - bool preLockless; - - // current generation of del file; NO if there are no deletes; CHECK_DIR if it's a pre-2.1 segment - // (and we must check filesystem); YES or higher if there are deletes at generation N - int64_t delGen; - - // current generation of each field's norm file. If this array is null, for lockLess this means no - // separate norms. For preLockLess this means we must check filesystem. If this array is not null, - // its values mean: NO says this field has no separate norms; CHECK_DIR says it is a preLockLess - // segment and filesystem must be checked; >= YES says this field has separate norms with the - // specified generation - Collection normGen; - - // NO if it is not; YES if it is; CHECK_DIR if it's pre-2.1 (ie, must check file system to see if - // .cfs and .nrm exist) - uint8_t isCompoundFile; - - // true if this segment maintains norms in a single file; false otherwise this is currently false for - // segments populated by DocumentWriter and true for newly created merged segments (both compound and - // non compound). - bool hasSingleNormFile; - - // cached list of files that this segment uses in the Directory - HashSet _files; - - // total byte size of all of our files (computed on demand) - int64_t _sizeInBytes; - - // if this segment shares stored fields & vectors, this offset is where in that file this segment's - // docs begin - int32_t docStoreOffset; - - // name used to derive fields/vectors file we share with other segments - String docStoreSegment; - - // whether doc store files are stored in compound file (*.cfx) - bool docStoreIsCompoundFile; - - // How many deleted docs in this segment, or -1 if not yet known (if it's an older index) - int32_t delCount; - - // True if this segment has any fields with omitTermFreqAndPositions == false - bool hasProx; - - MapStringString diagnostics; - - public: - String name; // unique name in dir - int32_t docCount; // number of docs in seg - DirectoryPtr dir; // where segment resides - - public: - /// Copy everything from src SegmentInfo into our instance. - void reset(SegmentInfoPtr src); - - void setDiagnostics(MapStringString diagnostics); - MapStringString getDiagnostics(); - - void setNumFields(int32_t numFields); - - /// Returns total size in bytes of all of files used by this segment. - int64_t sizeInBytes(); - - bool hasDeletions(); - void advanceDelGen(); - void clearDelGen(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - String getDelFileName(); - - /// Returns true if this field for this segment has saved a separate norms file (__N.sX). - /// @param fieldNumber the field index to check - bool hasSeparateNorms(int32_t fieldNumber); - - /// Returns true if any fields in this segment have separate norms. - bool hasSeparateNorms(); - - /// Increment the generation count for the norms file for this field. - /// @param fieldIndex field whose norm file will be rewritten - void advanceNormGen(int32_t fieldIndex); - - /// Get the file name for the norms file for this field. - /// @param number field index - String getNormFileName(int32_t number); - - /// Mark whether this segment is stored as a compound file. - /// @param isCompoundFile true if this is a compound file; else, false - void setUseCompoundFile(bool isCompoundFile); - - /// Returns true if this segment is stored as a compound file; else, false. - bool getUseCompoundFile(); - - int32_t getDelCount(); - void setDelCount(int32_t delCount); - int32_t getDocStoreOffset(); - bool getDocStoreIsCompoundFile(); - void setDocStoreIsCompoundFile(bool v); - String getDocStoreSegment(); - void setDocStoreOffset(int32_t offset); - void setDocStore(int32_t offset, const String& segment, bool isCompoundFile); - - /// Save this segment's info. - void write(IndexOutputPtr output); - - void setHasProx(bool hasProx); - bool getHasProx(); - - /// Return all files referenced by this SegmentInfo. The returns List is a locally cached List so - /// you should not modify it. - HashSet files(); - - /// Used for debugging. - String segString(DirectoryPtr dir); - - /// We consider another SegmentInfo instance equal if it has the same dir and same name. - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - - protected: - void addIfExists(HashSet files, const String& fileName); - - /// Called whenever any change is made that affects which files this segment has. - void clearFiles(); - }; -} - -#endif diff --git a/include/SegmentInfoCollection.h b/include/SegmentInfoCollection.h deleted file mode 100644 index 52a4785f..00000000 --- a/include/SegmentInfoCollection.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTINFOCOLLECTION_H -#define SEGMENTINFOCOLLECTION_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A collection of SegmentInfo objects to be used as a base class for {@link SegmentInfos} - class LPPAPI SegmentInfoCollection : public LuceneObject - { - public: - SegmentInfoCollection(); - virtual ~SegmentInfoCollection(); - - LUCENE_CLASS(SegmentInfoCollection); - - protected: - Collection segmentInfos; - - public: - int32_t size(); - bool empty(); - void clear(); - void add(SegmentInfoPtr info); - void add(int32_t pos, SegmentInfoPtr info); - void addAll(SegmentInfoCollectionPtr segmentInfos); - bool equals(SegmentInfoCollectionPtr other); - int32_t find(SegmentInfoPtr info); - bool contains(SegmentInfoPtr info); - void remove(int32_t pos); - void remove(int32_t start, int32_t end); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/SegmentInfos.h b/include/SegmentInfos.h deleted file mode 100644 index 68f9f31e..00000000 --- a/include/SegmentInfos.h +++ /dev/null @@ -1,183 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTINFOS_H -#define SEGMENTINFOS_H - -#include "SegmentInfoCollection.h" - -namespace Lucene -{ - /// A collection of SegmentInfo objects with methods for operating on those segments in relation to the file system. - class LPPAPI SegmentInfos : public SegmentInfoCollection - { - public: - SegmentInfos(); - virtual ~SegmentInfos(); - - LUCENE_CLASS(SegmentInfos); - - public: - /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 - static const int32_t FORMAT; - - /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names - /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, - /// segments_2, etc. This allows us to not use a commit lock. - /// See fileformats for details. - static const int32_t FORMAT_LOCKLESS; - - /// This format adds a "hasSingleNormFile" flag into each segment info. - static const int32_t FORMAT_SINGLE_NORM_FILE; - - /// This format allows multiple segments to share a single vectors and stored fields file. - static const int32_t FORMAT_SHARED_DOC_STORE; - - /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. - static const int32_t FORMAT_CHECKSUM; - - /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). - static const int32_t FORMAT_DEL_COUNT; - - /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have - /// omitTermFreqAndPositions == false) - static const int32_t FORMAT_HAS_PROX; - - /// This format adds optional commit userData storage. - static const int32_t FORMAT_USER_DATA; - - /// This format adds optional per-segment string diagnostics storage, and switches userData to Map - static const int32_t FORMAT_DIAGNOSTICS; - - /// This must always point to the most recent file format. - static const int32_t CURRENT_FORMAT; - - int32_t counter; // used to name new segments - - private: - /// Advanced configuration of retry logic in loading segments_N file. - static int32_t defaultGenFileRetryCount; - static int32_t defaultGenFileRetryPauseMsec; - static int32_t defaultGenLookaheadCount; - - /// Counts how often the index has been changed by adding or deleting docs. - /// Starting with the current time in milliseconds forces to create unique version numbers. - int64_t version; - - int64_t generation; // generation of the "segments_N" for the next commit - - int64_t lastGeneration; // generation of the "segments_N" file we last successfully read - // or wrote; this is normally the same as generation except if - // there was an exception that had interrupted a commit - - MapStringString userData; // Opaque map that user can specify during IndexWriter::commit - - static MapStringString singletonUserData; - - static InfoStreamPtr infoStream; - ChecksumIndexOutputPtr pendingSegnOutput; - - public: - SegmentInfoPtr info(int32_t i); - String getCurrentSegmentFileName(); - String getNextSegmentFileName(); - - /// Read a particular segmentFileName. Note that this may throw an IOException if a commit is in process. - void read(DirectoryPtr directory, const String& segmentFileName); - - /// This version of read uses the retry logic (for lock-less commits) to find the right segments file to load. - void read(DirectoryPtr directory); - - /// Returns a copy of this instance, also copying each SegmentInfo. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Version number when this SegmentInfos was generated. - int64_t getVersion(); - int64_t getGeneration(); - int64_t getLastGeneration(); - - /// Returns a new SegmentInfos containing the SegmentInfo instances in the specified range first (inclusive) to - /// last (exclusive), so total number of segments returned is last-first. - SegmentInfosPtr range(int32_t first, int32_t last); - - /// Carry over generation numbers from another SegmentInfos. - void updateGeneration(SegmentInfosPtr other); - - void rollbackCommit(DirectoryPtr dir); - - /// Call this to start a commit. This writes the new segments file, but writes an invalid checksum at the end, so - /// that it is not visible to readers. Once this is called you must call. - /// {@link #finishCommit} to complete the commit or - /// {@link #rollbackCommit} to abort it. - void prepareCommit(DirectoryPtr dir); - - /// Returns all file names referenced by SegmentInfo instances matching the provided Directory (ie files associated - /// with any "external" segments are skipped). The returned collection is recomputed on each invocation. - HashSet files(DirectoryPtr dir, bool includeSegmentsFile); - - void finishCommit(DirectoryPtr dir); - - /// Writes & syncs to the Directory dir, taking care to remove the segments file on exception. - void commit(DirectoryPtr dir); - - String segString(DirectoryPtr directory); - MapStringString getUserData(); - void setUserData(MapStringString data); - - /// Replaces all segments in this instance, but keeps generation, version, counter so that future commits remain - /// write once. - void replace(SegmentInfosPtr other); - - bool hasExternalSegments(DirectoryPtr dir); - - static int64_t getCurrentSegmentGeneration(HashSet files); - static int64_t getCurrentSegmentGeneration(DirectoryPtr directory); - static String getCurrentSegmentFileName(HashSet files); - static String getCurrentSegmentFileName(DirectoryPtr directory); - static int64_t generationFromSegmentsFileName(const String& fileName); - - /// Current version number from segments file. - static int64_t readCurrentVersion(DirectoryPtr directory); - - /// Returns userData from latest segments file. - static MapStringString readCurrentUserData(DirectoryPtr directory); - - /// If non-null, information about retries when loading the segments file will be printed to this. - static void setInfoStream(InfoStreamPtr infoStream); - - /// Set how many times to try loading the segments.gen file contents to determine current segment generation. This file - /// is only referenced when the primary method (listing the directory) fails. - static void setDefaultGenFileRetryCount(int32_t count); - - /// @see #setDefaultGenFileRetryCount - static int32_t getDefaultGenFileRetryCount(); - - /// Set how many milliseconds to pause in between attempts to load the segments.gen file. - static void setDefaultGenFileRetryPauseMsec(int32_t msec); - - /// @see #setDefaultGenFileRetryPauseMsec - static int32_t getDefaultGenFileRetryPauseMsec(); - - /// Set how many times to try incrementing the gen when loading the segments file. This only runs if the primary - /// (listing directory) and secondary (opening segments.gen file) methods fail to find the segments file. - static void setDefaultGenLookaheadCount(int32_t count); - - /// @see #setDefaultGenLookaheadCount - static int32_t getDefaultGenLookahedCount(); - - /// @see #setInfoStream - static InfoStreamPtr getInfoStream(); - - static void message(const String& message); - - protected: - void write(DirectoryPtr directory); - - friend class FindSegmentsFile; - }; -} - -#endif diff --git a/include/SegmentMergeInfo.h b/include/SegmentMergeInfo.h deleted file mode 100644 index b749ff52..00000000 --- a/include/SegmentMergeInfo.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTMERGEINFO_H -#define SEGMENTMERGEINFO_H - -#include "Term.h" - -namespace Lucene -{ - class SegmentMergeInfo : public LuceneObject - { - public: - SegmentMergeInfo(int32_t b, TermEnumPtr te, IndexReaderPtr r); - virtual ~SegmentMergeInfo(); - - LUCENE_CLASS(SegmentMergeInfo); - - protected: - TermPositionsPtr postings; // use getPositions() - Collection docMap; // use getDocMap() - - public: - TermPtr term; - int32_t base; - int32_t ord; // the position of the segment in a MultiReader - TermEnumPtr termEnum; - IndexReaderWeakPtr _reader; - int32_t delCount; - - public: - Collection getDocMap(); - TermPositionsPtr getPositions(); - bool next(); - void close(); - }; -} - -#endif diff --git a/include/SegmentMergeQueue.h b/include/SegmentMergeQueue.h deleted file mode 100644 index 52182256..00000000 --- a/include/SegmentMergeQueue.h +++ /dev/null @@ -1,30 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTMERGEQUEUE_H -#define SEGMENTMERGEQUEUE_H - -#include "PriorityQueue.h" - -namespace Lucene -{ - class SegmentMergeQueue : public PriorityQueue - { - public: - SegmentMergeQueue(int32_t size); - virtual ~SegmentMergeQueue(); - - LUCENE_CLASS(SegmentMergeQueue); - - public: - void close(); - - protected: - virtual bool lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second); - }; -} - -#endif diff --git a/include/SegmentMerger.h b/include/SegmentMerger.h deleted file mode 100644 index 8ba3aa21..00000000 --- a/include/SegmentMerger.h +++ /dev/null @@ -1,157 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTMERGER_H -#define SEGMENTMERGER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, into a single - /// Segment. After adding the appropriate readers, call the merge method to combine the segments. - /// - /// If the compoundFile flag is set, then the segments will be merged into a compound file. - /// @see #merge - /// @see #add - class SegmentMerger : public LuceneObject - { - public: - SegmentMerger(DirectoryPtr dir, const String& name); - SegmentMerger(IndexWriterPtr writer, const String& name, OneMergePtr merge); - virtual ~SegmentMerger(); - - LUCENE_CLASS(SegmentMerger); - - protected: - DirectoryPtr directory; - String segment; - int32_t termIndexInterval; - - Collection readers; - FieldInfosPtr fieldInfos; - - int32_t mergedDocs; - CheckAbortPtr checkAbort; - - /// Whether we should merge doc stores (stored fields and vectors files). When all segments we - /// are merging already share the same doc store files, we don't need to merge the doc stores. - bool mergeDocStores; - - /// Maximum number of contiguous documents to bulk-copy when merging stored fields - static const int32_t MAX_RAW_MERGE_DOCS; - - Collection matchingSegmentReaders; - Collection rawDocLengths; - Collection rawDocLengths2; - - SegmentMergeQueuePtr queue; - bool omitTermFreqAndPositions; - - ByteArray payloadBuffer; - Collection< Collection > docMaps; - Collection delCounts; - - public: - /// norms header placeholder - static const uint8_t NORMS_HEADER[]; - static const int32_t NORMS_HEADER_LENGTH; - - public: - bool hasProx(); - - /// Add an IndexReader to the collection of readers that are to be merged - void add(IndexReaderPtr reader); - - /// @param i The index of the reader to return - /// @return The i'th reader to be merged - IndexReaderPtr segmentReader(int32_t i); - - /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. - /// @return The number of documents that were merged - int32_t merge(); - - /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. - /// @param mergeDocStores if false, we will not merge the stored fields nor vectors files - /// @return The number of documents that were merged - int32_t merge(bool mergeDocStores); - - /// close all IndexReaders that have been added. Should not be called before merge(). - void closeReaders(); - - HashSet getMergedFiles(); - HashSet createCompoundFile(const String& fileName); - - /// @return The number of documents in all of the readers - int32_t mergeFields(); - - Collection< Collection > getDocMaps(); - Collection getDelCounts(); - - protected: - void addIndexed(IndexReaderPtr reader, FieldInfosPtr fInfos, HashSet names, bool storeTermVectors, - bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, - bool omitTFAndPositions); - - void setMatchingSegmentReaders(); - int32_t copyFieldsWithDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader); - int32_t copyFieldsNoDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader); - - /// Merge the TermVectors from each of the segments into the new one. - void mergeVectors(); - - void copyVectorsWithDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader); - void copyVectorsNoDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader); - - void mergeTerms(); - - void mergeTermInfos(FormatPostingsFieldsConsumerPtr consumer); - - /// Process postings from multiple segments all positioned on the same term. Writes out merged entries - /// into freqOutput and the proxOutput streams. - /// @param smis array of segments - /// @param n number of cells in the array actually occupied - /// @return number of documents across all segments where this term was found - int32_t appendPostings(FormatPostingsTermsConsumerPtr termsConsumer, Collection smis, int32_t n); - - void mergeNorms(); - }; - - class CheckAbort : public LuceneObject - { - public: - CheckAbort(OneMergePtr merge, DirectoryPtr dir); - virtual ~CheckAbort(); - - LUCENE_CLASS(CheckAbort); - - protected: - double workCount; - OneMergePtr merge; - DirectoryWeakPtr _dir; - - public: - /// Records the fact that roughly units amount of work have been done since this method was last called. - /// When adding time-consuming code into SegmentMerger, you should test different values for units to - /// ensure that the time in between calls to merge.checkAborted is up to ~ 1 second. - virtual void work(double units); - }; - - class CheckAbortNull : public CheckAbort - { - public: - CheckAbortNull(); - virtual ~CheckAbortNull(); - - LUCENE_CLASS(CheckAbortNull); - - public: - /// do nothing - virtual void work(double units); - }; -} - -#endif diff --git a/include/SegmentReader.h b/include/SegmentReader.h deleted file mode 100644 index 7c79627a..00000000 --- a/include/SegmentReader.h +++ /dev/null @@ -1,221 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTREADER_H -#define SEGMENTREADER_H - -#include "IndexReader.h" -#include "CloseableThreadLocal.h" - -namespace Lucene -{ - class LPPAPI SegmentReader : public IndexReader - { - public: - SegmentReader(); - virtual ~SegmentReader(); - - LUCENE_CLASS(SegmentReader); - - protected: - bool readOnly; - - INTERNAL: - BitVectorPtr deletedDocs; - SegmentReaderRefPtr deletedDocsRef; - CoreReadersPtr core; - FieldsReaderLocalPtr fieldsReaderLocal; - SegmentInfoPtr rollbackSegmentInfo; - CloseableThreadLocal termVectorsLocal; - FieldInfosPtr fieldInfos(); - - /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. - /// @return TermVectorsReader - TermVectorsReaderPtr getTermVectorsReader(); - - TermVectorsReaderPtr getTermVectorsReaderOrig(); - FieldsReaderPtr getFieldsReader(); - MapStringNorm _norms; - - private: - SegmentInfoPtr si; - int32_t readBufferSize; - bool deletedDocsDirty; - bool normsDirty; - int32_t pendingDeleteCount; - - bool rollbackHasChanges; - bool rollbackDeletedDocsDirty; - bool rollbackNormsDirty; - int32_t rollbackPendingDeleteCount; - - // optionally used for the .nrm file shared by multiple norms - IndexInputPtr singleNormStream; - SegmentReaderRefPtr singleNormRef; - - public: - virtual void initialize(); - - using IndexReader::document; - using IndexReader::termPositions; - - static SegmentReaderPtr get(bool readOnly, SegmentInfoPtr si, int32_t termInfosIndexDivisor); - static SegmentReaderPtr get(bool readOnly, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor); - - void openDocStores(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual LuceneObjectPtr clone(bool openReadOnly, LuceneObjectPtr other = LuceneObjectPtr()); - SegmentReaderPtr reopenSegment(SegmentInfoPtr si, bool doClone, bool openReadOnly); - - static bool hasDeletions(SegmentInfoPtr si); - - /// Returns true if any documents have been deleted - virtual bool hasDeletions(); - - static bool usesCompoundFile(SegmentInfoPtr si); - static bool hasSeparateNorms(SegmentInfoPtr si); - - HashSet files(); - - /// Returns an enumeration of all the terms in the index. - virtual TermEnumPtr terms(); - - /// Returns an enumeration of all terms starting at a given term. - virtual TermEnumPtr terms(TermPtr t); - - /// Get the {@link Document} at the n'th position. - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - - /// Returns true if document n has been deleted - virtual bool isDeleted(int32_t n); - - /// Returns an enumeration of all the documents which contain term. - virtual TermDocsPtr termDocs(TermPtr term); - - /// Returns an unpositioned {@link TermDocs} enumerator. - virtual TermDocsPtr termDocs(); - - /// Returns an unpositioned {@link TermPositions} enumerator. - virtual TermPositionsPtr termPositions(); - - /// Returns the number of documents containing the term t. - virtual int32_t docFreq(TermPtr t); - - /// Returns the number of documents in this index. - virtual int32_t numDocs(); - - /// Returns one greater than the largest possible document number. - virtual int32_t maxDoc(); - - /// Get a list of unique field names that exist in this index and have the specified field option information. - virtual HashSet getFieldNames(FieldOption fieldOption); - - /// Returns true if there are norms stored for this field. - virtual bool hasNorms(const String& field); - - /// Returns the byte-encoded normalization factor for the named field of every document. - virtual ByteArray norms(const String& field); - - /// Read norms into a pre-allocated array. - virtual void norms(const String& field, ByteArray norms, int32_t offset); - - bool termsIndexLoaded(); - - /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, sharing a - /// segment that's still being merged. This method is not thread safe, and relies on the synchronization in IndexWriter - void loadTermsIndex(int32_t termsIndexDivisor); - - bool normsClosed(); // for testing only - bool normsClosed(const String& field); // for testing only - - /// Return a term frequency vector for the specified document and field. The vector returned contains term - /// numbers and frequencies for all terms in the specified field of this document, if the field had - /// storeTermVector flag set. If the flag was not set, the method returns null. - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - - /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays - /// of the {@link TermFreqVector}. - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - - /// Map all the term vectors for all fields in a Document - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - - /// Return an array of term frequency vectors for the specified document. The array contains a vector for - /// each vectorized field in the document. Each vector vector contains term numbers and frequencies for all - /// terms in a given vectorized field. If no such fields existed, the method returns null. - virtual Collection getTermFreqVectors(int32_t docNumber); - - /// Return the name of the segment this reader is reading. - String getSegmentName(); - - /// Return the SegmentInfo of the segment this reader is reading. - SegmentInfoPtr getSegmentInfo(); - void setSegmentInfo(SegmentInfoPtr info); - - void startCommit(); - void rollbackCommit(); - - /// Returns the directory this index resides in. - virtual DirectoryPtr directory(); - - /// This is necessary so that cloned SegmentReaders (which share the underlying postings data) - /// will map to the same entry in the FieldCache. - virtual LuceneObjectPtr getFieldCacheKey(); - virtual LuceneObjectPtr getDeletesCacheKey(); - - /// Returns the number of unique terms (across all fields) in this reader. - virtual int64_t getUniqueTermCount(); - - static SegmentReaderPtr getOnlySegmentReader(DirectoryPtr dir); - static SegmentReaderPtr getOnlySegmentReader(IndexReaderPtr reader); - - virtual int32_t getTermInfosIndexDivisor(); - - protected: - bool checkDeletedCounts(); - void loadDeletedDocs(); - - /// Clones the norm bytes. May be overridden by subclasses. - /// @param bytes Byte array to clone - /// @return New BitVector - virtual ByteArray cloneNormBytes(ByteArray bytes); - - /// Clones the deleteDocs BitVector. May be overridden by subclasses. - /// @param bv BitVector to clone - /// @return New BitVector - virtual BitVectorPtr cloneDeletedDocs(BitVectorPtr bv); - - /// Implements commit. - virtual void doCommit(MapStringString commitUserData); - - virtual void commitChanges(MapStringString commitUserData); - - /// Implements close. - virtual void doClose(); - - /// Implements deletion of the document numbered docNum. - /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. - virtual void doDelete(int32_t docNum); - - /// Implements actual undeleteAll() in subclass. - virtual void doUndeleteAll(); - - /// can return null if norms aren't stored - ByteArray getNorms(const String& field); - - /// Implements setNorm in subclass. - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - - void openNorms(DirectoryPtr cfsDir, int32_t readBufferSize); - - friend class ReaderPool; - friend class IndexWriter; - friend class Norm; - }; -} - -#endif diff --git a/include/SegmentTermDocs.h b/include/SegmentTermDocs.h deleted file mode 100644 index 4db8b26d..00000000 --- a/include/SegmentTermDocs.h +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTTERMDOCS_H -#define SEGMENTTERMDOCS_H - -#include "TermPositions.h" - -namespace Lucene -{ - class SegmentTermDocs : public TermPositions, public LuceneObject - { - public: - SegmentTermDocs(SegmentReaderPtr parent); - virtual ~SegmentTermDocs(); - - LUCENE_CLASS(SegmentTermDocs); - - protected: - SegmentReaderWeakPtr _parent; - IndexInputPtr _freqStream; - int32_t count; - int32_t df; - BitVectorPtr deletedDocs; - int32_t _doc; - int32_t _freq; - - int32_t skipInterval; - int32_t maxSkipLevels; - DefaultSkipListReaderPtr skipListReader; - - int64_t freqBasePointer; - int64_t proxBasePointer; - - int64_t skipPointer; - bool haveSkipped; - - bool currentFieldStoresPayloads; - bool currentFieldOmitTermFreqAndPositions; - - public: - /// Sets this to the data for a term. - virtual void seek(TermPtr term); - - /// Sets this to the data for the current term in a {@link TermEnum}. - virtual void seek(TermEnumPtr termEnum); - - virtual void seek(TermInfoPtr ti, TermPtr term); - - virtual void close(); - - /// Returns the current document number. - virtual int32_t doc(); - - /// Returns the frequency of the term within the current document. - virtual int32_t freq(); - - /// Moves to the next pair in the enumeration. - virtual bool next(); - - /// Optimized implementation. - virtual int32_t read(Collection docs, Collection freqs); - - /// Optimized implementation. - virtual bool skipTo(int32_t target); - - /// Used for testing - virtual IndexInputPtr freqStream(); - virtual void freqStream(IndexInputPtr freqStream); - - protected: - virtual void skippingDoc(); - virtual int32_t readNoTf(Collection docs, Collection freqs, int32_t length); - - /// Overridden by SegmentTermPositions to skip in prox stream. - virtual void skipProx(int64_t proxPointer, int32_t payloadLength); - }; -} - -#endif diff --git a/include/SegmentTermEnum.h b/include/SegmentTermEnum.h deleted file mode 100644 index 5a40f4e8..00000000 --- a/include/SegmentTermEnum.h +++ /dev/null @@ -1,88 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTTERMENUM_H -#define SEGMENTTERMENUM_H - -#include "TermEnum.h" - -namespace Lucene -{ - class SegmentTermEnum : public TermEnum - { - public: - SegmentTermEnum(); - SegmentTermEnum(IndexInputPtr i, FieldInfosPtr fis, bool isi); - virtual ~SegmentTermEnum(); - - LUCENE_CLASS(SegmentTermEnum); - - protected: - IndexInputPtr input; - TermBufferPtr termBuffer; - TermBufferPtr prevBuffer; - TermBufferPtr scanBuffer; // used for scanning - - TermInfoPtr _termInfo; - - int32_t format; - bool isIndex; - int32_t formatM1SkipInterval; - - public: - FieldInfosPtr fieldInfos; - int64_t size; - int64_t position; - - int64_t indexPointer; - int32_t indexInterval; - int32_t skipInterval; - int32_t maxSkipLevels; - - public: - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - void seek(int64_t pointer, int64_t p, TermPtr t, TermInfoPtr ti); - - /// Increments the enumeration to the next element. True if one exists. - virtual bool next(); - - /// Optimized scan, without allocating new terms. Return number of invocations to next(). - int32_t scanTo(TermPtr term); - - /// Returns the current Term in the enumeration. - /// Initially invalid, valid after next() called for the first time. - virtual TermPtr term(); - - /// Returns the previous Term enumerated. Initially null. - TermPtr prev(); - - /// Returns the current TermInfo in the enumeration. - /// Initially invalid, valid after next() called for the first time. - TermInfoPtr termInfo(); - - /// Sets the argument to the current TermInfo in the enumeration. - /// Initially invalid, valid after next() called for the first time. - void termInfo(TermInfoPtr ti); - - /// Returns the docFreq of the current Term in the enumeration. - /// Initially invalid, valid after next() called for the first time. - virtual int32_t docFreq(); - - /// Returns the freqPointer from the current TermInfo in the enumeration. - /// Initially invalid, valid after next() called for the first time. - int64_t freqPointer(); - - /// Returns the proxPointer from the current TermInfo in the enumeration. - /// Initially invalid, valid after next() called for the first time. - int64_t proxPointer(); - - /// Closes the enumeration to further activity, freeing resources. - virtual void close(); - }; -} - -#endif diff --git a/include/SegmentTermPositionVector.h b/include/SegmentTermPositionVector.h deleted file mode 100644 index 31339e7b..00000000 --- a/include/SegmentTermPositionVector.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTTERMPOSITIONVECTOR_H -#define SEGMENTTERMPOSITIONVECTOR_H - -#include "SegmentTermVector.h" - -namespace Lucene -{ - class SegmentTermPositionVector : public SegmentTermVector - { - public: - SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, - Collection< Collection > positions, Collection< Collection > offsets); - virtual ~SegmentTermPositionVector(); - - LUCENE_CLASS(SegmentTermPositionVector); - - protected: - Collection< Collection > positions; - Collection< Collection > offsets; - - protected: - static const Collection EMPTY_TERM_POS(); - - public: - /// Returns an array of TermVectorOffsetInfo in which the term is found. - /// @param index The position in the array to get the offsets from - /// @return An array of TermVectorOffsetInfo objects or the empty list - virtual Collection getOffsets(int32_t index); - - /// Returns an array of positions in which the term is found. - /// Terms are identified by the index at which its number appears in the term String array obtained from the indexOf method. - virtual Collection getTermPositions(int32_t index); - }; -} - -#endif diff --git a/include/SegmentTermPositions.h b/include/SegmentTermPositions.h deleted file mode 100644 index e9018001..00000000 --- a/include/SegmentTermPositions.h +++ /dev/null @@ -1,80 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTTERMPOSITIONS_H -#define SEGMENTTERMPOSITIONS_H - -#include "SegmentTermDocs.h" - -namespace Lucene -{ - class SegmentTermPositions : public SegmentTermDocs - { - public: - SegmentTermPositions(SegmentReaderPtr parent); - virtual ~SegmentTermPositions(); - - LUCENE_CLASS(SegmentTermPositions); - - protected: - IndexInputPtr proxStream; - int32_t proxCount; - int32_t position; - - /// The current payload length - int32_t payloadLength; - - /// Indicates whether the payload of the current position has been read from the proxStream yet - bool needToLoadPayload; - - // these variables are being used to remember information for a lazy skip - int64_t lazySkipPointer; - int32_t lazySkipProxCount; - - public: - using SegmentTermDocs::seek; - - virtual void seek(TermInfoPtr ti, TermPtr term); - virtual void close(); - - /// Returns next position in the current document. - virtual int32_t nextPosition(); - - /// Moves to the next pair in the enumeration. - virtual bool next(); - - /// Not supported - virtual int32_t read(Collection docs, Collection freqs); - - /// Returns the length of the payload at the current term position. - virtual int32_t getPayloadLength(); - - /// Returns the payload data at the current term position. - virtual ByteArray getPayload(ByteArray data, int32_t offset); - - /// Checks if a payload can be loaded at this position. - virtual bool isPayloadAvailable(); - - protected: - int32_t readDeltaPosition(); - - virtual void skippingDoc(); - - virtual void skipProx(int64_t proxPointer, int32_t payloadLength); - virtual void skipPositions(int32_t n); - virtual void skipPayload(); - - /// It is not always necessary to move the prox pointer to a new document after the freq pointer has - /// been moved. Consider for example a phrase query with two terms: the freq pointer for term 1 has to - /// move to document x to answer the question if the term occurs in that document. But only if term 2 - /// also matches document x, the positions have to be read to figure out if term 1 and term 2 appear next - /// to each other in document x and thus satisfy the query. So we move the prox pointer lazily to the - /// document as soon as positions are requested. - virtual void lazySkip(); - }; -} - -#endif diff --git a/include/SegmentTermVector.h b/include/SegmentTermVector.h deleted file mode 100644 index 814b9ecb..00000000 --- a/include/SegmentTermVector.h +++ /dev/null @@ -1,51 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTTERMVECTOR_H -#define SEGMENTTERMVECTOR_H - -#include "TermPositionVector.h" - -namespace Lucene -{ - class SegmentTermVector : public TermPositionVector, public LuceneObject - { - public: - SegmentTermVector(const String& field, Collection terms, Collection termFreqs); - virtual ~SegmentTermVector(); - - LUCENE_CLASS(SegmentTermVector); - - protected: - String field; - Collection terms; - Collection termFreqs; - - public: - /// @return The number of the field this vector is associated with - virtual String getField(); - - virtual String toString(); - - /// @return The number of terms in the term vector. - virtual int32_t size(); - - /// @return An Array of term texts in ascending order. - virtual Collection getTerms(); - - /// @return Array of term frequencies. - virtual Collection getTermFrequencies(); - - /// Return an index in the term numbers array returned from getTerms at which the term with the - /// specified term appears. - virtual int32_t indexOf(const String& term); - - /// Just like indexOf(int) but searches for a number of terms at the same time. - virtual Collection indexesOf(Collection termNumbers, int32_t start, int32_t length); - }; -} - -#endif diff --git a/include/SegmentWriteState.h b/include/SegmentWriteState.h deleted file mode 100644 index 0a037657..00000000 --- a/include/SegmentWriteState.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SEGMENTWRITESTATE_H -#define SEGMENTWRITESTATE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class SegmentWriteState : public LuceneObject - { - public: - SegmentWriteState(DocumentsWriterPtr docWriter, DirectoryPtr directory, const String& segmentName, - const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, - int32_t termIndexInterval); - virtual ~SegmentWriteState(); - - LUCENE_CLASS(SegmentWriteState); - - public: - DocumentsWriterWeakPtr _docWriter; - DirectoryPtr directory; - String segmentName; - String docStoreSegmentName; - int32_t numDocs; - int32_t termIndexInterval; - int32_t numDocsInStore; - HashSet flushedFiles; - - public: - String segmentFileName(const String& ext); - }; -} - -#endif diff --git a/include/SerialMergeScheduler.h b/include/SerialMergeScheduler.h deleted file mode 100644 index 28e21fd7..00000000 --- a/include/SerialMergeScheduler.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SERIALMERGESCHEDULER_H -#define SERIALMERGESCHEDULER_H - -#include "MergeScheduler.h" - -namespace Lucene -{ - /// A {@link MergeScheduler} that simply does each merge sequentially, using the current thread. - class LPPAPI SerialMergeScheduler : public MergeScheduler - { - public: - virtual ~SerialMergeScheduler(); - - LUCENE_CLASS(SerialMergeScheduler); - - public: - /// Just do the merges in sequence. We do this "synchronized" so that even if the application is using - /// multiple threads, only one merge may run at a time. - virtual void merge(IndexWriterPtr writer); - - /// Close this MergeScheduler. - virtual void close(); - }; -} - -#endif diff --git a/include/Set.h b/include/Set.h deleted file mode 100644 index 911c718a..00000000 --- a/include/Set.h +++ /dev/null @@ -1,155 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SET_H -#define SET_H - -#include -#include "LuceneSync.h" - -namespace Lucene -{ - /// Utility template class to handle set based collections that can be safely copied and shared - template < class TYPE, class LESS = std::less > - class Set : public LuceneSync - { - public: - typedef Set this_type; - typedef std::set< TYPE, LESS, Allocator > set_type; - typedef typename set_type::iterator iterator; - typedef typename set_type::const_iterator const_iterator; - typedef TYPE value_type; - - virtual ~Set() - { - } - - protected: - boost::shared_ptr setContainer; - - public: - static this_type newInstance() - { - this_type instance; - instance.setContainer = Lucene::newInstance(); - return instance; - } - - template - static this_type newInstance(ITER first, ITER last) - { - this_type instance; - instance.setContainer = Lucene::newInstance(first, last); - return instance; - } - - void reset() - { - setContainer.reset(); - } - - int32_t size() const - { - return (int32_t)setContainer->size(); - } - - bool empty() const - { - return setContainer->empty(); - } - - void clear() - { - setContainer->clear(); - } - - iterator begin() - { - return setContainer->begin(); - } - - iterator end() - { - return setContainer->end(); - } - - const_iterator begin() const - { - return setContainer->begin(); - } - - const_iterator end() const - { - return setContainer->end(); - } - - bool add(const TYPE& type) - { - return setContainer->insert(type).second; - } - - template - void addAll(ITER first, ITER last) - { - setContainer->insert(first, last); - } - - bool remove(const TYPE& type) - { - return (setContainer->erase(type) > 0); - } - - iterator find(const TYPE& type) - { - return setContainer->find(type); - } - - bool contains(const TYPE& type) const - { - return (setContainer->find(type) != setContainer->end()); - } - - bool equals(const this_type& other) const - { - return equals(other, std::equal_to()); - } - - template - bool equals(const this_type& other, PRED comp) const - { - if (setContainer->size() != other.setContainer->size()) - return false; - return std::equal(setContainer->begin(), setContainer->end(), other.setContainer->begin(), comp); - } - - void swap(this_type& other) - { - setContainer.swap(other->setContainer); - } - - operator bool() const - { - return setContainer; - } - - bool operator! () const - { - return !setContainer; - } - - bool operator== (const this_type& other) - { - return (setContainer == other.setContainer); - } - - bool operator!= (const this_type& other) - { - return (setContainer != other.setContainer); - } - }; -} - -#endif diff --git a/include/SetBasedFieldSelector.h b/include/SetBasedFieldSelector.h deleted file mode 100644 index 5e840c7e..00000000 --- a/include/SetBasedFieldSelector.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SETBASEDFIELDSELECTOR_H -#define SETBASEDFIELDSELECTOR_H - -#include "FieldSelector.h" - -namespace Lucene -{ - /// Declare what fields to load normally and what fields to load lazily - class LPPAPI SetBasedFieldSelector : public FieldSelector - { - public: - /// Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. - /// If both are null, the Document will not have any {@link Field} on it. - /// @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null - /// @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null - SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad); - - virtual ~SetBasedFieldSelector(); - - LUCENE_CLASS(SetBasedFieldSelector); - - protected: - HashSet fieldsToLoad; - HashSet lazyFieldsToLoad; - - public: - /// Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in - /// either of the initializing Sets, then {@link FieldSelectorResult#NO_LOAD} is returned. If a Field name - /// is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. - /// @param fieldName The {@link Field} name to check - /// @return The {@link FieldSelectorResult} - virtual FieldSelectorResult accept(const String& fieldName); - }; -} - -#endif diff --git a/include/Similarity.h b/include/Similarity.h deleted file mode 100644 index ddc74f01..00000000 --- a/include/Similarity.h +++ /dev/null @@ -1,616 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMILARITY_H -#define SIMILARITY_H - -#include "Explanation.h" - -namespace Lucene -{ - /// Scoring API. - /// - /// Similarity defines the components of Lucene scoring. Overriding computation of these components is - /// a convenient way to alter Lucene scoring. - /// - /// Suggested reading: - /// Introduction To Information Retrieval, Chapter 6. - /// - /// The following describes how Lucene scoring evolves from underlying information retrieval models to - /// (efficient) implementation. We first brief on VSM Score, then derive from it Lucene's Conceptual Scoring - /// Formula, from which, finally, evolves Lucene's Practical Scoring Function (the latter is connected directly - /// with Lucene classes and methods). - /// - /// Lucene combines Boolean model (BM) of - /// Information Retrieval with Vector Space Model - /// (VSM) of Information Retrieval - documents "approved" by BM are scored by VSM. - /// - /// In VSM, documents and queries are represented as weighted vectors in a multi-dimensional space, where each - /// distinct index term is a dimension, and weights are Tf-idf - /// values. - /// - /// VSM does not require weights to be Tf-idf values, but Tf-idf values are believed to produce search results - /// of high quality, and so Lucene is using Tf-idf. Tf and Idf are described in more detail below, but for now, - /// for completion, let's just say that for given term t and document (or query) x, Tf(t,x) varies with the - /// number of occurrences of term t in x (when one increases so does the other) and idf(t) similarly varies with - /// the inverse of the number of index documents containing term t. - /// - /// VSM score of document d for query q is the Cosine - /// Similarity of the weighted query vectors V(q) and V(d): - /// - ///
 
- /// - /// - /// - ///
- /// - /// - ///
- /// - /// - /// - /// - /// - ///
- /// cosine-similarity(q,d)   =   - /// - /// - /// - /// - /// - ///
V(q) · V(d)
–––––––––
|V(q)| |V(d)|
- ///
- ///
- ///
- ///
VSM Score
- ///
- ///
 
- /// - /// Where V(q) · V(d) is the dot product of the - /// weighted vectors, and |V(q)| and |V(d)| are their - /// Euclidean norms. - /// - /// Note: the above equation can be viewed as the dot product of the normalized weighted vectors, in the sense - /// that dividing V(q) by its euclidean norm is normalizing it to a unit vector. - /// - /// Lucene refines VSM score for both search quality and usability: - ///
    - ///
  • Normalizing V(d) to the unit vector is known to be problematic in that it removes all document length - /// information. For some documents removing this info is probably ok, eg. a document made by duplicating a - /// certain paragraph 10 times, especially if that paragraph is made of distinct terms. But for a document which - /// contains no duplicated paragraphs, this might be wrong. To avoid this problem, a different document length - /// normalization factor is used, which normalizes to a vector equal to or larger than the unit vector: - /// doc-len-norm(d). - ///
  • - ///
  • At indexing, users can specify that certain documents are more important than others, by assigning a - /// document boost. For this, the score of each document is also multiplied by its boost value doc-boost(d). - ///
  • - ///
  • Lucene is field based, hence each query term applies to a single field, document length normalization - /// is by the length of the certain field, and in addition to document boost there are also document fields - /// boosts. - ///
  • - ///
  • The same field can be added to a document during indexing several times, and so the boost of that field - /// is the multiplication of the boosts of the separate additions (or parts) of that field within the document. - ///
  • - ///
  • At search time users can specify boosts to each query, sub-query, and each query term, hence the - /// contribution of a query term to the score of a document is multiplied by the boost of that query term - /// query-boost(q). - ///
  • - ///
  • A document may match a multi term query without containing all the terms of that query (this is correct - /// for some of the queries), and users can further reward documents matching more query terms through a - /// coordination factor, which is usually larger when more terms are matched: coord-factor(q,d). - ///
  • - ///
- /// - /// Under the simplifying assumption of a single field in the index, we get Lucene's Conceptual scoring formula: - /// - ///
 
- /// - /// - /// - ///
- /// - /// - ///
- /// - /// - /// - /// - /// - /// - ///
- /// score(q,d)   =   - /// coord-factor(q,d) ·   - /// query-boost(q) ·   - /// - /// - /// - /// - /// - ///
V(q) · V(d)
–––––––––
|V(q)|
- ///
- ///   ·   doc-len-norm(d) - ///   ·   doc-boost(d) - ///
- ///
- ///
- ///
Lucene Conceptual Scoring Formula
- ///
- ///
 
- /// - /// The conceptual formula is a simplification in the sense that (1) terms and documents are fielded and (2) - /// boosts are usually per query term rather than per query. - /// - /// We now describe how Lucene implements this conceptual scoring formula, and derive from it Lucene's Practical - /// Scoring Function. - /// - /// For efficient score computation some scoring components are computed and aggregated in advance: - ///
    - ///
  • Query-boost for the query (actually for each query term) is known when search starts. - ///
  • - ///
  • Query Euclidean norm |V(q)| can be computed when search starts, as it is independent of the document - /// being scored. From search optimization perspective, it is a valid question why bother to normalize the - /// query at all, because all scored documents will be multiplied by the same |V(q)|, and hence documents ranks - /// (their order by score) will not be affected by this normalization. There are two good reasons to keep this - /// normalization: - ///
      - ///
    • Recall that Cosine Similarity can be used - /// find how similar two documents are. One can use Lucene for eg. clustering, and use a document as a query to - /// compute its similarity to other documents. In this use case it is important that the score of document d3 - /// for query d1 is comparable to the score of document d3 for query d2. In other words, scores of a document for - /// two distinct queries should be comparable. There are other applications that may require this. And this is - /// exactly what normalizing the query vector V(q) provides: comparability (to a certain extent) of two or more - /// queries. - ///
    • - ///
    • Applying query normalization on the scores helps to keep the scores around the unit vector, hence preventing - /// loss of score data because of floating point precision limitations. - ///
    • - ///
    - ///
  • - ///
  • Document length norm doc-len-norm(d) and document boost doc-boost(d) are known at indexing time. They are - /// computed in advance and their multiplication is saved as a single value in the index: norm(d). (In the equations - /// below, norm(t in d) means norm(field(t) in doc d) where field(t) is the field associated with term t.) - ///
  • - ///
- /// - /// Lucene's Practical Scoring Function is derived from the above. The color codes demonstrate how it relates to - /// those of the conceptual formula: - /// - /// - /// - /// - ///
- /// - /// - ///
- /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - ///
- /// score(q,d)   =   - /// coord(q,d)  ·  - /// queryNorm(q)  ·  - /// - /// - /// - /// ( - /// tf(t in d)  ·  - /// idf(t)2  ·  - /// t.getBoost() ·  - /// norm(t,d) - /// ) - ///
t in q
- ///
- ///
- ///
Lucene Practical Scoring Function
- ///
- /// - /// where - ///
    - ///
  1. - /// - /// tf(t in d) - /// correlates to the term's frequency, defined as the number of times term t appears in the currently - /// scored document d. Documents that have more occurrences of a given term receive a higher score. - /// Note that tf(t in q) is assumed to be 1 and therefore it does not appear in this equation, - /// However if a query contains twice the same term, there will be two term-queries with that same term - /// and hence the computation would still be correct (although not very efficient). - /// The default computation for tf(t in d) in {@link DefaultSimilarity#tf(float) DefaultSimilarity} is: - /// - ///
     
    - /// - /// - /// - /// - /// - ///
    - /// {@link DefaultSimilarity#tf(float) tf(t in d)}   =   - /// - /// frequency½ - ///
    - ///
     
    - ///
  2. - /// - ///
  3. - /// - /// idf(t) stands for Inverse Document Frequency. This value correlates to the inverse of docFreq - /// (the number of documents in which the term t appears). This means rarer terms give higher contribution - /// to the total score. idf(t) appears for t in both the query and the document, hence it is squared in - /// the equation. The default computation for idf(t) in {@link DefaultSimilarity#idf(int, int) DefaultSimilarity} is: - /// - ///
     
    - /// - /// - /// - /// - /// - /// - /// - ///
    - /// {@link DefaultSimilarity#idf(int, int) idf(t)}  =   - /// - /// 1 + log ( - /// - /// - /// - /// - /// - ///
    numDocs
    –––––––––
    docFreq+1
    - ///
    - /// ) - ///
    - ///
     
    - ///
  4. - /// - ///
  5. - /// - /// coord(q,d) - /// is a score factor based on how many of the query terms are found in the specified document. Typically, a - /// document that contains more of the query's terms will receive a higher score than another document with - /// fewer query terms. This is a search time factor computed in {@link #coord(int, int) coord(q,d)} by the - /// Similarity in effect at search time. - ///
     
    - ///
  6. - /// - ///
  7. - /// - /// queryNorm(q) - /// - /// is a normalizing factor used to make scores between queries comparable. This factor does not affect - /// document ranking (since all ranked documents are multiplied by the same factor), but rather just attempts - /// to make scores from different queries (or even different indexes) comparable. This is a search time - /// factor computed by the Similarity in effect at search time. - /// - /// The default computation in {@link DefaultSimilarity#queryNorm(float) DefaultSimilarity} - /// produces a Euclidean norm: - ///
     
    - /// - /// - /// - /// - /// - ///
    - /// queryNorm(q)   =   - /// {@link DefaultSimilarity#queryNorm(float) queryNorm(sumOfSquaredWeights)} - ///   =   - /// - /// - /// - /// - /// - ///
    1
    - /// –––––––––––––– - ///
    sumOfSquaredWeights½
    - ///
    - ///
     
    - /// - /// The sum of squared weights (of the query terms) is computed by the query {@link Weight} object. For example, - /// a {@link BooleanQuery boolean query} computes this value as: - /// - ///
     
    - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - ///
    - /// {@link Weight#sumOfSquaredWeights() sumOfSquaredWeights}   =   - /// {@link Query#getBoost() q.getBoost()} 2 - ///  ·  - /// - /// - /// - /// ( - /// idf(t)  ·  - /// t.getBoost() - /// ) 2 - ///
    t in q
    - ///
     
    - /// - ///
  8. - /// - ///
  9. - /// - /// t.getBoost() - /// is a search time boost of term t in the query q as specified in the query text or as set by application - /// calls to {@link Query#setBoost(float) setBoost()}. Notice that there is really no direct API for accessing - /// a boost of one term in a multi term query, but rather multi terms are represented in a query as multi - /// {@link TermQuery TermQuery} objects, and so the boost of a term in the query is accessible by calling - /// the sub-query {@link Query#getBoost() getBoost()}. - ///
     
    - ///
  10. - /// - ///
  11. - /// - /// norm(t,d) encapsulates a few (indexing time) boost and length factors: - /// - ///
      - ///
    • Document boost - set by calling - /// {@link Document#setBoost(float) doc.setBoost()} - /// before adding the document to the index. - ///
    • - ///
    • Field boost - set by calling - /// {@link Fieldable#setBoost(float) field.setBoost()} - /// before adding the field to a document. - ///
    • - ///
    • {@link #lengthNorm(String, int) lengthNorm(field)} - computed when the document is added to - /// the index in accordance with the number of tokens of this field in the document, so that shorter fields - /// contribute more to the score. LengthNorm is computed by the Similarity class in effect at indexing. - ///
    • - ///
    - /// - /// When a document is added to the index, all the above factors are multiplied. - /// If the document has multiple fields with the same name, all their boosts are multiplied together: - /// - ///
     
    - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - /// - ///
    - /// norm(t,d)   =   - /// {@link Document#getBoost() doc.getBoost()} - ///  ·  - /// {@link #lengthNorm(String, int) lengthNorm(field)} - ///  ·  - /// - /// - /// - /// {@link Fieldable#getBoost() f.getBoost}() - ///
    field f in d named as t
    - ///
     
    - /// However the resulted norm value is {@link #encodeNorm(float) encoded} as a single byte before being stored. - /// At search time, the norm byte value is read from the index {@link Directory directory} and {@link - /// #decodeNorm(byte) decoded} back to a float norm value. This encoding/decoding, while reducing index size, - /// comes with the price of precision loss - it is not guaranteed that decode(encode(x)) = x. For instance, - /// decode(encode(0.89)) = 0.75. - ///
     
    - /// Compression of norm values to a single byte saves memory at search time, because once a field is referenced - /// at search time, its norms - for all documents - are maintained in memory. - ///
     
    - /// The rationale supporting such lossy compression of norm values is that given the difficulty (and inaccuracy) - /// of users to express their true information need by a query, only big differences matter. - ///
     
    - /// Last, note that search time is too late to modify this norm part of scoring, eg. by using a different - /// {@link Similarity} for search. - ///
     
    - ///
  12. - ///
- /// - /// @see #setDefault(SimilarityPtr) - /// @see IndexWriter#setSimilarity(SimilarityPtr) - /// @see Searcher#setSimilarity(SimilarityPtr) - class LPPAPI Similarity : public LuceneObject - { - public: - Similarity(); - virtual ~Similarity(); - - LUCENE_CLASS(Similarity); - - protected: - static const int32_t NO_DOC_ID_PROVIDED; - - protected: - static const Collection NORM_TABLE(); - - public: - /// Return the default Similarity implementation used by indexing and search code. - /// This is initially an instance of {@link DefaultSimilarity}. - /// @see Searcher#setSimilarity(SimilarityPtr) - /// @see IndexWriter#setSimilarity(SimilarityPtr) - static SimilarityPtr getDefault(); - - /// Decodes a normalization factor stored in an index. - /// @see #encodeNorm(double) - static double decodeNorm(uint8_t b); - - /// Returns a table for decoding normalization bytes. - /// @see #encodeNorm(double) - static const Collection getNormDecoder(); - - /// Compute the normalization value for a field, given the accumulated state of term processing for this - /// field (see {@link FieldInvertState}). - /// - /// Implementations should calculate a float value based on the field state and then return that value. - /// - /// For backward compatibility this method by default calls {@link #lengthNorm(String, int32_t)} passing - /// {@link FieldInvertState#getLength()} as the second argument, and then multiplies this value by {@link - /// FieldInvertState#getBoost()}. - /// - /// @param field Field name - /// @param state Current processing state for this field - /// @return The calculated float norm - virtual double computeNorm(const String& fieldName, FieldInvertStatePtr state); - - /// Computes the normalization value for a field given the total number of terms contained in a field. - /// These values, together with field boosts, are stored in an index and multiplied into scores for hits - /// on each field by the search code. - /// - /// Matches in longer fields are less precise, so implementations of this method usually return smaller - /// values when numTokens is large, and larger values when numTokens is small. - /// - /// Note that the return values are computed under {@link IndexWriter#addDocument(DocumentPtr)} and then - /// stored using {@link #encodeNorm(double)}. Thus they have limited precision, and documents must be - /// re-indexed if this method is altered. - /// - /// @param fieldName The name of the field - /// @param numTokens The total number of tokens contained in fields named fieldName of doc. - /// @return A normalization factor for hits on this field of this document - /// @see Field#setBoost(double) - virtual double lengthNorm(const String& fieldName, int32_t numTokens) = 0; - - /// Computes the normalization value for a query given the sum of the squared weights of each of the query - /// terms. This value is multiplied into the weight of each query term. While the classic query - /// normalization factor is computed as 1/sqrt(sumOfSquaredWeights), other implementations might completely - /// ignore sumOfSquaredWeights (ie return 1). - /// - /// This does not affect ranking, but the default implementation does make scores from different queries - /// more comparable than they would be by eliminating the magnitude of the Query vector as a factor in the - /// score. - /// - /// @param sumOfSquaredWeights The sum of the squares of query term weights - /// @return a normalization factor for query weights - virtual double queryNorm(double sumOfSquaredWeights) = 0; - - /// Encodes a normalization factor for storage in an index. - /// - /// The encoding uses a three-bit mantissa, a five-bit exponent, and the zero-exponent point at 15, thus - /// representing values from around 7x10^9 to 2x10^-9 with about one significant decimal digit of accuracy. - /// Zero is also represented. Negative numbers are rounded up to zero. Values too large to represent - /// are rounded down to the largest representable value. Positive values too small to represent are rounded - /// up to the smallest positive representable value. - /// - /// @see Field#setBoost(double) - static uint8_t encodeNorm(double f); - - /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied - /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then - /// summed to form the initial score for a document. - /// - /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this - /// method usually return larger values when freq is large, and smaller values when freq is small. - /// - /// The default implementation calls {@link #tf(double)}. - /// - /// @param freq The frequency of a term within a document - /// @return A score factor based on a term's within-document frequency - virtual double tf(int32_t freq); - - /// Computes the amount of a sloppy phrase match, based on an edit distance. This value is summed for - /// each sloppy phrase match in a document to form the frequency that is passed to {@link #tf(double)}. - /// - /// A phrase match with a small edit distance to a document passage more closely matches the document, so - /// implementations of this method usually return larger values when the edit distance is small and - /// smaller values when it is large. - /// - /// @see PhraseQuery#setSlop(int32_t) - /// @param distance The edit distance of this sloppy phrase match - /// @return The frequency increment for this match - virtual double sloppyFreq(int32_t distance) = 0; - - /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied - /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then - /// summed to form the initial score for a document. - /// - /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this - /// method usually return larger values when freq is large, and smaller values when freq is small. - /// - /// @param freq The frequency of a term within a document - /// @return A score factor based on a term's within-document frequency - virtual double tf(double freq) = 0; - - /// Computes a score factor for a simple term and returns an explanation for that score factor. - /// - /// The default implementation uses: - ///
-        /// idf(searcher->docFreq(term), searcher->maxDoc());
-        /// 
- /// - /// Note that {@link Searcher#maxDoc()} is used instead of {@link IndexReader#numDocs() IndexReader#numDocs()} - /// because also {@link Searcher#docFreq(TermPtr)} is used, and when the latter is inaccurate, so is {@link - /// Searcher#maxDoc()}, and in the same direction. In addition, {@link Searcher#maxDoc()} is more efficient - /// to compute. - /// - /// @param term The term in question - /// @param searcher The document collection being searched - /// @return An IDFExplain object that includes both an idf score factor and an explanation for the term. - virtual IDFExplanationPtr idfExplain(TermPtr term, SearcherPtr searcher); - - /// Computes a score factor for a phrase. - /// - /// The default implementation sums the idf factor for each term in the phrase. - /// - /// @param terms The terms in the phrase - /// @param searcher The document collection being searched - /// @return An IDFExplain object that includes both an idf score factor for the phrase and an explanation - /// for each term. - virtual IDFExplanationPtr idfExplain(Collection terms, SearcherPtr searcher); - - /// Computes a score factor based on a term's document frequency (the number of documents which contain the - /// term). This value is multiplied by the {@link #tf(int32_t)} factor for each term in the query and these - /// products are then summed to form the initial score for a document. - /// - /// Terms that occur in fewer documents are better indicators of topic, so implementations of this method - /// usually return larger values for rare terms, and smaller values for common terms. - /// - /// @param docFreq The number of documents which contain the term - /// @param numDocs The total number of documents in the collection - /// @return A score factor based on the term's document frequency - virtual double idf(int32_t docFreq, int32_t numDocs) = 0; - - /// Computes a score factor based on the fraction of all query terms that a document contains. This value - /// is multiplied into scores. - /// - /// The presence of a large portion of the query terms indicates a better match with the query, so - /// implementations of this method usually return larger values when the ratio between these parameters is - /// large and smaller values when the ratio between them is small. - /// - /// @param overlap The number of query terms matched in the document - /// @param maxOverlap The total number of terms in the query - /// @return A score factor based on term overlap with the query - virtual double coord(int32_t overlap, int32_t maxOverlap) = 0; - - /// Calculate a scoring factor based on the data in the payload. Overriding implementations are responsible - /// for interpreting what is in the payload. Lucene makes no assumptions about what is in the byte array. - /// - /// The default implementation returns 1. - /// - /// @param docId The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it - /// should be assumed that the PayloadQuery implementation does not provide document information - /// @param fieldName The fieldName of the term this payload belongs to - /// @param start The start position of the payload - /// @param end The end position of the payload - /// @param payload The payload byte array to be scored - /// @param offset The offset into the payload array - /// @param length The length in the array - /// @return An implementation dependent float to be used as a scoring factor - virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); - }; -} - -#endif diff --git a/include/SimilarityDelegator.h b/include/SimilarityDelegator.h deleted file mode 100644 index a5e6f51d..00000000 --- a/include/SimilarityDelegator.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMILARITYDELEGATOR_H -#define SIMILARITYDELEGATOR_H - -#include "Similarity.h" - -namespace Lucene -{ - /// Delegating scoring implementation. Useful in {@link Query#getSimilarity(Searcher)} implementations, - /// to override only certain methods of a Searcher's Similarity implementation. - class LPPAPI SimilarityDelegator : public Similarity - { - public: - SimilarityDelegator(SimilarityPtr delegee); - virtual ~SimilarityDelegator(); - - LUCENE_CLASS(SimilarityDelegator); - - protected: - SimilarityPtr delegee; - - public: - virtual double computeNorm(const String& field, FieldInvertStatePtr state); - virtual double lengthNorm(const String& fieldName, int32_t numTokens); - virtual double queryNorm(double sumOfSquaredWeights); - virtual double tf(double freq); - virtual double sloppyFreq(int32_t distance); - virtual double idf(int32_t docFreq, int32_t numDocs); - virtual double coord(int32_t overlap, int32_t maxOverlap); - virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); - }; -} - -#endif diff --git a/include/SimpleAnalyzer.h b/include/SimpleAnalyzer.h deleted file mode 100644 index 46ad2eec..00000000 --- a/include/SimpleAnalyzer.h +++ /dev/null @@ -1,28 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMPLEANALYZER_H -#define SIMPLEANALYZER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// An {@link Analyzer} that filters {@link LetterTokenizer} with {@link LowerCaseFilter} - class LPPAPI SimpleAnalyzer : public Analyzer - { - public: - virtual ~SimpleAnalyzer(); - - LUCENE_CLASS(SimpleAnalyzer); - - public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; -} - -#endif diff --git a/include/SimpleFSDirectory.h b/include/SimpleFSDirectory.h deleted file mode 100644 index ac224555..00000000 --- a/include/SimpleFSDirectory.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMPLEFSDIRECTORY_H -#define SIMPLEFSDIRECTORY_H - -#include "FSDirectory.h" - -namespace Lucene -{ - /// A straightforward implementation of {@link FSDirectory} using std::ofstream and std::ifstream. - class LPPAPI SimpleFSDirectory : public FSDirectory - { - public: - /// Create a new SimpleFSDirectory for the named location and {@link NativeFSLockFactory}. - /// @param path the path of the directory. - /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) - SimpleFSDirectory(const String& path, LockFactoryPtr lockFactory = LockFactoryPtr()); - virtual ~SimpleFSDirectory(); - - LUCENE_CLASS(SimpleFSDirectory); - - public: - /// Creates an IndexOutput for the file with the given name. - virtual IndexOutputPtr createOutput(const String& name); - - /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory implementation may ignore the buffer size. - virtual IndexInputPtr openInput(const String& name); - - /// Creates an IndexInput for the file with the given name. - virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); - }; -} - -#endif diff --git a/include/SimpleFSLockFactory.h b/include/SimpleFSLockFactory.h deleted file mode 100644 index aa0b9799..00000000 --- a/include/SimpleFSLockFactory.h +++ /dev/null @@ -1,42 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMPLEFSLOCKFACTORY_H -#define SIMPLEFSLOCKFACTORY_H - -#include "FSLockFactory.h" -#include "Lock.h" - -namespace Lucene -{ - /// Implements {@link LockFactory} using {@link File#createNewFile()}. - /// @see LockFactory - class LPPAPI SimpleFSLockFactory : public FSLockFactory - { - public: - /// Create a SimpleFSLockFactory instance, with null (unset) lock directory. When you pass this factory - /// to a {@link FSDirectory} subclass, the lock directory is automatically set to the directory itself. - /// Be sure to create one instance for each directory your create! - SimpleFSLockFactory(); - - /// Instantiate using the provided directory name. - /// @param lockDir where lock files should be created. - SimpleFSLockFactory(const String& lockDir); - - virtual ~SimpleFSLockFactory(); - - LUCENE_CLASS(SimpleFSLockFactory); - - public: - /// Return a new Lock instance identified by lockName. - virtual LockPtr makeLock(const String& lockName); - - /// Attempt to clear (forcefully unlock and remove) the specified lock. - virtual void clearLock(const String& lockName); - }; -} - -#endif diff --git a/include/SimpleLRUCache.h b/include/SimpleLRUCache.h deleted file mode 100644 index 00542819..00000000 --- a/include/SimpleLRUCache.h +++ /dev/null @@ -1,91 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SIMPLELRUCACHE_H -#define SIMPLELRUCACHE_H - -#include -#include "LuceneObject.h" - -namespace Lucene -{ - /// General purpose LRU cache map. - /// Accessing an entry will keep the entry cached. {@link #get(const KEY&)} and - /// {@link #put(const KEY&, const VALUE&)} results in an access to the corresponding entry. - template - class SimpleLRUCache : public LuceneObject - { - public: - typedef std::pair key_value; - typedef std::list< key_value > key_list; - typedef typename key_list::const_iterator const_iterator; - typedef boost::unordered_map< KEY, typename key_list::iterator, HASH, EQUAL, Allocator< std::pair > > map_type; - typedef typename map_type::const_iterator map_iterator; - - SimpleLRUCache(int32_t cacheSize) - { - this->cacheSize = cacheSize; - } - - virtual ~SimpleLRUCache() - { - } - - protected: - int32_t cacheSize; - key_list cacheList; - map_type cacheMap; - - public: - void put(const KEY& key, const VALUE& value) - { - cacheList.push_front(std::make_pair(key, value)); - cacheMap[key] = cacheList.begin(); - - if ((int32_t)cacheList.size() > cacheSize) - { - cacheMap.erase(cacheList.back().first); - cacheList.pop_back(); - } - } - - VALUE get(const KEY& key) - { - map_iterator find = cacheMap.find(key); - if (find == cacheMap.end()) - return VALUE(); - - VALUE value(find->second->second); - cacheList.erase(find->second); - cacheList.push_front(std::make_pair(key, value)); - cacheMap[key] = cacheList.begin(); - - return value; - } - - bool contains(const KEY& key) const - { - return (cacheMap.find(key) != cacheMap.end()); - } - - int32_t size() const - { - return (int32_t)cacheList.size(); - } - - const_iterator begin() const - { - return cacheList.begin(); - } - - const_iterator end() const - { - return cacheList.end(); - } - }; -}; - -#endif diff --git a/include/SingleInstanceLockFactory.h b/include/SingleInstanceLockFactory.h deleted file mode 100644 index 0eb1ed92..00000000 --- a/include/SingleInstanceLockFactory.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SINGLEINSTANCELOCKFACTORY_H -#define SINGLEINSTANCELOCKFACTORY_H - -#include "LockFactory.h" - -namespace Lucene -{ - /// Implements {@link LockFactory} for a single in-process instance, meaning all - /// locking will take place through this one instance. Only use this {@link LockFactory} - /// when you are certain all IndexReaders and IndexWriters for a given index are running - /// against a single shared in-process Directory instance. This is currently the - /// default locking for RAMDirectory. - /// @see LockFactory - class LPPAPI SingleInstanceLockFactory : public LockFactory - { - public: - SingleInstanceLockFactory(); - virtual ~SingleInstanceLockFactory(); - - LUCENE_CLASS(SingleInstanceLockFactory); - - protected: - HashSet locks; - - public: - /// Return a new Lock instance identified by lockName. - /// @param lockName name of the lock to be created. - virtual LockPtr makeLock(const String& lockName); - - /// Attempt to clear (forcefully unlock and remove) the - /// specified lock. Only call this at a time when you are - /// certain this lock is no longer in use. - /// @param lockName name of the lock to be cleared. - virtual void clearLock(const String& lockName); - }; -} - -#endif diff --git a/include/SingleTermEnum.h b/include/SingleTermEnum.h deleted file mode 100644 index ba366de5..00000000 --- a/include/SingleTermEnum.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SINGLETERMENUM_H -#define SINGLETERMENUM_H - -#include "FilteredTermEnum.h" - -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating a single term. - /// - /// This can be used by {@link MultiTermQuery}s that need only visit one term, but want to preserve - /// MultiTermQuery semantics such as {@link MultiTermQuery#rewriteMethod}. - class LPPAPI SingleTermEnum : public FilteredTermEnum - { - public: - SingleTermEnum(IndexReaderPtr reader, TermPtr singleTerm); - virtual ~SingleTermEnum(); - - LUCENE_CLASS(SingleTermEnum); - - protected: - TermPtr singleTerm; - bool _endEnum; - - public: - virtual double difference(); - - protected: - virtual bool endEnum(); - virtual bool termCompare(TermPtr term); - }; -} - -#endif diff --git a/include/SloppyPhraseScorer.h b/include/SloppyPhraseScorer.h deleted file mode 100644 index b8cdb571..00000000 --- a/include/SloppyPhraseScorer.h +++ /dev/null @@ -1,69 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SLOPPYPHRASESCORER_H -#define SLOPPYPHRASESCORER_H - -#include "PhraseScorer.h" - -namespace Lucene -{ - class SloppyPhraseScorer : public PhraseScorer - { - public: - SloppyPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, int32_t slop, ByteArray norms); - virtual ~SloppyPhraseScorer(); - - LUCENE_CLASS(SloppyPhraseScorer); - - protected: - int32_t slop; - Collection repeats; - Collection tmpPos; // for flipping repeating pps - bool checkedRepeats; - - public: - /// Score a candidate doc for all slop-valid position-combinations (matches) encountered while - /// traversing/hopping the PhrasePositions. The score contribution of a match depends on the distance: - /// - highest score for distance=0 (exact match). - /// - score gets lower as distance gets higher. - /// Example: for query "a b"~2, a document "x a b a y" can be scored twice: once for "a b" (distance=0), - /// and once for "b a" (distance=2). - /// Possibly not all valid combinations are encountered, because for efficiency we always propagate the - /// least PhrasePosition. This allows to base on PriorityQueue and move forward faster. - /// As result, for example, document "a b c b a" would score differently for queries "a b c"~4 and - /// "c b a"~4, although they really are equivalent. Similarly, for doc "a b c b a f g", query "c b"~2 - /// would get same score as "g f"~2, although "c b"~2 could be matched twice. We may want to fix this - /// in the future (currently not, for performance reasons). - virtual double phraseFreq(); - - protected: - /// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. - /// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps. - PhrasePositionsPtr flip(PhrasePositionsPtr pp, PhrasePositionsPtr pp2); - - /// Init PhrasePositions in place. - /// There is a one time initialization for this scorer: - /// - Put in repeats[] each pp that has another pp with same position in the doc. - /// - Also mark each such pp by pp.repeats = true. - /// Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. - /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. - /// - Example 1 - query with no repetitions: "ho my"~2 - /// - Example 2 - query with repetitions: "ho my my"~2 - /// - Example 3 - query with repetitions: "my ho my"~2 - /// Init per doc with repeats in query, includes propagating some repeating pp's to avoid false phrase detection. - /// @return end (max position), or -1 if any term ran out (ie. done) - int32_t initPhrasePositions(); - - /// We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences in the query - /// of the same word would go elsewhere in the matched doc. - /// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first - /// two PPs found to not differ. - PhrasePositionsPtr termPositionsDiffer(PhrasePositionsPtr pp); - }; -} - -#endif diff --git a/include/SmallDouble.h b/include/SmallDouble.h deleted file mode 100644 index 5956f8e6..00000000 --- a/include/SmallDouble.h +++ /dev/null @@ -1,33 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SMALLDOUBLE_H -#define SMALLDOUBLE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Floating point numbers smaller than 32 bits. - class SmallDouble : public LuceneObject - { - public: - virtual ~SmallDouble(); - LUCENE_CLASS(SmallDouble); - - public: - /// Converts a floating point number to an 8 bit float. - /// Values less than zero are all mapped to zero. - /// Values are truncated (rounded down) to the nearest 8 bit value. - /// Values between zero and the smallest representable value are rounded up. - static uint8_t doubleToByte(double f); - - /// Converts an 8 bit floating point number to a double. - static double byteToDouble(uint8_t b); - }; -} - -#endif diff --git a/include/SnapshotDeletionPolicy.h b/include/SnapshotDeletionPolicy.h deleted file mode 100644 index 35030dbe..00000000 --- a/include/SnapshotDeletionPolicy.h +++ /dev/null @@ -1,53 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SNAPSHOTDELETIONPOLICY_H -#define SNAPSHOTDELETIONPOLICY_H - -#include "IndexDeletionPolicy.h" - -namespace Lucene -{ - class LPPAPI SnapshotDeletionPolicy : public IndexDeletionPolicy - { - public: - SnapshotDeletionPolicy(IndexDeletionPolicyPtr primary); - virtual ~SnapshotDeletionPolicy(); - - LUCENE_CLASS(SnapshotDeletionPolicy); - - protected: - IndexCommitPtr lastCommit; - IndexDeletionPolicyPtr primary; - String _snapshot; - - public: - /// This is called once when a writer is first instantiated to give the policy a chance to remove old - /// commit points. - virtual void onInit(Collection commits); - - /// This is called each time the writer completed a commit. This gives the policy a chance to remove - /// old commit points with each commit. - virtual void onCommit(Collection commits); - - /// Take a snapshot of the most recent commit to the index. You must call release() to free this snapshot. - /// Note that while the snapshot is held, the files it references will not be deleted, which will consume - /// additional disk space in your index. If you take a snapshot at a particularly bad time (say just before - /// you call optimize()) then in the worst case this could consume an extra 1X of your total index size, - /// until you release the snapshot. - virtual IndexCommitPtr snapshot(); - - /// Release the currently held snapshot. - virtual void release(); - - protected: - Collection wrapCommits(Collection commits); - - friend class MyCommitPoint; - }; -} - -#endif diff --git a/include/Sort.h b/include/Sort.h deleted file mode 100644 index b4e3bc23..00000000 --- a/include/Sort.h +++ /dev/null @@ -1,115 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SORT_H -#define SORT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Encapsulates sort criteria for returned hits. - /// - /// The fields used to determine sort order must be carefully chosen. Documents must contain a single term - /// in such a field, and the value of the term should indicate the document's relative position in a given - /// sort order. The field must be indexed, but should not be tokenized, and does not need to be stored - /// (unless you happen to want it back with the rest of your document data). In other words: - /// - ///
-    /// document->add(newLucene(L"byNumber", StringUtils::toString(x), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
-    /// 
- /// - /// Valid Types of Values - /// - /// There are four possible kinds of term values which may be put into sorting fields: Integers, Longs, Doubles, - /// or Strings. Unless {@link SortField SortField} objects are specified, the type of value in the field is - /// determined by parsing the first term in the field. - /// - /// Integer term values should contain only digits and an optional preceding negative sign. Values must be base - /// 10 and in the range INT_MIN and INT_MAX inclusive. Documents which should appear first in the sort should - /// have low value integers, later documents high values (ie. the documents should be numbered 1..n where 1 is - /// the first and n the last). - /// - /// Long term values should contain only digits and an optional preceding negative sign. Values must be base 10 - /// and in the range LLONG_MIN and LLONG_MAX inclusive. Documents which should appear first in the sort should - /// have low value integers, later documents high values. - /// - /// Double term values should conform to values accepted by Double (except that NaN and Infinity are not - /// supported). Documents which should appear first in the sort should have low values, later documents high - /// values. - /// - /// String term values can contain any valid String, but should not be tokenized. The values are sorted according - /// to their comparable natural order. Note that using this type of term value has higher memory requirements - /// than the other two types. - /// - /// Object Reuse - /// - /// One of these objects can be used multiple times and the sort order changed between usages. - /// This class is thread safe. - /// - /// Memory Usage - /// - /// Sorting uses of caches of term values maintained by the internal HitQueue(s). The cache is static and - /// contains an integer or double array of length IndexReader::maxDoc() for each field name for which a sort is - /// performed. In other words, the size of the cache in bytes is: - /// - ///
-    /// 4 * IndexReader::maxDoc() * (# of different fields actually used to sort)
-    /// 
- /// - /// For String fields, the cache is larger: in addition to the above array, the value of every term in the - /// field is kept in memory. If there are many unique terms in the field, this could be quite large. - /// - /// Note that the size of the cache is not affected by how many fields are in the index and might be used to - /// sort - only by the ones actually used to sort a result set. - class LPPAPI Sort : public LuceneObject - { - public: - /// Sorts by computed relevance. This is the same sort criteria as calling {@link - /// Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly more - /// overhead. - Sort(); - - /// Sorts by the criteria in the given SortField. - Sort(SortFieldPtr field); - - /// Sorts in succession by the criteria in each SortField. - Sort(Collection fields); - - virtual ~Sort(); - - LUCENE_CLASS(Sort); - - public: - /// Internal representation of the sort criteria - Collection fields; - - public: - /// Represents sorting by computed relevance. Using this sort criteria returns the same results as calling - /// {@link Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly - /// more overhead. - static SortPtr RELEVANCE(); - - /// Represents sorting by index order. - static SortPtr INDEXORDER(); - - /// Sets the sort to the given criteria. - void setSort(SortFieldPtr field); - - /// Sets the sort to the given criteria in succession. - void setSort(Collection fields); - - /// Representation of the sort criteria. - /// @return Array of SortField objects used in this sort criteria - Collection getSort(); - - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/SortField.h b/include/SortField.h deleted file mode 100644 index c63bb97d..00000000 --- a/include/SortField.h +++ /dev/null @@ -1,148 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SORTFIELD_H -#define SORTFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Stores information about how to sort documents by terms in an individual field. Fields must be indexed - /// in order to sort by them. - class LPPAPI SortField : public LuceneObject - { - public: - /// Creates a sort by terms in the given field with the type of term values explicitly given. - /// @param field Name of field to sort by. Can be null if type is SCORE or DOC. - /// @param type Type of values in the terms. - /// @param reverse True if natural order should be reversed. - SortField(const String& field, int32_t type, bool reverse = false); - - /// Creates a sort, possibly in reverse, by terms in the given field, parsed to numeric values using a - /// custom {@link Parser}. - /// @param field Name of field to sort by - /// @param parser Instance of a {@link Parser}, which must subclass one of the existing numeric parsers from - /// {@link FieldCache}. Sort type is inferred by testing which numeric parser the parser subclasses. - /// @param reverse True if natural order should be reversed. - SortField(const String& field, ParserPtr parser, bool reverse = false); - - /// Creates a sort, possibly in reverse, by terms in the given field sorted according to the given locale. - /// @param field Name of field to sort by, cannot be null. - /// @param locale Locale of values in the field. - /// @param reverse True if natural order should be reversed. - SortField(const String& field, const std::locale& locale, bool reverse = false); - - /// Creates a sort, possibly in reverse, with a custom comparison function. - /// @param field Name of field to sort by; cannot be null. - /// @param comparator Returns a comparator for sorting hits. - /// @param reverse True if natural order should be reversed. - SortField(const String& field, FieldComparatorSourcePtr comparator, bool reverse = false); - - virtual ~SortField(); - - LUCENE_CLASS(SortField); - - public: - /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. - static const int32_t SCORE; - - /// Sort by document number (index order). Sort values are Integer and lower values are at the front. - static const int32_t DOC; - - /// Sort using term values as Strings. Sort values are String and lower values are at the front. - static const int32_t STRING; - - /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. - static const int32_t INT; - - /// Sort using term values as Floats. Sort values are Float and lower values are at the front. - static const int32_t FLOAT; - - /// Sort using term values as Longs. Sort values are Long and lower values are at the front. - static const int32_t LONG; - - /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. - static const int32_t DOUBLE; - - /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. - static const int32_t SHORT; - - /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according - /// to natural order. - static const int32_t CUSTOM; - - /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. - static const int32_t BYTE; - - /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. - /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. - static const int32_t STRING_VAL; - - INTERNAL: - bool reverse; // defaults to natural order - - String field; - int32_t type; // defaults to determining type dynamically - localePtr locale; // defaults to "natural order" (no Locale) - ParserPtr parser; - - private: - /// Used for CUSTOM sort - FieldComparatorSourcePtr comparatorSource; - - public: - /// Represents sorting by document score (relevancy). - static SortFieldPtr FIELD_SCORE(); - - /// Represents sorting by document number (index order). - static SortFieldPtr FIELD_DOC(); - - /// Returns the name of the field. Could return null if the sort is by SCORE or DOC. - /// @return Name of field, possibly null. - String getField(); - - /// Returns the type of contents in the field. - /// @return One of the constants SCORE, DOC, STRING, INT or DOUBLE. - int32_t getType(); - - /// Returns the Locale by which term values are interpreted. - localePtr getLocale(); - - /// Returns the instance of a {@link FieldCache} parser that fits to the given sort type. May return null - /// if no parser was specified. Sorting is using the default parser then. - /// @return An instance of a parser, or null. - ParserPtr getParser(); - - /// Returns whether the sort should be reversed. - /// @return True if natural order should be reversed. - bool getReverse(); - - /// Returns the {@link FieldComparatorSource} used for custom sorting - FieldComparatorSourcePtr getComparatorSource(); - - virtual String toString(); - - /// Returns true if other is equal to this. If a {@link FieldComparatorSource} or {@link Parser} was provided, - /// it must properly implement equals (unless a singleton is always used). - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - - /// Returns the {@link FieldComparator} to use for sorting. - /// @param numHits number of top hits the queue will store - /// @param sortPos position of this SortField within {@link Sort}. The comparator is primary if sortPos == 0, - /// secondary if sortPos == 1, etc. Some comparators can optimize themselves when they are the primary sort. - /// @return {@link FieldComparator} to use when sorting - FieldComparatorPtr getComparator(int32_t numHits, int32_t sortPos); - - protected: - /// Sets field and type, and ensures field is not NULL unless type is SCORE or DOC - void initFieldType(const String& field, int32_t type); - }; -} - -#endif diff --git a/include/SortedTermVectorMapper.h b/include/SortedTermVectorMapper.h deleted file mode 100644 index 9ade70d9..00000000 --- a/include/SortedTermVectorMapper.h +++ /dev/null @@ -1,61 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SORTEDTERMVECTORMAPPER_H -#define SORTEDTERMVECTORMAPPER_H - -#include -#include "TermVectorMapper.h" - -namespace Lucene -{ - /// Store a sorted collection of {@link TermVectorEntry}s. Collects all term information into a single, - /// sorted set. - /// - /// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/ - /// positions you will not know what Fields they correlate with. - /// - /// This is not thread-safe - class LPPAPI SortedTermVectorMapper : public TermVectorMapper - { - public: - /// @param comparator A Comparator for sorting {@link TermVectorEntry}s - SortedTermVectorMapper(TermVectorEntryComparator comparator); - - SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); - - virtual ~SortedTermVectorMapper(); - - LUCENE_CLASS(SortedTermVectorMapper); - - protected: - Collection currentSet; - MapStringTermVectorEntry termToTVE; - bool storeOffsets; - bool storePositions; - TermVectorEntryComparator comparator; - - public: - static const wchar_t* ALL; - - public: - /// Map the Term Vector information into your own structure - virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); - - /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. - virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); - - /// The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed - /// into the constructor. - /// - /// This set will be empty until after the mapping process takes place. - /// - /// @return The sorted set of {@link TermVectorEntry}. - Collection getTermVectorEntrySet(); - }; -} - -#endif diff --git a/include/SortedVIntList.h b/include/SortedVIntList.h deleted file mode 100644 index 2aff6bac..00000000 --- a/include/SortedVIntList.h +++ /dev/null @@ -1,88 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SORTEDVINTLIST_H -#define SORTEDVINTLIST_H - -#include "DocIdSet.h" - -namespace Lucene -{ - /// Stores and iterate on sorted integers in compressed form in RAM. - /// - /// The code for compressing the differences between ascending integers was borrowed from {@link IndexInput} - /// and {@link IndexOutput}. - /// - /// NOTE: this class assumes the stored integers are doc Ids (hence why it extends {@link DocIdSet}). Therefore - /// its {@link #iterator()} assumes {@link DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you - /// intend to use this value, then make sure it's not used during search flow. - class LPPAPI SortedVIntList : public DocIdSet - { - public: - /// Create a SortedVIntList from all elements of an array of integers. - /// @param sortedInts A sorted array of non negative integers. - SortedVIntList(Collection sortedInts); - - /// Create a SortedVIntList from an array of integers. - /// @param sortedInts A sorted array of non negative integers. - /// @param inputSize The number of integers to be used from the array. - SortedVIntList(Collection sortedInts, int32_t inputSize); - - /// Create a SortedVIntList from a BitSet. - /// @param bits A bit set representing a set of integers. - SortedVIntList(BitSetPtr bits); - - /// Create a SortedVIntList from an OpenBitSet. - /// @param bits A bit set representing a set of integers. - SortedVIntList(OpenBitSetPtr bits); - - /// Create a SortedVIntList. - /// @param docIdSetIterator An iterator providing document numbers as a set of integers. - /// This DocIdSetIterator is iterated completely when this constructor is called and it must provide the - /// integers in non decreasing order. - SortedVIntList(DocIdSetIteratorPtr docIdSetIterator); - - virtual ~SortedVIntList(); - - LUCENE_CLASS(SortedVIntList); - - public: - /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the - /// index numbers of the set bits will be smaller than that BitSet. - static const int32_t BITS2VINTLIST_SIZE; - - protected: - static const int32_t VB1; - static const int32_t BIT_SHIFT; - static const int32_t MAX_BYTES_PER_INT; - - int32_t _size; - ByteArray bytes; - int32_t lastBytePos; - int32_t lastInt; - - public: - /// @return The total number of sorted integers. - int32_t size(); - - /// @return The size of the byte array storing the compressed sorted integers. - int32_t getByteSize(); - - /// This DocIdSet implementation is cacheable. - virtual bool isCacheable(); - - /// @return An iterator over the sorted integers. - virtual DocIdSetIteratorPtr iterator(); - - protected: - void initBytes(); - void addInt(int32_t nextInt); - - friend class SortedDocIdSetIterator; - }; -} - -#endif diff --git a/include/SpanFilter.h b/include/SpanFilter.h deleted file mode 100644 index d05236e1..00000000 --- a/include/SpanFilter.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANFILTER_H -#define SPANFILTER_H - -#include "Filter.h" - -namespace Lucene -{ - /// Abstract base class providing a mechanism to restrict searches to a subset of an index and also maintains - /// and returns position information. - /// - /// This is useful if you want to compare the positions from a SpanQuery with the positions of items in a filter. - /// For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, and - /// then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could - /// then compare position information for post processing. - class LPPAPI SpanFilter : public Filter - { - public: - virtual ~SpanFilter(); - LUCENE_CLASS(SpanFilter); - - public: - /// Returns a SpanFilterResult with true for documents which should be permitted in search results, and - /// false for those that should not and Spans for where the true docs match. - /// @param reader The {@link IndexReader} to load position and DocIdSet information from - /// @return A {@link SpanFilterResult} - virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader) = 0; - }; -} - -#endif diff --git a/include/SpanFilterResult.h b/include/SpanFilterResult.h deleted file mode 100644 index c42ef9d3..00000000 --- a/include/SpanFilterResult.h +++ /dev/null @@ -1,79 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANFILTERRESULT_H -#define SPANFILTERRESULT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery - class LPPAPI SpanFilterResult : public LuceneObject - { - public: - /// @param docIdSet The DocIdSet for the Filter - /// @param positions A List of {@link PositionInfo} objects - SpanFilterResult(DocIdSetPtr docIdSet, Collection positions); - - virtual ~SpanFilterResult(); - - LUCENE_CLASS(SpanFilterResult); - - protected: - DocIdSetPtr docIdSet; - Collection positions; // Spans spans - - public: - /// The first entry in the array corresponds to the first "on" bit. Entries are increasing by - /// document order. - /// @return A List of PositionInfo objects - Collection getPositions(); - - /// Returns the docIdSet - DocIdSetPtr getDocIdSet(); - }; - - class LPPAPI PositionInfo : public LuceneObject - { - public: - PositionInfo(int32_t doc); - virtual ~PositionInfo(); - - LUCENE_CLASS(PositionInfo); - - protected: - int32_t doc; - Collection positions; - - public: - void addPosition(int32_t start, int32_t end); - int32_t getDoc(); - Collection getPositions(); - }; - - class LPPAPI StartEnd : public LuceneObject - { - public: - StartEnd(int32_t start, int32_t end); - virtual ~StartEnd(); - - LUCENE_CLASS(StartEnd); - - protected: - int32_t start; - int32_t end; - - public: - /// @return The end position of this match - int32_t getEnd(); - - /// @return The start position of this match - int32_t getStart(); - }; -} - -#endif diff --git a/include/SpanFirstQuery.h b/include/SpanFirstQuery.h deleted file mode 100644 index 02f2cfd9..00000000 --- a/include/SpanFirstQuery.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANFIRSTQUERY_H -#define SPANFIRSTQUERY_H - -#include "SpanQuery.h" -#include "Spans.h" - -namespace Lucene -{ - /// Matches spans near the beginning of a field. - class LPPAPI SpanFirstQuery : public SpanQuery - { - public: - /// Construct a SpanFirstQuery matching spans in match whose end position is less than or equal to end. - SpanFirstQuery(SpanQueryPtr match, int32_t end); - virtual ~SpanFirstQuery(); - - LUCENE_CLASS(SpanFirstQuery); - - protected: - SpanQueryPtr match; - int32_t end; - - public: - using SpanQuery::toString; - - /// Return the SpanQuery whose matches are filtered. - SpanQueryPtr getMatch(); - - /// Return the maximum end position permitted in a match. - int32_t getEnd(); - - virtual String getField(); - virtual String toString(const String& field); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual void extractTerms(SetTerm terms); - virtual SpansPtr getSpans(IndexReaderPtr reader); - virtual QueryPtr rewrite(IndexReaderPtr reader); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - friend class FirstSpans; - }; -} - -#endif diff --git a/include/SpanNearQuery.h b/include/SpanNearQuery.h deleted file mode 100644 index bc4810f5..00000000 --- a/include/SpanNearQuery.h +++ /dev/null @@ -1,58 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANNEARQUERY_H -#define SPANNEARQUERY_H - -#include "SpanQuery.h" - -namespace Lucene -{ - /// Matches spans which are near one another. One can specify slop, the maximum number of intervening - /// unmatched positions, as well as whether matches are required to be in-order. - class LPPAPI SpanNearQuery : public SpanQuery - { - public: - /// Construct a SpanNearQuery. Matches spans matching a span from each clause, with up to slop total - /// unmatched positions between them. * When inOrder is true, the spans from each clause must be - /// ordered as in clauses. - SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads = true); - virtual ~SpanNearQuery(); - - LUCENE_CLASS(SpanNearQuery); - - protected: - Collection clauses; - int32_t slop; - bool inOrder; - - String field; - bool collectPayloads; - - public: - using SpanQuery::toString; - - /// Return the clauses whose spans are matched. - Collection getClauses(); - - /// Return the maximum number of intervening unmatched positions permitted. - int32_t getSlop(); - - /// Return true if matches are required to be in-order. - bool isInOrder(); - - virtual String getField(); - virtual void extractTerms(SetTerm terms); - virtual String toString(const String& field); - virtual SpansPtr getSpans(IndexReaderPtr reader); - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/SpanNotQuery.h b/include/SpanNotQuery.h deleted file mode 100644 index cb7ef203..00000000 --- a/include/SpanNotQuery.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANNOTQUERY_H -#define SPANNOTQUERY_H - -#include "SpanQuery.h" - -namespace Lucene -{ - /// Removes matches which overlap with another SpanQuery. - class LPPAPI SpanNotQuery : public SpanQuery - { - public: - /// Construct a SpanNotQuery matching spans from include which have no overlap with spans from exclude. - SpanNotQuery(SpanQueryPtr include, SpanQueryPtr exclude); - virtual ~SpanNotQuery(); - - LUCENE_CLASS(SpanNotQuery); - - protected: - SpanQueryPtr include; - SpanQueryPtr exclude; - - public: - using SpanQuery::toString; - - /// Return the SpanQuery whose matches are filtered. - SpanQueryPtr getInclude(); - - /// Return the SpanQuery whose matches must not overlap those returned. - SpanQueryPtr getExclude(); - - virtual String getField(); - virtual void extractTerms(SetTerm terms); - virtual String toString(const String& field); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual SpansPtr getSpans(IndexReaderPtr reader); - virtual QueryPtr rewrite(IndexReaderPtr reader); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/SpanOrQuery.h b/include/SpanOrQuery.h deleted file mode 100644 index 178f5be1..00000000 --- a/include/SpanOrQuery.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANORQUERY_H -#define SPANORQUERY_H - -#include "SpanQuery.h" - -namespace Lucene -{ - /// Matches the union of its clauses. - class LPPAPI SpanOrQuery : public SpanQuery - { - public: - /// Construct a SpanOrQuery merging the provided clauses. - SpanOrQuery(Collection clauses); - virtual ~SpanOrQuery(); - - LUCENE_CLASS(SpanOrQuery); - - protected: - Collection clauses; - String field; - - public: - using SpanQuery::toString; - - /// Return the clauses whose spans are matched. - Collection getClauses(); - - virtual String getField(); - virtual void extractTerms(SetTerm terms); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual SpansPtr getSpans(IndexReaderPtr reader); - - friend class OrSpans; - }; -} - -#endif diff --git a/include/SpanQuery.h b/include/SpanQuery.h deleted file mode 100644 index 4106d37b..00000000 --- a/include/SpanQuery.h +++ /dev/null @@ -1,32 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANQUERY_H -#define SPANQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// Base class for span-based queries. - class LPPAPI SpanQuery : public Query - { - public: - virtual ~SpanQuery(); - LUCENE_CLASS(SpanQuery); - - public: - /// Returns the matches for this query in an index. Used internally to search for spans. - virtual SpansPtr getSpans(IndexReaderPtr reader) = 0; - - /// Returns the name of the field matched by this query. - virtual String getField() = 0; - - virtual WeightPtr createWeight(SearcherPtr searcher); - }; -} - -#endif diff --git a/include/SpanQueryFilter.h b/include/SpanQueryFilter.h deleted file mode 100644 index aee8a45d..00000000 --- a/include/SpanQueryFilter.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANQUERYFILTER_H -#define SPANQUERYFILTER_H - -#include "SpanFilter.h" - -namespace Lucene -{ - /// Constrains search results to only match those which also match a provided query. Also provides position - /// information about where each document matches at the cost of extra space compared with the - /// QueryWrapperFilter. There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. - /// Namely, the position information for each matching document is stored. - /// - /// This filter does not cache. See the {@link CachingSpanFilter} for a wrapper that caches. - class LPPAPI SpanQueryFilter : public SpanFilter - { - public: - /// Constructs a filter which only matches documents matching query. - /// @param query The {@link SpanQuery} to use as the basis for the Filter. - SpanQueryFilter(SpanQueryPtr query = SpanQueryPtr()); - - virtual ~SpanQueryFilter(); - - LUCENE_CLASS(SpanQueryFilter); - - protected: - SpanQueryPtr query; - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - virtual SpanFilterResultPtr bitSpans(IndexReaderPtr reader); - - SpanQueryPtr getQuery(); - - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/SpanScorer.h b/include/SpanScorer.h deleted file mode 100644 index f0269aff..00000000 --- a/include/SpanScorer.h +++ /dev/null @@ -1,50 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANSCORER_H -#define SPANSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// Public for extension only. - class LPPAPI SpanScorer : public Scorer - { - public: - SpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); - virtual ~SpanScorer(); - - LUCENE_CLASS(SpanScorer); - - protected: - SpansPtr spans; - WeightPtr weight; - ByteArray norms; - double value; - bool more; - int32_t doc; - double freq; - - public: - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - virtual double score(); - - protected: - virtual bool setFreqCurrentDoc(); - - /// This method is no longer an official member of {@link Scorer}, but it is needed by SpanWeight - /// to build an explanation. - virtual ExplanationPtr explain(int32_t doc); - - friend class SpanWeight; - friend class PayloadNearSpanWeight; - }; -} - -#endif diff --git a/include/SpanTermQuery.h b/include/SpanTermQuery.h deleted file mode 100644 index cb8a2afc..00000000 --- a/include/SpanTermQuery.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANTERMQUERY_H -#define SPANTERMQUERY_H - -#include "SpanQuery.h" - -namespace Lucene -{ - /// Matches spans containing a term. - class LPPAPI SpanTermQuery : public SpanQuery - { - public: - /// Construct a SpanTermQuery matching the named term's spans. - SpanTermQuery(TermPtr term); - virtual ~SpanTermQuery(); - - LUCENE_CLASS(SpanTermQuery); - - protected: - TermPtr term; - - public: - using SpanQuery::toString; - - /// Return the term whose spans are matched. - TermPtr getTerm(); - - virtual String getField(); - virtual void extractTerms(SetTerm terms); - virtual String toString(const String& field); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual SpansPtr getSpans(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/SpanWeight.h b/include/SpanWeight.h deleted file mode 100644 index 58496930..00000000 --- a/include/SpanWeight.h +++ /dev/null @@ -1,47 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANWEIGHT_H -#define SPANWEIGHT_H - -#include "Weight.h" - -namespace Lucene -{ - /// Public for use by other weight implementations - class LPPAPI SpanWeight : public Weight - { - public: - SpanWeight(SpanQueryPtr query, SearcherPtr searcher); - virtual ~SpanWeight(); - - LUCENE_CLASS(SpanWeight); - - protected: - SimilarityPtr similarity; - double value; - double idf; - double queryNorm; - double queryWeight; - - SetTerm terms; - SpanQueryPtr query; - IDFExplanationPtr idfExp; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - - friend class PayloadNearSpanScorer; - friend class PayloadTermSpanScorer; - }; -} - -#endif diff --git a/include/Spans.h b/include/Spans.h deleted file mode 100644 index 9a4794b8..00000000 --- a/include/Spans.h +++ /dev/null @@ -1,78 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SPANS_H -#define SPANS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// An enumeration of span matches. Used to implement span searching. Each span represents a range of term - /// positions within a document. Matches are enumerated in order, by increasing document number, within that - /// by increasing start position and finally by increasing end position. - class LPPAPI Spans : public LuceneObject - { - public: - virtual ~Spans(); - LUCENE_CLASS(Spans); - - public: - /// Move to the next match, returning true if any such exists. - virtual bool next() = 0; - - /// Skips to the first match beyond the current, whose document number is greater than or equal to target. - /// - /// Returns true if there is such a match. - /// - /// Behaves as if written: - ///
-        /// bool skipTo(int32_t target)
-        /// {
-        ///     do
-        ///     {
-        ///         if (!next())
-        ///             return false;
-        ///     }
-        ///     while (target > doc());
-        ///     return true;
-        /// }
-        /// 
- /// Most implementations are considerably more efficient than that. - virtual bool skipTo(int32_t target) = 0; - - /// Returns the document number of the current match. Initially invalid. - virtual int32_t doc() = 0; - - /// Returns the start position of the current match. Initially invalid. - virtual int32_t start() = 0; - - /// Returns the end position of the current match. Initially invalid. - virtual int32_t end() = 0; - - /// Returns the payload data for the current span. This is invalid until {@link #next()} is called for the - /// first time. This method must not be called more than once after each call of {@link #next()}. However, - /// most payloads are loaded lazily, so if the payload data for the current position is not needed, this - /// method may not be called at all for performance reasons. An ordered SpanQuery does not lazy load, so - /// if you have payloads in your index and you do not want ordered SpanNearQuerys to collect payloads, you - /// can disable collection with a constructor option. - /// - /// Note that the return type is a collection, thus the ordering should not be relied upon. - /// - /// @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable - /// is false - virtual Collection getPayload() = 0; - - /// Checks if a payload can be loaded at this position. - /// - /// Payloads can only be loaded once per call to {@link #next()}. - /// - /// @return true if there is a payload available at this position that can be loaded - virtual bool isPayloadAvailable() = 0; - }; -} - -#endif diff --git a/include/StandardAnalyzer.h b/include/StandardAnalyzer.h deleted file mode 100644 index 8cd53c42..00000000 --- a/include/StandardAnalyzer.h +++ /dev/null @@ -1,86 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STANDARDANALYZER_H -#define STANDARDANALYZER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter} and {@link StopFilter}, using - /// a list of English stop words. - /// - /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: - /// - ///
    - ///
  • As of 2.9, StopFilter preserves position increments - ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected - ///
- class LPPAPI StandardAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). - /// @param matchVersion Lucene version to match. - StandardAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - /// @param matchVersion Lucene version to match. - /// @param stopWords stop words - StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); - - /// Builds an analyzer with the stop words from the given file. - /// @see WordlistLoader#getWordSet(const String&, const String&) - /// @param matchVersion Lucene version to match. - /// @param stopwords File to read stop words from. - StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords); - - /// Builds an analyzer with the stop words from the given reader. - /// @see WordlistLoader#getWordSet(ReaderPtr, const String&) - /// @param matchVersion Lucene version to match. - /// @param stopwords Reader to read stop words from. - StandardAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords); - - virtual ~StandardAnalyzer(); - - LUCENE_CLASS(StandardAnalyzer); - - public: - /// Default maximum allowed token length - static const int32_t DEFAULT_MAX_TOKEN_LENGTH; - - protected: - HashSet stopSet; - - /// Specifies whether deprecated acronyms should be replaced with HOST type. - bool replaceInvalidAcronym; - bool enableStopPositionIncrements; - - LuceneVersion::Version matchVersion; - - int32_t maxTokenLength; - - protected: - /// Construct an analyzer with the given stop words. - void ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords); - - public: - /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} - /// and a {@link StopFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Set maximum allowed token length. If a token is seen that exceeds this length then it is discarded. This setting - /// only takes effect the next time tokenStream or reusableTokenStream is called. - void setMaxTokenLength(int32_t length); - - /// @see #setMaxTokenLength - int32_t getMaxTokenLength(); - - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; -} - -#endif diff --git a/include/StandardFilter.h b/include/StandardFilter.h deleted file mode 100644 index ab1eee4a..00000000 --- a/include/StandardFilter.h +++ /dev/null @@ -1,41 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STANDARDFILTER_H -#define STANDARDFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// Normalizes tokens extracted with {@link StandardTokenizer}. - class LPPAPI StandardFilter : public TokenFilter - { - public: - /// Construct filtering input. - StandardFilter(TokenStreamPtr input); - virtual ~StandardFilter(); - - LUCENE_CLASS(StandardFilter); - - protected: - TypeAttributePtr typeAtt; - TermAttributePtr termAtt; - - protected: - static const String& APOSTROPHE_TYPE(); - static const String& ACRONYM_TYPE(); - - public: - /// Returns the next token in the stream, or null at EOS. - /// - /// Removes 's from the end of words. - /// Removes dots from acronyms. - virtual bool incrementToken(); - }; -} - -#endif diff --git a/include/StandardTokenizer.h b/include/StandardTokenizer.h deleted file mode 100644 index 7f01c77b..00000000 --- a/include/StandardTokenizer.h +++ /dev/null @@ -1,107 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STANDARDTOKENIZER_H -#define STANDARDTOKENIZER_H - -#include "Tokenizer.h" - -namespace Lucene -{ - /// A grammar-based tokenizer - /// - /// This should be a good tokenizer for most European-language documents: - /// - ///
    - ///
  • Splits words at punctuation characters, removing punctuation. However, a dot that's not followed by - /// whitespace is considered part of a token. - ///
  • Splits words at hyphens, unless there's a number in the token, in which case the whole token is interpreted - /// as a product number and is not split. - ///
  • Recognizes email addresses and internet hostnames as one token. - ///
- /// - /// Many applications have specific tokenizer needs. If this tokenizer does not suit your application, please consider - /// copying this source code directory to your project and maintaining your own grammar-based tokenizer. - /// - /// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: - /// - ///
    - ///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected - ///
- class LPPAPI StandardTokenizer : public Tokenizer - { - public: - /// Creates a new instance of the {@link StandardTokenizer}. Attaches the input to the newly created scanner. - /// @param input The input reader - StandardTokenizer(LuceneVersion::Version matchVersion, ReaderPtr input); - - /// Creates a new StandardTokenizer with a given {@link AttributeSource}. - StandardTokenizer(LuceneVersion::Version matchVersion, AttributeSourcePtr source, ReaderPtr input); - - /// Creates a new StandardTokenizer with a given {@link AttributeSource.AttributeFactory} - StandardTokenizer(LuceneVersion::Version matchVersion, AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~StandardTokenizer(); - - LUCENE_CLASS(StandardTokenizer); - - protected: - /// A private instance of the scanner - StandardTokenizerImplPtr scanner; - - bool replaceInvalidAcronym; - int32_t maxTokenLength; - - // this tokenizer generates three attributes: offset, positionIncrement and type - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - PositionIncrementAttributePtr posIncrAtt; - TypeAttributePtr typeAtt; - - public: - static const int32_t ALPHANUM; - static const int32_t APOSTROPHE; - static const int32_t ACRONYM; - static const int32_t COMPANY; - static const int32_t EMAIL; - static const int32_t HOST; - static const int32_t NUM; - static const int32_t CJ; - - /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. - static const int32_t ACRONYM_DEP; - - /// String token types that correspond to token type int constants - static const Collection TOKEN_TYPES(); - - protected: - void init(ReaderPtr input, LuceneVersion::Version matchVersion); - - public: - /// Set the max allowed token length. Any token longer than this is skipped. - void setMaxTokenLength(int32_t length); - - /// @see #setMaxTokenLength - int32_t getMaxTokenLength(); - - /// @see TokenStream#next() - virtual bool incrementToken(); - - virtual void end(); - - virtual void reset(ReaderPtr input); - - /// @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false - /// @deprecated Remove in 3.X and make true the only valid value - bool isReplaceInvalidAcronym(); - - /// @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST. - /// @deprecated Remove in 3.X and make true the only valid value - void setReplaceInvalidAcronym(bool replaceInvalidAcronym); - }; -} - -#endif diff --git a/include/StandardTokenizerImpl.h b/include/StandardTokenizerImpl.h deleted file mode 100644 index 0998c9a4..00000000 --- a/include/StandardTokenizerImpl.h +++ /dev/null @@ -1,195 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STANDARDTOKENIZERIMPL_H -#define STANDARDTOKENIZERIMPL_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class StandardTokenizerImpl : public LuceneObject - { - public: - /// Creates a new scanner - /// @param in the Reader to read input from. - StandardTokenizerImpl(ReaderPtr in); - - virtual ~StandardTokenizerImpl(); - - LUCENE_CLASS(StandardTokenizerImpl); - - protected: - /// Initial size of the lookahead buffer - static const int32_t ZZ_BUFFERSIZE; - - /// Translates characters to character classes - static const wchar_t ZZ_CMAP_PACKED[]; - static const int32_t ZZ_CMAP_LENGTH; - static const int32_t ZZ_CMAP_PACKED_LENGTH; - - /// Translates characters to character classes - static const wchar_t* ZZ_CMAP(); - - /// Translates DFA states to action switch labels. - static const wchar_t ZZ_ACTION_PACKED_0[]; - static const int32_t ZZ_ACTION_LENGTH; - static const int32_t ZZ_ACTION_PACKED_LENGTH; - - /// Translates DFA states to action switch labels. - static const int32_t* ZZ_ACTION(); - - /// Translates a state to a row index in the transition table - static const wchar_t ZZ_ROWMAP_PACKED_0[]; - static const int32_t ZZ_ROWMAP_LENGTH; - static const int32_t ZZ_ROWMAP_PACKED_LENGTH; - - /// Translates a state to a row index in the transition table - static const int32_t* ZZ_ROWMAP(); - - /// The transition table of the DFA - static const wchar_t ZZ_TRANS_PACKED_0[]; - static const int32_t ZZ_TRANS_LENGTH; - static const int32_t ZZ_TRANS_PACKED_LENGTH; - - /// The transition table of the DFA - static const int32_t* ZZ_TRANS(); - - // error codes - static const int32_t ZZ_UNKNOWN_ERROR; - static const int32_t ZZ_NO_MATCH; - static const int32_t ZZ_PUSHBACK_2BIG; - - static const wchar_t* ZZ_ERROR_MSG[]; - - /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState - static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]; - static const int32_t ZZ_ATTRIBUTE_LENGTH; - static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH; - - /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState - static const int32_t* ZZ_ATTRIBUTE(); - - /// The input device - ReaderPtr zzReader; - - /// The current state of the DFA - int32_t zzState; - - /// The current lexical state - int32_t zzLexicalState; - - /// This buffer contains the current text to be matched and is the source of the yytext() string - CharArray zzBuffer; - - /// The text position at the last accepting state - int32_t zzMarkedPos; - - /// The text position at the last state to be included in yytext - int32_t zzPushbackPos; - - /// The current text position in the buffer - int32_t zzCurrentPos; - - /// StartRead marks the beginning of the yytext() string in the buffer - int32_t zzStartRead; - - /// EndRead marks the last character in the buffer, that has been read from input - int32_t zzEndRead; - - /// Number of newlines encountered up to the start of the matched text - int32_t yyline; - - /// The number of characters up to the start of the matched text - int32_t _yychar; - - /// The number of characters from the last newline up to the start of the matched text - int32_t yycolumn; - - /// zzAtBOL == true if the scanner is currently at the beginning of a line - bool zzAtBOL; - - /// zzAtEOF == true if the scanner is at the EOF - bool zzAtEOF; - - public: - /// This character denotes the end of file - static const int32_t YYEOF; - - /// Lexical states - static const int32_t YYINITIAL; - - public: - int32_t yychar(); - - /// Resets the Tokenizer to a new Reader. - void reset(ReaderPtr r); - - /// Fills Lucene token with the current token text. - void getText(TokenPtr t); - - /// Fills TermAttribute with the current token text. - void getText(TermAttributePtr t); - - /// Closes the input stream. - void yyclose(); - - /// Resets the scanner to read from a new input stream. Does not close the old reader. - /// - /// All internal variables are reset, the old input stream cannot be reused (internal buffer is discarded and lost). - /// Lexical state is set to ZZ_INITIAL. - /// - /// @param reader the new input stream. - void yyreset(ReaderPtr reader); - - /// Returns the current lexical state. - int32_t yystate(); - - /// Enters a new lexical state - /// @param newState the new lexical state. - void yybegin(int32_t newState); - - /// Returns the text matched by the current regular expression. - String yytext(); - - /// Returns the character at position pos from the matched text. - /// - /// It is equivalent to yytext()[pos], but faster - /// @param pos the position of the character to fetch. A value from 0 to yylength() - 1. - /// @return the character at position pos. - wchar_t yycharat(int32_t pos); - - /// Returns the length of the matched text region. - int32_t yylength(); - - /// Pushes the specified amount of characters back into the input stream. - /// - /// They will be read again by then next call of the scanning method - /// @param number the number of characters to be read again. This number must not be greater than yylength() - void yypushback(int32_t number); - - /// Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O- - /// Error occurs. - int32_t getNextToken(); - - protected: - /// Refills the input buffer. - bool zzRefill(); - - /// Reports an error that occurred while scanning. - /// - /// In a well-formed scanner (no or only correct usage of yypushback(int32_t) and a match-all fallback rule) - /// this method will only be called with things that "Can't Possibly Happen". If this method is called, - /// something is seriously wrong. - /// - /// Usual syntax/scanner level error handling should be done in error fallback rules. - /// - /// @param errorCode The code of the errormessage to display. - void zzScanError(int32_t errorCode); - }; -} - -#endif diff --git a/include/StopAnalyzer.h b/include/StopAnalyzer.h deleted file mode 100644 index 4ae74e2e..00000000 --- a/include/StopAnalyzer.h +++ /dev/null @@ -1,52 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STOPANALYZER_H -#define STOPANALYZER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. - /// - /// You must specify the required {@link Version} compatibility when creating StopAnalyzer: As of 2.9, position - /// increments are preserved - class LPPAPI StopAnalyzer : public Analyzer - { - public: - /// Builds an analyzer which removes words in {@link #ENGLISH_STOP_WORDS_SET}. - StopAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the stop words from the given set. - StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); - - /// Builds an analyzer with the stop words from the given file. - StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile); - - /// Builds an analyzer with the stop words from the given reader. - StopAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords); - - virtual ~StopAnalyzer(); - - LUCENE_CLASS(StopAnalyzer); - - protected: - HashSet stopWords; - bool enablePositionIncrements; - - static const wchar_t* _ENGLISH_STOP_WORDS_SET[]; - - public: - /// An unmodifiable set containing some common English words that are usually not useful for searching. - static const HashSet ENGLISH_STOP_WORDS_SET(); - - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; -} - -#endif diff --git a/include/StopFilter.h b/include/StopFilter.h deleted file mode 100644 index b16eabed..00000000 --- a/include/StopFilter.h +++ /dev/null @@ -1,68 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STOPFILTER_H -#define STOPFILTER_H - -#include "TokenFilter.h" - -namespace Lucene -{ - /// Removes stop words from a token stream. - class LPPAPI StopFilter : public TokenFilter - { - public: - /// Construct a token stream filtering the given input. If stopWords is an instance of {@link CharArraySet} - /// (true if makeStopSet() was used to construct the set) it will be directly used and ignoreCase will be - /// ignored since CharArraySet directly controls case sensitivity. - /// - /// If stopWords is not an instance of {@link CharArraySet}, a new CharArraySet will be constructed and - /// ignoreCase will be used to specify the case sensitivity of that set. - /// - /// @param enablePositionIncrements true if token positions should record the removed stop words - /// @param input Input TokenStream - /// @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords - /// @param ignoreCase if true, all words are lower cased first - StopFilter(bool enablePositionIncrements, TokenStreamPtr input, HashSet stopWords, bool ignoreCase = false); - StopFilter(bool enablePositionIncrements, TokenStreamPtr input, CharArraySetPtr stopWords, bool ignoreCase = false); - - virtual ~StopFilter(); - - LUCENE_CLASS(StopFilter); - - protected: - CharArraySetPtr stopWords; - bool enablePositionIncrements; - - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncrAtt; - - public: - /// Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. - static HashSet makeStopSet(Collection stopWords); - - /// Returns the next input Token whose term() is not a stop word. - virtual bool incrementToken(); - - /// Returns version-dependent default for enablePositionIncrements. Analyzers that embed StopFilter use this - /// method when creating the StopFilter. Prior to 2.9, this returns false. On 2.9 or later, it returns true. - static bool getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion); - - /// @see #setEnablePositionIncrements(bool). - bool getEnablePositionIncrements(); - - /// If true, this StopFilter will preserve positions of the incoming tokens (ie, accumulate and set position - /// increments of the removed stop tokens). Generally, true is best as it does not lose information (positions - /// of the original tokens) during indexing. - /// - /// When set, when a token is stopped (omitted), the position increment of the following token is incremented. - /// - /// NOTE: be sure to also set {@link QueryParser#setEnablePositionIncrements} if you use QueryParser to create queries. - void setEnablePositionIncrements(bool enable); - }; -} - -#endif diff --git a/include/StoredFieldsWriter.h b/include/StoredFieldsWriter.h deleted file mode 100644 index 87f74c6b..00000000 --- a/include/StoredFieldsWriter.h +++ /dev/null @@ -1,75 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STOREDFIELDSWRITER_H -#define STOREDFIELDSWRITER_H - -#include "DocumentsWriter.h" - -namespace Lucene -{ - /// This is a DocFieldConsumer that writes stored fields. - class StoredFieldsWriter : public LuceneObject - { - public: - StoredFieldsWriter(DocumentsWriterPtr docWriter, FieldInfosPtr fieldInfos); - virtual ~StoredFieldsWriter(); - - LUCENE_CLASS(StoredFieldsWriter); - - public: - FieldsWriterPtr fieldsWriter; - DocumentsWriterWeakPtr _docWriter; - FieldInfosPtr fieldInfos; - int32_t lastDocID; - - Collection docFreeList; - int32_t freeCount; - int32_t allocCount; - - public: - StoredFieldsWriterPerThreadPtr addThread(DocStatePtr docState); - void flush(SegmentWriteStatePtr state); - void closeDocStore(SegmentWriteStatePtr state); - StoredFieldsWriterPerDocPtr getPerDoc(); - void abort(); - - /// Fills in any hole in the docIDs - void fill(int32_t docID); - - void finishDocument(StoredFieldsWriterPerDocPtr perDoc); - bool freeRAM(); - void free(StoredFieldsWriterPerDocPtr perDoc); - - protected: - void initFieldsWriter(); - }; - - class StoredFieldsWriterPerDoc : public DocWriter - { - public: - StoredFieldsWriterPerDoc(StoredFieldsWriterPtr fieldsWriter); - virtual ~StoredFieldsWriterPerDoc(); - - LUCENE_CLASS(StoredFieldsWriterPerDoc); - - protected: - StoredFieldsWriterWeakPtr _fieldsWriter; - - public: - PerDocBufferPtr buffer; - RAMOutputStreamPtr fdt; - int32_t numStoredFields; - - public: - void reset(); - virtual void abort(); - virtual int64_t sizeInBytes(); - virtual void finish(); - }; -} - -#endif diff --git a/include/StoredFieldsWriterPerThread.h b/include/StoredFieldsWriterPerThread.h deleted file mode 100644 index 8a569532..00000000 --- a/include/StoredFieldsWriterPerThread.h +++ /dev/null @@ -1,37 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STOREDFIELDSWRITERPERTHREAD_H -#define STOREDFIELDSWRITERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class StoredFieldsWriterPerThread : public LuceneObject - { - public: - StoredFieldsWriterPerThread(DocStatePtr docState, StoredFieldsWriterPtr storedFieldsWriter); - virtual ~StoredFieldsWriterPerThread(); - - LUCENE_CLASS(StoredFieldsWriterPerThread); - - public: - FieldsWriterPtr localFieldsWriter; - StoredFieldsWriterWeakPtr _storedFieldsWriter; - DocStatePtr docState; - - StoredFieldsWriterPerDocPtr doc; - - public: - void startDocument(); - void addField(FieldablePtr field, FieldInfoPtr fieldInfo); - DocWriterPtr finishDocument(); - void abort(); - }; -} - -#endif diff --git a/include/StringReader.h b/include/StringReader.h deleted file mode 100644 index dbfcc275..00000000 --- a/include/StringReader.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STRINGREADER_H -#define STRINGREADER_H - -#include "Reader.h" - -namespace Lucene -{ - /// Convenience class for reading strings. - class LPPAPI StringReader : public Reader - { - public: - /// Creates a new StringReader, given the String to read from. - StringReader(const String& str); - virtual ~StringReader(); - - LUCENE_CLASS(StringReader); - - protected: - String str; - int32_t position; - - public: - /// Read a single character. - virtual int32_t read(); - - /// Read characters into a portion of an array. - virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); - - /// Close the stream. - virtual void close(); - - /// Tell whether this stream supports the mark() operation - virtual bool markSupported(); - - /// Reset the stream. - virtual void reset(); - - /// The number of bytes in the stream. - virtual int64_t length(); - }; -} - -#endif diff --git a/include/StringUtils.h b/include/StringUtils.h deleted file mode 100644 index 2468b0cc..00000000 --- a/include/StringUtils.h +++ /dev/null @@ -1,97 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef STRINGUTILS_H -#define STRINGUTILS_H - -#include "Lucene.h" - -namespace Lucene -{ - class LPPAPI StringUtils - { - public: - /// Maximum length of UTF encoding. - static const int32_t MAX_ENCODING_UTF8_SIZE; - - /// Default character radix. - static const int32_t CHARACTER_MAX_RADIX; - - public: - /// Convert uft8 buffer into unicode. - static int32_t toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode); - - /// Convert uft8 buffer into unicode. - static int32_t toUnicode(const uint8_t* utf8, int32_t length, UnicodeResultPtr unicodeResult); - - /// Convert uft8 buffer into unicode. - static String toUnicode(const uint8_t* utf8, int32_t length); - - /// Convert uft8 string into unicode. - static String toUnicode(const SingleString& s); - - /// Convert unicode buffer into uft8. - static int32_t toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8); - - /// Convert unicode buffer into uft8. - static int32_t toUTF8(const wchar_t* unicode, int32_t length, UTF8ResultPtr utf8Result); - - /// Convert unicode buffer into uft8. - static SingleString toUTF8(const wchar_t* unicode, int32_t length); - - /// Convert unicode string into uft8. - static SingleString toUTF8(const String& s); - - /// Convert given string to lower case using current locale - static void toLower(String& str); - - /// Convert given string to lower case using current locale - static String toLower(const String& str); - - /// Convert given string to upper case using current locale - static void toUpper(String& str); - - /// Convert given string to upper case using current locale - static String toUpper(const String& str); - - /// Compare two strings ignoring case differences - static int32_t compareCase(const String& first, const String& second); - - /// Splits string using given delimiters - static Collection split(const String& str, const String& delim); - - /// Convert the given string to int32_t. - static int32_t toInt(const String& value); - - /// Convert the given string to int64_t. - static int64_t toLong(const String& value); - - /// Return given value as a long integer using base unit. - static int64_t toLong(const String& value, int32_t base); - - /// Convert the given string to double. - static double toDouble(const String& value); - - /// Compute the hash code from string. - static int32_t hashCode(const String& value); - - /// Return given value as a string using base unit. - static String toString(int64_t value, int32_t base); - - /// Convert any given type to a {@link String}. - template - static String toString(const TYPE& value) - { - StringStream os; - os << value; - return os.str(); - } - }; - - #define UTF8_TO_STRING(utf8) StringUtils::toUnicode(utf8, SIZEOF_ARRAY(utf8)) -} - -#endif diff --git a/include/Synchronize.h b/include/Synchronize.h deleted file mode 100644 index 0d3f0afd..00000000 --- a/include/Synchronize.h +++ /dev/null @@ -1,67 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef SYNCHRONIZE_H -#define SYNCHRONIZE_H - -#include -#include "Lucene.h" - -namespace Lucene -{ - /// Utility class to support locking via a mutex. - class LPPAPI Synchronize - { - public: - Synchronize(); - virtual ~Synchronize(); - - protected: - boost::recursive_timed_mutex mutexSynchronize; - int64_t lockThread; - int32_t recursionCount; - - public: - /// create a new Synchronize instance atomically. - static void createSync(SynchronizePtr& sync); - - /// Lock mutex using an optional timeout. - void lock(int32_t timeout = 0); - - /// Unlock mutex. - void unlock(); - - /// Unlock all recursive mutex. - int32_t unlockAll(); - - /// Returns true if mutex is currently locked by current thread. - bool holdsLock(); - }; - - /// Utility class to support scope locking. - class LPPAPI SyncLock - { - public: - SyncLock(SynchronizePtr sync, int32_t timeout = 0); - - template - SyncLock(OBJECT object, int32_t timeout = 0) - { - this->sync = object->getSync(); - lock(timeout); - } - - virtual ~SyncLock(); - - protected: - SynchronizePtr sync; - - protected: - void lock(int32_t timeout); - }; -} - -#endif diff --git a/include/TeeSinkTokenFilter.h b/include/TeeSinkTokenFilter.h deleted file mode 100644 index c22653b7..00000000 --- a/include/TeeSinkTokenFilter.h +++ /dev/null @@ -1,152 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TEESINKTOKENFILTER_H -#define TEESINKTOKENFILTER_H - -#include "TokenFilter.h" -#include "TokenStream.h" - -namespace Lucene -{ - /// This TokenFilter provides the ability to set aside attribute states that have already been analyzed. This is - /// useful in situations where multiple fields share many common analysis steps and then go their separate ways. - /// - /// It is also useful for doing things like entity extraction or proper noun analysis as part of the analysis workflow - /// and saving off those tokens for use in another field. - /// - ///
-    /// TeeSinkTokenFilterPtr source1 = newLucene(newLucene(reader1));
-    /// SinkTokenStreamPtr sink1 = source1->newSinkTokenStream();
-    /// SinkTokenStreamPtr sink2 = source1->newSinkTokenStream();
-    ///
-    /// TeeSinkTokenFilterPtr source2 = newLucene(newLucene(reader2));
-    /// source2->addSinkTokenStream(sink1);
-    /// source2->addSinkTokenStream(sink2);
-    ///
-    /// TokenStreamPtr final1 = newLucene(source1);
-    /// TokenStreamPtr final2 = source2;
-    /// TokenStreamPtr final3 = newLucene(sink1);
-    /// TokenStreamPtr final4 = newLucene(sink2);
-    ///
-    /// d->add(newLucene(L"f1", final1));
-    /// d->add(newLucene(L"f2", final2));
-    /// d->add(newLucene(L"f3", final3));
-    /// d->add(newLucene(L"f4", final4));
-    /// 
- /// - /// In this example, sink1 and sink2 will both get tokens from both reader1 and reader2 after whitespace tokenizer - /// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. - /// It is important, that tees are consumed before sinks (in the above example, the field names must be less the - /// sink's field names). If you are not sure, which stream is consumed first, you can simply add another sink and - /// then pass all tokens to the sinks at once using {@link #consumeAllTokens}. - /// - /// This TokenFilter is exhausted after this. In the above example, change the example above to: - /// - ///
-    /// ...
-    /// TokenStreamPtr final1 = newLucene(source1->newSinkTokenStream());
-    /// TokenStreamPtr final2 = source2->newSinkTokenStream();
-    /// sink1->consumeAllTokens();
-    /// sink2->consumeAllTokens();
-    /// ...
-    /// 
- /// - /// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are - /// ready. - /// - /// Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. - class LPPAPI TeeSinkTokenFilter : public TokenFilter - { - public: - /// Instantiates a new TeeSinkTokenFilter. - TeeSinkTokenFilter(TokenStreamPtr input); - virtual ~TeeSinkTokenFilter(); - - LUCENE_CLASS(TeeSinkTokenFilter); - - protected: - Collection sinks; - - public: - /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. - SinkTokenStreamPtr newSinkTokenStream(); - - /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream that pass - /// the supplied filter. - /// @see SinkFilter - SinkTokenStreamPtr newSinkTokenStream(SinkFilterPtr filter); - - /// Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter to this one. The supplied stream will - /// also receive all consumed tokens. This method can be used to pass tokens from two different tees to one sink. - void addSinkTokenStream(SinkTokenStreamPtr sink); - - /// TeeSinkTokenFilter passes all tokens to the added sinks when itself is consumed. To be sure, that all tokens - /// from the input stream are passed to the sinks, you can call this methods. This instance is exhausted after this, - /// but all sinks are instant available. - void consumeAllTokens(); - - virtual bool incrementToken(); - virtual void end(); - }; - - class LPPAPI SinkFilter : public LuceneObject - { - public: - virtual ~SinkFilter(); - - LUCENE_CLASS(SinkFilter); - - public: - /// Returns true, if the current state of the passed-in {@link AttributeSource} shall be stored in the sink. - virtual bool accept(AttributeSourcePtr source) = 0; - - /// Called by {@link SinkTokenStream#reset()}. This method does nothing by default and can optionally be overridden. - virtual void reset(); - }; - - class LPPAPI AcceptAllSinkFilter : public SinkFilter - { - public: - virtual ~AcceptAllSinkFilter(); - - LUCENE_CLASS(AcceptAllSinkFilter); - - public: - virtual bool accept(AttributeSourcePtr source); - }; - - /// A filter that decides which {@link AttributeSource} states to store in the sink. - class LPPAPI SinkTokenStream : public TokenStream - { - public: - SinkTokenStream(AttributeSourcePtr source, SinkFilterPtr filter); - virtual ~SinkTokenStream(); - - LUCENE_CLASS(SinkTokenStream); - - protected: - Collection cachedStates; - AttributeSourceStatePtr finalState; - bool initIterator; - Collection::iterator it; - SinkFilterPtr filter; - - protected: - bool accept(AttributeSourcePtr source); - void addState(AttributeSourceStatePtr state); - void setFinalState(AttributeSourceStatePtr finalState); - - public: - virtual bool incrementToken(); - virtual void end(); - virtual void reset(); - - friend class TeeSinkTokenFilter; - }; -} - -#endif diff --git a/include/Term.h b/include/Term.h deleted file mode 100644 index 09bc1450..00000000 --- a/include/Term.h +++ /dev/null @@ -1,62 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERM_H -#define TERM_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A Term represents a word from text. This is the unit of search. It is composed of two elements, - /// the text of the word, as a string, and the name of the field that the text occurred in, an interned - /// string. - /// - /// Note that terms may represent more than words from text fields, but also things like dates, email - /// addresses, urls, etc. - class LPPAPI Term : public LuceneObject - { - public: - /// Constructs a Term with the given field and text. - Term(const String& fld, const String& txt = EmptyString); - virtual ~Term(); - - LUCENE_CLASS(Term); - - public: - String _field; - String _text; - - public: - /// Returns the field of this term, an interned string. The field indicates the part of a document - /// which this term came from. - String field(); - - /// Returns the text of this term. In the case of words, this is simply the text of the word. In - /// the case of dates and other types, this is an encoding of the object as a string. - String text(); - - /// Optimized construction of new Terms by reusing same field as this Term - /// @param text The text of the new term (field is implicitly same as this Term instance) - /// @return A new Term - TermPtr createTerm(const String& text); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Compares two terms, returning a negative integer if this term belongs before the argument, zero - /// if this term is equal to the argument, and a positive integer if this term belongs after the argument. - /// - /// The ordering of terms is first by field, then by text. - virtual int32_t compareTo(LuceneObjectPtr other); - - void set(const String& fld, const String& txt); - - virtual String toString(); - }; -} - -#endif diff --git a/include/TermAttribute.h b/include/TermAttribute.h deleted file mode 100644 index 2453fef4..00000000 --- a/include/TermAttribute.h +++ /dev/null @@ -1,92 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMATTRIBUTE_H -#define TERMATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// The term text of a Token. - class LPPAPI TermAttribute : public Attribute - { - public: - TermAttribute(); - virtual ~TermAttribute(); - - LUCENE_CLASS(TermAttribute); - - protected: - static const int32_t MIN_BUFFER_SIZE; - - CharArray _termBuffer; - int32_t _termLength; - - public: - virtual String toString(); - - /// Returns the Token's term text. - /// - /// This method has a performance penalty because the text is stored internally in a char[]. If possible, - /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use - /// this method, which is nothing more than a convenience call to new String(token.termBuffer(), 0, - /// token.termLength()) - virtual String term(); - - /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. - /// @param buffer the buffer to copy - /// @param offset the index in the buffer of the first character to copy - /// @param length the number of characters to copy - virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); - - /// Copies the contents of buffer into the termBuffer array. - /// @param buffer the buffer to copy - virtual void setTermBuffer(const String& buffer); - - /// Returns the internal termBuffer character array which you can then directly alter. If the array is - /// too small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer - /// be sure to call {@link #setTermLength} to record the number of valid characters that were placed into - /// the termBuffer. - virtual CharArray termBuffer(); - - /// Optimized implementation of termBuffer. - virtual wchar_t* termBufferArray(); - - /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next - /// operation is to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, - /// {@link #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the - /// resize with the setting of the termBuffer. - /// @param newSize minimum size of the new termBuffer - /// @return newly created termBuffer with length >= newSize - virtual CharArray resizeTermBuffer(int32_t newSize); - - /// Return number of valid characters (length of the term) in the termBuffer array. - virtual int32_t termLength(); - - /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the - /// termBuffer or to synchronize with external manipulation of the termBuffer. Note: to grow the size of - /// the array, use {@link #resizeTermBuffer(int)} first. - /// @param length the truncated length - virtual void setTermLength(int32_t length); - - virtual int32_t hashCode(); - virtual void clear(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual bool equals(LuceneObjectPtr other); - virtual void copyTo(AttributePtr target); - - protected: - /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always - /// used in places that set the content. - /// @param newSize minimum size of the buffer - void growTermBuffer(int32_t newSize); - - void initTermBuffer(); - }; -} - -#endif diff --git a/include/TermBuffer.h b/include/TermBuffer.h deleted file mode 100644 index 2766c70a..00000000 --- a/include/TermBuffer.h +++ /dev/null @@ -1,51 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMBUFFER_H -#define TERMBUFFER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class TermBuffer : public LuceneObject - { - public: - TermBuffer(); - virtual ~TermBuffer(); - - LUCENE_CLASS(TermBuffer); - - protected: - String field; - TermPtr term; // cached - bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding - - UnicodeResultPtr text; - UTF8ResultPtr bytes; - - public: - virtual int32_t compareTo(LuceneObjectPtr other); - - /// Call this if the IndexInput passed to {@link #read} stores terms in the "modified UTF8" format. - void setPreUTF8Strings(); - - void read(IndexInputPtr input, FieldInfosPtr fieldInfos); - - void set(TermPtr term); - void set(TermBufferPtr other); - void reset(); - - TermPtr toTerm(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - int32_t compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2); - }; -} - -#endif diff --git a/include/TermDocs.h b/include/TermDocs.h deleted file mode 100644 index d0d65589..00000000 --- a/include/TermDocs.h +++ /dev/null @@ -1,59 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMDOCS_H -#define TERMDOCS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// TermDocs provides an interface for enumerating ; pairs for a term. The document - /// portion names each document containing the term. Documents are indicated by number. The frequency - /// portion gives the number of times the term occurred in each document. The pairs are ordered by document - /// number. - /// @see IndexReader#termDocs() - class LPPAPI TermDocs - { - protected: - TermDocs(); - - public: - LUCENE_INTERFACE(TermDocs); - - public: - /// Sets this to the data for a term. The enumeration is reset to the start of the data for this term. - virtual void seek(TermPtr term) = 0; - - /// Sets this to the data for the current term in a {@link TermEnum}. - /// This may be optimized in some implementations. - virtual void seek(TermEnumPtr termEnum) = 0; - - /// Returns the current document number. This is invalid until {@link #next()} is called for the first time. - virtual int32_t doc() = 0; - - /// Returns the frequency of the term within the current document. This is invalid until {@link #next()} is - /// called for the first time. - virtual int32_t freq() = 0; - - /// Moves to the next pair in the enumeration. Returns true if there is such a next pair in the enumeration. - virtual bool next() = 0; - - /// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored - /// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only - /// returned when the stream has been exhausted. - virtual int32_t read(Collection docs, Collection freqs) = 0; - - /// Skips entries to the first beyond the current whose document number is greater than or equal to target. - /// Returns true if there is such an entry. - virtual bool skipTo(int32_t target) = 0; - - /// Frees associated resources. - virtual void close() = 0; - }; -} - -#endif diff --git a/include/TermEnum.h b/include/TermEnum.h deleted file mode 100644 index 7260f802..00000000 --- a/include/TermEnum.h +++ /dev/null @@ -1,39 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMENUM_H -#define TERMENUM_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Abstract class for enumerating terms. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater - /// than all that precede it. - class LPPAPI TermEnum : public LuceneObject - { - public: - virtual ~TermEnum(); - LUCENE_CLASS(TermEnum); - - public: - /// Increments the enumeration to the next element. True if one exists. - virtual bool next() = 0; - - /// Returns the current Term in the enumeration. - virtual TermPtr term() = 0; - - /// Returns the docFreq of the current Term in the enumeration. - virtual int32_t docFreq() = 0; - - /// Closes the enumeration to further activity, freeing resources. - virtual void close() = 0; - }; -} - -#endif diff --git a/include/TermFreqVector.h b/include/TermFreqVector.h deleted file mode 100644 index 846b1532..00000000 --- a/include/TermFreqVector.h +++ /dev/null @@ -1,57 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMFREQVECTOR_H -#define TERMFREQVECTOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Provides access to stored term vector of a document field. The vector consists of the name of the field, an - /// array of the terms that occur in the field of the {@link Document} and a parallel array of frequencies. Thus, - /// getTermFrequencies()[5] corresponds with the frequency of getTerms()[5], assuming there are at least 5 terms - /// in the Document. - class LPPAPI TermFreqVector - { - protected: - TermFreqVector(); - - public: - virtual ~TermFreqVector(); - LUCENE_INTERFACE(TermFreqVector); - - public: - /// The {@link Fieldable} name. - /// @return The name of the field this vector is associated with. - virtual String getField(); - - /// @return The number of terms in the term vector. - virtual int32_t size(); - - /// @return An Array of term texts in ascending order. - virtual Collection getTerms(); - - /// Array of term frequencies. Locations of the array correspond one to one to the terms in the array obtained from - /// getTerms method. Each location in the array contains the number of times this term occurs in the document or the - /// document field. - virtual Collection getTermFrequencies(); - - /// Return an index in the term numbers array returned from getTerms at which the term with the specified term appears. - /// If this term does not appear in the array, return -1. - virtual int32_t indexOf(const String& term); - - /// Just like indexOf(int) but searches for a number of terms at the same time. Returns an array that has the same size - /// as the number of terms searched for, each slot containing the result of searching for that term number. - /// - /// @param terms array containing terms to look for - /// @param start index in the array where the list of terms starts - /// @param length the number of terms in the list - virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); - }; -} - -#endif diff --git a/include/TermInfo.h b/include/TermInfo.h deleted file mode 100644 index a0b706a8..00000000 --- a/include/TermInfo.h +++ /dev/null @@ -1,37 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMINFO_H -#define TERMINFO_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// A TermInfo is the record of information stored for a term. - class TermInfo : public LuceneObject - { - public: - TermInfo(TermInfoPtr ti); - TermInfo(int32_t df = 0, int64_t fp = 0, int64_t pp = 0); - virtual ~TermInfo(); - - LUCENE_CLASS(TermInfo); - - public: - /// The number of documents which contain the term. - int32_t docFreq; - int64_t freqPointer; - int64_t proxPointer; - int32_t skipOffset; - - public: - void set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset); - void set(TermInfoPtr ti); - }; -} - -#endif diff --git a/include/TermInfosReader.h b/include/TermInfosReader.h deleted file mode 100644 index b03c0081..00000000 --- a/include/TermInfosReader.h +++ /dev/null @@ -1,90 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMINFOSREADER_H -#define TERMINFOSREADER_H - -#include "CloseableThreadLocal.h" -#include "SimpleLRUCache.h" - -namespace Lucene -{ - /// This stores a monotonically increasing set of pairs in a Directory. Pairs are - /// accessed either by Term or by ordinal position the set. - class TermInfosReader : public LuceneObject - { - public: - TermInfosReader(DirectoryPtr dir, const String& seg, FieldInfosPtr fis, int32_t readBufferSize, int32_t indexDivisor); - virtual ~TermInfosReader(); - - LUCENE_CLASS(TermInfosReader); - - protected: - DirectoryPtr directory; - String segment; - FieldInfosPtr fieldInfos; - CloseableThreadLocal threadResources; - SegmentTermEnumPtr origEnum; - int64_t _size; - - Collection indexTerms; - Collection indexInfos; - Collection indexPointers; - - int32_t totalIndexInterval; - - static const int32_t DEFAULT_CACHE_SIZE; - - public: - int32_t getSkipInterval(); - int32_t getMaxSkipLevels(); - void close(); - - /// Returns the number of term/value pairs in the set. - int64_t size(); - - /// Returns the TermInfo for a Term in the set, or null. - TermInfoPtr get(TermPtr term); - - /// Returns the position of a Term in the set or -1. - int64_t getPosition(TermPtr term); - - /// Returns an enumeration of all the Terms and TermInfos in the set. - SegmentTermEnumPtr terms(); - - /// Returns an enumeration of terms starting at or after the named term. - SegmentTermEnumPtr terms(TermPtr term); - - protected: - TermInfosReaderThreadResourcesPtr getThreadResources(); - - /// Returns the offset of the greatest index entry which is less than or equal to term. - int32_t getIndexOffset(TermPtr term); - - void seekEnum(SegmentTermEnumPtr enumerator, int32_t indexOffset); - - /// Returns the TermInfo for a Term in the set, or null. - TermInfoPtr get(TermPtr term, bool useCache); - - void ensureIndexIsRead(); - }; - - class TermInfosReaderThreadResources : public LuceneObject - { - public: - virtual ~TermInfosReaderThreadResources(); - - LUCENE_CLASS(TermInfosReaderThreadResources); - - public: - SegmentTermEnumPtr termEnum; - - // Used for caching the least recently looked-up Terms - TermInfoCachePtr termInfoCache; - }; -} - -#endif diff --git a/include/TermInfosWriter.h b/include/TermInfosWriter.h deleted file mode 100644 index 79804259..00000000 --- a/include/TermInfosWriter.h +++ /dev/null @@ -1,95 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMINFOSWRITER_H -#define TERMINFOSWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// This stores a monotonically increasing set of pairs in a Directory. A TermInfos - /// can be written once, in order. - class TermInfosWriter : public LuceneObject - { - public: - TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval); - TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isIndex); - virtual ~TermInfosWriter(); - - LUCENE_CLASS(TermInfosWriter); - - public: - /// The file format version, a negative number. - static const int32_t FORMAT; - - /// Changed strings to true utf8 with length-in-bytes not length-in-chars. - static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; - - /// NOTE: always change this if you switch to a new format. - static const int32_t FORMAT_CURRENT; - - /// The fraction of terms in the "dictionary" which should be stored in RAM. Smaller values use more memory, but - /// make searching slightly faster, while larger values use less memory and make searching slightly slower. - /// Searching is typically not dominated by dictionary lookup, so tweaking this is rarely useful. - int32_t indexInterval; - - /// The fraction of {@link TermDocs} entries stored in skip tables, used to accelerate {@link TermDocs#skipTo(int)}. - /// Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while smaller values - /// result in bigger indexes, less acceleration and more accelerable cases. More detailed experiments would be useful - /// here. - int32_t skipInterval; - - /// The maximum number of skip levels. Smaller values result in slightly smaller indexes, but slower skipping - /// in big posting lists. - int32_t maxSkipLevels; - - protected: - FieldInfosPtr fieldInfos; - IndexOutputPtr output; - TermInfoPtr lastTi; - int64_t size; - - int64_t lastIndexPointer; - bool isIndex; - ByteArray lastTermBytes; - int32_t lastTermBytesLength; - int32_t lastFieldNumber; - - TermInfosWriterPtr otherWriter; - TermInfosWriterWeakPtr _other; - UTF8ResultPtr utf8Result; - - // Currently used only by assert statements - UnicodeResultPtr unicodeResult1; - UnicodeResultPtr unicodeResult2; - - public: - virtual void initialize(); - - void add(TermPtr term, TermInfoPtr ti); - - /// Adds a new <, TermInfo> pair to the set. Term must be lexicographically - /// greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous. - void add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, TermInfoPtr ti); - - /// Called to complete TermInfos creation. - void close(); - - protected: - void initialize(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isi); - - /// Currently used only by assert statements - bool initUnicodeResults(); - - /// Currently used only by assert statement - int32_t compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); - - void writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); - }; -} - -#endif diff --git a/include/TermPositionVector.h b/include/TermPositionVector.h deleted file mode 100644 index b5a925eb..00000000 --- a/include/TermPositionVector.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMPOSITIONVECTOR_H -#define TERMPOSITIONVECTOR_H - -#include "TermFreqVector.h" - -namespace Lucene -{ - /// Extends TermFreqVector to provide additional information about positions in which each of the terms is found. A TermPositionVector not necessarily - /// contains both positions and offsets, but at least one of these arrays exists. - class LPPAPI TermPositionVector : public TermFreqVector - { - protected: - TermPositionVector(); - - public: - virtual ~TermPositionVector(); - LUCENE_INTERFACE(TermPositionVector); - - public: - /// Returns an array of positions in which the term is found. Terms are identified by the index at which its number appears in the term String - /// array obtained from the indexOf method. May return null if positions have not been stored. - virtual Collection getTermPositions(int32_t index); - - /// Returns an array of TermVectorOffsetInfo in which the term is found. May return null if offsets have not been stored. - /// @see Token - /// @param index The position in the array to get the offsets from - /// @return An array of TermVectorOffsetInfo objects or the empty list - virtual Collection getOffsets(int32_t index); - }; -} - -#endif diff --git a/include/TermPositions.h b/include/TermPositions.h deleted file mode 100644 index 72643ea3..00000000 --- a/include/TermPositions.h +++ /dev/null @@ -1,55 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMPOSITIONS_H -#define TERMPOSITIONS_H - -#include "TermDocs.h" - -namespace Lucene -{ - /// TermPositions provides an interface for enumerating the *> - /// tuples for a term. The document and frequency are the same as for a TermDocs. The positions portion - /// lists the ordinal positions of each occurrence of a term in a document. - /// @see IndexReader#termPositions() - class LPPAPI TermPositions : public TermDocs - { - protected: - TermPositions(); - - public: - virtual ~TermPositions(); - LUCENE_INTERFACE(TermPositions); - - public: - /// Returns next position in the current document. It is an error to call this more than {@link #freq()} - /// times without calling {@link #next()}. This is invalid until {@link #next()} is called for - // the first time. - virtual int32_t nextPosition(); - - /// Returns the length of the payload at the current term position. This is invalid until {@link - /// #nextPosition()} is called for the first time. - /// @return length of the current payload in number of bytes - virtual int32_t getPayloadLength(); - - /// Returns the payload data at the current term position. This is invalid until {@link #nextPosition()} - /// is called for the first time. - /// This method must not be called more than once after each call of {@link #nextPosition()}. However, - /// payloads are loaded lazily, so if the payload data for the current position is not needed, - /// this method may not be called at all for performance reasons. - /// @param data the array into which the data of this payload is to be stored - /// @param offset the offset in the array into which the data of this payload is to be stored. - /// @return a byte array containing the data of this payload - virtual ByteArray getPayload(ByteArray data, int32_t offset); - - /// Checks if a payload can be loaded at this position. - /// Payloads can only be loaded once per call to {@link #nextPosition()}. - /// @return true if there is a payload available at this position that can be loaded - virtual bool isPayloadAvailable(); - }; -} - -#endif diff --git a/include/TermQuery.h b/include/TermQuery.h deleted file mode 100644 index b334d20e..00000000 --- a/include/TermQuery.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMQUERY_H -#define TERMQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A Query that matches documents containing a term. This may be combined with other terms with a - /// {@link BooleanQuery}. - class LPPAPI TermQuery : public Query - { - public: - /// Constructs a query for the term. - TermQuery(TermPtr term); - - virtual ~TermQuery(); - - LUCENE_CLASS(TermQuery); - - protected: - TermPtr term; - - public: - using Query::toString; - - /// Returns the term of this query. - TermPtr getTerm(); - - virtual WeightPtr createWeight(SearcherPtr searcher); - virtual void extractTerms(SetTerm terms); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - friend class TermWeight; - }; -} - -#endif diff --git a/include/TermRangeFilter.h b/include/TermRangeFilter.h deleted file mode 100644 index fa10c123..00000000 --- a/include/TermRangeFilter.h +++ /dev/null @@ -1,68 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMRANGEFILTER_H -#define TERMRANGEFILTER_H - -#include "MultiTermQueryWrapperFilter.h" - -namespace Lucene -{ - /// A Filter that restricts search results to a range of term values in a given field. - /// - /// This filter matches the documents looking for terms that fall into the supplied range according to {@link - /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link - /// NumericRangeFilter} instead. - /// - /// If you construct a large number of range filters with different ranges but on the same field, {@link - /// FieldCacheRangeFilter} may have significantly better performance. - class LPPAPI TermRangeFilter : public MultiTermQueryWrapperFilter - { - public: - /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause - /// every single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending - /// on the number of index Terms in this Field, the operation could be very slow. - /// @param lowerTerm The lower bound on this range - /// @param upperTerm The upper bound on this range - /// @param includeLower Does this range include the lower bound? - /// @param includeUpper Does this range include the upper bound? - /// @param collator The collator to use when determining range inclusion; set to null to use Unicode code - /// point ordering instead of collation. - TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, - bool includeUpper, CollatorPtr collator = CollatorPtr()); - - virtual ~TermRangeFilter(); - - LUCENE_CLASS(TermRangeFilter); - - public: - /// Constructs a filter for field fieldName matching less than or equal to upperTerm. - static TermRangeFilterPtr Less(const String& fieldName, StringValue upperTerm); - - /// Constructs a filter for field fieldName matching greater than or equal to lowerTerm. - static TermRangeFilterPtr More(const String& fieldName, StringValue lowerTerm); - - /// Returns the field name for this filter - String getField(); - - /// Returns the lower value of this range filter - String getLowerTerm(); - - /// Returns the upper value of this range filter - String getUpperTerm(); - - /// Returns true if the lower endpoint is inclusive - bool includesLower(); - - /// Returns true if the upper endpoint is inclusive - bool includesUpper(); - - /// Returns the collator used to determine range inclusion, if any. - CollatorPtr getCollator(); - }; -} - -#endif diff --git a/include/TermRangeQuery.h b/include/TermRangeQuery.h deleted file mode 100644 index 6978f8f4..00000000 --- a/include/TermRangeQuery.h +++ /dev/null @@ -1,89 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMRANGEQUERY_H -#define TERMRANGEQUERY_H - -#include "MultiTermQuery.h" - -namespace Lucene -{ - /// A Query that matches documents within an range of terms. - /// - /// This query matches the documents looking for terms that fall into the supplied range according to {@link - /// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link - /// NumericRangeQuery} instead. - /// - /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. - class LPPAPI TermRangeQuery : public MultiTermQuery - { - public: - /// Constructs a query selecting all terms greater/equal than lowerTerm but less/equal than upperTerm. - /// - /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints - /// may not be exclusive (you can't select all but the first or last term without explicitly specifying the - /// term to exclude.) - /// - /// If collator is not null, it will be used to decide whether index terms are within the given range, rather - /// than using the Unicode code point order in which index terms are stored. - /// - /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause every - /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending on the - /// number of index Terms in this Field, the operation could be very slow. - /// - /// @param lowerTerm The Term text at the lower end of the range - /// @param upperTerm The Term text at the upper end of the range - /// @param includeLower If true, the lowerTerm is included in the range. - /// @param includeUpper If true, the upperTerm is included in the range. - /// @param collator The collator to use to collate index Terms, to determine their membership in the range - /// bounded by lowerTerm and upperTerm. - TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, - bool includeUpper, CollatorPtr collator = CollatorPtr()); - - virtual ~TermRangeQuery(); - - LUCENE_CLASS(TermRangeQuery); - - protected: - StringValue lowerTerm; - StringValue upperTerm; - CollatorPtr collator; - String field; - bool includeLower; - bool includeUpper; - - public: - using MultiTermQuery::toString; - - /// Returns the field name for this query - String getField(); - - /// Returns the lower value of this range query - String getLowerTerm(); - - /// Returns the upper value of this range query - String getUpperTerm(); - - /// Returns true if the lower endpoint is inclusive - bool includesLower(); - - /// Returns true if the upper endpoint is inclusive - bool includesUpper(); - - /// Returns the collator used to determine range inclusion, if any. - CollatorPtr getCollator(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual String toString(const String& field); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - protected: - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/TermRangeTermEnum.h b/include/TermRangeTermEnum.h deleted file mode 100644 index 074093be..00000000 --- a/include/TermRangeTermEnum.h +++ /dev/null @@ -1,60 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMRANGETERMENUM_H -#define TERMRANGETERMENUM_H - -#include "FilteredTermEnum.h" - -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating all terms that match the specified range parameters. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than - /// all that precede it. - class LPPAPI TermRangeTermEnum : public FilteredTermEnum - { - public: - /// Enumerates all terms greater/equal than lowerTerm but less/equal than upperTerm. - /// - /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints - /// may not be exclusive (you can't select all but the first or last term without explicitly specifying - /// the term to exclude.) - /// - /// @param reader - /// @param field An interned field that holds both lower and upper terms. - /// @param lowerTermText The term text at the lower end of the range - /// @param upperTermText The term text at the upper end of the range - /// @param includeLower If true, the lowerTerm is included in the range. - /// @param includeUpper If true, the upperTerm is included in the range. - /// @param collator The collator to use to collate index Terms, to determine their membership in the range - /// bounded by lowerTerm and upperTerm. - TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, StringValue upperTermText, - bool includeLower, bool includeUpper, CollatorPtr collator); - - virtual ~TermRangeTermEnum(); - - LUCENE_CLASS(TermRangeTermEnum); - - protected: - CollatorPtr collator; - bool _endEnum; - String field; - StringValue upperTermText; - StringValue lowerTermText; - bool includeLower; - bool includeUpper; - - public: - virtual double difference(); - - protected: - virtual bool endEnum(); - virtual bool termCompare(TermPtr term); - }; -} - -#endif diff --git a/include/TermScorer.h b/include/TermScorer.h deleted file mode 100644 index ab0932bd..00000000 --- a/include/TermScorer.h +++ /dev/null @@ -1,72 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSCORER_H -#define TERMSCORER_H - -#include "Scorer.h" - -namespace Lucene -{ - /// A Scorer for documents matching a Term. - class TermScorer : public Scorer - { - public: - /// Construct a TermScorer. - /// @param weight The weight of the Term in the query. - /// @param td An iterator over the documents matching the Term. - /// @param similarity The Similarity implementation to be used for score computations. - /// @param norms The field norms of the document fields for the Term. - TermScorer(WeightPtr weight, TermDocsPtr td, SimilarityPtr similarity, ByteArray norms); - - virtual ~TermScorer(); - - LUCENE_CLASS(TermScorer); - - protected: - WeightPtr weight; - TermDocsPtr termDocs; - ByteArray norms; - double weightValue; - int32_t doc; - - Collection docs; // buffered doc numbers - Collection freqs; // buffered term freqs - int32_t pointer; - int32_t pointerMax; - - static const int32_t SCORE_CACHE_SIZE; - Collection scoreCache; - - public: - virtual void score(CollectorPtr collector); - virtual int32_t docID(); - - /// Advances to the next document matching the query. - /// The iterator over the matching documents is buffered using {@link - /// TermDocs#read(Collection, Collection)}. - /// @return the document matching the query or -1 if there are no more documents. - virtual int32_t nextDoc(); - - virtual double score(); - - /// Advances to the first match beyond the current whose document number is greater than or equal to a - /// given target. The implementation uses {@link TermDocs#skipTo(int32_t)}. - /// @param target The target document number. - /// @return the matching document or -1 if none exist. - virtual int32_t advance(int32_t target); - - /// Returns a string representation of this TermScorer. - virtual String toString(); - - protected: - static const Collection SIM_NORM_DECODER(); - - virtual bool score(CollectorPtr collector, int32_t max, int32_t firstDocID); - }; -} - -#endif diff --git a/include/TermSpans.h b/include/TermSpans.h deleted file mode 100644 index b16010ce..00000000 --- a/include/TermSpans.h +++ /dev/null @@ -1,45 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSPANS_H -#define TERMSPANS_H - -#include "Spans.h" - -namespace Lucene -{ - /// Public for extension only - class LPPAPI TermSpans : public Spans - { - public: - TermSpans(TermPositionsPtr positions, TermPtr term); - virtual ~TermSpans(); - - LUCENE_CLASS(TermSpans); - - protected: - TermPositionsPtr positions; - TermPtr term; - int32_t _doc; - int32_t freq; - int32_t count; - int32_t position; - - public: - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual String toString(); - - TermPositionsPtr getPositions(); - }; -} - -#endif diff --git a/include/TermVectorEntry.h b/include/TermVectorEntry.h deleted file mode 100644 index a937ad34..00000000 --- a/include/TermVectorEntry.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORENTRY_H -#define TERMVECTORENTRY_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Convenience class for holding TermVector information. - class LPPAPI TermVectorEntry : public LuceneObject - { - public: - TermVectorEntry(const String& field = EmptyString, const String& term = EmptyString, int32_t frequency = 0, - Collection offsets = Collection(), - Collection positions = Collection()); - virtual ~TermVectorEntry(); - - LUCENE_CLASS(TermVectorEntry); - - protected: - String field; - String term; - int32_t frequency; - Collection offsets; - Collection positions; - - public: - String getField(); - int32_t getFrequency(); - Collection getOffsets(); - Collection getPositions(); - String getTerm(); - - void setFrequency(int32_t frequency); - void setOffsets(Collection offsets); - void setPositions(Collection positions); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual String toString(); - }; -} - -#endif diff --git a/include/TermVectorEntryFreqSortedComparator.h b/include/TermVectorEntryFreqSortedComparator.h deleted file mode 100644 index d09572ef..00000000 --- a/include/TermVectorEntryFreqSortedComparator.h +++ /dev/null @@ -1,27 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORENTRYFREQSORTEDCOMPARATOR_H -#define TERMVECTORENTRYFREQSORTEDCOMPARATOR_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Compares {@link TermVectorEntry}s first by frequency and then by the term (case-sensitive) - class LPPAPI TermVectorEntryFreqSortedComparator : public LuceneObject - { - public: - virtual ~TermVectorEntryFreqSortedComparator(); - - LUCENE_CLASS(TermVectorEntryFreqSortedComparator); - - public: - static bool compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second); - }; -} - -#endif diff --git a/include/TermVectorMapper.h b/include/TermVectorMapper.h deleted file mode 100644 index 622e380a..00000000 --- a/include/TermVectorMapper.h +++ /dev/null @@ -1,73 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORMAPPER_H -#define TERMVECTORMAPPER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel - /// array structure used by {@link IndexReader#getTermFreqVector(int,String)}. - /// - /// It is up to the implementation to make sure it is thread-safe. - class LPPAPI TermVectorMapper : public LuceneObject - { - public: - /// @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if - /// they are stored. - /// @param ignoringOffsets similar to ignoringPositions - TermVectorMapper(bool ignoringPositions = false, bool ignoringOffsets = false); - - virtual ~TermVectorMapper(); - - LUCENE_CLASS(TermVectorMapper); - - protected: - bool ignoringPositions; - bool ignoringOffsets; - - public: - /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. - /// This method will be called once before retrieving the vector for a field. - /// - /// This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}. - /// @param field The field the vector is for - /// @param numTerms The number of terms that need to be mapped - /// @param storeOffsets true if the mapper should expect offset information - /// @param storePositions true if the mapper should expect positions info - virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) = 0; - - /// Map the Term Vector information into your own structure - /// @param term The term to add to the vector - /// @param frequency The frequency of the term in the document - /// @param offsets null if the offset is not specified, otherwise the offset into the field of the term - /// @param positions null if the position is not specified, otherwise the position in the field of the term - virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions) = 0; - - /// Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and - /// they can be skipped over. Derived classes should set this to true if they want to ignore positions. - /// The default is false, meaning positions will be loaded if they are stored. - virtual bool isIgnoringPositions(); - - /// @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. - virtual bool isIgnoringOffsets(); - - /// Passes down the index of the document whose term vector is currently being mapped, once for each top - /// level call to a term vector reader. - /// - /// Default implementation IGNORES the document number. Override if your implementation needs the document - /// number. - /// - /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. - /// - /// @param documentNumber index of document currently being mapped - virtual void setDocumentNumber(int32_t documentNumber); - }; -} - -#endif diff --git a/include/TermVectorOffsetInfo.h b/include/TermVectorOffsetInfo.h deleted file mode 100644 index d1f49a20..00000000 --- a/include/TermVectorOffsetInfo.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTOROFFSETINFO_H -#define TERMVECTOROFFSETINFO_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s - /// offset information. This offset information is the character offset as set during the Analysis phase - /// (and thus may not be the actual offset in the original content). - class LPPAPI TermVectorOffsetInfo : public LuceneObject - { - public: - TermVectorOffsetInfo(int32_t startOffset = 0, int32_t endOffset = 0); - virtual ~TermVectorOffsetInfo(); - - LUCENE_CLASS(TermVectorOffsetInfo); - - protected: - int32_t startOffset; - int32_t endOffset; - - public: - /// Convenience declaration when creating a {@link TermPositionVector} that stores only position information. - static const Collection EMPTY_OFFSET_INFO(); - - /// The accessor for the ending offset for the term - int32_t getEndOffset(); - void setEndOffset(int32_t endOffset); - - /// The accessor for the starting offset of the term. - int32_t getStartOffset(); - void setStartOffset(int32_t startOffset); - - /// Two TermVectorOffsetInfos are equals if both the start and end offsets are the same. - /// @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects. - virtual bool equals(LuceneObjectPtr other); - - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/TermVectorsReader.h b/include/TermVectorsReader.h deleted file mode 100644 index 69dc23f8..00000000 --- a/include/TermVectorsReader.h +++ /dev/null @@ -1,151 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORSREADER_H -#define TERMVECTORSREADER_H - -#include "TermVectorMapper.h" - -namespace Lucene -{ - class TermVectorsReader : public LuceneObject - { - public: - TermVectorsReader(); - TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos); - TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, - int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); - virtual ~TermVectorsReader(); - - LUCENE_CLASS(TermVectorsReader); - - public: - /// NOTE: if you make a new format, it must be larger than the current format - static const int32_t FORMAT_VERSION; - - /// Changes to speed up bulk merging of term vectors - static const int32_t FORMAT_VERSION2; - - /// Changed strings to UTF8 with length-in-bytes not length-in-chars - static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES; - - /// NOTE: always change this if you switch to a new format. - static const int32_t FORMAT_CURRENT; - - /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file - static const int32_t FORMAT_SIZE; - - static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; - static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; - - protected: - FieldInfosPtr fieldInfos; - - IndexInputPtr tvx; - IndexInputPtr tvd; - IndexInputPtr tvf; - int32_t _size; - int32_t numTotalDocs; - - /// The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. - int32_t docStoreOffset; - - int32_t format; - - public: - /// Used for bulk copy when merging - IndexInputPtr getTvdStream(); - - /// Used for bulk copy when merging - IndexInputPtr getTvfStream(); - - bool canReadRawDocs(); - - /// Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with - /// startDocID. This is used for bulk copying when merging segments, if the field numbers are - /// congruent. Once this returns, the tvf & tvd streams are seeked to the startDocID. - void rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs); - - void close(); - - /// @return The number of documents in the reader - int32_t size(); - - void get(int32_t docNum, const String& field, TermVectorMapperPtr mapper); - - /// Retrieve the term vector for the given document and field - /// @param docNum The document number to retrieve the vector for - /// @param field The field within the document to retrieve - /// @return The TermFreqVector for the document and field or null if there is no termVector for - /// this field. - TermFreqVectorPtr get(int32_t docNum, const String& field); - - /// Return all term vectors stored for this document or null if the could not be read in. - /// - /// @param docNum The document number to retrieve the vector for - /// @return All term frequency vectors - Collection get(int32_t docNum); - - void get(int32_t docNumber, TermVectorMapperPtr mapper); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - protected: - void ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); - - void seekTvx(int32_t docNum); - - int32_t checkValidFormat(IndexInputPtr in); - - /// Reads the String[] fields; you have to pre-seek tvd to the right point - Collection readFields(int32_t fieldCount); - - /// Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point - Collection readTvfPointers(int32_t fieldCount); - - Collection readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers); - void readTermVectors(Collection fields, Collection tvfPointers, TermVectorMapperPtr mapper); - - /// @param field The field to read in - /// @param tvfPointer The pointer within the tvf file where we should start reading - /// @param mapper The mapper used to map the TermVector - void readTermVector(const String& field, int64_t tvfPointer, TermVectorMapperPtr mapper); - }; - - /// Models the existing parallel array structure - class ParallelArrayTermVectorMapper : public TermVectorMapper - { - public: - ParallelArrayTermVectorMapper(); - virtual ~ParallelArrayTermVectorMapper(); - - LUCENE_CLASS(ParallelArrayTermVectorMapper); - - protected: - Collection terms; - Collection termFreqs; - Collection< Collection > positions; - Collection< Collection > offsets; - int32_t currentPosition; - bool storingOffsets; - bool storingPositions; - String field; - - public: - /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. - /// This method will be called once before retrieving the vector for a field. - virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); - - /// Map the Term Vector information into your own structure - virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); - - /// Construct the vector - /// @return The {@link TermFreqVector} based on the mappings. - TermFreqVectorPtr materializeVector(); - }; -} - -#endif diff --git a/include/TermVectorsTermsWriter.h b/include/TermVectorsTermsWriter.h deleted file mode 100644 index f263169d..00000000 --- a/include/TermVectorsTermsWriter.h +++ /dev/null @@ -1,97 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORSTERMSWRITER_H -#define TERMVECTORSTERMSWRITER_H - -#include "TermsHashConsumer.h" -#include "DocumentsWriter.h" -#include "RawPostingList.h" - -namespace Lucene -{ - class TermVectorsTermsWriter : public TermsHashConsumer - { - public: - TermVectorsTermsWriter(DocumentsWriterPtr docWriter); - virtual ~TermVectorsTermsWriter(); - - LUCENE_CLASS(TermVectorsTermsWriter); - - public: - DocumentsWriterWeakPtr _docWriter; - TermVectorsWriterPtr termVectorsWriter; - Collection docFreeList; - int32_t freeCount; - IndexOutputPtr tvx; - IndexOutputPtr tvd; - IndexOutputPtr tvf; - int32_t lastDocID; - int32_t allocCount; - - public: - virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread); - virtual void createPostings(Collection postings, int32_t start, int32_t count); - virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - virtual void closeDocStore(SegmentWriteStatePtr state); - - TermVectorsTermsWriterPerDocPtr getPerDoc(); - - /// Fills in no-term-vectors for all docs we haven't seen since the last doc that had term vectors. - void fill(int32_t docID); - - void initTermVectorsWriter(); - void finishDocument(TermVectorsTermsWriterPerDocPtr perDoc); - bool freeRAM(); - void free(TermVectorsTermsWriterPerDocPtr doc); - - virtual void abort(); - virtual int32_t bytesPerPosting(); - }; - - class TermVectorsTermsWriterPerDoc : public DocWriter - { - public: - TermVectorsTermsWriterPerDoc(TermVectorsTermsWriterPtr termsWriter = TermVectorsTermsWriterPtr()); - virtual ~TermVectorsTermsWriterPerDoc(); - - LUCENE_CLASS(TermVectorsTermsWriterPerDoc); - - protected: - TermVectorsTermsWriterWeakPtr _termsWriter; - - public: - PerDocBufferPtr buffer; - RAMOutputStreamPtr perDocTvf; - int32_t numVectorFields; - - Collection fieldNumbers; - Collection fieldPointers; - - public: - void reset(); - virtual void abort(); - void addField(int32_t fieldNumber); - virtual int64_t sizeInBytes(); - virtual void finish(); - }; - - class TermVectorsTermsWriterPostingList : public RawPostingList - { - public: - TermVectorsTermsWriterPostingList(); - virtual ~TermVectorsTermsWriterPostingList(); - - LUCENE_CLASS(TermVectorsTermsWriterPostingList); - - public: - int32_t freq; // How many times this term occurred in the current doc - int32_t lastOffset; // Last offset we saw - int32_t lastPosition; // Last position where this term occurred - }; -} - -#endif diff --git a/include/TermVectorsTermsWriterPerField.h b/include/TermVectorsTermsWriterPerField.h deleted file mode 100644 index 298353b7..00000000 --- a/include/TermVectorsTermsWriterPerField.h +++ /dev/null @@ -1,55 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORSTERMSWRITERPERFIELD_H -#define TERMVECTORSTERMSWRITERPERFIELD_H - -#include "TermsHashConsumerPerField.h" - -namespace Lucene -{ - class TermVectorsTermsWriterPerField : public TermsHashConsumerPerField - { - public: - TermVectorsTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, TermVectorsTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo); - virtual ~TermVectorsTermsWriterPerField(); - - LUCENE_CLASS(TermVectorsTermsWriterPerField); - - public: - TermVectorsTermsWriterPerThreadWeakPtr _perThread; - TermsHashPerFieldWeakPtr _termsHashPerField; - TermVectorsTermsWriterWeakPtr _termsWriter; - FieldInfoPtr fieldInfo; - DocStateWeakPtr _docState; - FieldInvertStateWeakPtr _fieldState; - - bool doVectors; - bool doVectorPositions; - bool doVectorOffsets; - - int32_t maxNumPostings; - OffsetAttributePtr offsetAttribute; - - public: - virtual int32_t getStreamCount(); - virtual bool start(Collection fields, int32_t count); - virtual void abort(); - - /// Called once per field per document if term vectors are enabled, to write the vectors to RAMOutputStream, - /// which is then quickly flushed to the real term vectors files in the Directory. - virtual void finish(); - - void shrinkHash(); - - virtual void start(FieldablePtr field); - virtual void newTerm(RawPostingListPtr p0); - virtual void addTerm(RawPostingListPtr p0); - virtual void skippingLongTerm(); - }; -} - -#endif diff --git a/include/TermVectorsTermsWriterPerThread.h b/include/TermVectorsTermsWriterPerThread.h deleted file mode 100644 index 675b685e..00000000 --- a/include/TermVectorsTermsWriterPerThread.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORSTERMSWRITERPERTHREAD_H -#define TERMVECTORSTERMSWRITERPERTHREAD_H - -#include "TermsHashConsumerPerThread.h" - -namespace Lucene -{ - class TermVectorsTermsWriterPerThread : public TermsHashConsumerPerThread - { - public: - TermVectorsTermsWriterPerThread(TermsHashPerThreadPtr termsHashPerThread, TermVectorsTermsWriterPtr termsWriter); - virtual ~TermVectorsTermsWriterPerThread(); - - LUCENE_CLASS(TermVectorsTermsWriterPerThread); - - public: - TermVectorsTermsWriterWeakPtr _termsWriter; - TermsHashPerThreadWeakPtr _termsHashPerThread; - DocStateWeakPtr _docState; - - TermVectorsTermsWriterPerDocPtr doc; - ByteSliceReaderPtr vectorSliceReader; - Collection utf8Results; - String lastVectorFieldName; - - public: - virtual void startDocument(); - virtual DocWriterPtr finishDocument(); - virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo); - virtual void abort(); - - /// Called only by assert - bool clearLastVectorFieldName(); - bool vectorFieldsInOrder(FieldInfoPtr fi); - }; -} - -#endif diff --git a/include/TermVectorsWriter.h b/include/TermVectorsWriter.h deleted file mode 100644 index 15f79f69..00000000 --- a/include/TermVectorsWriter.h +++ /dev/null @@ -1,43 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMVECTORSWRITER_H -#define TERMVECTORSWRITER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class TermVectorsWriter : public LuceneObject - { - public: - TermVectorsWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fieldInfos); - virtual ~TermVectorsWriter(); - - LUCENE_CLASS(TermVectorsWriter); - - protected: - IndexOutputPtr tvx; - IndexOutputPtr tvd; - IndexOutputPtr tvf; - FieldInfosPtr fieldInfos; - Collection utf8Results; - - public: - /// Add a complete document specified by all its term vectors. If document has no term vectors, - /// add value for tvx. - void addAllDocVectors(Collection vectors); - - /// Do a bulk copy of numDocs documents from reader to our streams. This is used to expedite merging, - /// if the field numbers are congruent. - void addRawDocuments(TermVectorsReaderPtr reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs); - - /// Close all streams. - void close(); - }; -} - -#endif diff --git a/include/TermsHash.h b/include/TermsHash.h deleted file mode 100644 index e2b071d8..00000000 --- a/include/TermsHash.h +++ /dev/null @@ -1,68 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASH_H -#define TERMSHASH_H - -#include "InvertedDocConsumer.h" - -namespace Lucene -{ - /// This class implements {@link InvertedDocConsumer}, which is passed each token produced by the analyzer on - /// each field. It stores these tokens in a hash table, and allocates separate byte streams per token. Consumers - /// of this class, eg {@link FreqProxTermsWriter} and {@link TermVectorsTermsWriter}, write their own byte streams - /// under each term. - class TermsHash : public InvertedDocConsumer - { - public: - TermsHash(DocumentsWriterPtr docWriter, bool trackAllocations, TermsHashConsumerPtr consumer, TermsHashPtr nextTermsHash); - virtual ~TermsHash(); - - LUCENE_CLASS(TermsHash); - - public: - TermsHashConsumerPtr consumer; - TermsHashPtr nextTermsHash; - int32_t bytesPerPosting; - int32_t postingsFreeChunk; - DocumentsWriterWeakPtr _docWriter; - bool trackAllocations; - - protected: - Collection postingsFreeList; - int32_t postingsFreeCount; - int32_t postingsAllocCount; - - public: - /// Add a new thread - virtual InvertedDocConsumerPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread); - virtual TermsHashPerThreadPtr addThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPerThreadPtr primaryPerThread); - - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - - /// Abort (called after hitting AbortException) - /// NOTE: do not make this sync'd; it's not necessary (DW ensures all other threads are idle), and it - /// leads to deadlock - virtual void abort(); - - void shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - - /// Close doc stores - virtual void closeDocStore(SegmentWriteStatePtr state); - - /// Flush a new segment - virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state); - - /// Attempt to free RAM, returning true if any RAM was freed - virtual bool freeRAM(); - - void recyclePostings(Collection postings, int32_t numPostings); - - void getPostings(Collection postings); - }; -} - -#endif diff --git a/include/TermsHashConsumer.h b/include/TermsHashConsumer.h deleted file mode 100644 index 9bd63bd7..00000000 --- a/include/TermsHashConsumer.h +++ /dev/null @@ -1,36 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASHCONSUMER_H -#define TERMSHASHCONSUMER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class TermsHashConsumer : public LuceneObject - { - public: - virtual ~TermsHashConsumer(); - - LUCENE_CLASS(TermsHashConsumer); - - public: - FieldInfosPtr fieldInfos; - - public: - virtual int32_t bytesPerPosting() = 0; - virtual void createPostings(Collection postings, int32_t start, int32_t count) = 0; - virtual TermsHashConsumerPerThreadPtr addThread(TermsHashPerThreadPtr perThread) = 0; - virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) = 0; - virtual void abort() = 0; - virtual void closeDocStore(SegmentWriteStatePtr state) = 0; - - virtual void setFieldInfos(FieldInfosPtr fieldInfos); - }; -} - -#endif diff --git a/include/TermsHashConsumerPerField.h b/include/TermsHashConsumerPerField.h deleted file mode 100644 index 960e51ea..00000000 --- a/include/TermsHashConsumerPerField.h +++ /dev/null @@ -1,34 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASHCONSUMERPERFIELD_H -#define TERMSHASHCONSUMERPERFIELD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Implement this class to plug into the TermsHash processor, which inverts & stores Tokens into a hash - /// table and provides an API for writing bytes into multiple streams for each unique Token. - class TermsHashConsumerPerField : public LuceneObject - { - public: - virtual ~TermsHashConsumerPerField(); - - LUCENE_CLASS(TermsHashConsumerPerField); - - public: - virtual bool start(Collection fields, int32_t count) = 0; - virtual void finish() = 0; - virtual void skippingLongTerm() = 0; - virtual void start(FieldablePtr field) = 0; - virtual void newTerm(RawPostingListPtr p) = 0; - virtual void addTerm(RawPostingListPtr p) = 0; - virtual int32_t getStreamCount() = 0; - }; -} - -#endif diff --git a/include/TermsHashConsumerPerThread.h b/include/TermsHashConsumerPerThread.h deleted file mode 100644 index 883c915f..00000000 --- a/include/TermsHashConsumerPerThread.h +++ /dev/null @@ -1,29 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASHCONSUMERPERTHREAD_H -#define TERMSHASHCONSUMERPERTHREAD_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class TermsHashConsumerPerThread : public LuceneObject - { - public: - virtual ~TermsHashConsumerPerThread(); - - LUCENE_CLASS(TermsHashConsumerPerThread); - - public: - virtual void startDocument() = 0; - virtual DocWriterPtr finishDocument() = 0; - virtual TermsHashConsumerPerFieldPtr addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) = 0; - virtual void abort() = 0; - }; -} - -#endif diff --git a/include/TermsHashPerField.h b/include/TermsHashPerField.h deleted file mode 100644 index 1b06b50d..00000000 --- a/include/TermsHashPerField.h +++ /dev/null @@ -1,99 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASHPERFIELD_H -#define TERMSHASHPERFIELD_H - -#include "InvertedDocConsumerPerField.h" - -namespace Lucene -{ - class TermsHashPerField : public InvertedDocConsumerPerField - { - public: - TermsHashPerField(DocInverterPerFieldPtr docInverterPerField, TermsHashPerThreadPtr perThread, TermsHashPerThreadPtr nextPerThread, FieldInfoPtr fieldInfo); - virtual ~TermsHashPerField(); - - LUCENE_CLASS(TermsHashPerField); - - public: - TermsHashConsumerPerFieldPtr consumer; - TermsHashPerFieldPtr nextPerField; - DocInverterPerFieldWeakPtr _docInverterPerField; - TermsHashPerThreadPtr nextPerThread; - TermsHashPerThreadWeakPtr _perThread; - DocStatePtr docState; - FieldInvertStatePtr fieldState; - TermAttributePtr termAtt; - - // Copied from our perThread - CharBlockPoolPtr charPool; - IntBlockPoolPtr intPool; - ByteBlockPoolPtr bytePool; - - int32_t streamCount; - int32_t numPostingInt; - - FieldInfoPtr fieldInfo; - - bool postingsCompacted; - int32_t numPostings; - - IntArray intUptos; - int32_t intUptoStart; - - protected: - int32_t postingsHashSize; - int32_t postingsHashHalfSize; - int32_t postingsHashMask; - Collection postingsHash; - RawPostingListPtr p; - bool doCall; - bool doNextCall; - - public: - virtual void initialize(); - void shrinkHash(int32_t targetSize); - void reset(); - - /// Called on hitting an aborting exception - virtual void abort(); - - void initReader(ByteSliceReaderPtr reader, RawPostingListPtr p, int32_t stream); - - /// Collapse the hash table and sort in-place. - Collection sortPostings(); - - /// Called before a field instance is being processed - virtual void start(FieldablePtr field); - - /// Called once per field, and is given all Fieldable occurrences for this field in the document. - virtual bool start(Collection fields, int32_t count); - - void add(int32_t textStart); - - /// Primary entry point (for first TermsHash) - virtual void add(); - - void writeByte(int32_t stream, int8_t b); - void writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length); - void writeVInt(int32_t stream, int32_t i); - - /// Called once per field per document, after all Fieldable occurrences are inverted - virtual void finish(); - - /// Called when postings hash is too small (> 50% occupied) or too large (< 20% occupied). - void rehashPostings(int32_t newSize); - - protected: - void compactPostings(); - - /// Test whether the text for current RawPostingList p equals current tokenText. - bool postingEquals(const wchar_t* tokenText, int32_t tokenTextLen); - }; -} - -#endif diff --git a/include/TermsHashPerThread.h b/include/TermsHashPerThread.h deleted file mode 100644 index e05d7daa..00000000 --- a/include/TermsHashPerThread.h +++ /dev/null @@ -1,59 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TERMSHASHPERTHREAD_H -#define TERMSHASHPERTHREAD_H - -#include "InvertedDocConsumerPerThread.h" - -namespace Lucene -{ - class TermsHashPerThread : public InvertedDocConsumerPerThread - { - public: - TermsHashPerThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPtr termsHash, TermsHashPtr nextTermsHash, TermsHashPerThreadPtr primaryPerThread); - virtual ~TermsHashPerThread(); - - LUCENE_CLASS(TermsHashPerThread); - - public: - DocInverterPerThreadWeakPtr _docInverterPerThread; - TermsHashWeakPtr _termsHash; - TermsHashPtr nextTermsHash; - TermsHashPerThreadWeakPtr _primaryPerThread; - TermsHashConsumerPerThreadPtr consumer; - TermsHashPerThreadPtr nextPerThread; - - CharBlockPoolPtr charPool; - IntBlockPoolPtr intPool; - ByteBlockPoolPtr bytePool; - bool primary; - DocStatePtr docState; - - Collection freePostings; - int32_t freePostingsCount; - - public: - virtual void initialize(); - - virtual InvertedDocConsumerPerFieldPtr addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo); - virtual void abort(); - - /// perField calls this when it needs more postings - void morePostings(); - - virtual void startDocument(); - virtual DocWriterPtr finishDocument(); - - /// Clear all state - void reset(bool recyclePostings); - - protected: - static bool noNullPostings(Collection postings, int32_t count, const String& details); - }; -} - -#endif diff --git a/include/TestPoint.h b/include/TestPoint.h deleted file mode 100644 index 86a85d29..00000000 --- a/include/TestPoint.h +++ /dev/null @@ -1,44 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TESTPOINT_H -#define TESTPOINT_H - -#include "Lucene.h" - -namespace Lucene -{ - /// Used for unit testing as a substitute for stack trace - class TestPoint - { - public: - virtual ~TestPoint(); - - protected: - static MapStringInt testMethods; - static bool enable; - - public: - static void enableTestPoints(); - static void clear(); - static void setTestPoint(const String& object, const String& method, bool point); - static bool getTestPoint(const String& object, const String& method); - static bool getTestPoint(const String& method); - }; - - class TestScope - { - public: - TestScope(const String& object, const String& method); - virtual ~TestScope(); - - protected: - String object; - String method; - }; -} - -#endif diff --git a/include/ThreadPool.h b/include/ThreadPool.h deleted file mode 100644 index ce58bfd1..00000000 --- a/include/ThreadPool.h +++ /dev/null @@ -1,86 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef THREADPOOL_H -#define THREADPOOL_H - -#include -#include -#include -#include "LuceneObject.h" - -namespace Lucene -{ - typedef boost::shared_ptr workPtr; - - /// A Future represents the result of an asynchronous computation. Methods are provided to check if the computation - /// is complete, to wait for its completion, and to retrieve the result of the computation. The result can only be - /// retrieved using method get when the computation has completed, blocking if necessary until it is ready. - class Future : public LuceneObject - { - public: - virtual ~Future(); - - protected: - boost::any value; - - public: - void set(const boost::any& value) - { - SyncLock syncLock(this); - this->value = value; - } - - template - TYPE get() - { - SyncLock syncLock(this); - while (value.empty()) - wait(10); - return value.empty() ? TYPE() : boost::any_cast(value); - } - }; - - /// Utility class to handle a pool of threads. - class ThreadPool : public LuceneObject - { - public: - ThreadPool(); - virtual ~ThreadPool(); - - LUCENE_CLASS(ThreadPool); - - protected: - boost::asio::io_service io_service; - workPtr work; - boost::thread_group threadGroup; - - static const int32_t THREADPOOL_SIZE; - - public: - /// Get singleton thread pool instance. - static ThreadPoolPtr getInstance(); - - template - FuturePtr scheduleTask(FUNC func) - { - FuturePtr future(newInstance()); - io_service.post(boost::bind(&ThreadPool::execute, this, func, future)); - return future; - } - - protected: - // this will be executed when one of the threads is available - template - void execute(FUNC func, FuturePtr future) - { - future->set(func()); - future->notifyAll(); - } - }; -} - -#endif diff --git a/include/TimeLimitingCollector.h b/include/TimeLimitingCollector.h deleted file mode 100644 index 00285521..00000000 --- a/include/TimeLimitingCollector.h +++ /dev/null @@ -1,97 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TIMELIMITINGCOLLECTOR_H -#define TIMELIMITINGCOLLECTOR_H - -#include "Collector.h" - -namespace Lucene -{ - /// The {@link TimeLimitingCollector} is used to timeout search requests that take longer than the maximum - /// allowed search time limit. After this time is exceeded, the search thread is stopped by throwing a - /// {@link TimeExceededException}. - class LPPAPI TimeLimitingCollector : public Collector - { - public: - /// Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. - /// @param collector the wrapped {@link Collector} - /// @param timeAllowed max time allowed for collecting hits after which TimeExceeded exception is thrown - TimeLimitingCollector(CollectorPtr collector, int64_t timeAllowed); - - virtual ~TimeLimitingCollector(); - - LUCENE_CLASS(TimeLimitingCollector); - - public: - /// Default timer resolution. - /// @see #setResolution(int64_t) - static const int32_t DEFAULT_RESOLUTION; - - /// Default for {@link #isGreedy()}. - /// @see #isGreedy() - bool DEFAULT_GREEDY; - - protected: - static int64_t resolution; - bool greedy; - - int64_t t0; - int64_t timeout; - CollectorPtr collector; - - int32_t docBase; - - public: - /// Return the timer resolution. - /// @see #setResolution(int64_t) - static int64_t getResolution(); - - /// Set the timer resolution. - /// The default timer resolution is 20 milliseconds. - /// This means that a search required to take no longer than 800 milliseconds may be stopped after - /// 780 to 820 milliseconds. Note that: - ///
    - ///
  • Finer (smaller) resolution is more accurate but less efficient. - ///
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. - ///
  • Setting resolution smaller than current resolution might take effect only after current resolution. - /// (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, then it can take up to 20 - /// milliseconds for the change to have effect. - ///
- static void setResolution(int64_t newResolution); - - /// Stop timer thread. - static void stopTimer(); - - /// Checks if this time limited collector is greedy in collecting the last hit. A non greedy collector, - /// upon a timeout, would throw a TimeExceeded without allowing the wrapped collector to collect current - /// doc. A greedy one would first allow the wrapped hit collector to collect current doc and only then - /// throw a TimeExceeded exception. - /// @see #setGreedy(boolean) - bool isGreedy(); - - /// Sets whether this time limited collector is greedy. - /// @param greedy true to make this time limited greedy - /// @see #isGreedy() - void setGreedy(bool greedy); - - /// Calls {@link Collector#collect(int)} on the decorated {@link Collector} unless the allowed time has - /// passed, in which case it throws an exception. - virtual void collect(int32_t doc); - - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - virtual bool acceptsDocsOutOfOrder(); - - protected: - /// Initialize a single static timer thread to be used by all TimeLimitedCollector instances. - static TimerThreadPtr TIMER_THREAD(); - - friend class TimerThread; - }; -} - -#endif diff --git a/include/Token.h b/include/Token.h deleted file mode 100644 index 0491328a..00000000 --- a/include/Token.h +++ /dev/null @@ -1,357 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOKEN_H -#define TOKEN_H - -#include "Attribute.h" -#include "AttributeSource.h" - -namespace Lucene -{ - /// A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end - /// offset of the term in the text of the field and a type string. - /// - /// The start and end offsets permit applications to re-associate a token with its source text, eg., to display - /// highlighted query terms in a document browser, or to show matching text fragments in a - /// KWIC display, etc. - /// - /// The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or syntactic class - /// that the token belongs to. For example an end of sentence marker token might be implemented with type "eos". - /// The default token type is "word". - /// - /// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable length byte array. Use {@link - /// TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads - /// from the index. - /// - /// Tokenizers and TokenFilters should try to re-use a Token instance when possible for best performance, by implementing - /// the {@link TokenStream#incrementToken()} API. Failing that, to create a new Token you should first use one of - /// the constructors that starts with null text. To load the token from a char[] use - /// {@link #setTermBuffer(char[], int, int)}. To load from a String use {@link #setTermBuffer(String)} or {@link - /// #setTermBuffer(String, int, int)}. Alternatively you can get the Token's termBuffer by calling either {@link - /// #termBuffer()}, if you know that your text is shorter than the capacity of the termBuffer or {@link - /// #resizeTermBuffer(int)}, if there is any possibility that you may need to grow the buffer. Fill in the characters - /// of your term into this buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, - /// or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to - /// set the length of the term text. - /// - /// Typical Token reuse patterns: - /// - /// Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): - ///
-    /// return reusableToken->reinit(string, startOffset, endOffset[, type]);
-    /// 
- /// - /// Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): - ///
-    /// return reusableToken->reinit(string, 0, string.length(), startOffset, endOffset[, type]);
-    /// 
- /// - /// Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): - ///
-    /// return reusableToken->reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
-    /// 
- /// - /// Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): - ///
-    /// return reusableToken->reinit(buffer, start, end - start, startOffset, endOffset[, type]);
-    /// 
- /// - /// Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified): - ///
-    /// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
-    /// 
- /// - /// A few things to note: - /// clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but - /// should affect no one. - /// Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. The startOffset - /// and endOffset represent the start and offset in the source text, so be careful in adjusting them. When caching a - /// reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. - /// - /// @see Payload - class LPPAPI Token : public Attribute - { - public: - /// Constructs a Token will null text. - Token(); - - /// Constructs a Token with null text and start and end offsets. - /// @param start start offset in the source text - /// @param end end offset in the source text - Token(int32_t start, int32_t end); - - /// Constructs a Token with null text and start and end offsets plus the Token type. - /// @param start start offset in the source text - /// @param end end offset in the source text - /// @param type the lexical type of this Token - Token(int32_t start, int32_t end, const String& type); - - /// Constructs a Token with null text and start and end offsets plus flags. - /// @param start start offset in the source text - /// @param end end offset in the source text - /// @param flags The bits to set for this token - Token(int32_t start, int32_t end, int32_t flags); - - /// Constructs a Token with the given term text, start and end offsets. The type defaults to "word." - /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. - /// @param text term text - /// @param start start offset in the source text - /// @param end end offset in the source text - Token(const String& text, int32_t start, int32_t end); - - /// Constructs a Token with the given term text, start and end offsets and type. - /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. - /// @param text term text - /// @param start start offset in the source text - /// @param end end offset in the source text - /// @param type the lexical type of this Token - Token(const String& text, int32_t start, int32_t end, const String& type); - - /// Constructs a Token with the given term text, start and end offsets and flags. - /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. - /// @param text term text - /// @param start start offset in the source text - /// @param end end offset in the source text - /// @param flags The bits to set for this token - Token(const String& text, int32_t start, int32_t end, int32_t flags); - - /// Constructs a Token with the given term buffer (offset and length), start and end offsets - Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end); - - virtual ~Token(); - - LUCENE_CLASS(Token); - - public: - static const String& DEFAULT_TYPE(); - - protected: - static const int32_t MIN_BUFFER_SIZE; - - CharArray _termBuffer; - int32_t _termLength; - int32_t _startOffset; - int32_t _endOffset; - String _type; - int32_t flags; - PayloadPtr payload; - int32_t positionIncrement; - - public: - /// Set the position increment. This determines the position of this token relative to the previous Token - /// in a {@link TokenStream}, used in phrase searching. - /// - /// The default value is one. - /// - /// Some common uses for this are: - /// - /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple - /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's - /// increment should be set to zero: the increment of the first instance should be one. Repeating a token - /// with an increment of zero can also be used to boost the scores of matches on that token. - /// - /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want - /// phrases to match across removed stop words, then one could build a stop word filter that removes stop - /// words and also sets the increment to the number of stop words removed before each non-stop word. Then - /// exact phrase queries will only match when the terms occur with no intervening stop words. - /// - /// @param positionIncrement the distance from the prior term - /// @see TermPositions - virtual void setPositionIncrement(int32_t positionIncrement); - - /// Returns the position increment of this Token. - /// @see #setPositionIncrement - virtual int32_t getPositionIncrement(); - - /// Returns the Token's term text. - /// - /// This method has a performance penalty because the text is stored internally in a char[]. If possible, - /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use - /// this method, which is nothing more than a convenience call to String(token->termBuffer(), token->termLength()) - virtual String term(); - - /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. - /// @param buffer the buffer to copy - /// @param offset the index in the buffer of the first character to copy - /// @param length the number of characters to copy - virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); - - /// Copies the contents of buffer into the termBuffer array. - /// @param buffer the buffer to copy - virtual void setTermBuffer(const String& buffer); - - /// Copies the contents of buffer, starting at offset and continuing for length characters, into the termBuffer array. - /// @param buffer the buffer to copy - /// @param offset the index in the buffer of the first character to copy - /// @param length the number of characters to copy - virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length); - - /// Returns the internal termBuffer character array which you can then directly alter. If the array is too - /// small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer be sure - /// to call {@link #setTermLength} to record the number of valid characters that were placed into the termBuffer. - virtual CharArray termBuffer(); - - /// Optimized implementation of termBuffer. - virtual wchar_t* termBufferArray(); - - /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next operation is - /// to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, {@link - /// #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the resize with the - /// setting of the termBuffer. - /// @param newSize minimum size of the new termBuffer - /// @return newly created termBuffer with length >= newSize - virtual CharArray resizeTermBuffer(int32_t newSize); - - /// Return number of valid characters (length of the term) in the termBuffer array. - virtual int32_t termLength(); - - /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the termBuffer - /// or to synchronize with external manipulation of the termBuffer. Note: to grow the size of the array, use {@link - /// #resizeTermBuffer(int)} first. - /// @param length the truncated length - virtual void setTermLength(int32_t length); - - /// Returns this Token's starting offset, the position of the first character corresponding to this token in the - /// source text. - /// - /// Note that the difference between endOffset() and startOffset() may not be equal to {@link #termLength}, as the - /// term text may have been altered by a stemmer or some other filter. - virtual int32_t startOffset(); - - /// Set the starting offset. - /// @see #startOffset() - virtual void setStartOffset(int32_t offset); - - /// Returns this Token's ending offset, one greater than the position of the last character corresponding to this - /// token in the source text. The length of the token in the source text is (endOffset - startOffset). - virtual int32_t endOffset(); - - /// Set the ending offset. - /// @see #endOffset() - virtual void setEndOffset(int32_t offset); - - /// Set the starting and ending offset. - /// @see #startOffset() and #endOffset() - virtual void setOffset(int32_t startOffset, int32_t endOffset); - - /// Returns this Token's lexical type. Defaults to "word". - virtual String type(); - - /// Set the lexical type. - /// @see #type() - virtual void setType(const String& type); - - /// Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although - /// they do share similar purposes. The flags can be used to encode information about the token for use by other - /// {@link TokenFilter}s. - /// - /// @return The bits - virtual int32_t getFlags(); - - /// @see #getFlags() - virtual void setFlags(int32_t flags); - - /// Returns this Token's payload. - virtual PayloadPtr getPayload(); - - /// Sets this Token's payload. - virtual void setPayload(PayloadPtr payload); - - virtual String toString(); - - /// Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to default. - virtual void clear(); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficient than - /// doing a full clone (and then calling setTermBuffer) because it saves a wasted copy of the old termBuffer. - TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); - - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} - /// @return this Token instance - TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE - /// @return this Token instance - TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} - /// @return this Token instance - TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} - /// @return this Token instance - TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE - /// @return this Token instance - TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset); - - /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String, int, int)}, {@link #setStartOffset}, - /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE - /// @return this Token instance - TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); - - /// Copy the prototype token's fields into this one. Note: Payloads are shared. - void reinit(TokenPtr prototype); - - /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. - void reinit(TokenPtr prototype, const String& newTerm); - - /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. - void reinit(TokenPtr prototype, CharArray newTermBuffer, int32_t offset, int32_t length); - - virtual void copyTo(AttributePtr target); - - /// Convenience factory that returns Token as implementation for the basic attributes - static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY(); - - protected: - /// Construct Token and initialize values - void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags); - - /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always used in - /// places that set the content. - /// @param newSize minimum size of the buffer - void growTermBuffer(int32_t newSize); - - void initTermBuffer(); - - /// Like clear() but doesn't clear termBuffer/text - void clearNoTermBuffer(); - }; - - /// Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes and for all other - /// attributes calls the given delegate factory. - class LPPAPI TokenAttributeFactory : public AttributeFactory - { - public: - TokenAttributeFactory(AttributeFactoryPtr delegate); - virtual ~TokenAttributeFactory(); - - LUCENE_CLASS(TokenAttributeFactory); - - protected: - AttributeFactoryPtr delegate; - - public: - virtual AttributePtr createAttributeInstance(const String& className); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; -} - -#endif diff --git a/include/TokenFilter.h b/include/TokenFilter.h deleted file mode 100644 index e97d415c..00000000 --- a/include/TokenFilter.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOKENFILTER_H -#define TOKENFILTER_H - -#include "TokenStream.h" - -namespace Lucene -{ - /// A TokenFilter is a TokenStream whose input is another TokenStream. - /// - /// This is an abstract class; subclasses must override {@link #incrementToken()}. - /// @see TokenStream - class LPPAPI TokenFilter : public TokenStream - { - protected: - /// Construct a token stream filtering the given input. - TokenFilter(TokenStreamPtr input); - - public: - virtual ~TokenFilter(); - - LUCENE_CLASS(TokenFilter); - - protected: - /// The source of tokens for this filter. - TokenStreamPtr input; - - public: - /// Performs end-of-stream operations, if any, and calls then end() on the input TokenStream. - /// NOTE: Be sure to call TokenFilter::end() first when overriding this method. - virtual void end(); - - /// Close the input TokenStream. - virtual void close(); - - /// Reset the filter as well as the input TokenStream. - virtual void reset(); - }; -} - -#endif diff --git a/include/TokenStream.h b/include/TokenStream.h deleted file mode 100644 index d9be75a5..00000000 --- a/include/TokenStream.h +++ /dev/null @@ -1,103 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOKENSTREAM_H -#define TOKENSTREAM_H - -#include "AttributeSource.h" - -namespace Lucene -{ - /// A TokenStream enumerates the sequence of tokens, either from {@link Field}s of a {@link Document} or from - /// query text. - /// - /// This is an abstract class; concrete subclasses are: {@link Tokenizer}, a TokenStream whose input is a Reader; - /// and {@link TokenFilter}, a TokenStream whose input is another TokenStream. - /// - /// A new TokenStream API has been introduced with Lucene 2.9. This API has moved from being {@link Token}-based - /// to {@link Attribute}-based. While {@link Token} still exists in 2.9 as a convenience class, the preferred way - /// to store the information of a {@link Token} is to use {@link Attribute}s. - /// - /// TokenStream now extends {@link AttributeSource}, which provides access to all of the token {@link Attribute}s - /// for the TokenStream. Note that only one instance per {@link Attribute} is created and reused for every - /// token. This approach reduces object creation and allows local caching of references to the {@link Attribute}s. - /// See {@link #incrementToken()} for further details. - /// - /// The workflow of the new TokenStream API is as follows: - /// - Instantiation of TokenStream/{@link TokenFilter}s which add/get attributes to/from the {@link AttributeSource}. - /// - The consumer calls {@link TokenStream#reset()}. - /// - The consumer retrieves attributes from the stream and stores local references to all attributes it wants to access. - /// - The consumer calls {@link #incrementToken()} until it returns false consuming the attributes after each call. - /// - The consumer calls {@link #end()} so that any end-of-stream operations can be performed. - /// - The consumer calls {@link #close()} to release any resource when finished using the TokenStream. - /// - /// To make sure that filters and consumers know which attributes are available, the attributes must be added during - /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link - /// #incrementToken()}. - /// - /// Sometimes it is desirable to capture a current state of a TokenStream, eg., for buffering purposes (see {@link - /// CachingTokenFilter}, {@link TeeSinkTokenFilter}). For this use case {@link AttributeSource#captureState} and {@link - /// AttributeSource#restoreState} can be used. - class LPPAPI TokenStream : public AttributeSource - { - protected: - /// A TokenStream using the default attribute factory. - TokenStream(); - - /// A TokenStream that uses the same attributes as the supplied one. - TokenStream(AttributeSourcePtr input); - - /// A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. - TokenStream(AttributeFactoryPtr factory); - - public: - virtual ~TokenStream(); - - LUCENE_CLASS(TokenStream); - - public: - /// Consumers (ie., {@link IndexWriter}) use this method to advance the stream to the next token. Implementing - /// classes must implement this method and update the appropriate {@link Attribute}s with the attributes of - /// the next token. - /// - /// The producer must make no assumptions about the attributes after the method has been returned: the caller may - /// arbitrarily change it. If the producer needs to preserve the state for subsequent calls, it can use {@link - /// #captureState} to create a copy of the current attribute state. - /// - /// This method is called for every token of a document, so an efficient implementation is crucial for good - /// performance. To avoid calls to {@link #addAttribute(Class)} and {@link #getAttribute(Class)}, references to - /// all {@link Attribute}s that this stream uses should be retrieved during instantiation. - /// - /// To ensure that filters and consumers know which attributes are available, the attributes must be added during - /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link - /// #incrementToken()}. - /// - /// @return false for end of stream; true otherwise - virtual bool incrementToken() = 0; - - /// This method is called by the consumer after the last token has been consumed, after {@link #incrementToken()} - /// returned false (using the new TokenStream API). Streams implementing the old API should upgrade to use this - /// feature. - /// - /// This method can be used to perform any end-of-stream operations, such as setting the final offset of a stream. - /// The final offset of a stream might differ from the offset of the last token eg in case one or more whitespaces - /// followed after the last token, but a {@link WhitespaceTokenizer} was used. - virtual void end(); - - /// Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement - /// this method. {@link #reset()} is not needed for the standard indexing process. However, if the tokens of a - /// TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that - /// if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the - /// tokens when you store them away (on the first pass) as well as when you return them (on future passes after - /// {@link #reset()}). - virtual void reset(); - - /// Releases resources associated with this stream. - virtual void close(); - }; -} - -#endif diff --git a/include/Tokenizer.h b/include/Tokenizer.h deleted file mode 100644 index 49f5efb1..00000000 --- a/include/Tokenizer.h +++ /dev/null @@ -1,70 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOKENIZER_H -#define TOKENIZER_H - -#include "TokenStream.h" - -namespace Lucene -{ - /// A Tokenizer is a TokenStream whose input is a Reader. - /// - /// This is an abstract class; subclasses must override {@link #incrementToken()} - /// - /// Note: Subclasses overriding {@link #incrementToken()} must call {@link AttributeSource#clearAttributes()} - /// before setting attributes. - class LPPAPI Tokenizer : public TokenStream - { - protected: - /// Construct a tokenizer with null input. - Tokenizer(); - - /// Construct a token stream processing the given input. - Tokenizer(ReaderPtr input); - - /// Construct a tokenizer with null input using the given AttributeFactory. - Tokenizer(AttributeFactoryPtr factory); - - /// Construct a token stream processing the given input using the given AttributeFactory. - Tokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - /// Construct a token stream processing the given input using the given AttributeSource. - Tokenizer(AttributeSourcePtr source); - - /// Construct a token stream processing the given input using the given AttributeSource. - Tokenizer(AttributeSourcePtr source, ReaderPtr input); - - public: - virtual ~Tokenizer(); - - LUCENE_CLASS(Tokenizer); - - protected: - /// The text source for this Tokenizer. - ReaderPtr input; - CharStreamPtr charStream; - - public: - /// By default, closes the input Reader. - virtual void close(); - - /// Return the corrected offset. If {@link #input} is a {@link CharStream} subclass this method calls - /// {@link CharStream#correctOffset}, else returns currentOff. - /// @param currentOff offset as seen in the output - /// @return corrected offset based on the input - /// @see CharStream#correctOffset - virtual int32_t correctOffset(int32_t currentOff); - - using TokenStream::reset; - - /// Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will - /// use this to re-use a previously created tokenizer. - virtual void reset(ReaderPtr input); - }; -} - -#endif diff --git a/include/TopDocs.h b/include/TopDocs.h deleted file mode 100644 index 7834ad8e..00000000 --- a/include/TopDocs.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOPDOCS_H -#define TOPDOCS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t)} and {@link - /// Searcher#search(QueryPtr, int32_t)}. - class LPPAPI TopDocs : public LuceneObject - { - public: - /// Constructs a TopDocs with a default maxScore = double.NaN. - TopDocs(int32_t totalHits, Collection scoreDocs); - - /// Constructs a TopDocs. - TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore); - - virtual ~TopDocs(); - - LUCENE_CLASS(TopDocs); - - public: - /// The total number of hits for the query. - int32_t totalHits; - - /// The top hits for the query. - Collection scoreDocs; - - /// Stores the maximum score value encountered, needed for normalizing. - double maxScore; - - public: - /// Returns the maximum score value encountered. Note that in case scores are not tracked, - /// this returns NaN. - double getMaxScore(); - - /// Sets the maximum score value encountered. - void setMaxScore(double maxScore); - }; -} - -#endif diff --git a/include/TopDocsCollector.h b/include/TopDocsCollector.h deleted file mode 100644 index b2cc3e14..00000000 --- a/include/TopDocsCollector.h +++ /dev/null @@ -1,83 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOPDOCSCOLLECTOR_H -#define TOPDOCSCOLLECTOR_H - -#include "Collector.h" -#include "PriorityQueue.h" - -namespace Lucene -{ - /// A base class for all collectors that return a {@link TopDocs} output. This collector allows easy extension - /// by providing a single constructor which accepts a {@link PriorityQueue} as well as protected members for - /// that priority queue and a counter of the number of total hits. - /// - /// Extending classes can override {@link #topDocs(int32_t, int32_t)} and {@link #getTotalHits()} in order to - /// provide their own implementation. - class LPPAPI TopDocsCollector : public Collector - { - public: - TopDocsCollector(HitQueueBasePtr pq); - virtual ~TopDocsCollector(); - - LUCENE_CLASS(TopDocsCollector); - - protected: - /// The priority queue which holds the top documents. Note that different implementations of PriorityQueue - /// give different meaning to 'top documents'. HitQueue for example aggregates the top scoring documents, - /// while other PQ implementations may hold documents sorted by other criteria. - HitQueueBasePtr pq; - - /// The total number of documents that the collector encountered. - int32_t totalHits; - - public: - /// The total number of documents that matched this query. - virtual int32_t getTotalHits(); - - /// Returns the top docs that were collected by this collector. - virtual TopDocsPtr topDocs(); - - /// Returns the documents in the range [start .. pq.size()) that were collected by this collector. Note that - /// if start >= pq.size(), an empty TopDocs is returned. - /// - /// This method is convenient to call if the application always asks for the last results, starting from the - /// last 'page'. - /// - /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more - /// than once, passing each time a different start, you should call {@link #topDocs()} and work with the - /// returned {@link TopDocs} object, which will contain all the results this search execution collected. - virtual TopDocsPtr topDocs(int32_t start); - - /// Returns the documents in the rage [start .. start + howMany) that were collected by this collector. Note - /// that if start >= pq.size(), an empty TopDocs is returned, and if pq.size() - start < howMany, then only - /// the available documents in [start .. pq.size()) are returned. - /// - /// This method is useful to call in case pagination of search results is allowed by the search application, - /// as well as it attempts to optimize the memory used by allocating only as much as requested by howMany. - /// - /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more - /// than once, passing each time a different range, you should call {@link #topDocs()} and work with the - /// returned {@link TopDocs} object, which will contain all the results this search execution collected. - virtual TopDocsPtr topDocs(int32_t start, int32_t howMany); - - protected: - /// This is used in case topDocs() is called with illegal parameters, or there simply aren't (enough) results. - static TopDocsPtr EMPTY_TOPDOCS(); - - /// Populates the results array with the ScoreDoc instances. This can be overridden in case a different - /// ScoreDoc type should be returned. - virtual void populateResults(Collection results, int32_t howMany); - - /// Returns a {@link TopDocs} instance containing the given results. If results is null it means there are - /// no results to return, either because there were 0 calls to collect() or because the arguments to topDocs - /// were invalid. - virtual TopDocsPtr newTopDocs(Collection results, int32_t start); - }; -} - -#endif diff --git a/include/TopFieldCollector.h b/include/TopFieldCollector.h deleted file mode 100644 index 2f85351b..00000000 --- a/include/TopFieldCollector.h +++ /dev/null @@ -1,73 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOPFIELDCOLLECTOR_H -#define TOPFIELDCOLLECTOR_H - -#include "TopDocsCollector.h" - -namespace Lucene -{ - /// A {@link Collector} that sorts by {@link SortField} using {@link FieldComparator}s. - /// - /// See the {@link #create(SortPtr, int32_t, bool, bool, bool, bool)} method for instantiating a TopFieldCollector. - class LPPAPI TopFieldCollector : public TopDocsCollector - { - public: - TopFieldCollector(HitQueueBasePtr pq, int32_t numHits, bool fillFields); - virtual ~TopFieldCollector(); - - LUCENE_CLASS(TopFieldCollector); - - protected: - bool fillFields; - - /// Stores the maximum score value encountered, needed for normalizing. If document scores are not tracked, - /// this value is initialized to NaN. - double maxScore; - - int32_t numHits; - FieldValueHitQueueEntryPtr bottom; - bool queueFull; - int32_t docBase; - - public: - /// Creates a new {@link TopFieldCollector} from the given arguments. - /// - /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. - /// - /// @param sort The sort criteria (SortFields). - /// @param numHits The number of results to collect. - /// @param fillFields Specifies whether the actual field values should be returned on the results (FieldDoc). - /// @param trackDocScores Specifies whether document scores should be tracked and set on the results. Note - /// that if set to false, then the results' scores will be set to NaN. Setting this to true affects - /// performance, as it incurs the score computation on each competitive result. Therefore if document scores - /// are not required by the application, it is recommended to set it to false. - /// @param trackMaxScore Specifies whether the query's maxScore should be tracked and set on the resulting - /// {@link TopDocs}. Note that if set to false, {@link TopDocs#getMaxScore()} returns NaN. Setting this to - /// true affects performance as it incurs the score computation on each result. Also, setting this true - /// automatically sets trackDocScores to true as well. - /// @param docsScoredInOrder Specifies whether documents are scored in doc Id order or not by the given - /// {@link Scorer} in {@link #setScorer(ScorerPtr)}. - /// @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. - static TopFieldCollectorPtr create(SortPtr sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder); - - virtual void add(int32_t slot, int32_t doc, double score); - - virtual bool acceptsDocsOutOfOrder(); - - protected: - static const Collection EMPTY_SCOREDOCS(); - - /// Only the following callback methods need to be overridden since topDocs(int32_t, int32_t) calls them to - /// return the results. - virtual void populateResults(Collection results, int32_t howMany); - - virtual TopDocsPtr newTopDocs(Collection results, int32_t start); - }; -} - -#endif diff --git a/include/TopFieldDocs.h b/include/TopFieldDocs.h deleted file mode 100644 index 157f0d61..00000000 --- a/include/TopFieldDocs.h +++ /dev/null @@ -1,34 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOPFIELDDOCS_H -#define TOPFIELDDOCS_H - -#include "TopDocs.h" - -namespace Lucene -{ - /// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)}. - class LPPAPI TopFieldDocs : public TopDocs - { - public: - /// @param totalHits Total number of hits for the query. - /// @param scoreDocs The top hits for the query. - /// @param fields The sort criteria used to find the top hits. - /// @param maxScore The maximum score encountered. - TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore); - - virtual ~TopFieldDocs(); - - LUCENE_CLASS(TopFieldDocs); - - public: - /// The fields which were used to sort results by. - Collection fields; - }; -} - -#endif diff --git a/include/TopScoreDocCollector.h b/include/TopScoreDocCollector.h deleted file mode 100644 index f1667589..00000000 --- a/include/TopScoreDocCollector.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TOPSCOREDOCCOLLECTOR_H -#define TOPSCOREDOCCOLLECTOR_H - -#include "TopDocsCollector.h" - -namespace Lucene -{ - /// A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link TopDocs}. - /// This is used by {@link IndexSearcher} to implement {@link TopDocs}-based search. Hits are sorted by score - /// descending and then (when the scores are tied) docID ascending. When you create an instance of this - /// collector you should know in advance whether documents are going to be collected in doc Id order or not. - /// - /// NOTE: The values Nan, NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. This collector will - /// not properly collect hits with such scores. - class LPPAPI TopScoreDocCollector : public TopDocsCollector - { - public: - TopScoreDocCollector(int32_t numHits); - virtual ~TopScoreDocCollector(); - - LUCENE_CLASS(TopScoreDocCollector); - - INTERNAL: - ScoreDocPtr pqTop; - int32_t docBase; - ScorerWeakPtr _scorer; - - public: - /// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents - /// are scored in order by the input {@link Scorer} to {@link #setScorer(ScorerPtr)}. - /// - /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. - static TopScoreDocCollectorPtr create(int32_t numHits, bool docsScoredInOrder); - - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - - protected: - virtual TopDocsPtr newTopDocs(Collection results, int32_t start); - }; -} - -#endif diff --git a/include/TypeAttribute.h b/include/TypeAttribute.h deleted file mode 100644 index ea2e8e05..00000000 --- a/include/TypeAttribute.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef TYPEATTRIBUTE_H -#define TYPEATTRIBUTE_H - -#include "Attribute.h" - -namespace Lucene -{ - /// A Token's lexical type. The Default value is "word". - class LPPAPI TypeAttribute : public Attribute - { - public: - TypeAttribute(); - TypeAttribute(const String& type); - virtual ~TypeAttribute(); - - LUCENE_CLASS(TypeAttribute); - - protected: - String _type; - static const String& DEFAULT_TYPE(); - - public: - virtual String toString(); - - /// Returns this Token's lexical type. Defaults to "word". - String type(); - - /// Set the lexical type. - /// @see #type() - void setType(const String& type); - - virtual void clear(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual void copyTo(AttributePtr target); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/UTF8Stream.h b/include/UTF8Stream.h deleted file mode 100644 index 6d424b5f..00000000 --- a/include/UTF8Stream.h +++ /dev/null @@ -1,148 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef UTF8STREAM_H -#define UTF8STREAM_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class UTF8Base : public LuceneObject - { - public: - virtual ~UTF8Base(); - LUCENE_CLASS(UTF8Base); - - public: - static const uint16_t LEAD_SURROGATE_MIN; - static const uint16_t LEAD_SURROGATE_MAX; - static const uint16_t TRAIL_SURROGATE_MIN; - static const uint16_t TRAIL_SURROGATE_MAX; - static const uint16_t LEAD_OFFSET; - static const uint32_t SURROGATE_OFFSET; - static const uint32_t CODE_POINT_MAX; - - static const wchar_t UNICODE_REPLACEMENT_CHAR; - static const wchar_t UNICODE_TERMINATOR; - - protected: - virtual uint32_t readNext() = 0; - - uint8_t mask8(uint32_t b); - uint16_t mask16(uint32_t c); - bool isTrail(uint32_t b); - bool isSurrogate(uint32_t cp); - bool isLeadSurrogate(uint32_t cp); - bool isTrailSurrogate(uint32_t cp); - bool isValidCodePoint(uint32_t cp); - bool isOverlongSequence(uint32_t cp, int32_t length); - }; - - class UTF8Encoder : public UTF8Base - { - public: - UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd); - virtual ~UTF8Encoder(); - - LUCENE_CLASS(UTF8Encoder); - - protected: - const wchar_t* unicodeBegin; - const wchar_t* unicodeEnd; - - public: - int32_t encode(uint8_t* utf8, int32_t length); - - int32_t utf16to8(uint8_t* utf8, int32_t length); - int32_t utf32to8(uint8_t* utf8, int32_t length); - - protected: - virtual uint32_t readNext(); - - uint8_t* appendChar(uint8_t* utf8, uint32_t cp); - }; - - class UTF8EncoderStream : public UTF8Encoder - { - public: - UTF8EncoderStream(ReaderPtr reader); - virtual ~UTF8EncoderStream(); - - LUCENE_CLASS(UTF8EncoderStream); - - protected: - ReaderPtr reader; - - protected: - virtual uint32_t readNext(); - }; - - class UTF8Decoder : public UTF8Base - { - public: - UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End); - virtual ~UTF8Decoder(); - - LUCENE_CLASS(UTF8Decoder); - - protected: - const uint8_t* utf8Begin; - const uint8_t* utf8End; - - public: - int32_t decode(wchar_t* unicode, int32_t length); - - int32_t utf8to16(wchar_t* unicode, int32_t length); - int32_t utf8to32(wchar_t* unicode, int32_t length); - - protected: - virtual uint32_t readNext(); - - int32_t sequenceLength(uint32_t cp); - bool getSequence(uint32_t& cp, int32_t length); - bool isValidNext(uint32_t& cp); - }; - - class UTF8DecoderStream : public UTF8Decoder - { - public: - UTF8DecoderStream(ReaderPtr reader); - virtual ~UTF8DecoderStream(); - - LUCENE_CLASS(UTF8DecoderStream); - - protected: - ReaderPtr reader; - - protected: - virtual uint32_t readNext(); - }; - - class UTF16Decoder : public UTF8Base - { - public: - UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End); - virtual ~UTF16Decoder(); - - LUCENE_CLASS(UTF16Decoder); - - protected: - const uint16_t* utf16Begin; - const uint16_t* utf16End; - - public: - int32_t decode(wchar_t* unicode, int32_t length); - - int32_t utf16to16(wchar_t* unicode, int32_t length); - int32_t utf16to32(wchar_t* unicode, int32_t length); - - protected: - virtual uint32_t readNext(); - }; -} - -#endif diff --git a/include/UnicodeUtils.h b/include/UnicodeUtils.h deleted file mode 100644 index 7fdb1331..00000000 --- a/include/UnicodeUtils.h +++ /dev/null @@ -1,101 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef UNICODEUTILS_H -#define UNICODEUTILS_H - -#include "LuceneObject.h" - -namespace Lucene -{ - class LPPAPI UnicodeUtil - { - public: - virtual ~UnicodeUtil(); - - public: - /// Return true if supplied character is alpha-numeric. - static bool isAlnum(wchar_t c); - - /// Return true if supplied character is alphabetic. - static bool isAlpha(wchar_t c); - - /// Return true if supplied character is numeric. - static bool isDigit(wchar_t c); - - /// Return true if supplied character is a space. - static bool isSpace(wchar_t c); - - /// Return true if supplied character is uppercase. - static bool isUpper(wchar_t c); - - /// Return true if supplied character is lowercase. - static bool isLower(wchar_t c); - - /// Return true if supplied character is other type of letter. - static bool isOther(wchar_t c); - - /// Return true if supplied character is non-spacing. - static bool isNonSpacing(wchar_t c); - - /// Return uppercase representation of a given character. - static wchar_t toUpper(wchar_t c); - - /// Return lowercase representation of a given character. - static wchar_t toLower(wchar_t c); - }; - - /// Utility class that contains utf8 and unicode translations. - template - class TranslationResult : public LuceneObject - { - public: - TranslationResult() - { - result = Array::newInstance(10); - length = 0; - } - - public: - Array result; - int32_t length; - - public: - void setLength(int32_t length) - { - if (!result) - result = Array::newInstance((int32_t)(1.5 * (double)length)); - if (result.size() < length) - result.resize((int32_t)(1.5 * (double)length)); - this->length = length; - } - - void copyText(const TranslationResult& other) - { - setLength(other.length); - MiscUtils::arrayCopy(other.result.get(), 0, result.get(), 0, other.length); - } - - void copyText(boost::shared_ptr< TranslationResult > other) - { - copyText(*other); - } - }; - - class LPPAPI UTF8Result : public TranslationResult - { - public: - virtual ~UTF8Result(); - }; - - class LPPAPI UnicodeResult : public TranslationResult - { - public: - virtual ~UnicodeResult(); - }; -} - -#endif diff --git a/include/ValueSource.h b/include/ValueSource.h deleted file mode 100644 index 8fe985e2..00000000 --- a/include/ValueSource.h +++ /dev/null @@ -1,46 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef VALUESOURCE_H -#define VALUESOURCE_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Source of values for basic function queries. - /// - /// At its default/simplest form, values - one per doc - are used as the score of that doc. - /// - /// Values are instantiated as {@link DocValues} for a particular reader. - /// ValueSource implementations differ in RAM requirements: it would always be a factor of the number of - /// documents, but for each document the number of bytes can be 1, 2, 4, or 8. - class LPPAPI ValueSource : public LuceneObject - { - public: - virtual ~ValueSource(); - LUCENE_CLASS(ValueSource); - - public: - /// Return the DocValues used by the function query. - /// @param reader The IndexReader used to read these values. If any caching is involved, that caching - /// would also be IndexReader based. - virtual DocValuesPtr getValues(IndexReaderPtr reader) = 0; - - /// Description of field, used in explain() - virtual String description() = 0; - - virtual String toString(); - - /// Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(LuceneObjectPtr)}. - virtual bool equals(LuceneObjectPtr other) = 0; - - /// Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}. - virtual int32_t hashCode() = 0; - }; -} - -#endif diff --git a/include/ValueSourceQuery.h b/include/ValueSourceQuery.h deleted file mode 100644 index 56514845..00000000 --- a/include/ValueSourceQuery.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef VALUESOURCEQUERY_H -#define VALUESOURCEQUERY_H - -#include "Query.h" - -namespace Lucene -{ - /// A Query that sets the scores of document to the values obtained from a {@link ValueSource}. - /// - /// This query provides a score for each and every undeleted document in the index. - /// - /// The value source can be based on a (cached) value of an indexed field, but it can also be based on an - /// external source, eg. values read from an external database. - /// - /// Score is set as: Score(doc,query) = (query.getBoost() * query.getBoost()) * valueSource(doc). - class LPPAPI ValueSourceQuery : public Query - { - public: - /// Create a value source query - /// @param valSrc provides the values defines the function to be used for scoring - ValueSourceQuery(ValueSourcePtr valSrc); - - virtual ~ValueSourceQuery(); - - LUCENE_CLASS(ValueSourceQuery); - - public: - ValueSourcePtr valSrc; - - public: - using Query::toString; - - virtual QueryPtr rewrite(IndexReaderPtr reader); - virtual void extractTerms(SetTerm terms); - virtual WeightPtr createWeight(SearcherPtr searcher); - virtual String toString(const String& field); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; -} - -#endif diff --git a/include/VariantUtils.h b/include/VariantUtils.h deleted file mode 100644 index 1400449a..00000000 --- a/include/VariantUtils.h +++ /dev/null @@ -1,102 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef VARIANTUTILS_H -#define VARIANTUTILS_H - -#include -#include "Lucene.h" -#include "MiscUtils.h" - -namespace Lucene -{ - class LPPAPI VariantUtils - { - public: - template - static TYPE get(boost::any var) - { - return var.type() == typeid(TYPE) ? boost::any_cast(var) : TYPE(); - } - - template - static TYPE get(VAR var) - { - return var.type() == typeid(TYPE) ? boost::get(var) : TYPE(); - } - - template - static bool typeOf(VAR var) - { - return (var.type() == typeid(TYPE)); - } - - static VariantNull null() - { - return VariantNull(); - } - - static bool isNull(boost::any var) - { - return var.empty(); - } - - template - static bool isNull(VAR var) - { - return typeOf(var); - } - - template - static int32_t hashCode(VAR var) - { - if (typeOf(var)) - return StringUtils::hashCode(get(var)); - if (typeOf(var)) - return get(var); - if (typeOf(var)) - return (int32_t)get(var); - if (typeOf(var)) - { - int64_t longBits = MiscUtils::doubleToLongBits(get(var)); - return (int32_t)(longBits ^ (longBits >> 32)); - } - if (typeOf< Collection >(var)) - return get< Collection >(var).hashCode(); - if (typeOf< Collection >(var)) - return get< Collection >(var).hashCode(); - if (typeOf< Collection >(var)) - return get< Collection >(var).hashCode(); - if (typeOf< Collection >(var)) - return get< Collection >(var).hashCode(); - if (typeOf< Collection >(var)) - return get< Collection >(var).hashCode(); - if (typeOf(var)) - return get(var)->hashCode(); - return 0; - } - - template - static bool equalsType(FIRST first, SECOND second) - { - return (first.type() == second.type()); - } - - template - static bool equals(FIRST first, SECOND second) - { - return first.type() == second.type() ? (first == second) : false; - } - - template - static int32_t compareTo(VAR first, VAR second) - { - return first < second ? -1 : (first == second ? 0 : 1); - } - }; -} - -#endif diff --git a/include/Weight.h b/include/Weight.h deleted file mode 100644 index d573627f..00000000 --- a/include/Weight.h +++ /dev/null @@ -1,84 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WEIGHT_H -#define WEIGHT_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Calculate query weights and build query scorers. - /// - /// The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that a - /// {@link Query} instance can be reused. - /// {@link Searcher} dependent state of the query should reside in the {@link Weight}. - /// {@link IndexReader} dependent state should reside in the {@link Scorer}. - /// - /// Weight is used in the following way: - ///
    - ///
  1. A Weight is constructed by a top-level query, given a Searcher ({@link Query#createWeight(Searcher)}). - ///
  2. The {@link #sumOfSquaredWeights()} method is called on the Weight to compute the query normalization - /// factor {@link Similarity#queryNorm(float)} of the query clauses contained in the query. - ///
  3. The query normalization factor is passed to {@link #normalize(float)}. At this point the weighting is - /// complete. - ///
  4. A Scorer is constructed by {@link #scorer(IndexReaderPtr, bool, bool)}. - ///
- class LPPAPI Weight : public LuceneObject - { - public: - virtual ~Weight(); - LUCENE_CLASS(Weight); - - public: - /// An explanation of the score computation for the named document. - /// @param reader sub-reader containing the give doc - /// @param doc - /// @return an Explanation for the score - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc) = 0; - - /// The query that this concerns. - virtual QueryPtr getQuery() = 0; - - /// The weight for this query. - virtual double getValue() = 0; - - /// Assigns the query normalization factor to this. - virtual void normalize(double norm) = 0; - - /// Returns a {@link Scorer} which scores documents in/out-of order according to scoreDocsInOrder. - /// - /// NOTE: even if scoreDocsInOrder is false, it is recommended to check whether the returned Scorer - /// indeed scores documents out of order (ie., call {@link #scoresDocsOutOfOrder()}), as some Scorer - /// implementations will always return documents in-order. - /// - /// NOTE: null can be returned if no documents will be scored by this query. - /// - /// @param reader The {@link IndexReader} for which to return the {@link Scorer}. - /// @param scoreDocsInOrder Specifies whether in-order scoring of documents is required. Note that if - /// set to false (i.e., out-of-order scoring is required), this method can return whatever scoring mode - /// it supports, as every in-order scorer is also an out-of-order one. However, an out-of-order scorer - /// may not support {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)}, therefore it is - /// recommended to request an in-order scorer if use of these methods is required. - /// @param topScorer If true, {@link Scorer#score(CollectorPtr)} will be called; if false, {@link - /// Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will be called. - /// @return a {@link Scorer} which scores documents in/out-of order. - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) = 0; - - /// The sum of squared weights of contained query clauses. - virtual double sumOfSquaredWeights() = 0; - - /// Returns true if this implementation scores docs only out of order. This method is used in conjunction - /// with {@link Collector}'s {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and - /// {@link #scorer(IndexReaderPtr, bool, bool)} to create a matching {@link Scorer} instance for a given - /// {@link Collector}, or vice versa. - /// - /// NOTE: the default implementation returns false, ie. the Scorer scores documents in-order. - virtual bool scoresDocsOutOfOrder(); - }; -} - -#endif diff --git a/include/WhitespaceAnalyzer.h b/include/WhitespaceAnalyzer.h deleted file mode 100644 index a51f42dc..00000000 --- a/include/WhitespaceAnalyzer.h +++ /dev/null @@ -1,28 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WHITESPACEANALYZER_H -#define WHITESPACEANALYZER_H - -#include "Analyzer.h" - -namespace Lucene -{ - /// An Analyzer that uses {@link WhitespaceTokenizer}. - class LPPAPI WhitespaceAnalyzer : public Analyzer - { - public: - virtual ~WhitespaceAnalyzer(); - - LUCENE_CLASS(WhitespaceAnalyzer); - - public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; -} - -#endif diff --git a/include/WhitespaceTokenizer.h b/include/WhitespaceTokenizer.h deleted file mode 100644 index ff3a39e6..00000000 --- a/include/WhitespaceTokenizer.h +++ /dev/null @@ -1,38 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WHITESPACETOKENIZER_H -#define WHITESPACETOKENIZER_H - -#include "CharTokenizer.h" - -namespace Lucene -{ - /// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. Adjacent sequences of non-Whitespace - /// characters form tokens. - class LPPAPI WhitespaceTokenizer : public CharTokenizer - { - public: - /// Construct a new WhitespaceTokenizer. - WhitespaceTokenizer(ReaderPtr input); - - /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. - WhitespaceTokenizer(AttributeSourcePtr source, ReaderPtr input); - - /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource.AttributeFactory}. - WhitespaceTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~WhitespaceTokenizer(); - - LUCENE_CLASS(WhitespaceTokenizer); - - public: - /// Collects only characters which do not satisfy {@link Character#isWhitespace(char)}. - virtual bool isTokenChar(wchar_t c); - }; -} - -#endif diff --git a/include/WildcardQuery.h b/include/WildcardQuery.h deleted file mode 100644 index cd5b10d0..00000000 --- a/include/WildcardQuery.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WILDCARDQUERY_H -#define WILDCARDQUERY_H - -#include "MultiTermQuery.h" - -namespace Lucene -{ - /// Implements the wildcard search query. Supported wildcards are *, which matches any character sequence - /// (including the empty one), and ?, which matches any single character. Note this query can be slow, as - /// it needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, a Wildcard - /// term should not start with one of the wildcards * or ?. - /// - /// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. - /// @see WildcardTermEnum - class LPPAPI WildcardQuery : public MultiTermQuery - { - public: - WildcardQuery(TermPtr term); - virtual ~WildcardQuery(); - - LUCENE_CLASS(WildcardQuery); - - protected: - bool termContainsWildcard; - bool termIsPrefix; - TermPtr term; - - public: - using MultiTermQuery::toString; - - /// Returns the pattern term. - TermPtr getTerm(); - - virtual QueryPtr rewrite(IndexReaderPtr reader); - - /// Prints a user-readable version of this query. - virtual String toString(const String& field); - - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - - protected: - virtual FilteredTermEnumPtr getEnum(IndexReaderPtr reader); - }; -} - -#endif diff --git a/include/WildcardTermEnum.h b/include/WildcardTermEnum.h deleted file mode 100644 index 633373e4..00000000 --- a/include/WildcardTermEnum.h +++ /dev/null @@ -1,54 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WILDCARDTERMENUM_H -#define WILDCARDTERMENUM_H - -#include "FilteredTermEnum.h" - -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating all terms that match the specified wildcard filter term. - /// - /// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than - /// all that precede it. - class LPPAPI WildcardTermEnum : public FilteredTermEnum - { - public: - /// Creates a new WildcardTermEnum. - /// - /// After calling the constructor the enumeration is already pointing to the first valid term if such - /// a term exists. - WildcardTermEnum(IndexReaderPtr reader, TermPtr term); - - virtual ~WildcardTermEnum(); - - LUCENE_CLASS(WildcardTermEnum); - - public: - static const wchar_t WILDCARD_STRING; - static const wchar_t WILDCARD_CHAR; - - TermPtr searchTerm; - String field; - String text; - String pre; - int32_t preLen; - bool _endEnum; - - public: - virtual double difference(); - - /// Determines if a word matches a wildcard pattern. - static bool wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx); - - protected: - virtual bool termCompare(TermPtr term); - virtual bool endEnum(); - }; -} - -#endif diff --git a/include/WordlistLoader.h b/include/WordlistLoader.h deleted file mode 100644 index e7e14cc5..00000000 --- a/include/WordlistLoader.h +++ /dev/null @@ -1,49 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#ifndef WORDLISTLOADER_H -#define WORDLISTLOADER_H - -#include "LuceneObject.h" - -namespace Lucene -{ - /// Loader for text files that represent a list of stopwords. - class LPPAPI WordlistLoader : public LuceneObject - { - public: - virtual ~WordlistLoader(); - - LUCENE_CLASS(WordlistLoader); - - public: - /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). - /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an - /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). - /// - /// @param wordfile File name containing the wordlist - /// @param comment The comment string to ignore - /// @return A set with the file's words - static HashSet getWordSet(const String& wordfile, const String& comment = EmptyString); - - /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). - /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an - /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). - /// - /// @param reader Reader containing the wordlist - /// @param comment The comment string to ignore - /// @return A set with the file's words - static HashSet getWordSet(ReaderPtr reader, const String& comment = EmptyString); - - /// Reads a stem dictionary. Each line contains: - ///
word\tstem
- /// (ie. two tab separated words) - /// @return stem dictionary that overrules the stemming algorithm - static MapStringString getStemDict(const String& wordstemfile); - }; -} - -#endif diff --git a/include/config_h/CMakeLists.txt b/include/config_h/CMakeLists.txt new file mode 100644 index 00000000..439ef071 --- /dev/null +++ b/include/config_h/CMakeLists.txt @@ -0,0 +1,39 @@ +################################# +# set Config.h vars +################################# +if(ENABLE_BOOST_INTEGER) + set(USE_BOOST_INTEGER "define") +else() + set(USE_BOOST_INTEGER "undef") +endif() + +if(ENABLE_CYCLIC_CHECK) + set(USE_CYCLIC_CHECK "define") +else() + set(USE_CYCLIC_CHECK "undef") +endif() + + +if(LUCENE_BUILD_SHARED) + set(LPP_SHARED_DLL "define") +else() + set(LPP_SHARED_DLL "undef") +endif() + + +################################# +# generate Config.h +################################# +configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/Config.h.in" + "${lucene++_BINARY_DIR}/include/Config.h" @ONLY +) + + +################################# +# install Config.h +################################# +install( + FILES + "${lucene++_BINARY_DIR}/include/Config.h" + DESTINATION include/lucene++ ) diff --git a/include/config_h/Config.h.in b/include/config_h/Config.h.in new file mode 100644 index 00000000..f3926499 --- /dev/null +++ b/include/config_h/Config.h.in @@ -0,0 +1,122 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CONFIG_H +#define CONFIG_H + +#if defined(_WIN32) || defined(_WIN64) +#pragma warning(disable:4251) +#pragma warning(disable:4275) +#pragma warning(disable:4005) +#pragma warning(disable:4996) +#ifndef _WIN64 +#pragma warning(disable:4244) +#endif +#endif + + +// Define to enable boost integer types +#@USE_BOOST_INTEGER@ LPP_USE_BOOST_INTEGER + + +// Generic helper definitions for shared library support +#if defined(_WIN32) || defined(_WIN64) || defined(__CYGWIN__) + #define LPP_IMPORT __declspec(dllimport) + #define LPP_EXPORT __declspec(dllexport) + #define LPP_LOCAL +#else + #if __GNUC__ >= 4 + #define LPP_IMPORT __attribute__ ((visibility ("default"))) + #define LPP_EXPORT __attribute__ ((visibility ("default"))) + #define LPP_LOCAL __attribute__ ((visibility ("hidden"))) + #else + #define LPP_IMPORT + #define LPP_EXPORT + #define LPP_LOCAL + #endif +#endif + +// bulding shared? +#@LPP_SHARED_DLL@ LPP_SHARED_LIB + + +// setup library binds +#ifdef LPP_SHARED_LIB + #ifdef LPP_BUILDING_LIB + #define LPP_API LPP_EXPORT + #define LPP_CONTRIB_API LPP_EXPORT + #else + #define LPP_API LPP_IMPORT + #define LPP_CONTRIB_API LPP_IMPORT + #endif +#else + #define LPP_API + #define LPP_CONTRIB_API + #define LPP_LOCAL +#endif // LPP_LOCAL + + +// legacy binds +#define LPPAPI LPP_API +#define LPPCONTRIBAPI LPP_CONTRIB_API +#define LPPLOCAL LPP_LOCAL + + +// Check windows +#if defined(_WIN32) || defined(_WIN64) + #define LPP_UNICODE_CHAR_SIZE_2 +#if defined(_WIN64) + #define LPP_BUILD_64 +#else + #define LPP_BUILD_32 +#endif +#endif + + +// Check GCC +#if defined(__GNUC__) + #define LPP_UNICODE_CHAR_SIZE_4 + #if defined(__x86_64__) || defined(__ppc64__) + #define LPP_BUILD_64 + #else + #define LPP_BUILD_32 + #endif +#endif + + +// Default to 32-bit platforms +#if !defined(LPP_BUILD_32) && !defined(LPP_BUILD_64) + #define LPP_BUILD_32 +#endif + + +// Default to 4-byte unicode format +#if !defined(LPP_UNICODE_CHAR_SIZE_2) && !defined(LPP_UNICODE_CHAR_SIZE_4) + #define LPP_UNICODE_CHAR_SIZE_4 +#endif + + +// Define to enable cyclic checking in debug builds +#@USE_CYCLIC_CHECK@ LPP_USE_CYCLIC_CHECK + + +// Make internal bitset storage public +#define BOOST_DYNAMIC_BITSET_DONT_USE_FRIENDS +#define BOOST_FILESYSTEM_VERSION 3 + + +// Use windows definitions +#if defined(_WIN32) || defined(_WIN64) + #define BOOST_USE_WINDOWS_H +#endif + +// Disable deprication warnings in windows +#if defined(_WIN32) || defined(_WIN64) + #define _CRT_SECURE_NO_WARNINGS +#endif + + +#endif //CONFIG_H diff --git a/include/lucene++/ASCIIFoldingFilter.h b/include/lucene++/ASCIIFoldingFilter.h new file mode 100644 index 00000000..68597772 --- /dev/null +++ b/include/lucene++/ASCIIFoldingFilter.h @@ -0,0 +1,66 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ASCIIFOLDINGFILTER_H +#define ASCIIFOLDINGFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// This class converts alphabetic, numeric, and symbolic Unicode characters which are not in the first 127 ASCII +/// characters (the "Basic Latin" Unicode block) into their ASCII equivalents, if one exists. +/// +/// Characters from the following Unicode blocks are converted; however, only those characters with reasonable ASCII +/// alternatives are converted: +/// +/// C1 Controls and Latin-1 Supplement: http://www.unicode.org/charts/PDF/U0080.pdf +/// Latin Extended-A: http://www.unicode.org/charts/PDF/U0100.pdf +/// Latin Extended-B: http://www.unicode.org/charts/PDF/U0180.pdf +/// Latin Extended Additional: http://www.unicode.org/charts/PDF/U1E00.pdf +/// Latin Extended-C: http://www.unicode.org/charts/PDF/U2C60.pdf +/// Latin Extended-D: http://www.unicode.org/charts/PDF/UA720.pdf +/// IPA Extensions: http://www.unicode.org/charts/PDF/U0250.pdf +/// Phonetic Extensions: http://www.unicode.org/charts/PDF/U1D00.pdf +/// Phonetic Extensions Supplement: http://www.unicode.org/charts/PDF/U1D80.pdf +/// General Punctuation: http://www.unicode.org/charts/PDF/U2000.pdf +/// Superscripts and Subscripts: http://www.unicode.org/charts/PDF/U2070.pdf +/// Enclosed Alphanumerics: http://www.unicode.org/charts/PDF/U2460.pdf +/// Dingbats: http://www.unicode.org/charts/PDF/U2700.pdf +/// Supplemental Punctuation: http://www.unicode.org/charts/PDF/U2E00.pdf +/// Alphabetic Presentation Forms: http://www.unicode.org/charts/PDF/UFB00.pdf +/// Halfwidth and Fullwidth Forms: http://www.unicode.org/charts/PDF/UFF00.pdf +/// +/// See: http://en.wikipedia.org/wiki/Latin_characters_in_Unicode +/// +/// The set of character conversions supported by this class is a superset of those supported by Lucene's {@link +/// ISOLatin1AccentFilter} which strips accents from Latin1 characters. For example, 'à' will be replaced by 'a'. +/// +class LPPAPI ASCIIFoldingFilter : public TokenFilter { +public: + ASCIIFoldingFilter(const TokenStreamPtr& input); + virtual ~ASCIIFoldingFilter(); + + LUCENE_CLASS(ASCIIFoldingFilter); + +protected: + CharArray output; + int32_t outputPos; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// Converts characters above ASCII to their ASCII equivalents. For example, accents are removed from + /// accented characters. + /// @param input The string to fold + /// @param length The number of characters in the input string + void foldToASCII(const wchar_t* input, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/AbstractAllTermDocs.h b/include/lucene++/AbstractAllTermDocs.h new file mode 100644 index 00000000..eb6342ea --- /dev/null +++ b/include/lucene++/AbstractAllTermDocs.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ABSTRACTALLTERMDOCS_H +#define ABSTRACTALLTERMDOCS_H + +#include "TermDocs.h" + +namespace Lucene { + +/// Base class for enumerating all but deleted docs. +/// +/// NOTE: this class is meant only to be used internally by Lucene; it's only public so it +/// can be shared across packages. +class LPPAPI AbstractAllTermDocs : public TermDocs, public LuceneObject { +public: + AbstractAllTermDocs(int32_t maxDoc); + virtual ~AbstractAllTermDocs(); + + LUCENE_CLASS(AbstractAllTermDocs); + +protected: + int32_t maxDoc; + int32_t _doc; + +public: + virtual void seek(const TermPtr& term); + virtual void seek(const TermEnumPtr& termEnum); + virtual int32_t doc(); + virtual int32_t freq(); + virtual bool next(); + virtual int32_t read(Collection& docs, Collection& freqs); + virtual bool skipTo(int32_t target); + virtual void close(); + virtual bool isDeleted(int32_t doc) = 0; +}; + +} + +#endif diff --git a/include/lucene++/AbstractField.h b/include/lucene++/AbstractField.h new file mode 100644 index 00000000..b719c630 --- /dev/null +++ b/include/lucene++/AbstractField.h @@ -0,0 +1,221 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ABSTRACTFIELD_H +#define ABSTRACTFIELD_H + +#include "Fieldable.h" + +namespace Lucene { + +class LPPAPI AbstractField : public Fieldable, public LuceneObject { +public: + /// Specifies whether and how a field should be stored. + enum Store { + /// Store the original field value in the index. This is useful for short texts like a document's title + /// which should be displayed with the results. The value is stored in its original form, ie. no analyzer + /// is used before it is stored. + STORE_YES, + + /// Do not store the field value in the index. + STORE_NO + }; + + /// Specifies whether and how a field should be indexed. + enum Index { + /// Do not index the field value. This field can thus not be searched, but one can still access its + /// contents provided it is {@link Field.Store stored}. + INDEX_NO, + + /// Index the tokens produced by running the field's value through an Analyzer. This is useful for + /// common text. + INDEX_ANALYZED, + + /// Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used + /// the value will be stored as a single term. This is useful for unique Ids like product numbers. + INDEX_NOT_ANALYZED, + + /// Index the field's value without an Analyzer, and also disable the storing of norms. Note that you + /// can also separately enable/disable norms by calling {@link Field#setOmitNorms}. No norms means + /// that index-time field and document boosting and field length normalization are disabled. The benefit + /// is less memory usage as norms take up one byte of RAM per indexed field for every document in the + /// index, during searching. Note that once you index a given field with norms enabled, disabling norms + /// will have no effect. In other words, for this to have the above described effect on a field, all + /// instances of that field must be indexed with NOT_ANALYZED_NO_NORMS from the beginning. + INDEX_NOT_ANALYZED_NO_NORMS, + + /// Index the tokens produced by running the field's value through an Analyzer, and also separately + /// disable the storing of norms. See {@link #NOT_ANALYZED_NO_NORMS} for what norms are and why you + /// may want to disable them. + INDEX_ANALYZED_NO_NORMS + }; + + /// Specifies whether and how a field should have term vectors. + enum TermVector { + /// Do not store term vectors. + TERM_VECTOR_NO, + + /// Store the term vectors of each document. A term vector is a list of the document's terms and their + /// number of occurrences in that document. + TERM_VECTOR_YES, + + /// Store the term vector + token position information + /// @see #YES + TERM_VECTOR_WITH_POSITIONS, + + /// Store the term vector + token offset information + /// @see #YES + TERM_VECTOR_WITH_OFFSETS, + + /// Store the term vector + token position and offset information + /// @see #YES + /// @see #WITH_POSITIONS + /// @see #WITH_OFFSETS + TERM_VECTOR_WITH_POSITIONS_OFFSETS + }; + +public: + virtual ~AbstractField(); + + LUCENE_CLASS(AbstractField); + +protected: + AbstractField(); + AbstractField(const String& name, Store store, Index index, TermVector termVector); + + String _name; + bool storeTermVector; + bool storeOffsetWithTermVector; + bool storePositionWithTermVector; + bool _omitNorms; + bool _isStored; + bool _isIndexed; + bool _isTokenized; + bool _isBinary; + bool lazy; + bool omitTermFreqAndPositions; + double boost; + + // the data object for all different kind of field values + FieldsData fieldsData; + + // pre-analyzed tokenStream for indexed fields + TokenStreamPtr tokenStream; + + // length/offset for all primitive types + int32_t binaryLength; + int32_t binaryOffset; + +public: + /// Sets the boost factor hits on this field. This value will be multiplied into the score of all + /// hits on this this field of this document. + /// + /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. + /// If a document has multiple fields with the same name, all such values are multiplied together. + /// This product is then used to compute the norm factor for the field. By default, in the {@link + /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the + /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} + /// before it is stored in the index. One should attempt to ensure that this product does not overflow + /// the range of that encoding. + /// + /// @see Document#setBoost(double) + /// @see Similarity#computeNorm(String, FieldInvertState) + /// @see Similarity#encodeNorm(double) + virtual void setBoost(double boost); + + /// Returns the boost factor for hits for this field. + /// + /// The default value is 1.0. + /// + /// Note: this value is not stored directly with the document in the index. Documents returned from + /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value + /// present as when this field was indexed. + virtual double getBoost(); + + /// Returns the name of the field as an interned string. For example "date", "title", "body", ... + virtual String name(); + + /// True if the value of the field is to be stored in the index for return with search hits. It is an + /// error for this to be true if a field is Reader-valued. + virtual bool isStored(); + + /// True if the value of the field is to be indexed, so that it may be searched on. + virtual bool isIndexed(); + + /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields + /// are indexed as a single word and may not be Reader-valued. + virtual bool isTokenized(); + + /// True if the term or terms used to index this field are stored as a term vector, available from + /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the + /// original content of the field, only to terms used to index it. If the original content must be + /// preserved, use the stored attribute instead. + virtual bool isTermVectorStored(); + + /// True if terms are stored as term vector together with their offsets (start and end position in + /// source text). + virtual bool isStoreOffsetWithTermVector(); + + /// True if terms are stored as term vector together with their token positions. + virtual bool isStorePositionWithTermVector(); + + /// True if the value of the field is stored as binary. + virtual bool isBinary(); + + /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} + /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. + /// @return reference to the Field value as byte[]. + virtual ByteArray getBinaryValue(); + + /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} + /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. + /// @return reference to the Field value as byte[]. + virtual ByteArray getBinaryValue(ByteArray result); + + /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is + /// undefined. + /// @return length of byte[] segment that represents this Field value. + virtual int32_t getBinaryLength(); + + /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is + /// undefined. + /// @return index of the first character in byte[] segment that represents this Field value. + virtual int32_t getBinaryOffset(); + + /// True if norms are omitted for this indexed field. + virtual bool getOmitNorms(); + + /// @see #setOmitTermFreqAndPositions + virtual bool getOmitTermFreqAndPositions(); + + /// If set, omit normalization factors associated with this indexed field. + /// This effectively disables indexing boosts and length normalization for this field. + virtual void setOmitNorms(bool omitNorms); + + /// If set, omit term freq, positions and payloads from postings for this field. + /// + /// NOTE: While this option reduces storage space required in the index, it also means any query requiring + /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail + /// to find results. + virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions); + + /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field + /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} + /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. + /// + /// @return true if this field can be loaded lazily + virtual bool isLazy(); + + /// Prints a Field for human consumption. + virtual String toString(); + +protected: + void setStoreTermVector(TermVector termVector); +}; + +} + +#endif diff --git a/include/lucene++/AllTermDocs.h b/include/lucene++/AllTermDocs.h new file mode 100644 index 00000000..d3f46efe --- /dev/null +++ b/include/lucene++/AllTermDocs.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ALLTERMDOCS_H +#define ALLTERMDOCS_H + +#include "AbstractAllTermDocs.h" + +namespace Lucene { + +class AllTermDocs : public AbstractAllTermDocs { +public: + AllTermDocs(const SegmentReaderPtr& parent); + virtual ~AllTermDocs(); + + LUCENE_CLASS(AllTermDocs); + +protected: + BitVectorWeakPtr _deletedDocs; + +public: + virtual bool isDeleted(int32_t doc); +}; + +} + +#endif diff --git a/include/lucene++/Analyzer.h b/include/lucene++/Analyzer.h new file mode 100644 index 00000000..42860834 --- /dev/null +++ b/include/lucene++/Analyzer.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ANALYZER_H +#define ANALYZER_H + +#include "CloseableThreadLocal.h" + +namespace Lucene { + +/// An Analyzer builds TokenStreams, which analyze text. It thus represents a policy for extracting index terms +/// from text. +/// +/// Typical implementations first build a Tokenizer, which breaks the stream of characters from the Reader into +/// raw Tokens. One or more TokenFilters may then be applied to the output of the Tokenizer. +class LPPAPI Analyzer : public LuceneObject { +public: + virtual ~Analyzer(); + LUCENE_CLASS(Analyzer); + +protected: + CloseableThreadLocal tokenStreams; + +public: + /// Creates a TokenStream which tokenizes all the text in the provided Reader. Must be able to handle null + /// field name for backward compatibility. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) = 0; + + /// Creates a TokenStream that is allowed to be re-used from the previous time that the same thread called + /// this method. Callers that do not need to use more than one TokenStream at the same time from this analyzer + /// should use this method for better performance. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Invoked before indexing a Fieldable instance if terms have already been added to that field. This allows + /// custom analyzers to place an automatic position increment gap between Fieldable instances using the same + /// field name. The default value position increment gap is 0. With a 0 position increment gap and the typical + /// default token position increment of 1, all terms in a field, including across Fieldable instances, are in + /// successive positions, allowing exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + /// + /// @param fieldName Fieldable name being indexed. + /// @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} + virtual int32_t getPositionIncrementGap(const String& fieldName); + + /// Just like {@link #getPositionIncrementGap}, except for Token offsets instead. By default this returns 1 for + /// tokenized fields and, as if the fields were joined with an extra space character, and 0 for un-tokenized + /// fields. This method is only called if the field produced at least one token for indexing. + /// + /// @param field the field just indexed + /// @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)} + virtual int32_t getOffsetGap(const FieldablePtr& field); + + /// Frees persistent resources used by this Analyzer + virtual void close(); + +protected: + /// Used by Analyzers that implement reusableTokenStream to retrieve previously saved TokenStreams for re-use + /// by the same thread. + virtual LuceneObjectPtr getPreviousTokenStream(); + + /// Used by Analyzers that implement reusableTokenStream to save a TokenStream for later re-use by the + /// same thread. + virtual void setPreviousTokenStream(const LuceneObjectPtr& stream); +}; + +} + +#endif diff --git a/include/lucene++/Array.h b/include/lucene++/Array.h new file mode 100644 index 00000000..f868341e --- /dev/null +++ b/include/lucene++/Array.h @@ -0,0 +1,135 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ARRAY_H +#define ARRAY_H + +#include +#include "Lucene.h" + +namespace Lucene { + +template +class ArrayData { +public: + ArrayData(int32_t size_) { + data = NULL; + resize(size_); + } + + ~ArrayData() { + resize(0); + } + +public: + TYPE* data; + int32_t size; + +public: + void resize(int32_t size_) { + if (size_ == 0) { + FreeMemory(data); + data = NULL; + } else if (data == NULL) { + data = (TYPE*)AllocMemory(size_ * sizeof(TYPE)); + } else { + data = (TYPE*)ReallocMemory(data, size_ * sizeof(TYPE)); + } + this->size = size_; + } +}; + +/// Utility template class to handle sharable arrays of simple data types +template +class Array { +public: + typedef Array this_type; + typedef ArrayData array_type; + + Array() { + array = NULL; + } + +protected: + boost::shared_ptr container; + array_type* array; + +public: + static this_type newInstance(int32_t size) { + this_type instance; + instance.container = Lucene::newInstance(size); + instance.array = instance.container.get(); + return instance; + } + + void reset() { + resize(0); + } + + void resize(int32_t size) { + if (size == 0) { + container.reset(); + } else if (!container) { + container = Lucene::newInstance(size); + } else { + container->resize(size); + } + array = container.get(); + } + + TYPE* get() const { + return array->data; + } + + int32_t size() const { + return array->size; + } + + bool equals(const this_type& other) const { + if (array->size != other.array->size) { + return false; + } + return (std::memcmp(array->data, other.array->data, array->size) == 0); + } + + int32_t hashCode() const { + return (int32_t)(int64_t)array; + } + + TYPE& operator[] (int32_t i) const { + return array->data[i]; + } + + operator bool () const { + return container.get() != NULL; + } + + bool operator! () const { + return !container; + } + + bool operator== (const Array& other) { + return (container == other.container); + } + + bool operator!= (const Array& other) { + return (container != other.container); + } +}; + +template +inline std::size_t hash_value(const Array& value) { + return (std::size_t)value.hashCode(); +} + +template +inline bool operator== (const Array& value1, const Array& value2) { + return (value1.hashCode() == value2.hashCode()); +} + +} + +#endif diff --git a/include/lucene++/Attribute.h b/include/lucene++/Attribute.h new file mode 100644 index 00000000..4c75ce10 --- /dev/null +++ b/include/lucene++/Attribute.h @@ -0,0 +1,56 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ATTRIBUTE_H +#define ATTRIBUTE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Base class for Attributes that can be added to a {@link AttributeSource}. +/// +/// Attributes are used to add data in a dynamic, yet type-safe way to a source of usually streamed objects, +/// eg. a {@link TokenStream}. +class LPPAPI Attribute : public LuceneObject { +public: + virtual ~Attribute(); + LUCENE_CLASS(Attribute); + +public: + /// Clears the values in this Attribute and resets it to its default value. If this implementation + /// implements more than one Attribute interface it clears all. + virtual void clear() = 0; + + /// Subclasses must implement this method and should compute a hashCode similar to this: + /// + /// int32_t hashCode() + /// { + /// int32_t code = startOffset; + /// code = code * 31 + endOffset; + /// return code; + /// } + /// + /// see also {@link #equals(Object)} + virtual int32_t hashCode() = 0; + + /// All values used for computation of {@link #hashCode()} should be checked here for equality. + /// + /// see also {@link LuceneObject#equals(Object)} + virtual bool equals(const LuceneObjectPtr& other) = 0; + + /// Copies the values from this Attribute into the passed-in target attribute. The target implementation + /// must support all the Attributes this implementation supports. + virtual void copyTo(const AttributePtr& target) = 0; + + /// Shallow clone. Subclasses must override this if they need to clone any members deeply. + /// @param base clone reference - null when called initially, then set in top virtual override. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()) = 0; +}; + +} + +#endif diff --git a/include/lucene++/AttributeSource.h b/include/lucene++/AttributeSource.h new file mode 100644 index 00000000..6b63649d --- /dev/null +++ b/include/lucene++/AttributeSource.h @@ -0,0 +1,186 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ATTRIBUTESOURCE_H +#define ATTRIBUTESOURCE_H + +#include "LuceneObject.h" + +namespace Lucene { + +class LPPAPI AttributeFactory : public LuceneObject { +protected: + AttributeFactory(); + +public: + virtual ~AttributeFactory(); + + LUCENE_CLASS(AttributeFactory); + +public: + /// returns an {@link Attribute}. + virtual AttributePtr createAttributeInstance(const String& className); + + template + AttributePtr createInstance(const String& className) { + AttributePtr attrImpl = createAttributeInstance(className); + return attrImpl ? attrImpl : newLucene(); + } + + /// This is the default factory that creates {@link Attribute}s using the class name of the supplied + /// {@link Attribute} interface class by appending Impl to it. + static AttributeFactoryPtr DEFAULT_ATTRIBUTE_FACTORY(); +}; + +/// An AttributeSource contains a list of different {@link Attribute}s, and methods to add and get them. +/// There can only be a single instance of an attribute in the same AttributeSource instance. This is ensured +/// by passing in the actual type of the Attribute (Class) to the {@link #addAttribute(Class)}, +/// which then checks if an instance of that type is already present. If yes, it returns the instance, otherwise +/// it creates a new instance and returns it. +class LPPAPI AttributeSource : public LuceneObject { +public: + /// An AttributeSource using the default attribute factory {@link DefaultAttributeFactory}. + AttributeSource(); + + /// An AttributeSource that uses the same attributes as the supplied one. + AttributeSource(const AttributeSourcePtr& input); + + /// An AttributeSource using the supplied {@link AttributeFactory} for creating new {@link Attribute} + /// instances. + AttributeSource(const AttributeFactoryPtr& factory); + + virtual ~AttributeSource(); + + LUCENE_CLASS(AttributeSource); + +protected: + AttributeFactoryPtr factory; + MapStringAttribute attributes; + AttributeSourceStatePtr currentState; + +public: + /// returns the used AttributeFactory. + AttributeFactoryPtr getAttributeFactory(); + + /// This method first checks if an instance of that class is already in this AttributeSource and returns it. + /// Otherwise a new instance is created, added to this AttributeSource and returned. + template + boost::shared_ptr addAttribute() { + String className(ATTR::_getClassName()); + boost::shared_ptr attrImpl(boost::dynamic_pointer_cast(getAttribute(className))); + if (!attrImpl) { + attrImpl = boost::dynamic_pointer_cast(factory->createInstance(className)); + if (!attrImpl) { + boost::throw_exception(IllegalArgumentException(L"Could not instantiate implementing class for " + className)); + } + addAttribute(className, attrImpl); + } + return attrImpl; + } + + /// Adds a custom Attribute instance. + void addAttribute(const String& className, const AttributePtr& attrImpl); + + /// Returns true if this AttributeSource has any attributes. + bool hasAttributes(); + + /// Returns true, if this AttributeSource contains the passed-in Attribute. + template + bool hasAttribute() { + return getAttribute(ATTR::_getClassName()).get() != NULL; + } + + /// Returns the instance of the passed in Attribute contained in this AttributeSource. + template + boost::shared_ptr getAttribute() { + String className(ATTR::_getClassName()); + boost::shared_ptr attr(boost::dynamic_pointer_cast(getAttribute(className))); + if (!attr) { + boost::throw_exception(IllegalArgumentException(L"This AttributeSource does not have the attribute '" + className + L"'.")); + } + return attr; + } + + /// Resets all Attributes in this AttributeSource by calling {@link AttributeImpl#clear()} on each Attribute + /// implementation. + void clearAttributes(); + + /// Captures the state of all Attributes. The return value can be passed to {@link #restoreState} to restore + /// the state of this or another AttributeSource. + AttributeSourceStatePtr captureState(); + + /// Restores this state by copying the values of all attribute implementations that this state contains into + /// the attributes implementations of the targetStream. The targetStream must contain a corresponding instance + /// for each argument contained in this state (eg. it is not possible to restore the state of an AttributeSource + /// containing a TermAttribute into a AttributeSource using a Token instance as implementation). + /// + /// Note that this method does not affect attributes of the targetStream that are not contained in this state. + /// In other words, if for example the targetStream contains an OffsetAttribute, but this state doesn't, then + /// the value of the OffsetAttribute remains unchanged. It might be desirable to reset its value to the default, + /// in which case the caller should first call {@link TokenStream#clearAttributes()} on the targetStream. + void restoreState(const AttributeSourceStatePtr& state); + + /// Return hash code for this object. + virtual int32_t hashCode(); + + /// Return whether two objects are equal + virtual bool equals(const LuceneObjectPtr& other); + + /// Returns a string representation of the object + virtual String toString(); + + /// Performs a clone of all {@link AttributeImpl} instances returned in a new AttributeSource instance. This + /// method can be used to eg. create another TokenStream with exactly the same attributes (using {@link + /// #AttributeSource(AttributeSource)}) + AttributeSourcePtr cloneAttributes(); + + /// Return a vector of attributes based on currentState. + Collection getAttributes(); + +protected: + /// The caller must pass in a className value. + /// This method checks if an instance of that class is already in this AttributeSource and returns it. + AttributePtr getAttribute(const String& className); + + /// Returns true, if this AttributeSource contains the passed-in Attribute. + bool hasAttribute(const String& className); + + void computeCurrentState(); +}; + +class LPPAPI DefaultAttributeFactory : public AttributeFactory { +public: + virtual ~DefaultAttributeFactory(); + + LUCENE_CLASS(DefaultAttributeFactory); + +public: + /// returns an {@link Attribute}. + virtual AttributePtr createAttributeInstance(const String& className); +}; + +/// This class holds the state of an AttributeSource. +/// @see #captureState +/// @see #restoreState +class LPPAPI AttributeSourceState : public LuceneObject { +public: + virtual ~AttributeSourceState(); + + LUCENE_CLASS(AttributeSourceState); + +protected: + AttributePtr attribute; + AttributeSourceStatePtr next; + +public: + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class AttributeSource; +}; + +} + +#endif diff --git a/include/lucene++/AveragePayloadFunction.h b/include/lucene++/AveragePayloadFunction.h new file mode 100644 index 00000000..f7b3c3d2 --- /dev/null +++ b/include/lucene++/AveragePayloadFunction.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef AVERAGEPAYLOADFUNCTION_H +#define AVERAGEPAYLOADFUNCTION_H + +#include "PayloadFunction.h" + +namespace Lucene { + +/// Calculate the final score as the average score of all payloads seen. +/// +/// Is thread safe and completely reusable. +class LPPAPI AveragePayloadFunction : public PayloadFunction { +public: + virtual ~AveragePayloadFunction(); + LUCENE_CLASS(AveragePayloadFunction); + +public: + virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, + double currentScore, double currentPayloadScore); + virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); +}; + +} + +#endif diff --git a/include/lucene++/Base64.h b/include/lucene++/Base64.h new file mode 100644 index 00000000..117e2af3 --- /dev/null +++ b/include/lucene++/Base64.h @@ -0,0 +1,33 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BASE64_H +#define BASE64_H + +#include "LuceneObject.h" + +namespace Lucene { + +class LPPAPI Base64 : public LuceneObject { +public: + virtual ~Base64(); + LUCENE_CLASS(Base64); + +protected: + static const String BASE64_CHARS; + +public: + static String encode(ByteArray bytes); + static String encode(const uint8_t* bytes, int32_t length); + static ByteArray decode(const String& str); + +protected: + static bool isBase64(wchar_t ch); +}; + +} + +#endif diff --git a/include/lucene++/BaseCharFilter.h b/include/lucene++/BaseCharFilter.h new file mode 100644 index 00000000..5f66e940 --- /dev/null +++ b/include/lucene++/BaseCharFilter.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BASECHARFILTER_H +#define BASECHARFILTER_H + +#include "CharFilter.h" + +namespace Lucene { + +/// Base utility class for implementing a {@link CharFilter}. You subclass this, and then record mappings by +/// calling {@link #addOffCorrectMap}, and then invoke the correct method to correct an offset. +class LPPAPI BaseCharFilter : public CharFilter { +public: + BaseCharFilter(const CharStreamPtr& in); + virtual ~BaseCharFilter(); + + LUCENE_CLASS(BaseCharFilter); + +protected: + IntArray offsets; + IntArray diffs; + int32_t size; + +protected: + /// Retrieve the corrected offset. + virtual int32_t correct(int32_t currentOff); + + int32_t getLastCumulativeDiff(); + void addOffCorrectMap(int32_t off, int32_t cumulativeDiff); +}; + +} + +#endif diff --git a/include/lucene++/BitSet.h b/include/lucene++/BitSet.h new file mode 100644 index 00000000..e06e6c7b --- /dev/null +++ b/include/lucene++/BitSet.h @@ -0,0 +1,66 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BITSET_H +#define BITSET_H + +#include +#include "LuceneObject.h" + +namespace Lucene { + +class LPPAPI BitSet : public LuceneObject { +public: + BitSet(uint32_t size = 0); + virtual ~BitSet(); + + LUCENE_CLASS(BitSet); + +protected: + typedef boost::dynamic_bitset bitset_type; + bitset_type bitSet; + +public: + const uint64_t* getBits(); + void clear(); + void clear(uint32_t bitIndex); + void fastClear(uint32_t bitIndex); + void clear(uint32_t fromIndex, uint32_t toIndex); + void fastClear(uint32_t fromIndex, uint32_t toIndex); + void set(uint32_t bitIndex); + void fastSet(uint32_t bitIndex); + void set(uint32_t bitIndex, bool value); + void fastSet(uint32_t bitIndex, bool value); + void set(uint32_t fromIndex, uint32_t toIndex); + void fastSet(uint32_t fromIndex, uint32_t toIndex); + void set(uint32_t fromIndex, uint32_t toIndex, bool value); + void fastSet(uint32_t fromIndex, uint32_t toIndex, bool value); + void flip(uint32_t bitIndex); + void fastFlip(uint32_t bitIndex); + void flip(uint32_t fromIndex, uint32_t toIndex); + void fastFlip(uint32_t fromIndex, uint32_t toIndex); + uint32_t size() const; + uint32_t numBlocks() const; + bool isEmpty() const; + bool get(uint32_t bitIndex) const; + bool fastGet(uint32_t bitIndex) const; + int32_t nextSetBit(uint32_t fromIndex) const; + void _and(const BitSetPtr& set); + void _or(const BitSetPtr& set); + void _xor(const BitSetPtr& set); + void andNot(const BitSetPtr& set); + bool intersectsBitSet(const BitSetPtr& set) const; + uint32_t cardinality(); + void resize(uint32_t size); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/BitUtil.h b/include/lucene++/BitUtil.h new file mode 100644 index 00000000..5bb91531 --- /dev/null +++ b/include/lucene++/BitUtil.h @@ -0,0 +1,75 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BITUTIL_H +#define BITUTIL_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A variety of high efficiency bit twiddling routines. +class LPPAPI BitUtil : public LuceneObject { +public: + virtual ~BitUtil(); + LUCENE_CLASS(BitUtil); + +public: + /// Table of number of trailing zeros in a byte + static const uint8_t ntzTable[]; + +public: + /// Returns the number of bits set in the long + static int32_t pop(int64_t x); + + /// Returns the number of set bits in an array of longs. + static int64_t pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords); + + /// Returns the popcount or cardinality of the two sets after an intersection. Neither array is modified. + static int64_t pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); + + /// Returns the popcount or cardinality of the union of two sets. Neither array is modified. + static int64_t pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); + + /// Returns the popcount or cardinality of A & ~B. Neither array is modified. + static int64_t pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); + + /// Returns the popcount or cardinality of A ^ B. Neither array is modified. + static int64_t pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords); + + /// Returns number of trailing zeros in a 64 bit long value. + static int32_t ntz(int64_t val); + + /// Returns number of trailing zeros in a 32 bit int value. + static int32_t ntz(int32_t val); + + /// Returns 0 based index of first set bit (only works for x!=0) + /// This is an alternate implementation of ntz() + static int32_t ntz2(int64_t x); + + /// Returns 0 based index of first set bit. + /// This is an alternate implementation of ntz() + static int32_t ntz3(int64_t x); + + /// Returns true if v is a power of two or zero. + static bool isPowerOfTwo(int32_t v); + + /// Returns true if v is a power of two or zero. + static bool isPowerOfTwo(int64_t v); + + /// Returns the next highest power of two, or the current value if it's already a power of two or zero. + static int32_t nextHighestPowerOfTwo(int32_t v); + + /// Returns the next highest power of two, or the current value if it's already a power of two or zero. + static int64_t nextHighestPowerOfTwo(int64_t v); + +protected: + inline static void CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c); +}; + +} + +#endif diff --git a/include/lucene++/BitVector.h b/include/lucene++/BitVector.h new file mode 100644 index 00000000..694bb74b --- /dev/null +++ b/include/lucene++/BitVector.h @@ -0,0 +1,95 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BITVECTOR_H +#define BITVECTOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Optimized implementation of a vector of bits. +class LPPAPI BitVector : public LuceneObject { +public: + /// Constructs a vector capable of holding n bits. + BitVector(int32_t n = 0); + + BitVector(ByteArray bits, int32_t size); + + /// Constructs a bit vector from the file name in Directory d, + /// as written by the {@link #write} method. + BitVector(const DirectoryPtr& d, const String& name); + + virtual ~BitVector(); + + LUCENE_CLASS(BitVector); + +protected: + ByteArray bits; + int32_t _size; + int32_t _count; + + static const uint8_t BYTE_COUNTS[]; // table of bits/byte + +public: + /// Clone this vector + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Sets the value of bit to one. + void set(int32_t bit); + + /// Sets the value of bit to true, and returns true if bit was already set. + bool getAndSet(int32_t bit); + + /// Sets the value of bit to zero. + void clear(int32_t bit); + + /// Returns true if bit is one and false if it is zero. + bool get(int32_t bit); + + /// Returns the number of bits in this vector. This is also one greater than + /// the number of the largest valid bit number. + int32_t size(); + + /// Returns the total number of one bits in this vector. This is efficiently + /// computed and cached, so that, if the vector is not changed, no recomputation + /// is done for repeated calls. + int32_t count(); + + /// For testing + int32_t getRecomputedCount(); + + /// Writes this vector to the file name in Directory d, in a format that can + /// be read by the constructor {@link #BitVector(DirectoryPtr, const String&)}. + void write(const DirectoryPtr& d, const String& name); + + /// Retrieve a subset of this BitVector. + /// @param start starting index, inclusive + /// @param end ending index, exclusive + /// @return subset + BitVectorPtr subset(int32_t start, int32_t end); + +protected: + /// Write as a bit set. + void writeBits(const IndexOutputPtr& output); + + /// Write as a d-gaps list. + void writeDgaps(const IndexOutputPtr& output); + + /// Indicates if the bit vector is sparse and should be saved as a d-gaps list, + /// or dense, and should be saved as a bit set. + bool isSparse(); + + /// Read as a bit set. + void readBits(const IndexInputPtr& input); + + /// Read as a d-gaps list. + void readDgaps(const IndexInputPtr& input); +}; + +} + +#endif diff --git a/include/lucene++/BooleanClause.h b/include/lucene++/BooleanClause.h new file mode 100644 index 00000000..70aed799 --- /dev/null +++ b/include/lucene++/BooleanClause.h @@ -0,0 +1,60 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BOOLEANCLAUSE_H +#define BOOLEANCLAUSE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A clause in a BooleanQuery. +class LPPAPI BooleanClause : public LuceneObject { +public: + /// Specifies how clauses are to occur in matching documents. + enum Occur { + /// Use this operator for clauses that must appear in the matching documents. + MUST, + + /// Use this operator for clauses that should appear in the matching documents. For a BooleanQuery + /// with no MUST clauses one or more SHOULD clauses must match a document for the BooleanQuery to match. + /// @see BooleanQuery#setMinimumNumberShouldMatch + SHOULD, + + /// Use this operator for clauses that must not appear in the matching documents. Note that it is not + /// possible to search for queries that only consist of a MUST_NOT clause. + MUST_NOT + }; + +public: + BooleanClause(const QueryPtr& query, Occur occur); + virtual ~BooleanClause(); + + LUCENE_CLASS(BooleanClause); + +protected: + /// The query whose matching documents are combined by the boolean query. + QueryPtr query; + Occur occur; + +public: + Occur getOccur(); + void setOccur(Occur occur); + + QueryPtr getQuery(); + void setQuery(const QueryPtr& query); + + bool isProhibited(); + bool isRequired(); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/BooleanQuery.h b/include/lucene++/BooleanQuery.h new file mode 100644 index 00000000..29f1634e --- /dev/null +++ b/include/lucene++/BooleanQuery.h @@ -0,0 +1,102 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BOOLEANQUERY_H +#define BOOLEANQUERY_H + +#include "Query.h" +#include "BooleanClause.h" +#include "Weight.h" + +namespace Lucene { + +/// A Query that matches documents matching boolean combinations of other queries, eg. {@link TermQuery}s, +/// {@link PhraseQuery}s or other BooleanQuerys. +class LPPAPI BooleanQuery : public Query { +public: + /// Constructs an empty boolean query. + /// + /// {@link Similarity#coord(int32_t, int32_t)} may be disabled in scoring, as appropriate. For example, + /// this score factor does not make sense for most automatically generated queries, like {@link WildcardQuery} + /// and {@link FuzzyQuery}. + /// + /// @param disableCoord disables {@link Similarity#coord(int32_t, int32_t)} in scoring. + BooleanQuery(bool disableCoord = false); + virtual ~BooleanQuery(); + + LUCENE_CLASS(BooleanQuery); + +protected: + static int32_t maxClauseCount; + + Collection clauses; + bool disableCoord; + int32_t minNrShouldMatch; + +public: + using Query::toString; + + /// Return the maximum number of clauses permitted, 1024 by default. Attempts to add more than the permitted + /// number of clauses cause TooManyClauses to be thrown. + /// @see #setMaxClauseCount(int32_t) + static int32_t getMaxClauseCount(); + + /// Set the maximum number of clauses permitted per BooleanQuery. Default value is 1024. + static void setMaxClauseCount(int32_t maxClauseCount); + + /// Returns true if {@link Similarity#coord(int32_t, int32_t)} is disabled in scoring for this query instance. + /// @see #BooleanQuery(bool) + bool isCoordDisabled(); + + /// Implement coord disabling. + virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); + + /// Specifies a minimum number of the optional BooleanClauses which must be satisfied. + /// + /// By default no optional clauses are necessary for a match (unless there are no required clauses). If this + /// method is used, then the specified number of clauses is required. + /// + /// Use of this method is totally independent of specifying that any specific clauses are required (or prohibited). + /// This number will only be compared against the number of matching optional clauses. + /// + /// @param min the number of optional clauses that must match + void setMinimumNumberShouldMatch(int32_t min); + + /// Gets the minimum number of the optional BooleanClauses which must be satisfied. + int32_t getMinimumNumberShouldMatch(); + + /// Adds a clause to a boolean query. + /// @see #getMaxClauseCount() + void add(const QueryPtr& query, BooleanClause::Occur occur); + + /// Adds a clause to a boolean query. + /// @see #getMaxClauseCount() + void add(const BooleanClausePtr& clause); + + /// Returns the set of clauses in this query. + Collection getClauses(); + + /// Returns an iterator on the clauses in this query. + Collection::iterator begin(); + Collection::iterator end(); + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + virtual void extractTerms(SetTerm terms); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + friend class BooleanWeight; +}; + +} + +#endif diff --git a/include/lucene++/BooleanScorer.h b/include/lucene++/BooleanScorer.h new file mode 100644 index 00000000..9009e191 --- /dev/null +++ b/include/lucene++/BooleanScorer.h @@ -0,0 +1,168 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BOOLEANSCORER_H +#define BOOLEANSCORER_H + +#include "Scorer.h" +#include "Collector.h" + +namespace Lucene { + +/// BooleanScorer uses a ~16k array to score windows of docs. So it scores docs 0-16k first, then docs 16-32k, +/// etc. For each window it iterates through all query terms and accumulates a score in table[doc%16k]. It also +/// stores in the table a bitmask representing which terms contributed to the score. Non-zero scores are chained +/// in a linked list. At the end of scoring each window it then iterates through the linked list and, if the +/// bitmask matches the boolean constraints, collects a hit. For boolean queries with lots of frequent terms this +/// can be much faster, since it does not need to update a priority queue for each posting, instead performing +/// constant-time operations per posting. The only downside is that it results in hits being delivered out-of-order +/// within the window, which means it cannot be nested within other scorers. But it works well as a top-level scorer. +/// +/// The new BooleanScorer2 implementation instead works by merging priority queues of postings, albeit with some +/// clever tricks. For example, a pure conjunction (all terms required) does not require a priority queue. Instead it +/// sorts the posting streams at the start, then repeatedly skips the first to to the last. If the first ever equals +/// the last, then there's a hit. When some terms are required and some terms are optional, the conjunction can +/// be evaluated first, then the optional terms can all skip to the match and be added to the score. Thus the +/// conjunction can reduce the number of priority queue updates for the optional terms. +class LPPAPI BooleanScorer : public Scorer { +public: + BooleanScorer(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers); + virtual ~BooleanScorer(); + + LUCENE_CLASS(BooleanScorer); + + +protected: + SubScorerPtr scorers; + BucketTablePtr bucketTable; + int32_t maxCoord; + Collection coordFactors; + int32_t requiredMask; + int32_t prohibitedMask; + int32_t nextMask; + int32_t minNrShouldMatch; + int32_t end; + BucketPtr current; + Bucket* __current = nullptr; + int32_t doc; + +protected: + // firstDocID is ignored since nextDoc() initializes 'current' + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); + +public: + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); + virtual void score(const CollectorPtr& collector); + virtual String toString(); + +}; + +class BooleanScorerCollector : public Collector { +public: + BooleanScorerCollector(int32_t mask, const BucketTablePtr& bucketTable); + virtual ~BooleanScorerCollector(); + + LUCENE_CLASS(BooleanScorerCollector); + +protected: + BucketTableWeakPtr _bucketTable; + BucketTable* __bucketTable = nullptr; + int32_t mask; + ScorerWeakPtr _scorer; + Scorer* __scorer = nullptr; + +public: + virtual void collect(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); + virtual bool acceptsDocsOutOfOrder(); +}; + +// An internal class which is used in score(Collector, int32_t) for setting the current score. This is required +// since Collector exposes a setScorer method and implementations that need the score will call scorer->score(). +// Therefore the only methods that are implemented are score() and doc(). +class BucketScorer : public Scorer { +public: + BucketScorer(); + virtual ~BucketScorer(); + + + int32_t freq; + LUCENE_CLASS(BucketScorer); + float termFreq(){ + return freq; + } + + +public: + double _score; + int32_t doc; + +public: + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); +}; + +class Bucket : public LuceneObject { +public: + Bucket(); + virtual ~Bucket(); + + LUCENE_CLASS(Bucket); + +public: + int32_t doc; // tells if bucket is valid + double score; // incremental score + int32_t bits; // used for bool constraints + int32_t coord; // count of terms in score + BucketWeakPtr _next; // next valid bucket + Bucket* __next = nullptr; // next valid bucket +}; + +/// A simple hash table of document scores within a range. +class BucketTable : public LuceneObject { +public: + BucketTable(); + virtual ~BucketTable(); + + LUCENE_CLASS(BucketTable); + +public: + static const int32_t SIZE; + static const int32_t MASK; + + Collection buckets; + BucketPtr first; // head of valid list + Bucket* __first = nullptr; // head of valid list + +public: + CollectorPtr newCollector(int32_t mask); + int32_t size(); +}; + +class SubScorer : public LuceneObject { +public: + SubScorer(const ScorerPtr& scorer, bool required, bool prohibited, const CollectorPtr& collector, const SubScorerPtr& next); + virtual ~SubScorer(); + + LUCENE_CLASS(SubScorer); + +public: + ScorerPtr scorer; + bool required; + bool prohibited; + CollectorPtr collector; + SubScorerPtr next; +}; + +} + +#endif diff --git a/include/lucene++/BooleanScorer2.h b/include/lucene++/BooleanScorer2.h new file mode 100644 index 00000000..b4f4bbc1 --- /dev/null +++ b/include/lucene++/BooleanScorer2.h @@ -0,0 +1,166 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BOOLEANSCORER2_H +#define BOOLEANSCORER2_H + +#include "DisjunctionSumScorer.h" +#include "ConjunctionScorer.h" + +namespace Lucene { + +/// See the description in BooleanScorer, comparing BooleanScorer & BooleanScorer2 +/// +/// An alternative to BooleanScorer that also allows a minimum number of optional scorers that should match. +/// Implements skipTo(), and has no limitations on the numbers of added scorers. +/// Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. +class BooleanScorer2 : public Scorer { +public: + /// Creates a {@link Scorer} with the given similarity and lists of required, prohibited and optional + /// scorers. In no required scorers are added, at least one of the optional scorers will have to match + /// during the search. + /// + /// @param similarity The similarity to be used. + /// @param minNrShouldMatch The minimum number of optional added scorers that should match during the search. + /// In case no required scorers are added, at least one of the optional scorers will have to match during + /// the search. + /// @param required The list of required scorers. + /// @param prohibited The list of prohibited scorers. + /// @param optional The list of optional scorers. + BooleanScorer2(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional); + + virtual ~BooleanScorer2(); + + LUCENE_CLASS(BooleanScorer2); + +protected: + Collection requiredScorers; + Collection optionalScorers; + Collection prohibitedScorers; + + CoordinatorPtr coordinator; + + /// The scorer to which all scoring will be delegated, except for computing and using the coordination factor. + ScorerPtr countingSumScorer; + + int32_t minNrShouldMatch; + int32_t doc; + +public: + virtual void initialize(); + + /// Scores and collects all matching documents. + /// @param collector The collector to which all matching documents are passed through. + virtual void score(const CollectorPtr& collector); + + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); + virtual int32_t advance(int32_t target); + +protected: + ScorerPtr countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch); + ScorerPtr countingConjunctionSumScorer(Collection requiredScorers); + ScorerPtr dualConjunctionSumScorer(const ScorerPtr& req1, const ScorerPtr& req2); + + /// Returns the scorer to be used for match counting and score summing. Uses requiredScorers, optionalScorers + /// and prohibitedScorers. + ScorerPtr makeCountingSumScorer(); + ScorerPtr makeCountingSumScorerNoReq(); + ScorerPtr makeCountingSumScorerSomeReq(); + + /// Returns the scorer to be used for match counting and score summing. Uses the given required scorer and + /// the prohibitedScorers. + /// @param requiredCountingSumScorer A required scorer already built. + ScorerPtr addProhibitedScorers(const ScorerPtr& requiredCountingSumScorer); + + friend class CountingDisjunctionSumScorer; + friend class CountingConjunctionSumScorer; +}; + +class Coordinator : public LuceneObject { +public: + Coordinator(const BooleanScorer2Ptr& scorer); + virtual ~Coordinator(); + + LUCENE_CLASS(Coordinator); + +public: + BooleanScorer2WeakPtr _scorer; + Collection coordFactors; + int32_t maxCoord; // to be increased for each non prohibited scorer + int32_t nrMatchers; // to be increased by score() of match counting scorers. + +public: + void init(); // use after all scorers have been added. + + friend class BooleanScorer2; +}; + +/// Count a scorer as a single match. +class SingleMatchScorer : public Scorer { +public: + SingleMatchScorer(const ScorerPtr& scorer, const CoordinatorPtr& coordinator); + virtual ~SingleMatchScorer(); + + LUCENE_CLASS(SingleMatchScorer); + +protected: + ScorerPtr scorer; + CoordinatorPtr coordinator; + int32_t lastScoredDoc; + double lastDocScore; + +public: + virtual double score(); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); +}; + +class CountingDisjunctionSumScorer : public DisjunctionSumScorer { +public: + CountingDisjunctionSumScorer(const BooleanScorer2Ptr& scorer, Collection subScorers, int32_t minimumNrMatchers); + virtual ~CountingDisjunctionSumScorer(); + + LUCENE_CLASS(CountingDisjunctionSumScorer); + +protected: + BooleanScorer2WeakPtr _scorer; + int32_t lastScoredDoc; + + // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). + double lastDocScore; + +public: + virtual double score(); + + friend class BooleanScorer2; +}; + +class CountingConjunctionSumScorer : public ConjunctionScorer { +public: + CountingConjunctionSumScorer(const BooleanScorer2Ptr& scorer, const SimilarityPtr& similarity, Collection scorers); + virtual ~CountingConjunctionSumScorer(); + + LUCENE_CLASS(CountingConjunctionSumScorer); + +protected: + BooleanScorer2WeakPtr _scorer; + int32_t lastScoredDoc; + int32_t requiredNrMatchers; + + // Save the score of lastScoredDoc, so that we don't compute it more than once in score(). + double lastDocScore; + +public: + virtual double score(); +}; + +} + +#endif diff --git a/include/lucene++/BufferedDeletes.h b/include/lucene++/BufferedDeletes.h new file mode 100644 index 00000000..b54c811b --- /dev/null +++ b/include/lucene++/BufferedDeletes.h @@ -0,0 +1,57 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BUFFEREDDELETES_H +#define BUFFEREDDELETES_H + +#include "Term.h" +#include "Query.h" + +namespace Lucene { + +/// Holds buffered deletes, by docID, term or query. We hold two instances of this class: one for +/// the deletes prior to the last flush, the other for deletes after the last flush. This is so if +/// we need to abort (discard all buffered docs) we can also discard the buffered deletes yet keep +/// the deletes done during previously flushed segments. +class BufferedDeletes : public LuceneObject { +public: + BufferedDeletes(bool doTermSort); + virtual ~BufferedDeletes(); + + LUCENE_CLASS(BufferedDeletes); + +public: + int32_t numTerms; + MapTermNum terms; + MapQueryInt queries; + Collection docIDs; + int64_t bytesUsed; + +public: + int32_t size(); + void update(const BufferedDeletesPtr& in); + void clear(); + void addBytesUsed(int64_t b); + bool any(); + void remap(const MergeDocIDRemapperPtr& mapper, const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount); +}; + +/// Number of documents a delete term applies to. +class Num : public LuceneObject { +public: + Num(int32_t num); + +protected: + int32_t num; + +public: + int32_t getNum(); + void setNum(int32_t num); +}; + +} + +#endif diff --git a/include/lucene++/BufferedIndexInput.h b/include/lucene++/BufferedIndexInput.h new file mode 100644 index 00000000..6ebe4083 --- /dev/null +++ b/include/lucene++/BufferedIndexInput.h @@ -0,0 +1,111 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BUFFEREDINDEXINPUT_H +#define BUFFEREDINDEXINPUT_H + +#include "IndexInput.h" + +namespace Lucene { + +/// Base implementation class for buffered {@link IndexInput}. +class LPPAPI BufferedIndexInput : public IndexInput { +public: + /// Construct BufferedIndexInput with a specific bufferSize. + BufferedIndexInput(int32_t bufferSize = BUFFER_SIZE); + virtual ~BufferedIndexInput(); + + LUCENE_CLASS(BufferedIndexInput); + +public: + /// Default buffer size. + static const int32_t BUFFER_SIZE; + +protected: + int32_t bufferSize; + int64_t bufferStart; // position in file of buffer + int32_t bufferLength; // end of valid bytes + int32_t bufferPosition; // next byte to read + ByteArray buffer; + decltype(buffer.get()) __buffer; + +public: + /// Reads and returns a single byte. + /// @see IndexOutput#writeByte(uint8_t) + virtual uint8_t readByte(); + + /// Reads an int stored in variable-length format. Reads between one and five + /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. + /// @see IndexOutput#writeVInt(int32_t) + virtual int32_t readVInt(); + + /// Change the buffer size used by this IndexInput. + void setBufferSize(int32_t newSize); + + /// Returns buffer size. + /// @see #setBufferSize + int32_t getBufferSize(); + + /// Reads a specified number of bytes into an array at the specified offset. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + /// @see #readInternal(uint8_t*, int32_t, int32_t) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Reads a specified number of bytes into an array at the specified offset with control over whether the + /// read should be buffered (callers who have their own buffer should pass in "false" for useBuffer). + /// Currently only {@link BufferedIndexInput} respects this parameter. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @param useBuffer set to false if the caller will handle buffering. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + /// @see #readInternal(uint8_t*, int32_t, int32_t) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); + + /// Closes the stream to further operations. + virtual void close(); + + /// Returns the current position in this file, where the next read will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next read will occur. + /// @see #getFilePointer() + /// @see #seekInternal(int64_t) + virtual void seek(int64_t pos); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + virtual void newBuffer(ByteArray newBuffer); + + void checkBufferSize(int32_t bufferSize); + + /// Refill buffer in preparation for reading. + /// @see #readInternal(uint8_t*, int32_t, int32_t) + /// @see #seekInternal(int64_t) + virtual void refill(); + + /// Implements buffer refill. Reads bytes from the current position in the input. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + virtual void readInternal(uint8_t* b, int32_t offset, int32_t length) = 0; + + /// Implements seek. Sets current position in this file, where the next {@link + /// #readInternal(uint8_t*, int32_t, int32_t)} will occur. + /// @param pos position to set next write. + /// @see #readInternal(uint8_t*, int32_t, int32_t) + virtual void seekInternal(int64_t pos) = 0; +}; + +} + +#endif diff --git a/include/lucene++/BufferedIndexOutput.h b/include/lucene++/BufferedIndexOutput.h new file mode 100644 index 00000000..0a506217 --- /dev/null +++ b/include/lucene++/BufferedIndexOutput.h @@ -0,0 +1,75 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BUFFEREDINDEXOUTPUT_H +#define BUFFEREDINDEXOUTPUT_H + +#include "IndexOutput.h" + +namespace Lucene { + +/// Base implementation class for buffered {@link IndexOutput}. +class LPPAPI BufferedIndexOutput : public IndexOutput { +public: + BufferedIndexOutput(); + virtual ~BufferedIndexOutput(); + + LUCENE_CLASS(BufferedIndexOutput); + +public: + static const int32_t BUFFER_SIZE; + +protected: + int64_t bufferStart; // position in file of buffer + int32_t bufferPosition; // position in buffer + ByteArray buffer; + +public: + /// Writes a single byte. + /// @see IndexInput#readByte() + virtual void writeByte(uint8_t b); + + /// Writes an array of bytes. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) + virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); + + /// Forces any buffered output to be written. + virtual void flush(); + + /// Implements buffer write. Writes bytes at the current + /// position in the output. + /// @param b the bytes to write. + /// @param offset the offset in the byte array. + /// @param length the number of bytes to write. + virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); + + /// Closes this stream to further operations. + virtual void close(); + + /// Returns the current position in this file, where the next write will occur. + /// @see #seek(long) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next write will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// The number of bytes in the file. + virtual int64_t length() = 0; + +protected: + /// Implements buffer write. Writes bytes at the current + /// position in the output. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + void flushBuffer(const uint8_t* b, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/BufferedReader.h b/include/lucene++/BufferedReader.h new file mode 100644 index 00000000..8bbf3f4b --- /dev/null +++ b/include/lucene++/BufferedReader.h @@ -0,0 +1,63 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BUFFEREDREADER_H +#define BUFFEREDREADER_H + +#include "Reader.h" + +namespace Lucene { + +/// Read text from a character-input stream, buffering characters so as to provide +/// for the efficient reading of characters, arrays, and lines. +class LPPAPI BufferedReader : public Reader { +public: + /// Create a buffering character-input stream. + BufferedReader(const ReaderPtr& reader, int32_t size = READER_BUFFER); + virtual ~BufferedReader(); + + LUCENE_CLASS(BufferedReader); + +protected: + ReaderPtr reader; + int32_t bufferSize; + int32_t bufferLength; // end of valid bytes + int32_t bufferPosition; // next byte to read + CharArray buffer; + +public: + static const int32_t READER_BUFFER; + +public: + /// Read a single character. + virtual int32_t read(); + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); + + /// Read a line of text. + virtual bool readLine(String& line); + + /// Close the stream. + virtual void close(); + + /// Tell whether this stream supports the mark() operation + virtual bool markSupported(); + + /// Reset the stream. + virtual void reset(); + +protected: + /// Refill buffer in preparation for reading. + int32_t refill(); + + /// Read a single character without moving position. + int32_t peek(); +}; + +} + +#endif diff --git a/include/lucene++/ByteBlockPool.h b/include/lucene++/ByteBlockPool.h new file mode 100644 index 00000000..c0df8c63 --- /dev/null +++ b/include/lucene++/ByteBlockPool.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BYTEBLOCKPOOL_H +#define BYTEBLOCKPOOL_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Class that Posting and PostingVector use to write byte streams into shared fixed-size byte[] arrays. +/// The idea is to allocate slices of increasing lengths. For example, the first slice is 5 bytes, the +/// next slice is 14, etc. We start by writing our bytes into the first 5 bytes. When we hit the end of +/// the slice, we allocate the next slice and then write the address of the new slice into the last 4 +/// bytes of the previous slice (the "forwarding address"). +/// +/// Each slice is filled with 0's initially, and we mark the end with a non-zero byte. This way the methods +/// that are writing into the slice don't need to record its length and instead allocate a new slice once +/// they hit a non-zero byte. +class LPPAPI ByteBlockPool : public LuceneObject { +public: + ByteBlockPool(const ByteBlockPoolAllocatorBasePtr& allocator, bool trackAllocations); + virtual ~ByteBlockPool(); + + LUCENE_CLASS(ByteBlockPool); + +public: + Collection buffers; + int32_t bufferUpto; // Which buffer we are up to + int32_t byteUpto; // Where we are in head buffer + + ByteArray buffer; + int32_t byteOffset; + + static const int32_t nextLevelArray[]; + static const int32_t levelSizeArray[]; + +protected: + bool trackAllocations; + ByteBlockPoolAllocatorBasePtr allocator; + +public: + static int32_t FIRST_LEVEL_SIZE(); + + void reset(); + void nextBuffer(); + int32_t newSlice(int32_t size); + int32_t allocSlice(ByteArray slice, int32_t upto); +}; + +class LPPAPI ByteBlockPoolAllocatorBase : public LuceneObject { +public: + virtual ~ByteBlockPoolAllocatorBase(); + + LUCENE_CLASS(ByteBlockPoolAllocatorBase); + +public: + virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end) = 0; + virtual void recycleByteBlocks(Collection blocks) = 0; + virtual ByteArray getByteBlock(bool trackAllocations) = 0; +}; + +} + +#endif diff --git a/include/lucene++/ByteFieldSource.h b/include/lucene++/ByteFieldSource.h new file mode 100644 index 00000000..111c0e0b --- /dev/null +++ b/include/lucene++/ByteFieldSource.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BYTEFIELDSOURCE_H +#define BYTEFIELDSOURCE_H + +#include "FieldCacheSource.h" + +namespace Lucene { + +/// Obtains byte field values from the {@link FieldCache} using getBytes() and makes those values available +/// as other numeric types, casting as needed. +/// +/// @see FieldCacheSource for requirements on the field. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, +/// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU +/// per lookup but will not consume double the FieldCache RAM. +class LPPAPI ByteFieldSource : public FieldCacheSource { +public: + /// Create a cached byte field source with a specific string-to-byte parser. + ByteFieldSource(const String& field, const ByteParserPtr& parser = ByteParserPtr()); + virtual ~ByteFieldSource(); + + LUCENE_CLASS(ByteFieldSource); + +protected: + ByteParserPtr parser; + +public: + virtual String description(); + virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); + virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); + virtual int32_t cachedFieldSourceHashCode(); +}; + +} + +#endif diff --git a/include/lucene++/ByteSliceReader.h b/include/lucene++/ByteSliceReader.h new file mode 100644 index 00000000..1e0d2b1d --- /dev/null +++ b/include/lucene++/ByteSliceReader.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BYTESLICEREADER_H +#define BYTESLICEREADER_H + +#include "IndexInput.h" + +namespace Lucene { + +/// IndexInput that knows how to read the byte slices written by Posting and PostingVector. We read the bytes in each slice +/// until we hit the end of that slice at which point we read the forwarding address of the next slice and then jump to it. +class LPPAPI ByteSliceReader : public IndexInput { +public: + ByteSliceReader(); + virtual ~ByteSliceReader(); + + LUCENE_CLASS(ByteSliceReader); + +public: + ByteBlockPoolPtr pool; + int32_t bufferUpto; + ByteArray buffer; + int32_t upto; + int32_t limit; + int32_t level; + int32_t bufferOffset; + int32_t endIndex; + +public: + void init(const ByteBlockPoolPtr& pool, int32_t startIndex, int32_t endIndex); + bool eof(); + + /// Reads and returns a single byte. + virtual uint8_t readByte(); + + int64_t writeTo(const IndexOutputPtr& out); + + void nextSlice(); + + /// Reads a specified number of bytes into an array at the specified offset. + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Not implemented + virtual int64_t getFilePointer(); + + /// Not implemented + virtual int64_t length(); + + /// Not implemented + virtual void seek(int64_t pos); + + /// Not implemented + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/ByteSliceWriter.h b/include/lucene++/ByteSliceWriter.h new file mode 100644 index 00000000..85f3e661 --- /dev/null +++ b/include/lucene++/ByteSliceWriter.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef BYTESLICEWRITER_H +#define BYTESLICEWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Class to write byte streams into slices of shared byte[]. This is used by DocumentsWriter to hold +/// the posting list for many terms in RAM. +class LPPAPI ByteSliceWriter : public LuceneObject { +public: + ByteSliceWriter(const ByteBlockPoolPtr& pool); + virtual ~ByteSliceWriter(); + + LUCENE_CLASS(ByteSliceWriter); + +protected: + ByteArray slice; + int32_t upto; + ByteBlockPoolPtr pool; + +public: + int32_t offset0; + +public: + /// Set up the writer to write at address. + void init(int32_t address); + + /// Write byte into byte slice stream + void writeByte(uint8_t b); + + void writeBytes(const uint8_t* b, int32_t offset, int32_t length); + int32_t getAddress(); + void writeVInt(int32_t i); +}; + +} + +#endif diff --git a/include/lucene++/CMakeLists.txt b/include/lucene++/CMakeLists.txt new file mode 100644 index 00000000..b497dc4f --- /dev/null +++ b/include/lucene++/CMakeLists.txt @@ -0,0 +1,11 @@ +#################################### +# install headers +#################################### + +file(GLOB_RECURSE lucene_headers + "${CMAKE_CURRENT_SOURCE_DIR}/*.h" +) + +install( + FILES ${lucene_headers} + DESTINATION include/lucene++ ) diff --git a/include/lucene++/CachingSpanFilter.h b/include/lucene++/CachingSpanFilter.h new file mode 100644 index 00000000..ec7b0ad5 --- /dev/null +++ b/include/lucene++/CachingSpanFilter.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CACHINGSPANFILTER_H +#define CACHINGSPANFILTER_H + +#include "SpanFilter.h" +#include "CachingWrapperFilter.h" + +namespace Lucene { + +/// Wraps another SpanFilter's result and caches it. The purpose is to allow filters to simply filter, +/// and then wrap with this class to add caching. +class LPPAPI CachingSpanFilter : public SpanFilter { +public: + /// New deletions always result in a cache miss, by default ({@link CachingWrapperFilter#RECACHE}. + CachingSpanFilter(const SpanFilterPtr& filter, CachingWrapperFilter::DeletesMode deletesMode = CachingWrapperFilter::DELETES_RECACHE); + virtual ~CachingSpanFilter(); + + LUCENE_CLASS(CachingSpanFilter); + +protected: + SpanFilterPtr filter; + FilterCachePtr cache; + +public: + // for testing + int32_t hitCount; + int32_t missCount; + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader); + + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + +protected: + SpanFilterResultPtr getCachedResult(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/CachingTokenFilter.h b/include/lucene++/CachingTokenFilter.h new file mode 100644 index 00000000..ab836be8 --- /dev/null +++ b/include/lucene++/CachingTokenFilter.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CACHINGTOKENFILTER_H +#define CACHINGTOKENFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// This class can be used if the token attributes of a TokenStream are intended to be consumed more than once. +/// It caches all token attribute states locally in a List. +/// +/// CachingTokenFilter implements the optional method {@link TokenStream#reset()}, which repositions the stream +/// to the first Token. +class LPPAPI CachingTokenFilter : public TokenFilter { +public: + CachingTokenFilter(const TokenStreamPtr& input); + virtual ~CachingTokenFilter(); + + LUCENE_CLASS(CachingTokenFilter); + +protected: + Collection cache; + Collection::iterator iterator; + AttributeSourceStatePtr finalState; + +public: + virtual bool incrementToken(); + virtual void end(); + virtual void reset(); + +protected: + void fillCache(); +}; + +} + +#endif diff --git a/include/lucene++/CachingWrapperFilter.h b/include/lucene++/CachingWrapperFilter.h new file mode 100644 index 00000000..915925e6 --- /dev/null +++ b/include/lucene++/CachingWrapperFilter.h @@ -0,0 +1,69 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CACHINGWRAPPERFILTER_H +#define CACHINGWRAPPERFILTER_H + +#include "Filter.h" + +namespace Lucene { + +/// Wraps another filter's result and caches it. The purpose is to allow filters to simply filter, and +/// then wrap with this class to add caching. +class LPPAPI CachingWrapperFilter : public Filter { +public: + /// Specifies how new deletions against a reopened reader should be handled. + /// + /// The default is IGNORE, which means the cache entry will be re-used for a given segment, even when + /// that segment has been reopened due to changes in deletions. This is a big performance gain, + /// especially with near-real-timer readers, since you don't hit a cache miss on every reopened reader + /// for prior segments. + /// + /// However, in some cases this can cause invalid query results, allowing deleted documents to be + /// returned. This only happens if the main query does not rule out deleted documents on its own, + /// such as a toplevel ConstantScoreQuery. To fix this, use RECACHE to re-create the cached filter + /// (at a higher per-reopen cost, but at faster subsequent search performance), or use DYNAMIC to + /// dynamically intersect deleted docs (fast reopen time but some hit to search performance). + enum DeletesMode { DELETES_IGNORE, DELETES_RECACHE, DELETES_DYNAMIC }; + + /// New deletes are ignored by default, which gives higher cache hit rate on reopened readers. + /// Most of the time this is safe, because the filter will be AND'd with a Query that fully enforces + /// deletions. If instead you need this filter to always enforce deletions, pass either {@link + /// DeletesMode#RECACHE} or {@link DeletesMode#DYNAMIC}. + CachingWrapperFilter(const FilterPtr& filter, DeletesMode deletesMode = DELETES_IGNORE); + + virtual ~CachingWrapperFilter(); + + LUCENE_CLASS(CachingWrapperFilter); + +INTERNAL: + FilterPtr filter; + + // for testing + int32_t hitCount; + int32_t missCount; + +protected: + /// A Filter cache + FilterCachePtr cache; + + /// Provide the DocIdSet to be cached, using the DocIdSet provided by the wrapped Filter. + /// + /// This implementation returns the given {@link DocIdSet}, if {@link DocIdSet#isCacheable} returns + /// true, else it copies the {@link DocIdSetIterator} into an {@link OpenBitSetDISI}. + DocIdSetPtr docIdSetToCache(const DocIdSetPtr& docIdSet, const IndexReaderPtr& reader); + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/CharArraySet.h b/include/lucene++/CharArraySet.h new file mode 100644 index 00000000..3f911606 --- /dev/null +++ b/include/lucene++/CharArraySet.h @@ -0,0 +1,56 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARARRAYSET_H +#define CHARARRAYSET_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A simple class that stores Strings as char[]'s in a hash table. Note that this is not a general purpose class. +/// For example, it cannot remove items from the set, nor does it resize its hash table to be smaller, etc. It is +/// designed to be quick to test if a char[] is in the set without the necessity of converting it to a String first. +class LPPAPI CharArraySet : public LuceneObject { +public: + CharArraySet(bool ignoreCase); + + /// Create set from a set of strings. + CharArraySet(HashSet entries, bool ignoreCase); + + /// Create set from a collection of strings. + CharArraySet(Collection entries, bool ignoreCase); + + virtual ~CharArraySet(); + + LUCENE_CLASS(CharArraySet); + +protected: + HashSet entries; + bool ignoreCase; + +public: + virtual bool contains(const String& text); + + /// True if the length chars of text starting at offset are in the set + virtual bool contains(const wchar_t* text, int32_t offset, int32_t length); + + /// Add this String into the set + virtual bool add(const String& text); + + /// Add this char[] into the set. + virtual bool add(CharArray text); + + virtual int32_t size(); + virtual bool isEmpty(); + + HashSet::iterator begin(); + HashSet::iterator end(); +}; + +} + +#endif diff --git a/include/lucene++/CharBlockPool.h b/include/lucene++/CharBlockPool.h new file mode 100644 index 00000000..a676b280 --- /dev/null +++ b/include/lucene++/CharBlockPool.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARBLOCKPOOL_H +#define CHARBLOCKPOOL_H + +#include "LuceneObject.h" + +namespace Lucene { + +class CharBlockPool : public LuceneObject { +public: + CharBlockPool(const DocumentsWriterPtr& docWriter); + virtual ~CharBlockPool(); + + LUCENE_CLASS(CharBlockPool); + +public: + Collection buffers; + int32_t numBuffer; + int32_t bufferUpto; // Which buffer we are up to + int32_t charUpto; // Where we are in head buffer + + CharArray buffer; // Current head buffer + int32_t charOffset; // Current head offset + +protected: + DocumentsWriterWeakPtr _docWriter; + +public: + void reset(); + void nextBuffer(); +}; + +} + +#endif diff --git a/include/lucene++/CharFilter.h b/include/lucene++/CharFilter.h new file mode 100644 index 00000000..8589d9f9 --- /dev/null +++ b/include/lucene++/CharFilter.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARFILTER_H +#define CHARFILTER_H + +#include "CharStream.h" + +namespace Lucene { + +/// Subclasses of CharFilter can be chained to filter CharStream. They can be used as {@link Reader} with +/// additional offset correction. {@link Tokenizer}s will automatically use {@link #correctOffset} if a +/// CharFilter/CharStream subclass is used. +class LPPAPI CharFilter : public CharStream { +protected: + CharFilter(const CharStreamPtr& in); +public: + virtual ~CharFilter(); + + LUCENE_CLASS(CharFilter); + +protected: + CharStreamPtr input; + +protected: + /// Subclass may want to override to correct the current offset. + /// @param currentOff current offset + /// @return corrected offset + virtual int32_t correct(int32_t currentOff); + + /// Chains the corrected offset through the input CharFilter. + virtual int32_t correctOffset(int32_t currentOff); + + virtual void close(); + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + virtual bool markSupported(); + virtual void mark(int32_t readAheadLimit); + virtual void reset(); +}; + +} + +#endif diff --git a/include/lucene++/CharFolder.h b/include/lucene++/CharFolder.h new file mode 100644 index 00000000..fda66bef --- /dev/null +++ b/include/lucene++/CharFolder.h @@ -0,0 +1,51 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARFOLDER_H +#define CHARFOLDER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Utility class for folding character case. +class LPPAPI CharFolder : public LuceneObject { +public: + virtual ~CharFolder(); + LUCENE_CLASS(CharFolder); + +protected: + static bool lowerCache; + static bool upperCache; + static wchar_t lowerChars[CHAR_MAX - CHAR_MIN + 1]; + static wchar_t upperChars[CHAR_MAX - CHAR_MIN + 1]; + +public: + static wchar_t toLower(wchar_t ch); + static wchar_t toUpper(wchar_t ch); + + template + static void toLower(ITER first, ITER last) { + for (; first != last; ++first) { + *first = toLower(*first); + } + } + + template + static void toUpper(ITER first, ITER last) { + for (; first != last; ++first) { + *first = toUpper(*first); + } + } + +protected: + static bool fillLower(); + static bool fillUpper(); +}; + +} + +#endif diff --git a/include/lucene++/CharReader.h b/include/lucene++/CharReader.h new file mode 100644 index 00000000..0223d56d --- /dev/null +++ b/include/lucene++/CharReader.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARREADER_H +#define CHARREADER_H + +#include "CharStream.h" + +namespace Lucene { + +/// CharReader is a Reader wrapper. It reads chars from Reader and outputs {@link CharStream}, defining an +/// identify function {@link #correctOffset} method that simply returns the provided offset. +class LPPAPI CharReader : public CharStream { +public: + CharReader(const ReaderPtr& in); + virtual ~CharReader(); + + LUCENE_CLASS(CharReader); + +protected: + ReaderPtr input; + +public: + using CharStream::read; + + static CharStreamPtr get(const ReaderPtr& input); + + virtual int32_t correctOffset(int32_t currentOff); + virtual void close(); + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + virtual bool markSupported(); + virtual void mark(int32_t readAheadLimit); + virtual void reset(); +}; + +} + +#endif diff --git a/include/lucene++/CharStream.h b/include/lucene++/CharStream.h new file mode 100644 index 00000000..5c5d07f3 --- /dev/null +++ b/include/lucene++/CharStream.h @@ -0,0 +1,33 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARSTREAM_H +#define CHARSTREAM_H + +#include "Reader.h" + +namespace Lucene { + +/// CharStream adds {@link #correctOffset} functionality over {@link Reader}. All Tokenizers accept a CharStream +/// instead of {@link Reader} as input, which enables arbitrary character based filtering before tokenization. +/// The {@link #correctOffset} method fixed offsets to account for removal or insertion of characters, so that the +/// offsets reported in the tokens match the character offsets of the original Reader. +class LPPAPI CharStream : public Reader { +public: + virtual ~CharStream(); + LUCENE_CLASS(CharStream); + +public: + /// Called by CharFilter(s) and Tokenizer to correct token offset. + /// + /// @param currentOff offset as seen in the output + /// @return corrected offset based on the input + virtual int32_t correctOffset(int32_t currentOff) = 0; +}; + +} + +#endif diff --git a/include/lucene++/CharTokenizer.h b/include/lucene++/CharTokenizer.h new file mode 100644 index 00000000..a1c5acda --- /dev/null +++ b/include/lucene++/CharTokenizer.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHARTOKENIZER_H +#define CHARTOKENIZER_H + +#include "Tokenizer.h" + +namespace Lucene { + +/// An abstract base class for simple, character-oriented tokenizers. +class LPPAPI CharTokenizer : public Tokenizer { +public: + CharTokenizer(const ReaderPtr& input); + CharTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + CharTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + virtual ~CharTokenizer(); + + LUCENE_CLASS(CharTokenizer); + +protected: + int32_t offset; + int32_t bufferIndex; + int32_t dataLen; + + static const int32_t MAX_WORD_LEN; + static const int32_t IO_BUFFER_SIZE; + + CharArray ioBuffer; + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken(); + virtual void end(); + virtual void reset(const ReaderPtr& input); + +protected: + /// Returns true if a character should be included in a token. This tokenizer generates as tokens adjacent + /// sequences of characters which satisfy this predicate. Characters for which this is false are used to + /// define token boundaries and are not included in tokens. + virtual bool isTokenChar(wchar_t c) = 0; + + /// Called on each token character to normalize it before it is added to the token. The default implementation + /// does nothing. Subclasses may use this to, eg., lowercase tokens. + virtual wchar_t normalize(wchar_t c); +}; + +} + +#endif diff --git a/include/lucene++/CheckIndex.h b/include/lucene++/CheckIndex.h new file mode 100644 index 00000000..6e22292e --- /dev/null +++ b/include/lucene++/CheckIndex.h @@ -0,0 +1,314 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHECKINDEX_H +#define CHECKINDEX_H + +#include "SegmentTermDocs.h" + +namespace Lucene { + +/// Basic tool and API to check the health of an index and write a new segments file that removes reference to +/// problematic segments. +/// +/// As this tool checks every byte in the index, on a large index it can take quite a long time to run. +/// +/// WARNING: Please make a complete backup of your index before using this to fix your index! +class LPPAPI CheckIndex : public LuceneObject { +public: + /// Create a new CheckIndex on the directory. + CheckIndex(const DirectoryPtr& dir); + virtual ~CheckIndex(); + + LUCENE_CLASS(CheckIndex); + +protected: + InfoStreamPtr infoStream; + DirectoryPtr dir; + + static bool _assertsOn; + +public: + /// Set infoStream where messages should go. If null, no messages are printed + void setInfoStream(const InfoStreamPtr& out); + + /// Returns a {@link IndexStatus} instance detailing the state of the index. + /// + /// As this method checks every byte in the index, on a large index it can take quite a long time to run. + /// + /// WARNING: make sure you only call this when the index is not opened by any writer. + IndexStatusPtr checkIndex(); + + /// Returns a {@link IndexStatus} instance detailing the state of the index. + /// + /// @param onlySegments list of specific segment names to check + /// + /// As this method checks every byte in the specified segments, on a large index it can take quite a long + /// time to run. + /// + /// WARNING: make sure you only call this when the index is not opened by any writer. + IndexStatusPtr checkIndex(Collection onlySegments); + + /// Repairs the index using previously returned result from {@link #checkIndex}. Note that this does not + /// remove any of the unreferenced files after it's done; you must separately open an {@link IndexWriter}, + /// which deletes unreferenced files when it's created. + /// + /// WARNING: this writes a new segments file into the index, effectively removing all documents in broken + /// segments from the index. BE CAREFUL. + /// + /// WARNING: Make sure you only call this when the index is not opened by any writer. + void fixIndex(const IndexStatusPtr& result); + + static bool testAsserts(); + static bool assertsOn(); + + /// Command-line interface to check and fix an index. + /// + /// Run it like this: + /// CheckIndex pathToIndex [-fix] [-segment X] [-segment Y] + /// + /// -fix: actually write a new segments_N file, removing any problematic segments + /// + /// -segment X: only check the specified segment(s). This can be specified multiple times, + /// to check more than one segment, eg -segment _2 -segment _a. + /// You can't use this with the -fix option. + /// + /// WARNING: -fix should only be used on an emergency basis as it will cause documents (perhaps many) + /// to be permanently removed from the index. Always make a backup copy of your index before running + /// this! Do not run this tool on an index that is actively being written to. You have been warned! + /// + /// Run without -fix, this tool will open the index, report version information and report any exceptions + /// it hits and what action it would take if -fix were specified. With -fix, this tool will remove any + /// segments that have issues and write a new segments_N file. This means all documents contained in the + /// affected segments will be removed. + /// + /// This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. + static int main(Collection args); + +protected: + void msg(const String& msg); + + /// Test field norms. + FieldNormStatusPtr testFieldNorms(Collection fieldNames, const SegmentReaderPtr& reader); + + /// Test the term index. + TermIndexStatusPtr testTermIndex(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); + + /// Test stored fields for a segment. + StoredFieldStatusPtr testStoredFields(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); + + /// Test term vectors for a segment. + TermVectorStatusPtr testTermVectors(const SegmentInfoPtr& info, const SegmentReaderPtr& reader); +}; + +/// Returned from {@link #checkIndex()} detailing the health and status of the index. +class LPPAPI IndexStatus : public LuceneObject { +public: + IndexStatus(); + virtual ~IndexStatus(); + + LUCENE_CLASS(IndexStatus); + +public: + /// True if no problems were found with the index. + bool clean; + + /// True if we were unable to locate and load the segments_N file. + bool missingSegments; + + /// True if we were unable to open the segments_N file. + bool cantOpenSegments; + + /// True if we were unable to read the version number from segments_N file. + bool missingSegmentVersion; + + /// Name of latest segments_N file in the index. + String segmentsFileName; + + /// Number of segments in the index. + int32_t numSegments; + + /// String description of the version of the index. + String segmentFormat; + + /// Empty unless you passed specific segments list to check as optional 3rd argument. + /// @see CheckIndex#checkIndex(List) + Collection segmentsChecked; + + /// True if the index was created with a newer version of Lucene than the CheckIndex tool. + bool toolOutOfDate; + + /// List of {@link SegmentInfoStatus} instances, detailing status of each segment. + Collection segmentInfos; + + /// Directory index is in. + DirectoryPtr dir; + + /// SegmentInfos instance containing only segments that had no problems (this is used with the + /// {@link CheckIndex#fixIndex} method to repair the index. + SegmentInfosPtr newSegments; + + /// How many documents will be lost to bad segments. + int32_t totLoseDocCount; + + /// How many bad segments were found. + int32_t numBadSegments; + + /// True if we checked only specific segments ({@link #checkIndex(List)}) was called with non-null argument). + bool partial; + + /// Holds the userData of the last commit in the index + MapStringString userData; +}; + +/// Holds the status of each segment in the index. See {@link #segmentInfos}. +class LPPAPI SegmentInfoStatus : public LuceneObject { +public: + SegmentInfoStatus(); + virtual ~SegmentInfoStatus(); + + LUCENE_CLASS(SegmentInfoStatus); + +public: + /// Name of the segment. + String name; + + /// Document count (does not take deletions into account). + int32_t docCount; + + /// True if segment is compound file format. + bool compound; + + /// Number of files referenced by this segment. + int32_t numFiles; + + /// Net size (MB) of the files referenced by this segment. + double sizeMB; + + /// Doc store offset, if this segment shares the doc store files (stored fields and term vectors) with + /// other segments. This is -1 if it does not share. + int32_t docStoreOffset; + + /// String of the shared doc store segment, or null if this segment does not share the doc store files. + String docStoreSegment; + + /// True if the shared doc store files are compound file format. + bool docStoreCompoundFile; + + /// True if this segment has pending deletions. + bool hasDeletions; + + /// Name of the current deletions file name. + String deletionsFileName; + + /// Number of deleted documents. + int32_t numDeleted; + + /// True if we were able to open a SegmentReader on this segment. + bool openReaderPassed; + + /// Number of fields in this segment. + int32_t numFields; + + /// True if at least one of the fields in this segment does not omitTermFreqAndPositions. + /// @see AbstractField#setOmitTermFreqAndPositions + bool hasProx; + + /// Map that includes certain debugging details that IndexWriter records into each segment it creates + MapStringString diagnostics; + + /// Status for testing of field norms (null if field norms could not be tested). + FieldNormStatusPtr fieldNormStatus; + + /// Status for testing of indexed terms (null if indexed terms could not be tested). + TermIndexStatusPtr termIndexStatus; + + /// Status for testing of stored fields (null if stored fields could not be tested). + StoredFieldStatusPtr storedFieldStatus; + + /// Status for testing of term vectors (null if term vectors could not be tested). + TermVectorStatusPtr termVectorStatus; +}; + +/// Status from testing field norms. +class LPPAPI FieldNormStatus : public LuceneObject { +public: + FieldNormStatus(); + virtual ~FieldNormStatus(); + + LUCENE_CLASS(FieldNormStatus); + +public: + /// Number of fields successfully tested + int64_t totFields; + + /// Exception thrown during term index test (null on success) + LuceneException error; +}; + +/// Status from testing term index. +class LPPAPI TermIndexStatus : public LuceneObject { +public: + TermIndexStatus(); + virtual ~TermIndexStatus(); + + LUCENE_CLASS(TermIndexStatus); + +public: + /// Total term count + int64_t termCount; + + /// Total frequency across all terms. + int64_t totFreq; + + /// Total number of positions. + int64_t totPos; + + /// Exception thrown during term index test (null on success) + LuceneException error; +}; + +/// Status from testing stored fields. +class LPPAPI StoredFieldStatus : public LuceneObject { +public: + StoredFieldStatus(); + virtual ~StoredFieldStatus(); + + LUCENE_CLASS(StoredFieldStatus); + +public: + /// Number of documents tested. + int32_t docCount; + + /// Total number of stored fields tested. + int64_t totFields; + + /// Exception thrown during stored fields test (null on success) + LuceneException error; +}; + +/// Status from testing stored fields. +class LPPAPI TermVectorStatus : public LuceneObject { +public: + TermVectorStatus(); + virtual ~TermVectorStatus(); + + LUCENE_CLASS(TermVectorStatus); + +public: + /// Number of documents tested. + int32_t docCount; + + /// Total number of term vectors tested. + int64_t totVectors; + + /// Exception thrown during term vector test (null on success) + LuceneException error; +}; + +} + +#endif diff --git a/include/lucene++/ChecksumIndexInput.h b/include/lucene++/ChecksumIndexInput.h new file mode 100644 index 00000000..597fbbea --- /dev/null +++ b/include/lucene++/ChecksumIndexInput.h @@ -0,0 +1,63 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHECKSUMINDEXINPUT_H +#define CHECKSUMINDEXINPUT_H + +#include +#include "IndexInput.h" + +namespace Lucene { + +/// Writes bytes through to a primary IndexInput, computing checksum as it goes. +/// Note that you cannot use seek(). +class LPPAPI ChecksumIndexInput : public IndexInput { +public: + ChecksumIndexInput(const IndexInputPtr& main); + virtual ~ChecksumIndexInput(); + + LUCENE_CLASS(ChecksumIndexInput); + +protected: + IndexInputPtr main; + boost::crc_32_type checksum; + +public: + /// Reads and returns a single byte. + /// @see IndexOutput#writeByte(uint8_t) + virtual uint8_t readByte(); + + /// Reads a specified number of bytes into an array at the specified offset. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Return calculated checksum. + int64_t getChecksum(); + + /// Closes the stream to further operations. + virtual void close(); + + /// Returns the current position in this file, where the next read will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next read will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// The number of bytes in the file. + virtual int64_t length(); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/ChecksumIndexOutput.h b/include/lucene++/ChecksumIndexOutput.h new file mode 100644 index 00000000..67124c67 --- /dev/null +++ b/include/lucene++/ChecksumIndexOutput.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CHECKSUMINDEXOUTPUT_H +#define CHECKSUMINDEXOUTPUT_H + +#include +#include "IndexOutput.h" + +namespace Lucene { + +/// Writes bytes through to a primary IndexOutput, computing +/// checksum. Note that you cannot use seek(). +class LPPAPI ChecksumIndexOutput : public IndexOutput { +public: + ChecksumIndexOutput(const IndexOutputPtr& main); + virtual ~ChecksumIndexOutput(); + + LUCENE_CLASS(ChecksumIndexOutput); + +protected: + IndexOutputPtr main; + boost::crc_32_type checksum; + +public: + /// Writes a single byte. + /// @see IndexInput#readByte() + virtual void writeByte(uint8_t b); + + /// Writes an array of bytes. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) + virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); + + /// Return calculated checksum. + int64_t getChecksum(); + + /// Forces any buffered output to be written. + virtual void flush(); + + /// Closes the stream to further operations. + virtual void close(); + + /// Returns the current position in this file, where the next write will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next write will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// Starts but does not complete the commit of this file (= writing of + /// the final checksum at the end). After this is called must call + /// {@link #finishCommit} and the {@link #close} to complete the commit. + void prepareCommit(); + + /// See {@link #prepareCommit} + void finishCommit(); + + /// The number of bytes in the file. + virtual int64_t length(); +}; + +} + +#endif diff --git a/include/lucene++/CloseableThreadLocal.h b/include/lucene++/CloseableThreadLocal.h new file mode 100644 index 00000000..2f27aa25 --- /dev/null +++ b/include/lucene++/CloseableThreadLocal.h @@ -0,0 +1,59 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CLOSEABLETHREADLOCAL_H +#define CLOSEABLETHREADLOCAL_H + +#include "LuceneThread.h" + +namespace Lucene { + +/// General purpose thread-local map. +template +class CloseableThreadLocal : public LuceneObject { +public: + typedef boost::shared_ptr localDataPtr; + typedef Map MapLocalData; + + CloseableThreadLocal() { + localData = MapLocalData::newInstance(); + } + +public: + localDataPtr get() { + SyncLock syncLock(this); + typename MapLocalData::iterator local = localData.find(LuceneThread::currentId()); + if (local != localData.end()) { + return local->second; + } + localDataPtr initial(initialValue()); + if (initial) { + localData.put(LuceneThread::currentId(), initial); + } + return initial; + } + + void set(const localDataPtr& data) { + SyncLock syncLock(this); + localData.put(LuceneThread::currentId(), data); + } + + void close() { + SyncLock syncLock(this); + localData.remove(LuceneThread::currentId()); + } + +protected: + MapLocalData localData; + + virtual localDataPtr initialValue() { + return localDataPtr(); // override + } +}; + +} + +#endif diff --git a/include/lucene++/Collator.h b/include/lucene++/Collator.h new file mode 100644 index 00000000..3343e23b --- /dev/null +++ b/include/lucene++/Collator.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COLLATOR_H +#define COLLATOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Convenience class for storing collate objects. +class LPPAPI Collator : public LuceneObject { +public: + /// Creates a new Collator, given the file to read from. + Collator(std::locale locale); + virtual ~Collator(); + + LUCENE_CLASS(Collator); + +protected: + const std::collate& collate; + +public: + int32_t compare(const String& first, const String& second); +}; + +} + +#endif diff --git a/include/lucene++/Collection.h b/include/lucene++/Collection.h new file mode 100644 index 00000000..0263f3f1 --- /dev/null +++ b/include/lucene++/Collection.h @@ -0,0 +1,274 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COLLECTION_H +#define COLLECTION_H + +#include +#include "LuceneSync.h" + +namespace Lucene { + +/// Utility template class to handle collections that can be safely copied and shared +template +class Collection : public LuceneSync { +public: + typedef Collection this_type; + typedef boost::shared_ptr shared_ptr; + typedef std::vector collection_type; + typedef typename collection_type::iterator iterator; + typedef typename collection_type::const_iterator const_iterator; + typedef TYPE value_type; + + virtual ~Collection() { + } + +protected: + boost::shared_ptr container; + +public: + static this_type newInstance(int32_t size = 0) { + this_type instance; + instance.container = Lucene::newInstance(size); + return instance; + } + + template + static this_type newInstance(ITER first, ITER last) { + this_type instance; + instance.container = Lucene::newInstance(first, last); + return instance; + } + + void reset() { + resize(0); + } + + void resize(int32_t size) { + if (size == 0) { + container.reset(); + } else { + container->resize(size); + } + } + + int32_t size() const { + return (int32_t)container->size(); + } + + bool empty() const { + return container->empty(); + } + + void clear() { + container->clear(); + } + + iterator begin() { + return container->begin(); + } + + iterator end() { + return container->end(); + } + + const_iterator begin() const { + return container->begin(); + } + + const_iterator end() const { + return container->end(); + } + + void add(const TYPE& type) { + container->push_back(type); + } + + void add(int32_t pos, const TYPE& type) { + container->insert(container->begin() + pos, type); + } + + template + void addAll(ITER first, ITER last) { + container->insert(container->end(), first, last); + } + + template + void insert(ITER pos, const TYPE& type) { + container->insert(pos, type); + } + + template + ITER remove(ITER pos) { + return container->erase(pos); + } + + template + ITER remove(ITER first, ITER last) { + return container->erase(first, last); + } + + void remove(const TYPE& type) { + container->erase(std::remove(container->begin(), container->end(), type), container->end()); + } + + template + void remove_if(PRED comp) { + container->erase(std::remove_if(container->begin(), container->end(), comp), container->end()); + } + + TYPE removeFirst() { + TYPE front = container->front(); + container->erase(container->begin()); + return front; + } + + TYPE removeLast() { + TYPE back = container->back(); + container->pop_back(); + return back; + } + + iterator find(const TYPE& type) { + return std::find(container->begin(), container->end(), type); + } + + template + iterator find_if(PRED comp) { + return std::find_if(container->begin(), container->end(), comp); + } + + bool contains(const TYPE& type) const { + return (std::find(container->begin(), container->end(), type) != container->end()); + } + + template + bool contains_if(PRED comp) const { + return (std::find_if(container->begin(), container->end(), comp) != container->end()); + } + + bool equals(const this_type& other) const { + return equals(other, std::equal_to()); + } + + template + bool equals(const this_type& other, PRED comp) const { + if (container->size() != other.container->size()) { + return false; + } + return std::equal(container->begin(), container->end(), other.container->begin(), comp); + } + + int32_t hashCode() { + return (int32_t)(int64_t)container.get(); + } + + void swap(this_type& other) { + container.swap(other->container); + } + + TYPE& operator[] (int32_t pos) { + return (*container)[pos]; + } + + const TYPE& operator[] (int32_t pos) const { + return (*container)[pos]; + } + + operator bool() const { + return container.get() != NULL; + } + + bool operator! () const { + return !container; + } + + bool operator== (const this_type& other) { + return (container == other.container); + } + + bool operator!= (const this_type& other) { + return (container != other.container); + } + + collection_type* get() { + return container.get(); + } +}; + +template +Collection newCollection(const TYPE& a1) { + Collection result = Collection::newInstance(); + result.add(a1); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2) { + Collection result = newCollection(a1); + result.add(a2); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3) { + Collection result = newCollection(a1, a2); + result.add(a3); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4) { + Collection result = newCollection(a1, a2, a3); + result.add(a4); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5) { + Collection result = newCollection(a1, a2, a3, a4); + result.add(a5); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6) { + Collection result = newCollection(a1, a2, a3, a4, a5); + result.add(a6); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7) { + Collection result = newCollection(a1, a2, a3, a4, a5, a6); + result.add(a7); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8) { + Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7); + result.add(a8); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9) { + Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8); + result.add(a9); + return result; +} + +template +Collection newCollection(const TYPE& a1, const TYPE& a2, const TYPE& a3, const TYPE& a4, const TYPE& a5, const TYPE& a6, const TYPE& a7, const TYPE& a8, const TYPE& a9, const TYPE& a10) { + Collection result = newCollection(a1, a2, a3, a4, a5, a6, a7, a8, a9); + result.add(a10); + return result; +} + +} + +#endif diff --git a/include/lucene++/Collector.h b/include/lucene++/Collector.h new file mode 100644 index 00000000..fc9d9719 --- /dev/null +++ b/include/lucene++/Collector.h @@ -0,0 +1,138 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COLLECTOR_H +#define COLLECTOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Collectors are primarily meant to be used to gather raw results from a search, and implement sorting +/// or custom result filtering, collation, etc. +/// +/// Lucene's core collectors are derived from Collector. Likely your application can use one of these +/// classes, or subclass {@link TopDocsCollector}, instead of implementing Collector directly: +/// +///
    +///
  • {@link TopDocsCollector} is an abstract base class that assumes you will retrieve the top N docs, +/// according to some criteria, after collection is done. +/// +///
  • {@link TopScoreDocCollector} is a concrete subclass {@link TopDocsCollector} and sorts according +/// to score + docID. This is used internally by the {@link IndexSearcher} search methods that do not take +/// an explicit {@link Sort}. It is likely the most frequently used collector. +/// +///
  • {@link TopFieldCollector} subclasses {@link TopDocsCollector} and sorts according to a specified +/// {@link Sort} object (sort by field). This is used internally by the {@link IndexSearcher} search methods +/// that take an explicit {@link Sort}. +/// +///
  • {@link TimeLimitingCollector}, which wraps any other Collector and aborts the search if it's taken too +/// much time. +/// +///
  • {@link PositiveScoresOnlyCollector} wraps any other Collector and prevents collection of hits whose +/// score is <= 0.0 +/// +///
+/// +/// Collector decouples the score from the collected doc: the score computation is skipped entirely if it's not +/// needed. Collectors that do need the score should implement the {@link #setScorer} method, to hold onto the +/// passed {@link Scorer} instance, and call {@link Scorer#score()} within the collect method to compute the +/// current hit's score. If your collector may request the score for a single hit multiple times, you should use +/// {@link ScoreCachingWrappingScorer}. +/// +/// NOTE: The doc that is passed to the collect method is relative to the current reader. If your collector needs +/// to resolve this to the docID space of the Multi*Reader, you must re-base it by recording the docBase from the +/// most recent setNextReader call. Here's a simple example showing how to collect docIDs into a BitSet: +/// +///
+/// class MyCollector : public Collector
+/// {
+/// public:
+///     MyCollector(const BitSetPtr& bits)
+///     {
+///         this->bits = bits;
+///         this->docBase = 0;
+///     }
+///
+/// protected:
+///     BitSetPtr bits;
+///     int32_t docBase;
+///
+/// public:
+///     virtual void setScorer(const ScorerPtr& scorer)
+///     {
+///         // ignore scorer
+///     }
+///
+///     virtual void collect(int32_t doc)
+///     {
+///         bits->set(doc + docBase);
+///     }
+///
+///     virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase)
+///     {
+///         this->docBase = docBase;
+///     }
+///
+///     virtual bool acceptsDocsOutOfOrder()
+///     {
+///         return true; // accept docs out of order (for a BitSet it doesn't matter)
+///     }
+/// };
+///
+/// ...
+///
+/// SearcherPtr searcher = newLucene(indexReader);
+/// BitSetPtr bits = newLucene(indexReader->maxDoc());
+/// searcher->search(query, newLucene(bits));
+///
+/// 
+/// Not all collectors will need to rebase the docID. For example, a collector that simply counts the +/// total number of hits would skip it. +/// +/// NOTE: Prior to 2.9, Lucene silently filtered out hits with score <= 0. As of 2.9, the core Collectors +/// no longer do that. It's very unusual to have such hits (a negative query boost, or function query +/// returning negative custom scores, could cause it to happen). If you need that behavior, use {@link +/// PositiveScoresOnlyCollector}. +class LPPAPI Collector : public LuceneObject { +public: + virtual ~Collector(); + LUCENE_CLASS(Collector); + +public: + /// Called before successive calls to {@link #collect(int32_t)}. Implementations that need the score + /// of the current document (passed-in to {@link #collect(int32_t)}), should save the passed-in Scorer + /// and call scorer.score() when needed. + virtual void setScorer(const ScorerPtr& scorer) = 0; + + /// Called once for every document matching a query, with the unbased document number. + /// + /// Note: This is called in an inner search loop. For good search performance, implementations of this + /// method should not call {@link Searcher#doc(int32_t)} or {@link IndexReader#document(int32_t)} on + /// every hit. Doing so can slow searches by an order of magnitude or more. + virtual void collect(int32_t doc) = 0; + + /// Called before collecting from each IndexReader. All doc ids in {@link #collect(int32_t)} will + /// correspond to reader. Add docBase to the current IndexReaders internal document id to re-base ids + /// in {@link #collect(int32_t)}. + /// @param reader next IndexReader + /// @param docBase + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) = 0; + + /// Return true if this collector does not require the matching docIDs to be delivered in int sort + /// order (smallest to largest) to {@link #collect}. + /// + /// Most Lucene Query implementations will visit matching docIDs in order. However, some queries + /// (currently limited to certain cases of {@link BooleanQuery}) can achieve faster searching if the + /// Collector allows them to deliver the docIDs out of order. + /// + /// Many collectors don't mind getting docIDs out of order, so it's important to return true here. + virtual bool acceptsDocsOutOfOrder() = 0; +}; + +} + +#endif diff --git a/include/lucene++/ComplexExplanation.h b/include/lucene++/ComplexExplanation.h new file mode 100644 index 00000000..2b8d8bec --- /dev/null +++ b/include/lucene++/ComplexExplanation.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COMPLEXEXPLANATION_H +#define COMPLEXEXPLANATION_H + +#include "Explanation.h" + +namespace Lucene { + +/// Describes the score computation for document and query, and can distinguish a match independent +/// of a positive value. +class LPPAPI ComplexExplanation : public Explanation { +public: + ComplexExplanation(bool match = false, double value = 0, const String& description = EmptyString); + virtual ~ComplexExplanation(); + + LUCENE_CLASS(ComplexExplanation); + +protected: + bool match; + +public: + /// The match status of this explanation node. + bool getMatch(); + + /// Sets the match status assigned to this explanation node. + void setMatch(bool match); + + /// Indicates whether or not this Explanation models a good match. + /// + /// If the match status is explicitly set this method uses it; otherwise it defers to the + /// superclass. + /// + /// @see #getMatch + virtual bool isMatch(); + +protected: + virtual String getSummary(); +}; + +} + +#endif diff --git a/include/lucene++/CompoundFileReader.h b/include/lucene++/CompoundFileReader.h new file mode 100644 index 00000000..8de2adf3 --- /dev/null +++ b/include/lucene++/CompoundFileReader.h @@ -0,0 +1,120 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COMPOUNDFILEREADER_H +#define COMPOUNDFILEREADER_H + +#include "Directory.h" +#include "BufferedIndexInput.h" + +namespace Lucene { + +/// Class for accessing a compound stream. +/// This class implements a directory, but is limited to only read operations. +/// Directory methods that would normally modify data throw an exception. +class LPPAPI CompoundFileReader : public Directory { +public: + CompoundFileReader(const DirectoryPtr& dir, const String& name); + CompoundFileReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize); + virtual ~CompoundFileReader(); + + LUCENE_CLASS(CompoundFileReader); + +protected: + struct FileEntry { + FileEntry(int64_t offset = 0, int64_t length = 0) { + this->offset = offset; + this->length = length; + } + int64_t offset; + int64_t length; + }; + typedef boost::shared_ptr FileEntryPtr; + typedef HashMap MapStringFileEntryPtr; + + DirectoryPtr directory; + String fileName; + int32_t readBufferSize; + IndexInputPtr stream; + MapStringFileEntryPtr entries; + +protected: + void ConstructReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize); + +public: + DirectoryPtr getDirectory(); + String getName(); + virtual void close(); + virtual IndexInputPtr openInput(const String& name); + virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); + + /// Returns an array of strings, one for each file in the directory. + virtual HashSet listAll(); + + /// Returns true if a file with the given name exists. + virtual bool fileExists(const String& name); + + /// Returns the time the compound file was last modified. + virtual uint64_t fileModified(const String& name); + + /// Set the modified time of the compound file to now. + virtual void touchFile(const String& name); + + /// Not implemented + virtual void deleteFile(const String& name); + + /// Not implemented + virtual void renameFile(const String& from, const String& to); + + /// Returns the length of a file in the directory. + virtual int64_t fileLength(const String& name); + + /// Not implemented + virtual IndexOutputPtr createOutput(const String& name); + + /// Not implemented + virtual LockPtr makeLock(const String& name); +}; + +/// Implementation of an IndexInput that reads from a portion of the compound file. +class LPPAPI CSIndexInput : public BufferedIndexInput { +public: + CSIndexInput(); + CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length); + CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length, int32_t readBufferSize); + virtual ~CSIndexInput(); + + LUCENE_CLASS(CSIndexInput); + +public: + IndexInputPtr base; + int64_t fileOffset; + int64_t _length; + +public: + /// Closes the stream to further operations. + virtual void close(); + + virtual int64_t length(); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + /// Implements buffer refill. Reads bytes from the current position in the input. + /// @param b the array to read bytes into + /// @param offset the offset in the array to start storing bytes + /// @param len the number of bytes to read + virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); + + /// Implements seek. Sets current position in this file, where the next {@link + /// #readInternal(byte[],int,int)} will occur. + virtual void seekInternal(int64_t pos); +}; + +} + +#endif diff --git a/include/lucene++/CompoundFileWriter.h b/include/lucene++/CompoundFileWriter.h new file mode 100644 index 00000000..804d0d71 --- /dev/null +++ b/include/lucene++/CompoundFileWriter.h @@ -0,0 +1,77 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COMPOUNDFILEWRITER_H +#define COMPOUNDFILEWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Combines multiple files into a single compound file. +/// The file format: +/// VInt fileCount +/// {Directory} +/// fileCount entries with the following structure: +/// int64_t dataOffset +/// String fileName +/// {File Data} +/// fileCount entries with the raw data of the corresponding file +/// +/// The fileCount integer indicates how many files are contained in this compound file. The {directory} +/// that follows has that many entries. Each directory entry contains a long pointer to the start of +/// this file's data section, and a string with that file's name. +class LPPAPI CompoundFileWriter : public LuceneObject { +public: + CompoundFileWriter(const DirectoryPtr& dir, const String& name, const CheckAbortPtr& checkAbort = CheckAbortPtr()); + virtual ~CompoundFileWriter(); + + LUCENE_CLASS(CompoundFileWriter); + +protected: + struct FileEntry { + /// source file + String file; + + /// temporary holder for the start of directory entry for this file + int64_t directoryOffset; + + /// temporary holder for the start of this file's data section + int64_t dataOffset; + }; + + DirectoryWeakPtr _directory; + String fileName; + HashSet ids; + Collection entries; + bool merged; + CheckAbortPtr checkAbort; + +public: + /// Returns the directory of the compound file. + DirectoryPtr getDirectory(); + + /// Returns the name of the compound file. + String getName(); + + /// Add a source stream. file is the string by which the sub-stream will be known in the + /// compound stream. + void addFile(const String& file); + + /// Merge files with the extensions added up to now. All files with these extensions are + /// combined sequentially into the compound stream. After successful merge, the source + /// are deleted.files + void close(); + +protected: + /// Copy the contents of the file with specified extension into the provided output stream. + /// Use the provided buffer for moving data to reduce memory allocation. + void copyFile(const FileEntry& source, const IndexOutputPtr& os, ByteArray buffer); +}; + +} + +#endif diff --git a/include/lucene++/CompressionTools.h b/include/lucene++/CompressionTools.h new file mode 100644 index 00000000..4566c1f1 --- /dev/null +++ b/include/lucene++/CompressionTools.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef COMPRESSIONTOOLS_H +#define COMPRESSIONTOOLS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Simple utility class providing static methods to compress and decompress binary data for stored fields. +class LPPAPI CompressionTools : public LuceneObject { +public: + virtual ~CompressionTools(); + + LUCENE_CLASS(CompressionTools); + +public: + /// Compresses the specified byte range using the specified compressionLevel + static ByteArray compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel); + + /// Compresses the specified byte range, with default BEST_COMPRESSION level + static ByteArray compress(uint8_t* value, int32_t offset, int32_t length); + + /// Compresses all bytes in the array, with default BEST_COMPRESSION level + static ByteArray compress(ByteArray value); + + /// Compresses the String value, with default BEST_COMPRESSION level + static ByteArray compressString(const String& value); + + /// Compresses the String value using the specified compressionLevel + static ByteArray compressString(const String& value, int32_t compressionLevel); + + /// Decompress the byte array previously returned by compress + static ByteArray decompress(ByteArray value); + + /// Decompress the byte array previously returned by compressString back into a String + static String decompressString(ByteArray value); + +protected: + static const int32_t COMPRESS_BUFFER; +}; + +} + +#endif diff --git a/include/lucene++/ConcurrentMergeScheduler.h b/include/lucene++/ConcurrentMergeScheduler.h new file mode 100644 index 00000000..22b33ed6 --- /dev/null +++ b/include/lucene++/ConcurrentMergeScheduler.h @@ -0,0 +1,100 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CONCURRENTMERGESCHEDULER_H +#define CONCURRENTMERGESCHEDULER_H + +#include "MergeScheduler.h" + +namespace Lucene { + +/// A {@link MergeScheduler} that runs each merge using a separate thread, up until a +/// maximum number of threads ({@link #setMaxThreadCount}) at which when a merge is needed, +/// the thread(s) that are updating the index will pause until one or more merges completes. +/// This is a simple way to use concurrency in the indexing process without having to create +/// and manage application level threads. +class LPPAPI ConcurrentMergeScheduler : public MergeScheduler { +public: + ConcurrentMergeScheduler(); + virtual ~ConcurrentMergeScheduler(); + + LUCENE_CLASS(ConcurrentMergeScheduler); + +protected: + int32_t mergeThreadPriority; + + SetMergeThread mergeThreads; + + /// Max number of threads allowed to be merging at once + int32_t maxThreadCount; + + DirectoryPtr dir; + + bool closed; + IndexWriterWeakPtr _writer; + + static Collection allInstances; + + bool suppressExceptions; + static bool anyExceptions; + +public: + virtual void initialize(); + + /// Sets the max # simultaneous threads that may be running. If a merge is necessary yet + /// we already have this many threads running, the incoming thread (that is calling + /// add/updateDocument) will block until a merge thread has completed. + virtual void setMaxThreadCount(int32_t count); + + /// Get the max # simultaneous threads that may be running. @see #setMaxThreadCount. + virtual int32_t getMaxThreadCount(); + + /// Return the priority that merge threads run at. By default the priority is 1 plus the + /// priority of (ie, slightly higher priority than) the first thread that calls merge. + virtual int32_t getMergeThreadPriority(); + + /// Set the priority that merge threads run at. + virtual void setMergeThreadPriority(int32_t pri); + + virtual void close(); + + virtual void sync(); + + virtual void merge(const IndexWriterPtr& writer); + + /// Used for testing + static bool anyUnhandledExceptions(); + static void clearUnhandledExceptions(); + + /// Used for testing + void setSuppressExceptions(); + void clearSuppressExceptions(); + + /// Used for testing + static void setTestMode(); + +protected: + virtual bool verbose(); + virtual void message(const String& message); + virtual void initMergeThreadPriority(); + virtual int32_t mergeThreadCount(); + + /// Does the actual merge, by calling {@link IndexWriter#merge} + virtual void doMerge(const OneMergePtr& merge); + + virtual MergeThreadPtr getMergeThread(const IndexWriterPtr& writer, const OneMergePtr& merge); + + /// Called when an exception is hit in a background merge thread + virtual void handleMergeException(const LuceneException& exc); + + virtual void addMyself(); + + friend class MergeThread; +}; + +} + +#endif diff --git a/include/lucene++/ConjunctionScorer.h b/include/lucene++/ConjunctionScorer.h new file mode 100644 index 00000000..d902d00e --- /dev/null +++ b/include/lucene++/ConjunctionScorer.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CONJUNCTIONSCORER_H +#define CONJUNCTIONSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// Scorer for conjunctions, sets of queries, all of which are required. +class ConjunctionScorer : public Scorer { +public: + ConjunctionScorer(const SimilarityPtr& similarity, Collection scorers); + virtual ~ConjunctionScorer(); + + LUCENE_CLASS(ConjunctionScorer); + +protected: + Collection scorers; + double coord; + int32_t lastDoc; + +public: + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); + +protected: + int32_t doNext(); +}; + +} + +#endif diff --git a/include/lucene++/ConstantScoreQuery.h b/include/lucene++/ConstantScoreQuery.h new file mode 100644 index 00000000..8a637b63 --- /dev/null +++ b/include/lucene++/ConstantScoreQuery.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CONSTANTSCOREQUERY_H +#define CONSTANTSCOREQUERY_H + +#include "Query.h" +#include "Weight.h" +#include "Scorer.h" + +namespace Lucene { + +/// A query that wraps a filter and simply returns a constant score equal to the query boost for every +/// document in the filter. +class LPPAPI ConstantScoreQuery : public Query { +public: + ConstantScoreQuery(const FilterPtr& filter); + virtual ~ConstantScoreQuery(); + + LUCENE_CLASS(ConstantScoreQuery); + +protected: + FilterPtr filter; + +public: + using Query::toString; + + /// Returns the encapsulated filter + FilterPtr getFilter(); + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual void extractTerms(SetTerm terms); + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class ConstantWeight; + friend class ConstantScorer; +}; + +} + +#endif diff --git a/include/lucene++/Constants.h b/include/lucene++/Constants.h new file mode 100644 index 00000000..225af5c5 --- /dev/null +++ b/include/lucene++/Constants.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CONSTANTS_H +#define CONSTANTS_H + +#include "Lucene.h" + +namespace Lucene { + +/// Some useful Lucene constants. +class LPPAPI Constants { +private: + Constants(); + +public: + virtual ~Constants(); + +public: + static String OS_NAME; + static String LUCENE_MAIN_VERSION; + static String LUCENE_VERSION; +}; + +/// Use by certain classes to match version compatibility across releases of Lucene. +/// +/// WARNING: When changing the version parameter that you supply to components in Lucene, do not simply +/// change the version at search-time, but instead also adjust your indexing code to match, and re-index. +class LPPAPI LuceneVersion { +private: + LuceneVersion(); + +public: + virtual ~LuceneVersion(); + +public: + enum Version { + /// Match settings and bugs in Lucene's 2.0 release. + LUCENE_20 = 0, + + /// Match settings and bugs in Lucene's 2.1 release. + LUCENE_21, + + /// Match settings and bugs in Lucene's 2.2 release. + LUCENE_22, + + /// Match settings and bugs in Lucene's 2.3 release. + LUCENE_23, + + /// Match settings and bugs in Lucene's 2.4 release. + LUCENE_24, + + /// Match settings and bugs in Lucene's 2.9 release. + LUCENE_29, + + /// Match settings and bugs in Lucene's 3.0 release. + /// + /// Use this to get the latest & greatest settings, bug fixes, etc, for Lucene. + LUCENE_30, + + /// Add new constants for later versions **here** to respect order! + + /// Warning: If you use this setting, and then upgrade to a newer release of Lucene, + /// sizable changes may happen. If backwards compatibility is important then you + /// should instead explicitly specify an actual version. + /// + /// If you use this constant then you may need to re-index all of your documents + /// when upgrading Lucene, as the way text is indexed may have changed. Additionally, + /// you may need to re-test your entire application to ensure it behaves as + /// expected, as some defaults may have changed and may break functionality in your + /// application. + /// + /// Deprecated: Use an actual version instead. + LUCENE_CURRENT + }; + +public: + static bool onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second); +}; + +} + +#endif diff --git a/include/lucene++/CustomScoreProvider.h b/include/lucene++/CustomScoreProvider.h new file mode 100644 index 00000000..22d27cba --- /dev/null +++ b/include/lucene++/CustomScoreProvider.h @@ -0,0 +1,93 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CUSTOMSCOREPROVIDER_H +#define CUSTOMSCOREPROVIDER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// An instance of this subclass should be returned by {@link CustomScoreQuery#getCustomScoreProvider}, +/// if you want to modify the custom score calculation of a {@link CustomScoreQuery}. +/// +/// Since Lucene 2.9, queries operate on each segment of an Index separately, so overriding the similar +/// (now deprecated) methods in {@link CustomScoreQuery} is no longer suitable, as the supplied doc ID +/// is per-segment and without knowledge of the IndexReader you cannot access the document or {@link +/// FieldCache}. +class LPPAPI CustomScoreProvider : public LuceneObject { +public: + /// Creates a new instance of the provider class for the given {@link IndexReader}. + CustomScoreProvider(const IndexReaderPtr& reader); + + virtual ~CustomScoreProvider(); + + LUCENE_CLASS(CustomScoreProvider); + +protected: + IndexReaderPtr reader; + +public: + /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. + /// + /// Subclasses can override this method to modify the custom score. + /// + /// If your custom scoring is different than the default herein you should override at least one of + /// the two customScore() methods. If the number of ValueSourceQueries is always < 2 it is + /// sufficient to override the other {@link #customScore(int32_t, double, double) customScore()} + /// method, which is simpler. + /// + /// The default computation herein is a multiplication of given scores: + ///
+    /// ModifiedScore = valSrcScore * valSrcScores[0] * valSrcScores[1] * ...
+    /// 
+ /// + /// @param doc id of scored doc. + /// @param subQueryScore score of that doc by the subQuery. + /// @param valSrcScores scores of that doc by the ValueSourceQuery. + /// @return custom score. + virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); + + /// Compute a custom score by the subQuery score and the ValueSourceQuery score. + /// + /// Subclasses can override this method to modify the custom score. + /// + /// If your custom scoring is different than the default herein you should override at least one of the + /// two customScore() methods. If the number of ValueSourceQueries is always < 2 it is sufficient to + /// override this customScore() method, which is simpler. + /// + /// The default computation herein is a multiplication of the two scores: + ///
+    /// ModifiedScore = subQueryScore * valSrcScore
+    /// 
+ /// + /// @param doc id of scored doc. + /// @param subQueryScore score of that doc by the subQuery. + /// @param valSrcScore score of that doc by the ValueSourceQuery. + /// @return custom score. + virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); + + /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, Collection)}, + /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. + /// + /// @param doc doc being explained. + /// @param subQueryExpl explanation for the sub-query part. + /// @param valSrcExpls explanation for the value source part. + /// @return an explanation for the custom score + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); + + /// Explain the custom score. Whenever overriding {@link #customScore(int32_t, double, double)}, + /// this method should also be overridden to provide the correct explanation for the part of the custom scoring. + /// @param doc doc being explained. + /// @param subQueryExpl explanation for the sub-query part. + /// @param valSrcExpl explanation for the value source part. + /// @return an explanation for the custom score + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); +}; + +} + +#endif diff --git a/include/lucene++/CustomScoreQuery.h b/include/lucene++/CustomScoreQuery.h new file mode 100644 index 00000000..fcda80e9 --- /dev/null +++ b/include/lucene++/CustomScoreQuery.h @@ -0,0 +1,132 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CUSTOMSCOREQUERY_H +#define CUSTOMSCOREQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// Query that sets document score as a programmatic function of several (sub) scores: +///
    +///
  1. the score of its subQuery (any query) +///
  2. (optional) the score of its ValueSourceQuery (or queries). For most simple/convenient use cases +/// this query is likely to be a {@link FieldScoreQuery} +///
+/// Subclasses can modify the computation by overriding {@link #getCustomScoreProvider}. +class LPPAPI CustomScoreQuery : public Query { +public: + /// Create a CustomScoreQuery over input subQuery. + /// @param subQuery the sub query whose scored is being customed. Must not be null. + CustomScoreQuery(const QueryPtr& subQuery); + + /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. + /// @param subQuery the sub query whose score is being customized. Must not be null. + /// @param valSrcQuery a value source query whose scores are used in the custom score computation. For + /// most simple/convenient use case this would be a {@link FieldScoreQuery}. This parameter is + /// optional - it can be null. + CustomScoreQuery(const QueryPtr& subQuery, const ValueSourceQueryPtr& valSrcQuery); + + /// Create a CustomScoreQuery over input subQuery and a {@link ValueSourceQuery}. + /// @param subQuery the sub query whose score is being customized. Must not be null. + /// @param valSrcQueries value source queries whose scores are used in the custom score computation. + /// For most simple/convenient use case these would be {@link FieldScoreQueries}. This parameter is + /// optional - it can be null or even an empty array. + CustomScoreQuery(const QueryPtr& subQuery, Collection valSrcQueries); + + virtual ~CustomScoreQuery(); + + LUCENE_CLASS(CustomScoreQuery); + +protected: + QueryPtr subQuery; + Collection valSrcQueries; // never null (empty array if there are no valSrcQueries). + bool strict; // if true, valueSource part of query does not take part in weights normalization. + +public: + using Query::toString; + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual void extractTerms(SetTerm terms); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Compute a custom score by the subQuery score and a number of ValueSourceQuery scores. + /// + /// Deprecated: Will be removed in Lucene 3.1. + /// + /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment + /// search (since Lucene 2.9). + /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} + /// for the given {@link IndexReader}. + virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); + + /// Compute a custom score by the subQuery score and the ValueSourceQuery score. + /// + /// Deprecated: Will be removed in Lucene 3.1. + /// + /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment + /// search (since Lucene 2.9). + /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} + /// for the given {@link IndexReader}. + virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); + + /// Explain the custom score. + /// + /// Deprecated: Will be removed in Lucene 3.1. + /// + /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment + /// search (since Lucene 2.9). + /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} + /// for the given {@link IndexReader}. + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); + + /// Explain the custom score. + /// + /// Deprecated Will be removed in Lucene 3.1. + /// + /// The doc is relative to the current reader, which is unknown to CustomScoreQuery when using per-segment + /// search (since Lucene 2.9). + /// Please override {@link #getCustomScoreProvider} and return a subclass of {@link CustomScoreProvider} + /// for the given {@link IndexReader}. + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Checks if this is strict custom scoring. In strict custom scoring, the ValueSource part does not + /// participate in weight normalization. This may be useful when one wants full control over how scores + /// are modified, and does not care about normalizing by the ValueSource part. One particular case where + /// this is useful if for testing this query. + /// + /// Note: only has effect when the ValueSource part is not null. + virtual bool isStrict(); + + /// Set the strict mode of this query. + /// @param strict The strict mode to set. + /// @see #isStrict() + virtual void setStrict(bool strict); + + /// A short name of this query, used in {@link #toString(String)}. + virtual String name(); + +protected: + void ConstructQuery(const QueryPtr& subQuery, Collection valSrcQueries); + + /// Returns a {@link CustomScoreProvider} that calculates the custom scores for the given {@link + /// IndexReader}. The default implementation returns a default implementation as specified in + /// the docs of {@link CustomScoreProvider}. + virtual CustomScoreProviderPtr getCustomScoreProvider(const IndexReaderPtr& reader); + + friend class CustomWeight; + friend class CustomScorer; +}; + +} + +#endif diff --git a/include/lucene++/CycleCheck.h b/include/lucene++/CycleCheck.h new file mode 100644 index 00000000..240eb568 --- /dev/null +++ b/include/lucene++/CycleCheck.h @@ -0,0 +1,50 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef CYCLECHECK_H +#define CYCLECHECK_H + +#include "Lucene.h" + +namespace Lucene { + +/// Debug utility to track shared_ptr utilization. +class LPPAPI CycleCheck { +public: + virtual ~CycleCheck(); + +protected: + static MapStringInt cycleMap; + static Set staticRefs; + +protected: + void addRef(const String& className, int32_t ref); + static void addStatic(LuceneObjectPtr* staticRef); + +public: + template + static void addStatic(TYPE& staticRef) { + addStatic(reinterpret_cast(&staticRef)); + } + + static void dumpRefs(); +}; + +template +class CycleCheckT : public CycleCheck { +public: + CycleCheckT() { + addRef(TYPE::_getClassName(), 1); + } + + virtual ~CycleCheckT() { + addRef(TYPE::_getClassName(), -1); + } +}; + +} + +#endif diff --git a/include/lucene++/DateField.h b/include/lucene++/DateField.h new file mode 100644 index 00000000..ee9c4d89 --- /dev/null +++ b/include/lucene++/DateField.h @@ -0,0 +1,56 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DATEFIELD_H +#define DATEFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Provides support for converting dates to strings and vice-versa. The strings are structured so that +/// lexicographic sorting orders by date, which makes them suitable for use as field values and search terms. +/// +/// Note that this class saves dates with millisecond granularity, which is bad for {@link TermRangeQuery} and +/// {@link PrefixQuery}, as those queries are expanded to a BooleanQuery with a potentially large number of terms +/// when searching. Thus you might want to use {@link DateTools} instead. +/// +/// Note: dates before 1970 cannot be used, and therefore cannot be indexed when using this class. See {@link +/// DateTools} for an alternative without such a limitation. +/// +/// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) +/// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix +/// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric +/// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. +/// +/// @deprecated If you build a new index, use {@link DateTools} or {@link NumericField} instead. This class is +/// included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). +class LPPAPI DateField : public LuceneObject { +public: + virtual ~DateField(); + + LUCENE_CLASS(DateField); + +protected: + static int32_t DATE_LEN(); + +public: + static const String& MIN_DATE_STRING(); + static const String& MAX_DATE_STRING(); + + /// Converts a Date to a string suitable for indexing. + static String dateToString(const boost::posix_time::ptime& date); + + /// Converts a millisecond time to a string suitable for indexing. + static String timeToString(int64_t time); + + /// Converts a string-encoded date into a millisecond time. + static int64_t stringToTime(const String& s); +}; + +} + +#endif diff --git a/include/lucene++/DateTools.h b/include/lucene++/DateTools.h new file mode 100644 index 00000000..0a4126fb --- /dev/null +++ b/include/lucene++/DateTools.h @@ -0,0 +1,108 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DATETOOLS_H +#define DATETOOLS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Provides support for converting dates to strings and vice-versa. The strings are structured so that +/// lexicographic sorting orders them by date, which makes them suitable for use as field values and search +/// terms. +/// +/// This class also helps you to limit the resolution of your dates. Do not save dates with a finer resolution +/// than you really need, as then RangeQuery and PrefixQuery will require more memory and become slower. +/// +/// Compared to {@link DateField} the strings generated by the methods in this class take slightly more space, +/// unless your selected resolution is set to Resolution.DAY or lower. +/// +/// Another approach is {@link NumericUtils}, which provides a sortable binary representation (prefix encoded) +/// of numeric values, which date/time are. For indexing a {@link Date} or {@link Calendar}, just get the unix +/// timestamp as long using {@link Date#getTime} or {@link Calendar#getTimeInMillis} and index this as a numeric +/// value with {@link NumericField} and use {@link NumericRangeQuery} to query it. +class LPPAPI DateTools : public LuceneObject { +public: + virtual ~DateTools(); + + LUCENE_CLASS(DateTools); + +public: + enum Resolution { + RESOLUTION_NULL, + RESOLUTION_YEAR, + RESOLUTION_MONTH, + RESOLUTION_DAY, + RESOLUTION_HOUR, + RESOLUTION_MINUTE, + RESOLUTION_SECOND, + RESOLUTION_MILLISECOND + }; + + enum DateOrder { + DATEORDER_LOCALE, + DATEORDER_YMD, + DATEORDER_DMY, + DATEORDER_MDY + }; + +protected: + static DateOrder dateOrder; + +public: + /// Converts a Date to a string suitable for indexing. + /// @param date the date to be converted + /// @param resolution the desired resolution + /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone + static String dateToString(const boost::posix_time::ptime& date, Resolution resolution); + + /// Converts a millisecond time to a string suitable for indexing. + /// @param time the date expressed as milliseconds since January 1, 1970, 00:00:00 GMT + /// @param resolution the desired resolution + /// @return a string in format yyyyMMddHHmmssSSS or shorter, depending on resolution; using GMT as timezone + static String timeToString(int64_t time, Resolution resolution); + + /// Converts a string produced by timeToString or dateToString back to a time, represented as the number of + /// milliseconds since January 1, 1970, 00:00:00 GMT. + /// @param dateString the date string to be converted + /// @return the number of milliseconds since January 1, 1970, 00:00:00 GMT + static int64_t stringToTime(const String& dateString); + + /// Converts a string produced by timeToString or dateToString back to a time, represented as a ptime object. + /// @param dateString the date string to be converted + /// @return the parsed time as a ptime object + static boost::posix_time::ptime stringToDate(const String& dateString); + + /// Limit a date's resolution. For example, the date 2004-09-21 13:50:11 will be changed to 2004-09-01 00:00:00 + /// when using Resolution.MONTH. + /// @param resolution The desired resolution of the date to be returned + /// @return the date with all values more precise than resolution set to 0 or 1 + static boost::posix_time::ptime round(const boost::posix_time::ptime& date, Resolution resolution); + + /// Limit a date's resolution. For example, the date 1095767411000 (which represents 2004-09-21 13:50:11) will + /// be changed to 1093989600000 (2004-09-01 00:00:00) when using Resolution.MONTH. + /// @param resolution The desired resolution of the date to be returned + /// @return the date with all values more precise than resolution set to 0 or 1, expressed as milliseconds + /// since January 1, 1970, 00:00:00 GMT + static int64_t round(int64_t time, Resolution resolution); + + /// Allow overriding of date ordering. + static void setDateOrder(DateTools::DateOrder order); + + /// Return date ordering based on given locale (or overridden in {@link #setDateOrder(DateTools::DateOrder)}). + static DateTools::DateOrder getDateOrder(std::locale locale = std::locale()); + + /// Parse a given date using locale date format + /// @param dateString the date string to be converted + /// @param locale the locale to use for parsing + /// @return the parsed time as a ptime object + static boost::posix_time::ptime parseDate(const String& dateString, std::locale locale = std::locale()); +}; + +} + +#endif diff --git a/include/lucene++/DefaultSimilarity.h b/include/lucene++/DefaultSimilarity.h new file mode 100644 index 00000000..78f18696 --- /dev/null +++ b/include/lucene++/DefaultSimilarity.h @@ -0,0 +1,60 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DEFAULTSIMILARITY_H +#define DEFAULTSIMILARITY_H + +#include "Similarity.h" + +namespace Lucene { + +/// Default scoring implementation. +class LPPAPI DefaultSimilarity : public Similarity { +public: + DefaultSimilarity(); + virtual ~DefaultSimilarity(); + + LUCENE_CLASS(DefaultSimilarity); + +protected: + bool discountOverlaps; // Default false + +public: + /// Implemented as state->getBoost() * lengthNorm(numTerms), where numTerms is {@link + /// FieldInvertState#getLength()} if {@link #setDiscountOverlaps} is false, else it's {@link + /// FieldInvertState#getLength()} - {@link FieldInvertState#getNumOverlap()}. + virtual double computeNorm(const String& fieldName, const FieldInvertStatePtr& state); + + /// Implemented as 1 / sqrt(numTerms). + virtual double lengthNorm(const String& fieldName, int32_t numTokens); + + /// Implemented as 1 / sqrt(sumOfSquaredWeights). + virtual double queryNorm(double sumOfSquaredWeights); + + /// Implemented as sqrt(freq). + virtual double tf(double freq); + + /// Implemented as 1 / (distance + 1). + virtual double sloppyFreq(int32_t distance); + + /// Implemented as log(numDocs / (docFreq + 1)) + 1. + virtual double idf(int32_t docFreq, int32_t numDocs); + + /// Implemented as overlap / maxOverlap. + virtual double coord(int32_t overlap, int32_t maxOverlap); + + /// Determines whether overlap tokens (Tokens with 0 position increment) are ignored when computing + /// norm. By default this is false, meaning overlap tokens are counted just like non-overlap tokens. + /// @see #computeNorm + void setDiscountOverlaps(bool v); + + /// @see #setDiscountOverlaps + bool getDiscountOverlaps(); +}; + +} + +#endif diff --git a/include/lucene++/DefaultSkipListReader.h b/include/lucene++/DefaultSkipListReader.h new file mode 100644 index 00000000..073fd8fc --- /dev/null +++ b/include/lucene++/DefaultSkipListReader.h @@ -0,0 +1,60 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DEFAULTSKIPLISTREADER_H +#define DEFAULTSKIPLISTREADER_H + +#include "MultiLevelSkipListReader.h" + +namespace Lucene { + +/// Implements the skip list reader for the default posting list format that stores positions and payloads. +class DefaultSkipListReader : public MultiLevelSkipListReader { +public: + DefaultSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval); + virtual ~DefaultSkipListReader(); + + LUCENE_CLASS(DefaultSkipListReader); + +protected: + bool currentFieldStoresPayloads; + Collection freqPointer; + Collection proxPointer; + Collection payloadLength; + + int64_t lastFreqPointer; + int64_t lastProxPointer; + int32_t lastPayloadLength; + +public: + void init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads); + + /// Returns the freq pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} + /// has skipped. + int64_t getFreqPointer(); + + /// Returns the prox pointer of the doc to which the last call of {@link MultiLevelSkipListReader#skipTo(int)} + /// has skipped. + int64_t getProxPointer(); + + /// Returns the payload length of the payload stored just before the doc to which the last call of {@link + /// MultiLevelSkipListReader#skipTo(int)} has skipped. + int32_t getPayloadLength(); + +protected: + /// Seeks the skip entry on the given level + virtual void seekChild(int32_t level); + + /// Copies the values of the last read skip entry on this level + virtual void setLastSkipData(int32_t level); + + /// Subclasses must implement the actual skip data encoding in this method. + virtual int32_t readSkipData(int32_t level, const IndexInputPtr& skipStream); +}; + +} + +#endif diff --git a/include/lucene++/DefaultSkipListWriter.h b/include/lucene++/DefaultSkipListWriter.h new file mode 100644 index 00000000..ae467a3b --- /dev/null +++ b/include/lucene++/DefaultSkipListWriter.h @@ -0,0 +1,53 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DEFAULTSKIPLISTWRITER_H +#define DEFAULTSKIPLISTWRITER_H + +#include "MultiLevelSkipListWriter.h" + +namespace Lucene { + +/// Implements the skip list writer for the default posting list format that stores positions and payloads. +class DefaultSkipListWriter : public MultiLevelSkipListWriter { +public: + DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, const IndexOutputPtr& freqOutput, const IndexOutputPtr& proxOutput); + virtual ~DefaultSkipListWriter(); + + LUCENE_CLASS(DefaultSkipListWriter); + +protected: + Collection lastSkipDoc; + Collection lastSkipPayloadLength; + Collection lastSkipFreqPointer; + Collection lastSkipProxPointer; + + IndexOutputPtr freqOutput; + IndexOutputPtr proxOutput; + + int32_t curDoc; + bool curStorePayloads; + int32_t curPayloadLength; + int64_t curFreqPointer; + int64_t curProxPointer; + +public: + void setFreqOutput(const IndexOutputPtr& freqOutput); + void setProxOutput(const IndexOutputPtr& proxOutput); + + /// Sets the values for the current skip data. + void setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength); + +protected: + virtual void resetSkip(); + virtual void writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer); + + friend class FormatPostingsTermsWriter; +}; + +} + +#endif diff --git a/include/lucene++/Directory.h b/include/lucene++/Directory.h new file mode 100644 index 00000000..826159ab --- /dev/null +++ b/include/lucene++/Directory.h @@ -0,0 +1,110 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DIRECTORY_H +#define DIRECTORY_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A Directory is a flat list of files. Files may be written once, when they are created. Once a file +/// is created it may only be opened for read, or deleted. Random access is permitted both when reading +/// and writing. Directory locking is implemented by an instance of {@link LockFactory}, and can be changed +/// for each Directory instance using {@link #setLockFactory}. +class LPPAPI Directory : public LuceneObject { +public: + Directory(); + virtual ~Directory(); + + LUCENE_CLASS(Directory); + +protected: + bool isOpen; + + /// Holds the LockFactory instance (implements locking for this Directory instance). + LockFactoryPtr lockFactory; + +public: + /// Returns an array of strings, one for each file in the directory. + virtual HashSet listAll() = 0; + + /// Returns true if a file with the given name exists. + virtual bool fileExists(const String& name) = 0; + + /// Returns the time the named file was last modified. + virtual uint64_t fileModified(const String& name) = 0; + + /// Set the modified time of an existing file to now. + virtual void touchFile(const String& name) = 0; + + /// Removes an existing file in the directory. + virtual void deleteFile(const String& name) = 0; + + /// Returns the length of a file in the directory. + virtual int64_t fileLength(const String& name) = 0; + + /// Creates a new, empty file in the directory with the given name. + /// Returns a stream writing this file. + virtual IndexOutputPtr createOutput(const String& name) = 0; + + /// Returns a stream reading an existing file. + virtual IndexInputPtr openInput(const String& name) = 0; + + /// Closes the store. + virtual void close() = 0; + + /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit + /// changes to the index, to prevent a machine/OS crash from corrupting the index. + virtual void sync(const String& name); + + /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory + /// implementation may ignore the buffer size. Currently the only Directory implementations that respect + /// this parameter are {@link FSDirectory} and {@link CompoundFileReader}. + virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); + + /// Construct a {@link Lock}. + /// @param name the name of the lock file. + virtual LockPtr makeLock(const String& name); + + /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you + /// are certain this lock is no longer in use. + /// @param name name of the lock to be cleared. + void clearLock(const String& name); + + /// Set the LockFactory that this Directory instance should use for its locking implementation. Each * instance + /// of LockFactory should only be used for one directory (ie, do not share a single instance across multiple + /// Directories). + /// @param lockFactory instance of {@link LockFactory}. + void setLockFactory(const LockFactoryPtr& lockFactory); + + /// Get the LockFactory that this Directory instance is using for its locking implementation. Note that this + /// may be null for Directory implementations that provide their own locking implementation. + LockFactoryPtr getLockFactory(); + + /// Return a string identifier that uniquely differentiates this Directory instance from other Directory + /// instances. This ID should be the same if two Directory instances are considered "the same index". + /// This is how locking "scopes" to the right index. + virtual String getLockID(); + + virtual String toString(); + + /// Copy contents of a directory src to a directory dest. If a file in src already exists in dest then the one + /// in dest will be blindly overwritten. NOTE: the source directory cannot change while this method is running. + /// Otherwise the results are undefined. + /// @param src source directory. + /// @param dest destination directory. + /// @param closeDirSrc if true, call {@link #close()} method on source directory. + static void copy(const DirectoryPtr& src, const DirectoryPtr& dest, bool closeDirSrc); + +protected: + /// @throws AlreadyClosed if this Directory is closed. + void ensureOpen(); +}; + +} + +#endif diff --git a/include/lucene++/DirectoryReader.h b/include/lucene++/DirectoryReader.h new file mode 100644 index 00000000..dfe19441 --- /dev/null +++ b/include/lucene++/DirectoryReader.h @@ -0,0 +1,349 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DIRECTORYREADER_H +#define DIRECTORYREADER_H + +#include "IndexReader.h" +#include "TermEnum.h" +#include "TermPositions.h" +#include "IndexCommit.h" +#include "SegmentMergeQueue.h" + +namespace Lucene { + +/// An IndexReader which reads indexes with multiple segments. +class LPPAPI DirectoryReader : public IndexReader { +public: + /// Construct reading the named set of readers. + DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); + + /// Used by near real-time search. + DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor); + + /// This constructor is only used for {@link #reopen()} + DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, + Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, + bool doClone, int32_t termInfosIndexDivisor); + + virtual ~DirectoryReader(); + + LUCENE_CLASS(DirectoryReader); + +protected: + DirectoryPtr _directory; + bool readOnly; + IndexWriterWeakPtr _writer; + IndexDeletionPolicyPtr deletionPolicy; + HashSet synced; + LockPtr writeLock; + SegmentInfosPtr segmentInfos; + SegmentInfosPtr segmentInfosStart; + bool stale; + int32_t termInfosIndexDivisor; + + bool rollbackHasChanges; + + Collection subReaders; + Collection starts; // 1st docno for each segment + MapStringByteArray normsCache; + int32_t _maxDoc; + int32_t _numDocs; + bool _hasDeletions; + + // Max version in index as of when we opened; this can be > our current segmentInfos version + // in case we were opened on a past IndexCommit + int64_t maxIndexVersion; + +public: + void _initialize(Collection subReaders); + + static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); + + virtual IndexReaderPtr reopen(); + virtual IndexReaderPtr reopen(bool openReadOnly); + virtual IndexReaderPtr reopen(const IndexCommitPtr& commit); + + /// Version number when this IndexReader was opened. + virtual int64_t getVersion(); + + /// Return an array of term frequency vectors for the specified document. + virtual Collection getTermFreqVectors(int32_t docNumber); + + /// Return a term frequency vector for the specified document and field. + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + + /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of the {@link TermFreqVector}. + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + + /// Map all the term vectors for all fields in a Document + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + + /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented in the IndexReader base class. + /// @return true if the index is optimized; false otherwise + virtual bool isOptimized(); + + /// Returns the number of documents in this index. + virtual int32_t numDocs(); + + /// Returns one greater than the largest possible document number. + virtual int32_t maxDoc(); + + /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what {@link Field}s to load and how they should be loaded. + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Returns true if document n has been deleted + virtual bool isDeleted(int32_t n); + + /// Returns true if any documents have been deleted + virtual bool hasDeletions(); + + /// Find reader for doc n + static int32_t readerIndex(int32_t n, Collection starts, int32_t numSubReaders); + + /// Returns true if there are norms stored for this field. + virtual bool hasNorms(const String& field); + + /// Returns the byte-encoded normalization factor for the named field of every document. + virtual ByteArray norms(const String& field); + + /// Reads the byte-encoded normalization factor for the named field of every document. + virtual void norms(const String& field, ByteArray norms, int32_t offset); + + /// Returns an enumeration of all the terms in the index. + virtual TermEnumPtr terms(); + + /// Returns an enumeration of all terms starting at a given term. + virtual TermEnumPtr terms(const TermPtr& t); + + /// Returns the number of documents containing the term t. + virtual int32_t docFreq(const TermPtr& t); + + /// Returns an unpositioned {@link TermDocs} enumerator. + virtual TermDocsPtr termDocs(); + + /// Returns an unpositioned {@link TermPositions} enumerator. + virtual TermPositionsPtr termPositions(); + + /// Tries to acquire the WriteLock on this directory. this method is only valid if this + /// IndexReader is directory owner. + virtual void acquireWriteLock(); + + void startCommit(); + void rollbackCommit(); + + /// Retrieve the String userData optionally passed to IndexWriter#commit. + virtual MapStringString getCommitUserData(); + + /// Check whether any new changes have occurred to the index since this reader was opened. + virtual bool isCurrent(); + + /// Get a list of unique field names that exist in this index and have the specified field + /// option information. + virtual HashSet getFieldNames(FieldOption fieldOption); + + static HashSet getFieldNames(FieldOption fieldOption, Collection subReaders); + + /// Returns the sequential sub readers that this reader is logically composed of. + virtual Collection getSequentialSubReaders(); + + /// Returns the directory this index resides in. + virtual DirectoryPtr directory(); + + virtual int32_t getTermInfosIndexDivisor(); + + /// Return the IndexCommit that this reader has opened. + virtual IndexCommitPtr getIndexCommit(); + + /// Returns all commit points that exist in the Directory. + static Collection listCommits(const DirectoryPtr& dir); + +protected: + IndexReaderPtr doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit); + IndexReaderPtr doReopen(bool openReadOnly, const IndexCommitPtr& commit); + IndexReaderPtr doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit); + DirectoryReaderPtr doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly); + + /// Implements deletion of the document numbered docNum. + virtual void doDelete(int32_t docNum); + + /// Implements actual undeleteAll() in subclass. + virtual void doUndeleteAll(); + + int32_t readerIndex(int32_t n); + + /// Implements setNorm in subclass. + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + + /// Commit changes resulting from delete, undeleteAll, or setNorm operations + /// + /// If an exception is hit, then either no changes or all changes will have been committed to the index (transactional semantics). + virtual void doCommit(MapStringString commitUserData); + + /// Implements close. + virtual void doClose(); + + friend class FindSegmentsReopen; +}; + +class MultiTermEnum : public TermEnum { +public: + MultiTermEnum(const IndexReaderPtr& topReader, Collection readers, Collection starts, const TermPtr& t); + virtual ~MultiTermEnum(); + + LUCENE_CLASS(MultiTermEnum); + +protected: + SegmentMergeQueuePtr queue; + TermPtr _term; + int32_t _docFreq; + +public: + IndexReaderWeakPtr _topReader; + Collection matchingSegments; // null terminated array of matching segments + +public: + /// Increments the enumeration to the next element. True if one exists. + virtual bool next(); + + /// Returns the current Term in the enumeration. + virtual TermPtr term(); + + /// Returns the docFreq of the current Term in the enumeration. + virtual int32_t docFreq(); + + /// Closes the enumeration to further activity, freeing resources. + virtual void close(); +}; + +class MultiTermDocs : public TermPositions, public LuceneObject { +public: + MultiTermDocs(const IndexReaderPtr& topReader, Collection r, Collection s); + virtual ~MultiTermDocs(); + + LUCENE_CLASS(MultiTermDocs); + +protected: + IndexReaderWeakPtr _topReader; // used for matching TermEnum to TermDocs + Collection readers; + Collection starts; + TermPtr term; + + int32_t base; + int32_t pointer; + + Collection readerTermDocs; + TermDocsPtr current; + MultiTermEnumPtr tenum; // the term enum used for seeking + int32_t matchingSegmentPos; // position into the matching segments from tenum + SegmentMergeInfoPtr smi; // current segment mere info + +public: + /// Returns the current document number. + virtual int32_t doc(); + + /// Returns the frequency of the term within the current document. + virtual int32_t freq(); + + /// Sets this to the data for a term. + virtual void seek(const TermPtr& term); + + /// Sets this to the data for the current term in a {@link TermEnum}. + virtual void seek(const TermEnumPtr& termEnum); + + /// Moves to the next pair in the enumeration. + virtual bool next(); + + /// Attempts to read multiple entries from the enumeration, up to length of docs. + /// Optimized implementation. + virtual int32_t read(Collection& docs, Collection& freqs); + + /// Skips entries to the first beyond the current whose document number is greater than or equal to target. + virtual bool skipTo(int32_t target); + + /// Frees associated resources. + virtual void close(); + +protected: + virtual TermDocsPtr termDocs(int32_t i); + virtual TermDocsPtr termDocs(const IndexReaderPtr& reader); +}; + +class MultiTermPositions : public MultiTermDocs { +public: + MultiTermPositions(const IndexReaderPtr& topReader, Collection r, Collection s); + virtual ~MultiTermPositions(); + + LUCENE_CLASS(MultiTermPositions); + +public: + /// Returns next position in the current document. + virtual int32_t nextPosition(); + + /// Returns the length of the payload at the current term position. + virtual int32_t getPayloadLength(); + + /// Returns the payload data at the current term position. + virtual ByteArray getPayload(ByteArray data, int32_t offset); + + /// Checks if a payload can be loaded at this position. + virtual bool isPayloadAvailable(); + +protected: + virtual TermDocsPtr termDocs(const IndexReaderPtr& reader); +}; + +class ReaderCommit : public IndexCommit { +public: + ReaderCommit(const SegmentInfosPtr& infos, const DirectoryPtr& dir); + virtual ~ReaderCommit(); + + LUCENE_CLASS(ReaderCommit); + +protected: + String segmentsFileName; + HashSet files; + DirectoryPtr dir; + int64_t generation; + int64_t version; + bool _isOptimized; + MapStringString userData; + +public: + virtual String toString(); + + /// Returns true if this commit is an optimized index. + virtual bool isOptimized(); + + /// Two IndexCommits are equal if both their Directory and versions are equal. + virtual String getSegmentsFileName(); + + /// Returns all index files referenced by this commit point. + virtual HashSet getFileNames(); + + /// Returns the {@link Directory} for the index. + virtual DirectoryPtr getDirectory(); + + /// Returns the version for this IndexCommit. + virtual int64_t getVersion(); + + /// Returns the generation (the _N in segments_N) for this IndexCommit. + virtual int64_t getGeneration(); + + virtual bool isDeleted(); + + /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. + virtual MapStringString getUserData(); + + virtual void deleteCommit(); +}; + +} + +#endif diff --git a/include/lucene++/DisjunctionMaxQuery.h b/include/lucene++/DisjunctionMaxQuery.h new file mode 100644 index 00000000..9ed7791a --- /dev/null +++ b/include/lucene++/DisjunctionMaxQuery.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DISJUNCTIONMAXQUERY_H +#define DISJUNCTIONMAXQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A query that generates the union of documents produced by its subqueries, and that scores each +/// document with the maximum score for that document as produced by any subquery, plus a tie breaking +/// increment for any additional matching subqueries. This is useful when searching for a word in +/// multiple fields with different boost factors (so that the fields cannot be combined equivalently +/// into a single search field). We want the primary score to be the one associated with the highest +/// boost, not the sum of the field scores (as BooleanQuery would give). If the query is "albino +/// elephant" this ensures that "albino" matching one field and "elephant" matching another gets a +/// higher score than "albino" matching both fields. To get this result, use both BooleanQuery and +/// DisjunctionMaxQuery: for each term a DisjunctionMaxQuery searches for it in each field, while the +/// set of these DisjunctionMaxQuery's is combined into a BooleanQuery. The tie breaker capability +/// allows results that include the same term in multiple fields to be judged better than results that +/// include this term in only the best of those multiple fields, without confusing this with the better +/// case of two different terms in the multiple fields. +class LPPAPI DisjunctionMaxQuery : public Query { +public: + /// Creates a new empty DisjunctionMaxQuery. Use add() to add the subqueries. + /// @param tieBreakerMultiplier the score of each non-maximum disjunct for a document is multiplied + /// by this weight and added into the final score. If non-zero, the value should be small, on the + /// order of 0.1, which says that 10 occurrences of word in a lower-scored field that is also in a + /// higher scored field is just as good as a unique word in the lower scored field (ie., one that is + /// not in any higher scored field. + DisjunctionMaxQuery(double tieBreakerMultiplier = 0.0); + + /// Creates a new DisjunctionMaxQuery + /// @param disjuncts A Collection of all the disjuncts to add + /// @param tieBreakerMultiplier The weight to give to each matching non-maximum disjunct + DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier); + + virtual ~DisjunctionMaxQuery(); + + LUCENE_CLASS(DisjunctionMaxQuery); + +protected: + /// The subqueries + Collection disjuncts; + + /// Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. + double tieBreakerMultiplier; + +public: + using Query::toString; + + /// Add a subquery to this disjunction + /// @param query the disjunct added + void add(const QueryPtr& query); + + /// Add a collection of disjuncts to this disjunction + void add(Collection disjuncts); + + /// An iterator over the disjuncts + Collection::iterator begin(); + Collection::iterator end(); + + /// Create the Weight used to score us + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Optimize our representation and our subqueries representations + /// @param reader the IndexReader we query + /// @return an optimized copy of us (which may not be a copy if there is nothing to optimize) + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + /// Create a shallow copy of us - used in rewriting if necessary + /// @return a copy of us (but reuse, don't copy, our subqueries) + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Adds all terms occurring in this query to the terms set. + virtual void extractTerms(SetTerm terms); + + /// Pretty print us. + /// @param field the field to which we are applied + /// @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" + virtual String toString(const String& field); + + /// @return true if other is a DisjunctionMaxQuery with the same boost and the same subqueries, in the + /// same order, as us + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); + + friend class DisjunctionMaxWeight; +}; + +} + +#endif diff --git a/include/lucene++/DisjunctionMaxScorer.h b/include/lucene++/DisjunctionMaxScorer.h new file mode 100644 index 00000000..2f32f7f4 --- /dev/null +++ b/include/lucene++/DisjunctionMaxScorer.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DISJUNCTIONMAXSCORER_H +#define DISJUNCTIONMAXSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers +/// is generated in document number order. The score for each document is the maximum of the scores computed +/// by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores +/// for the other subqueries that generate the document. +class DisjunctionMaxScorer : public Scorer { +public: + DisjunctionMaxScorer(double tieBreakerMultiplier, const SimilarityPtr& similarity, Collection subScorers, int32_t numScorers); + virtual ~DisjunctionMaxScorer(); + + LUCENE_CLASS(DisjunctionMaxScorer); + +protected: + /// The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. + Collection subScorers; + int32_t numScorers; + + /// Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. + double tieBreakerMultiplier; + + int32_t doc; + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + + /// Determine the current document score. Initially invalid, until {@link #next()} is called the first time. + /// @return the score of the current generated document + virtual double score(); + + virtual int32_t advance(int32_t target); + +protected: + /// Recursively iterate all subScorers that generated last doc computing sum and max + void scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max); + + /// Organize subScorers into a min heap with scorers generating the earliest document on top. + void heapify(); + + /// The subtree of subScorers at root is a min heap except possibly for its root element. Bubble the root + /// down as required to make the subtree a heap. + void heapAdjust(int32_t root); + + /// Remove the root Scorer from subScorers and re-establish it as a heap + void heapRemoveRoot(); +}; + +} + +#endif diff --git a/include/lucene++/DisjunctionSumScorer.h b/include/lucene++/DisjunctionSumScorer.h new file mode 100644 index 00000000..d481423a --- /dev/null +++ b/include/lucene++/DisjunctionSumScorer.h @@ -0,0 +1,95 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DISJUNCTIONSUMSCORER_H +#define DISJUNCTIONSUMSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// A Scorer for OR like queries, counterpart of ConjunctionScorer. This Scorer implements {@link +/// Scorer#skipTo(int32_t)} and uses skipTo() on the given Scorers. +class DisjunctionSumScorer : public Scorer { +public: + DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers = 1); + virtual ~DisjunctionSumScorer(); + + LUCENE_CLASS(DisjunctionSumScorer); + +protected: + /// The number of subscorers. + int32_t nrScorers; + + /// The subscorers. + Collection subScorers; + + /// The minimum number of scorers that should match. + int32_t minimumNrMatchers; + + /// The scorerDocQueue contains all subscorers ordered by their current doc(), with the minimum at + /// the top. The scorerDocQueue is initialized the first time next() or skipTo() is called. An exhausted + /// scorer is immediately removed from the scorerDocQueue. If less than the minimumNrMatchers scorers + /// remain in the scorerDocQueue next() and skipTo() return false. + /// + /// After each to call to next() or skipTo() currentSumScore is the total score of the current matching doc, + /// nrMatchers is the number of matching scorers, and all scorers are after the matching doc, or are exhausted. + ScorerDocQueuePtr scorerDocQueue; + + /// The document number of the current match. + int32_t currentDoc; + + /// The number of subscorers that provide the current match. + int32_t _nrMatchers; + + double currentScore; + +public: + virtual void initialize(); + + virtual void score(const CollectorPtr& collector); + virtual int32_t nextDoc(); + + /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} + /// is called the first time. + virtual double score(); + + virtual int32_t docID(); + + /// Returns the number of subscorers matching the current document. Initially invalid, until {@link #next()} + /// is called the first time. + int32_t nrMatchers(); + + /// Advances to the first match beyond the current whose document number is greater than or equal to a given + /// target. The implementation uses the skipTo() method on the subscorers. + /// + /// @param target The target document number. + /// @return the document whose number is greater than or equal to the given target, or -1 if none exist. + virtual int32_t advance(int32_t target); + +protected: + /// Called the first time next() or skipTo() is called to initialize scorerDocQueue. + void initScorerDocQueue(); + + /// Collects matching documents in a range. Hook for optimization. Note that {@link #next()} must be + /// called once before this method is called for the first time. + /// @param collector The collector to which all matching documents are passed through. + /// @param max Do not score documents past this. + /// @return true if more matching documents may remain. + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); + + /// Advance all subscorers after the current document determined by the top of the scorerDocQueue. Repeat + /// until at least the minimum number of subscorers match on the same document and all subscorers are after + /// that document or are exhausted. On entry the scorerDocQueue has at least minimumNrMatchers available. + /// At least the scorer with the minimum document number will be advanced. + /// @return true if there is a match. In case there is a match, currentDoc, currentSumScore and nrMatchers + /// describe the match. + bool advanceAfterCurrent(); +}; + +} + +#endif diff --git a/include/lucene++/DocConsumer.h b/include/lucene++/DocConsumer.h new file mode 100644 index 00000000..5b92f1f9 --- /dev/null +++ b/include/lucene++/DocConsumer.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCCONSUMER_H +#define DOCCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class DocConsumer : public LuceneObject { +public: + virtual ~DocConsumer(); + + LUCENE_CLASS(DocConsumer); + +public: + virtual DocConsumerPerThreadPtr addThread(const DocumentsWriterThreadStatePtr& perThread) = 0; + virtual void flush(Collection threads, const SegmentWriteStatePtr& state) = 0; + virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; + virtual void abort() = 0; + virtual bool freeRAM() = 0; +}; + +} + +#endif diff --git a/include/lucene++/DocConsumerPerThread.h b/include/lucene++/DocConsumerPerThread.h new file mode 100644 index 00000000..15b32659 --- /dev/null +++ b/include/lucene++/DocConsumerPerThread.h @@ -0,0 +1,31 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCCONSUMERPERTHREAD_H +#define DOCCONSUMERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class DocConsumerPerThread : public LuceneObject { +public: + virtual ~DocConsumerPerThread(); + + LUCENE_CLASS(DocConsumerPerThread); + +public: + /// Process the document. If there is something for this document to be done in docID order, + /// you should encapsulate that as a DocWriter and return it. + /// DocumentsWriter then calls finish() on this object when it's its turn. + virtual DocWriterPtr processDocument() = 0; + + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumer.h b/include/lucene++/DocFieldConsumer.h new file mode 100644 index 00000000..d8d2f443 --- /dev/null +++ b/include/lucene++/DocFieldConsumer.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMER_H +#define DOCFIELDCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class DocFieldConsumer : public LuceneObject { +public: + virtual ~DocFieldConsumer(); + + LUCENE_CLASS(DocFieldConsumer); + +protected: + FieldInfosPtr fieldInfos; + +public: + /// Called when DocumentsWriter decides to create a new segment + virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; + + /// Called when DocumentsWriter decides to close the doc stores + virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; + + /// Called when an aborting exception is hit + virtual void abort() = 0; + + /// Add a new thread + virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) = 0; + + /// Called when DocumentsWriter is using too much RAM. The consumer should free RAM, if possible, returning + /// true if any RAM was in fact freed. + virtual bool freeRAM() = 0; + + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumerPerField.h b/include/lucene++/DocFieldConsumerPerField.h new file mode 100644 index 00000000..75b5f0ef --- /dev/null +++ b/include/lucene++/DocFieldConsumerPerField.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMERPERFIELD_H +#define DOCFIELDCONSUMERPERFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class DocFieldConsumerPerField : public LuceneObject { +public: + virtual ~DocFieldConsumerPerField(); + + LUCENE_CLASS(DocFieldConsumerPerField); + +public: + /// Processes all occurrences of a single field + virtual void processFields(Collection fields, int32_t count) = 0; + + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumerPerThread.h b/include/lucene++/DocFieldConsumerPerThread.h new file mode 100644 index 00000000..9c88e7ce --- /dev/null +++ b/include/lucene++/DocFieldConsumerPerThread.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMERPERTHREAD_H +#define DOCFIELDCONSUMERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class DocFieldConsumerPerThread : public LuceneObject { +public: + virtual ~DocFieldConsumerPerThread(); + + LUCENE_CLASS(DocFieldConsumerPerThread); + +public: + virtual void startDocument() = 0; + virtual DocWriterPtr finishDocument() = 0; + virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi) = 0; + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumers.h b/include/lucene++/DocFieldConsumers.h new file mode 100644 index 00000000..35c4a813 --- /dev/null +++ b/include/lucene++/DocFieldConsumers.h @@ -0,0 +1,72 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMERS_H +#define DOCFIELDCONSUMERS_H + +#include "DocFieldConsumer.h" +#include "DocumentsWriter.h" + +namespace Lucene { + +/// This is just a "splitter" class: it lets you wrap two DocFieldConsumer instances as a single consumer. +class DocFieldConsumers : public DocFieldConsumer { +public: + DocFieldConsumers(const DocFieldConsumerPtr& one, const DocFieldConsumerPtr& two); + virtual ~DocFieldConsumers(); + + LUCENE_CLASS(DocFieldConsumers); + +public: + DocFieldConsumerPtr one; + DocFieldConsumerPtr two; + + Collection docFreeList; + int32_t freeCount; + int32_t allocCount; + +public: + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); + + /// Called when DocumentsWriter decides to create a new segment + virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + + /// Called when DocumentsWriter decides to close the doc stores + virtual void closeDocStore(const SegmentWriteStatePtr& state); + + /// Called when DocumentsWriter is using too much RAM. + virtual bool freeRAM(); + + /// Add a new thread + virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread); + + DocFieldConsumersPerDocPtr getPerDoc(); + void freePerDoc(const DocFieldConsumersPerDocPtr& perDoc); +}; + +class DocFieldConsumersPerDoc : public DocWriter { +public: + DocFieldConsumersPerDoc(const DocFieldConsumersPtr& fieldConsumers); + virtual ~DocFieldConsumersPerDoc(); + + LUCENE_CLASS(DocFieldConsumersPerDoc); + +protected: + DocFieldConsumersWeakPtr _fieldConsumers; + +public: + DocWriterPtr one; + DocWriterPtr two; + +public: + virtual int64_t sizeInBytes(); + virtual void finish(); + virtual void abort(); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumersPerField.h b/include/lucene++/DocFieldConsumersPerField.h new file mode 100644 index 00000000..ee301b64 --- /dev/null +++ b/include/lucene++/DocFieldConsumersPerField.h @@ -0,0 +1,35 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMERSPERFIELD_H +#define DOCFIELDCONSUMERSPERFIELD_H + +#include "DocFieldConsumerPerField.h" + +namespace Lucene { + +class DocFieldConsumersPerField : public DocFieldConsumerPerField { +public: + DocFieldConsumersPerField(const DocFieldConsumersPerThreadPtr& perThread, const DocFieldConsumerPerFieldPtr& one, const DocFieldConsumerPerFieldPtr& two); + virtual ~DocFieldConsumersPerField(); + + LUCENE_CLASS(DocFieldConsumersPerField); + +public: + DocFieldConsumerPerFieldPtr one; + DocFieldConsumerPerFieldPtr two; + DocFieldConsumersPerThreadWeakPtr _perThread; + +public: + /// Processes all occurrences of a single field + virtual void processFields(Collection fields, int32_t count); + + virtual void abort(); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldConsumersPerThread.h b/include/lucene++/DocFieldConsumersPerThread.h new file mode 100644 index 00000000..e46dc341 --- /dev/null +++ b/include/lucene++/DocFieldConsumersPerThread.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDCONSUMERSPERTHREAD_H +#define DOCFIELDCONSUMERSPERTHREAD_H + +#include "DocFieldConsumerPerThread.h" + +namespace Lucene { + +class DocFieldConsumersPerThread : public DocFieldConsumerPerThread { +public: + DocFieldConsumersPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocFieldConsumersPtr& parent, + const DocFieldConsumerPerThreadPtr& one, const DocFieldConsumerPerThreadPtr& two); + virtual ~DocFieldConsumersPerThread(); + + LUCENE_CLASS(DocFieldConsumersPerThread); + +public: + DocFieldConsumerPerThreadPtr one; + DocFieldConsumerPerThreadPtr two; + DocFieldConsumersWeakPtr _parent; + DocStatePtr docState; + +public: + virtual void startDocument(); + virtual void abort(); + virtual DocWriterPtr finishDocument(); + virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldProcessor.h b/include/lucene++/DocFieldProcessor.h new file mode 100644 index 00000000..3cada74b --- /dev/null +++ b/include/lucene++/DocFieldProcessor.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDPROCESSOR_H +#define DOCFIELDPROCESSOR_H + +#include "DocConsumer.h" + +namespace Lucene { + +/// This is a DocConsumer that gathers all fields under the same name, and calls per-field consumers to process +/// field by field. This class doesn't doesn't do any "real" work of its own: it just forwards the fields to a +/// DocFieldConsumer. +class DocFieldProcessor : public DocConsumer { +public: + DocFieldProcessor(const DocumentsWriterPtr& docWriter, const DocFieldConsumerPtr& consumer); + virtual ~DocFieldProcessor(); + + LUCENE_CLASS(DocFieldProcessor); + +public: + DocumentsWriterWeakPtr _docWriter; + FieldInfosPtr fieldInfos; + DocFieldConsumerPtr consumer; + StoredFieldsWriterPtr fieldsWriter; + +public: + virtual void closeDocStore(const SegmentWriteStatePtr& state); + virtual void flush(Collection threads, const SegmentWriteStatePtr& state); + virtual void abort(); + virtual bool freeRAM(); + virtual DocConsumerPerThreadPtr addThread(const DocumentsWriterThreadStatePtr& perThread); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldProcessorPerField.h b/include/lucene++/DocFieldProcessorPerField.h new file mode 100644 index 00000000..963c8c5c --- /dev/null +++ b/include/lucene++/DocFieldProcessorPerField.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDPROCESSORPERFIELD_H +#define DOCFIELDPROCESSORPERFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Holds all per thread, per field state. +class DocFieldProcessorPerField : public LuceneObject { +public: + DocFieldProcessorPerField(const DocFieldProcessorPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); + virtual ~DocFieldProcessorPerField(); + + LUCENE_CLASS(DocFieldProcessorPerField); + +public: + DocFieldConsumerPerFieldPtr consumer; + FieldInfoPtr fieldInfo; + + DocFieldProcessorPerFieldPtr next; + int32_t lastGen; + + int32_t fieldCount; + Collection fields; + +public: + virtual void abort(); +}; + +} + +#endif diff --git a/include/lucene++/DocFieldProcessorPerThread.h b/include/lucene++/DocFieldProcessorPerThread.h new file mode 100644 index 00000000..ccfb7a89 --- /dev/null +++ b/include/lucene++/DocFieldProcessorPerThread.h @@ -0,0 +1,85 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCFIELDPROCESSORPERTHREAD_H +#define DOCFIELDPROCESSORPERTHREAD_H + +#include "DocConsumerPerThread.h" +#include "DocumentsWriter.h" + +namespace Lucene { + +/// Gathers all Fieldables for a document under the same name, updates FieldInfos, and calls per-field +/// consumers to process field by field. +/// +/// Currently, only a single thread visits the fields, sequentially, for processing. +class DocFieldProcessorPerThread : public DocConsumerPerThread { +public: + DocFieldProcessorPerThread(const DocumentsWriterThreadStatePtr& threadState, const DocFieldProcessorPtr& docFieldProcessor); + virtual ~DocFieldProcessorPerThread(); + + LUCENE_CLASS(DocFieldProcessorPerThread); + +public: + double docBoost; + int32_t fieldGen; + DocFieldProcessorWeakPtr _docFieldProcessor; + FieldInfosPtr fieldInfos; + DocFieldConsumerPerThreadPtr consumer; + Collection _fields; // Holds all fields seen in current doc + int32_t fieldCount; + + Collection fieldHash; // Hash table for all fields ever seen + int32_t hashMask; + int32_t totalFieldCount; + + StoredFieldsWriterPerThreadPtr fieldsWriter; + DocStatePtr docState; + + Collection docFreeList; + int32_t freeCount; + int32_t allocCount; + +public: + virtual void initialize(); + virtual void abort(); + Collection fields(); + + // If there are fields we've seen but did not see again in the last run, then free them up. + void trimFields(const SegmentWriteStatePtr& state); + + virtual DocWriterPtr processDocument(); + + DocFieldProcessorPerThreadPerDocPtr getPerDoc(); + void freePerDoc(const DocFieldProcessorPerThreadPerDocPtr& perDoc); + +protected: + void rehash(); +}; + +class DocFieldProcessorPerThreadPerDoc : public DocWriter { +public: + DocFieldProcessorPerThreadPerDoc(const DocFieldProcessorPerThreadPtr& docProcessor); + virtual ~DocFieldProcessorPerThreadPerDoc(); + + LUCENE_CLASS(DocFieldProcessorPerThreadPerDoc); + +public: + DocWriterPtr one; + DocWriterPtr two; + +protected: + DocFieldProcessorPerThreadWeakPtr _docProcessor; + +public: + virtual int64_t sizeInBytes(); + virtual void finish(); + virtual void abort(); +}; + +} + +#endif diff --git a/include/lucene++/DocIdBitSet.h b/include/lucene++/DocIdBitSet.h new file mode 100644 index 00000000..5262de10 --- /dev/null +++ b/include/lucene++/DocIdBitSet.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCIDBITSET_H +#define DOCIDBITSET_H + +#include "DocIdSet.h" + +namespace Lucene { + +/// Simple DocIdSet and DocIdSetIterator backed by a BitSet +class LPPAPI DocIdBitSet : public DocIdSet { +public: + DocIdBitSet(); + DocIdBitSet(const BitSetPtr& bitSet); + + virtual ~DocIdBitSet(); + + LUCENE_CLASS(DocIdBitSet); + +protected: + BitSetPtr bitSet; + +public: + virtual DocIdSetIteratorPtr iterator(); + + /// This DocIdSet implementation is cacheable. + virtual bool isCacheable(); + + /// Returns the underlying BitSet. + BitSetPtr getBitSet(); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/DocIdSet.h b/include/lucene++/DocIdSet.h new file mode 100644 index 00000000..470f9c61 --- /dev/null +++ b/include/lucene++/DocIdSet.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCIDSET_H +#define DOCIDSET_H + +#include "DocIdSetIterator.h" + +namespace Lucene { + +/// A DocIdSet contains a set of doc ids. Implementing classes must only implement {@link #iterator} to +/// provide access to the set. +class LPPAPI DocIdSet : public LuceneObject { +public: + virtual ~DocIdSet(); + LUCENE_CLASS(DocIdSet); + +public: + /// Provides a {@link DocIdSetIterator} to access the set. This implementation can return null or + /// {@link #EmptyDocIdSet}.iterator() if there are no docs that match. + virtual DocIdSetIteratorPtr iterator() = 0; + + /// This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet should be cached without + /// copying it into a BitSet. The default is to return false. If you have an own DocIdSet implementation + /// that does its iteration very effective and fast without doing disk I/O, override this method and + /// return true. + virtual bool isCacheable(); + + /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. + static DocIdSetPtr EMPTY_DOCIDSET(); +}; + +} + +#endif diff --git a/include/lucene++/DocIdSetIterator.h b/include/lucene++/DocIdSetIterator.h new file mode 100644 index 00000000..9db8e706 --- /dev/null +++ b/include/lucene++/DocIdSetIterator.h @@ -0,0 +1,75 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCIDSETITERATOR_H +#define DOCIDSETITERATOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This abstract class defines methods to iterate over a set of non-decreasing doc ids. Note that this class +/// assumes it iterates on doc Ids, and therefore {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to +/// be used as a sentinel object. Implementations of this class are expected to consider INT_MAX as an invalid value. +class LPPAPI DocIdSetIterator : public LuceneObject { +public: + virtual ~DocIdSetIterator(); + + LUCENE_CLASS(DocIdSetIterator); + +public: + /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there are no more + /// docs in the iterator. + static const int32_t NO_MORE_DOCS; + +public: + /// Returns the following: + ///
    + ///
  • -1 or {@link #NO_MORE_DOCS} if {@link #nextDoc()} or {@link #advance(int)} were not called yet. + ///
  • {@link #NO_MORE_DOCS} if the iterator has exhausted. + ///
  • Otherwise it should return the doc ID it is currently on. + ///
+ virtual int32_t docID() = 0; + + /// Advances to the next document in the set and returns the doc it is currently on, or {@link #NO_MORE_DOCS} + /// if there are no more docs in the set. + /// + /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted + /// behaviour. + virtual int32_t nextDoc() = 0; + + /// Advances to the first beyond the current whose document number is greater than or equal to target. Returns + /// the current document number or {@link #NO_MORE_DOCS} if there are no more docs in the set. + /// + /// Behaves as if written: + /// + ///
+    /// int32_t advance(int32_t target)
+    /// {
+    ///     int32_t doc;
+    ///     while ((doc = nextDoc()) < target)
+    ///     { }
+    ///     return doc;
+    /// }
+    /// 
+ /// + /// Some implementations are considerably more efficient than that. + /// + /// NOTE: certain implementations may return a different value (each time) if called several times in a row + /// with the same target. + /// + /// NOTE: this method may be called with {@value #NO_MORE_DOCS} for efficiency by some Scorers. If your + /// implementation cannot efficiently determine that it should exhaust, it is recommended that you check for + /// that value in each call to this method. + /// + /// NOTE: after the iterator has exhausted you should not call this method, as it may result in unpredicted + /// behaviour. + virtual int32_t advance(int32_t target) = 0; +}; + +} + +#endif diff --git a/include/lucene++/DocInverter.h b/include/lucene++/DocInverter.h new file mode 100644 index 00000000..a0e30ed5 --- /dev/null +++ b/include/lucene++/DocInverter.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCINVERTER_H +#define DOCINVERTER_H + +#include "DocFieldConsumer.h" + +namespace Lucene { + +/// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a +/// InvertedTermsConsumer to process those terms. +class DocInverter : public DocFieldConsumer { +public: + DocInverter(const InvertedDocConsumerPtr& consumer, const InvertedDocEndConsumerPtr& endConsumer); + virtual ~DocInverter(); + + LUCENE_CLASS(DocInverter); + +public: + InvertedDocConsumerPtr consumer; + InvertedDocEndConsumerPtr endConsumer; + +public: + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); + + /// Called when DocumentsWriter decides to create a new segment + virtual void flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + + /// Called when DocumentsWriter decides to close the doc stores + virtual void closeDocStore(const SegmentWriteStatePtr& state); + + /// Called when an aborting exception is hit + virtual void abort(); + + /// Called when DocumentsWriter is using too much RAM. + virtual bool freeRAM(); + + /// Add a new thread + virtual DocFieldConsumerPerThreadPtr addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread); +}; + +} + +#endif diff --git a/include/lucene++/DocInverterPerField.h b/include/lucene++/DocInverterPerField.h new file mode 100644 index 00000000..183f3f3f --- /dev/null +++ b/include/lucene++/DocInverterPerField.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCINVERTERPERFIELD_H +#define DOCINVERTERPERFIELD_H + +#include "DocFieldConsumerPerField.h" + +namespace Lucene { + +/// Holds state for inverting all occurrences of a single field in the document. This class doesn't do +/// anything itself; instead, it forwards the tokens produced by analysis to its own consumer +/// (InvertedDocConsumerPerField). It also interacts with an endConsumer (InvertedDocEndConsumerPerField). +class DocInverterPerField : public DocFieldConsumerPerField { +public: + DocInverterPerField(const DocInverterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); + virtual ~DocInverterPerField(); + + LUCENE_CLASS(DocInverterPerField); + +protected: + DocInverterPerThreadWeakPtr _perThread; + FieldInfoPtr fieldInfo; + +public: + InvertedDocConsumerPerFieldPtr consumer; + InvertedDocEndConsumerPerFieldPtr endConsumer; + DocStatePtr docState; + FieldInvertStatePtr fieldState; + +public: + virtual void initialize(); + virtual void abort(); + + /// Processes all occurrences of a single field + virtual void processFields(Collection fields, int32_t count); +}; + +} + +#endif diff --git a/include/lucene++/DocInverterPerThread.h b/include/lucene++/DocInverterPerThread.h new file mode 100644 index 00000000..8343dc1b --- /dev/null +++ b/include/lucene++/DocInverterPerThread.h @@ -0,0 +1,61 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCINVERTERPERTHREAD_H +#define DOCINVERTERPERTHREAD_H + +#include "DocFieldConsumerPerThread.h" +#include "AttributeSource.h" + +namespace Lucene { + +/// This is a DocFieldConsumer that inverts each field, separately, from a Document, and accepts a +/// InvertedTermsConsumer to process those terms. +class DocInverterPerThread : public DocFieldConsumerPerThread { +public: + DocInverterPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocInverterPtr& docInverter); + virtual ~DocInverterPerThread(); + + LUCENE_CLASS(DocInverterPerThread); + +public: + DocInverterWeakPtr _docInverter; + InvertedDocConsumerPerThreadPtr consumer; + InvertedDocEndConsumerPerThreadPtr endConsumer; + SingleTokenAttributeSourcePtr singleToken; + + DocStatePtr docState; + FieldInvertStatePtr fieldState; + + /// Used to read a string value for a field + ReusableStringReaderPtr stringReader; + +public: + virtual void initialize(); + virtual void startDocument(); + virtual DocWriterPtr finishDocument(); + virtual void abort(); + virtual DocFieldConsumerPerFieldPtr addField(const FieldInfoPtr& fi); +}; + +class SingleTokenAttributeSource : public AttributeSource { +public: + SingleTokenAttributeSource(); + virtual ~SingleTokenAttributeSource(); + + LUCENE_CLASS(SingleTokenAttributeSource); + +public: + TermAttributePtr termAttribute; + OffsetAttributePtr offsetAttribute; + +public: + void reinit(const String& stringValue, int32_t startOffset, int32_t endOffset); +}; + +} + +#endif diff --git a/include/lucene++/DocValues.h b/include/lucene++/DocValues.h new file mode 100644 index 00000000..3b1037e8 --- /dev/null +++ b/include/lucene++/DocValues.h @@ -0,0 +1,98 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCVALUES_H +#define DOCVALUES_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Represents field values as different types. Normally created via a {@link ValueSuorce} for a +/// particular field and reader. +/// +/// DocValues is distinct from ValueSource because there needs to be an object created at query +/// evaluation time that is not referenced by the query itself because: +/// - Query objects should be MT safe +/// - For caching, Query objects are often used as keys... you don't want the Query carrying around +/// big objects +class LPPAPI DocValues : public LuceneObject { +public: + DocValues(); + virtual ~DocValues(); + + LUCENE_CLASS(DocValues); + +protected: + double minVal; + double maxVal; + double avgVal; + bool computed; + +public: + using LuceneObject::toString; + + /// Return doc value as a double. + /// Mandatory: every DocValues implementation must implement at least this method. + /// @param doc document whose double value is requested. + virtual double doubleVal(int32_t doc) = 0; + + /// Return doc value as an int. + /// Optional: DocValues implementation can (but don't have to) override this method. + /// @param doc document whose int value is requested. + virtual int32_t intVal(int32_t doc); + + /// Return doc value as a long. + /// Optional: DocValues implementation can (but don't have to) override this method. + /// @param doc document whose long value is requested. + virtual int64_t longVal(int32_t doc); + + /// Return doc value as a string. + /// Optional: DocValues implementation can (but don't have to) override this method. + /// @param doc document whose string value is requested. + virtual String strVal(int32_t doc); + + /// Return a string representation of a doc value, as required for Explanations. + virtual String toString(int32_t doc) = 0; + + /// Explain the scoring value for the input doc. + virtual ExplanationPtr explain(int32_t doc); + + /// For test purposes only, return the inner array of values, or null if not applicable. + /// + /// Allows tests to verify that loaded values are: + ///
    + ///
  1. indeed cached/reused. + ///
  2. stored in the expected size/type (byte/short/int/float). + ///
+ /// + /// Note: implementations of DocValues must override this method for these test elements to be tested, + /// Otherwise the test would not fail, just print a warning. + virtual CollectionValue getInnerArray(); + + /// Returns the minimum of all values or NaN if this DocValues instance does not contain any value. + /// This operation is optional + /// @return the minimum of all values or NaN if this DocValues instance does not contain any value. + virtual double getMinValue(); + + /// Returns the maximum of all values or NaN if this DocValues instance does not contain any value. + /// This operation is optional + /// @return the maximum of all values or NaN if this DocValues instance does not contain any value. + virtual double getMaxValue(); + + /// Returns the average of all values or NaN if this DocValues instance does not contain any value. + /// This operation is optional + /// @return the average of all values or NaN if this DocValues instance does not contain any value. + virtual double getAverageValue(); + +protected: + /// Compute optional values + void compute(); +}; + +} + +#endif diff --git a/include/lucene++/Document.h b/include/lucene++/Document.h new file mode 100644 index 00000000..5e0422f1 --- /dev/null +++ b/include/lucene++/Document.h @@ -0,0 +1,142 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCUMENT_H +#define DOCUMENT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Documents are the unit of indexing and search. +/// +/// A Document is a set of fields. Each field has a name and a textual value. A field may be {@link +/// Fieldable#isStored() stored} with the document, in which case it is returned with search hits on the +/// document. Thus each document should typically contain one or more stored fields which uniquely +/// identify it. +/// +/// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents +/// retrieved from the index, eg. with {@link ScoreDoc#doc}, {@link Searcher#doc(int)} or {@link +/// IndexReader#document(int)}. +class LPPAPI Document : public LuceneObject { +public: + /// Constructs a new document with no fields. + Document(); + + virtual ~Document(); + + LUCENE_CLASS(Document); + +protected: + Collection fields; + double boost; + +public: + /// Sets a boost factor for hits on any field of this document. This value will be multiplied into the + /// score of all hits on this document. + /// + /// The default value is 1.0. + /// + /// Values are multiplied into the value of {@link Fieldable#getBoost()} of each field in this document. + /// Thus, this method in effect sets a default boost for the fields of this document. + /// + /// @see Fieldable#setBoost(double) + void setBoost(double boost); + + /// Returns, at indexing time, the boost factor as set by {@link #setBoost(double)}. + /// + /// Note that once a document is indexed this value is no longer available from the index. At search time, + /// for retrieved documents, this method always returns 1. This however does not mean that the boost value + /// set at indexing time was ignored - it was just combined with other indexing time factors and stored + /// elsewhere, for better indexing and search performance. (For more information see the "norm(t,d)" part + /// of the scoring formula in {@link Similarity}.) + /// + /// @see #setBoost(double) + double getBoost(); + + /// Adds a field to a document. Several fields may be added with the same name. In this case, if the fields + /// are indexed, their text is treated as though appended for the purposes of search. + /// + /// Note that add like the removeField(s) methods only makes sense prior to adding a document to an index. + /// These methods cannot be used to change the content of an existing index! In order to achieve this, a + /// document has to be deleted from an index and a new changed version of that document has to be added. + void add(const FieldablePtr& field); + + /// Removes field with the specified name from the document. If multiple fields exist with this name, this + /// method removes the first field that has been added. If there is no field with the specified name, the + /// document remains unchanged. + /// + /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to + /// an index. These methods cannot be used to change the content of an existing index! In order to achieve + /// this, a document has to be deleted from an index and a new changed version of that document has to be added. + void removeField(const String& name); + + /// Removes all fields with the given name from the document. If there is no field with the specified name, + /// the document remains unchanged. + /// + /// Note that the removeField(s) methods like the add method only make sense prior to adding a document to an + /// index. These methods cannot be used to change the content of an existing index! In order to achieve this, + /// a document has to be deleted from an index and a new changed version of that document has to be added. + void removeFields(const String& name); + + /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with + /// this name, this method returns the first value added. + /// Do not use this method with lazy loaded fields. + FieldPtr getField(const String& name); + + /// Returns a field with the given name if any exist in this document, or null. If multiple fields exists with + /// this name, this method returns the first value added. + FieldablePtr getFieldable(const String& name); + + /// Returns the string value of the field with the given name if any exist in this document, or null. If multiple + /// fields exist with this name, this method returns the first value added. If only binary fields with this name + /// exist, returns null. + String get(const String& name); + + /// Returns a List of all the fields in a document. + /// + /// Note that fields which are not {@link Fieldable#isStored() stored} are not available in documents + /// retrieved from the index, eg. {@link Searcher#doc(int)} or {@link IndexReader#document(int)}. + Collection getFields(); + + /// Returns an array of {@link Field}s with the given name. Do not use with lazy loaded fields. This method + /// returns an empty array when there are no matching fields. It never returns null. + /// @param name the name of the field + /// @return a Field[] array + Collection getFields(const String& name); + + /// Returns an array of {@link Fieldable}s with the given name. + /// This method returns an empty array when there are no matching fields. It never returns null. + /// @param name the name of the field + /// @return a Fieldable[] array + Collection getFieldables(const String& name); + + /// Returns an array of values of the field specified as the method parameter. + /// This method returns an empty array when there are no matching fields. It never returns null. + /// @param name the name of the field + /// @return a String[] of field values + Collection getValues(const String& name); + + /// Returns an array of byte arrays for of the fields that have the name specified as the method parameter. + /// This method returns an empty array when there are no matching fields. It never returns null. + /// @param name the name of the field + /// @return a byte[][] of binary field values + Collection getBinaryValues(const String& name); + + /// Returns an array of bytes for the first (or only) field that has the name specified as the method parameter. + /// This method will return null if no binary fields with the specified name are available. There may be + /// non-binary fields with the same name. + /// @param name the name of the field. + /// @return a byte[] containing the binary field value or null + ByteArray getBinaryValue(const String& name); + + /// Returns a string representation of the object + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/DocumentsWriter.h b/include/lucene++/DocumentsWriter.h new file mode 100644 index 00000000..a33a29ef --- /dev/null +++ b/include/lucene++/DocumentsWriter.h @@ -0,0 +1,522 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCUMENTSWRITER_H +#define DOCUMENTSWRITER_H + +#include "ByteBlockPool.h" +#include "RAMFile.h" + +namespace Lucene { + +/// This class accepts multiple added documents and directly writes a single segment file. It does this more +/// efficiently than creating a single segment per document (with DocumentWriter) and doing standard merges on +/// those segments. +/// +/// Each added document is passed to the {@link DocConsumer}, which in turn processes the document and interacts +/// with other consumers in the indexing chain. Certain consumers, like {@link StoredFieldsWriter} and {@link +/// TermVectorsTermsWriter}, digest a document and immediately write bytes to the "doc store" files (ie, +/// they do not consume RAM per document, except while they are processing the document). +/// +/// Other consumers, eg {@link FreqProxTermsWriter} and {@link NormsWriter}, buffer bytes in RAM and flush only +/// when a new segment is produced. +/// +/// Once we have used our allowed RAM buffer, or the number of added docs is large enough (in the case we are +/// flushing by doc count instead of RAM usage), we create a real segment and flush it to the Directory. +/// +/// Threads: +/// Multiple threads are allowed into addDocument at once. There is an initial synchronized call to +/// getThreadState which allocates a ThreadState for this thread. The same thread will get the same ThreadState +/// over time (thread affinity) so that if there are consistent patterns (for example each thread is indexing a +/// different content source) then we make better use of RAM. Then processDocument is called on that ThreadState +/// without synchronization (most of the "heavy lifting" is in this call). Finally the synchronized +/// "finishDocument" is called to flush changes to the directory. +/// +/// When flush is called by IndexWriter we forcefully idle all threads and flush only once they are all idle. +/// This means you can call flush with a given thread even while other threads are actively adding/deleting +/// documents. +/// +/// Exceptions: +/// Because this class directly updates in-memory posting lists, and flushes stored fields and term vectors +/// directly to files in the directory, there are certain limited times when an exception can corrupt this state. +/// For example, a disk full while flushing stored fields leaves this file in a corrupt state. Or, an +/// std::bad_alloc exception while appending to the in-memory posting lists can corrupt that posting list. +/// We call such exceptions "aborting exceptions". In these cases we must call abort() to discard all docs added +/// since the last flush. +/// +/// All other exceptions ("non-aborting exceptions") can still partially update the index structures. These +/// updates are consistent, but, they represent only a part of the document seen up until the exception was hit. +/// When this happens, we immediately mark the document as deleted so that the document is always atomically +/// ("all or none") added to the index. +class LPPAPI DocumentsWriter : public LuceneObject { +public: + DocumentsWriter(const DirectoryPtr& directory, const IndexWriterPtr& writer, const IndexingChainPtr& indexingChain); + virtual ~DocumentsWriter(); + + LUCENE_CLASS(DocumentsWriter); + +protected: + String docStoreSegment; // Current doc-store segment we are writing + int32_t docStoreOffset; // Current starting doc-store offset of current segment + + int32_t nextDocID; // Next docID to be added + int32_t numDocsInRAM; // # docs buffered in RAM + + /// Max # ThreadState instances; if there are more threads than this they share ThreadStates + static const int32_t MAX_THREAD_STATE; + Collection threadStates; + MapThreadDocumentsWriterThreadState threadBindings; + + int32_t pauseThreads; // Non-zero when we need all threads to pause (eg to flush) + bool aborting; // True if an abort is pending + + DocFieldProcessorPtr docFieldProcessor; + + /// Deletes done after the last flush; these are discarded on abort + BufferedDeletesPtr deletesInRAM; + + /// Deletes done before the last flush; these are still kept on abort + BufferedDeletesPtr deletesFlushed; + + /// The max number of delete terms that can be buffered before they must be flushed to disk. + int32_t maxBufferedDeleteTerms; + + /// How much RAM we can use before flushing. This is 0 if we are flushing by doc count instead. + int64_t ramBufferSize; + int64_t waitQueuePauseBytes; + int64_t waitQueueResumeBytes; + + /// If we've allocated 5% over our RAM budget, we then free down to 95% + int64_t freeTrigger; + int64_t freeLevel; + + /// Flush @ this number of docs. If ramBufferSize is non-zero we will flush by RAM usage instead. + int32_t maxBufferedDocs; + + /// How many docs already flushed to index + int32_t flushedDocCount; + + bool closed; + + /// List of files that were written before last abort() + HashSet _abortedFiles; + SegmentWriteStatePtr flushState; + + Collection freeIntBlocks; + Collection freeCharBlocks; + +public: + /// Coarse estimates used to measure RAM usage of buffered deletes + static const int32_t OBJECT_HEADER_BYTES; + static const int32_t POINTER_NUM_BYTE; + static const int32_t INT_NUM_BYTE; + static const int32_t CHAR_NUM_BYTE; + + /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object + /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is + /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since + /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). + /// BufferedDeletes.num is OBJ_HEADER + INT. + static const int32_t BYTES_PER_DEL_TERM; + + /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is + /// OBJ_HEADER + int + static const int32_t BYTES_PER_DEL_DOCID; + + /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object + /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount + /// (say 24 bytes). Integer is OBJ_HEADER + INT. + static const int32_t BYTES_PER_DEL_QUERY; + + /// Initial chunks size of the shared byte[] blocks used to store postings data + static const int32_t BYTE_BLOCK_SHIFT; + static const int32_t BYTE_BLOCK_SIZE; + static const int32_t BYTE_BLOCK_MASK; + static const int32_t BYTE_BLOCK_NOT_MASK; + + /// Initial chunk size of the shared char[] blocks used to store term text + static const int32_t CHAR_BLOCK_SHIFT; + static const int32_t CHAR_BLOCK_SIZE; + static const int32_t CHAR_BLOCK_MASK; + + static const int32_t MAX_TERM_LENGTH; + + /// Initial chunks size of the shared int[] blocks used to store postings data + static const int32_t INT_BLOCK_SHIFT; + static const int32_t INT_BLOCK_SIZE; + static const int32_t INT_BLOCK_MASK; + + static const int32_t PER_DOC_BLOCK_SIZE; + +INTERNAL: + IndexWriterWeakPtr _writer; + DirectoryPtr directory; + IndexingChainPtr indexingChain; + String segment; // Current segment we are working on + + int32_t numDocsInStore; // # docs written to doc stores + + bool flushPending; // True when a thread has decided to flush + bool bufferIsFull; // True when it's time to write segment + + InfoStreamPtr infoStream; + int32_t maxFieldLength; + SimilarityPtr similarity; + + DocConsumerPtr consumer; + + HashSet _openFiles; + HashSet _closedFiles; + + WaitQueuePtr waitQueue; + SkipDocWriterPtr skipDocWriter; + + ByteBlockAllocatorPtr byteBlockAllocator; + ByteBlockAllocatorPtr perDocAllocator; + + int64_t numBytesAlloc; + int64_t numBytesUsed; + + // used only by assert + TermPtr lastDeleteTerm; + +public: + virtual void initialize(); + + /// Create and return a new DocWriterBuffer. + PerDocBufferPtr newPerDocBuffer(); + + static IndexingChainPtr getDefaultIndexingChain(); + + void updateFlushedDocCount(int32_t n); + int32_t getFlushedDocCount(); + void setFlushedDocCount(int32_t n); + + /// Returns true if any of the fields in the current buffered docs have omitTermFreqAndPositions==false + bool hasProx(); + + /// If non-null, various details of indexing are printed here. + void setInfoStream(const InfoStreamPtr& infoStream); + + void setMaxFieldLength(int32_t maxFieldLength); + void setSimilarity(const SimilarityPtr& similarity); + + /// Set how much RAM we can use before flushing. + void setRAMBufferSizeMB(double mb); + double getRAMBufferSizeMB(); + + /// Set max buffered docs, which means we will flush by doc count instead of by RAM usage. + void setMaxBufferedDocs(int32_t count); + int32_t getMaxBufferedDocs(); + + /// Get current segment name we are writing. + String getSegment(); + + /// Returns how many docs are currently buffered in RAM. + int32_t getNumDocsInRAM(); + + /// Returns the current doc store segment we are writing to. + String getDocStoreSegment(); + + /// Returns the doc offset into the shared doc store for the current buffered docs. + int32_t getDocStoreOffset(); + + /// Closes the current open doc stores an returns the doc store segment name. This returns null if there + /// are no buffered documents. + String closeDocStore(); + + HashSet abortedFiles(); + + void message(const String& message); + + /// Returns Collection of files in use by this instance, including any flushed segments. + HashSet openFiles(); + HashSet closedFiles(); + + void addOpenFile(const String& name); + void removeOpenFile(const String& name); + + void setAborting(); + + /// Called if we hit an exception at a bad time (when updating the index files) and must discard all + /// currently buffered docs. This resets our state, discarding any docs added since last flush. + void abort(); + + /// Returns true if an abort is in progress + bool pauseAllThreads(); + void resumeAllThreads(); + + bool anyChanges(); + + void initFlushState(bool onlyDocStore); + + /// Flush all pending docs to a new segment + int32_t flush(bool _closeDocStore); + + HashSet getFlushedFiles(); + + /// Build compound file for the segment we just flushed + void createCompoundFile(const String& segment); + + /// Set flushPending if it is not already set and returns whether it was set. This is used by IndexWriter + /// to trigger a single flush even when multiple threads are trying to do so. + bool setFlushPending(); + void clearFlushPending(); + + void pushDeletes(); + + void close(); + + void initSegmentName(bool onlyDocStore); + + /// Returns a free (idle) ThreadState that may be used for indexing this one document. This call also + /// pauses if a flush is pending. If delTerm is non-null then we buffer this deleted term after the + /// thread state has been acquired. + DocumentsWriterThreadStatePtr getThreadState(const DocumentPtr& doc, const TermPtr& delTerm); + + /// Returns true if the caller (IndexWriter) should now flush. + bool addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer); + + bool updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer); + bool updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm); + + int32_t getNumBufferedDeleteTerms(); // for testing + MapTermNum getBufferedDeleteTerms(); // for testing + + /// Called whenever a merge has completed and the merged segments had deletions + void remapDeletes(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergeDocCount); + + bool bufferDeleteTerms(Collection terms); + bool bufferDeleteTerm(const TermPtr& term); + bool bufferDeleteQueries(Collection queries); + bool bufferDeleteQuery(const QueryPtr& query); + bool deletesFull(); + bool doApplyDeletes(); + + void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); + int32_t getMaxBufferedDeleteTerms(); + + bool hasDeletes(); + bool applyDeletes(const SegmentInfosPtr& infos); + bool doBalanceRAM(); + + void waitForWaitQueue(); + + int64_t getRAMUsed(); + + IntArray getIntBlock(bool trackAllocations); + void bytesAllocated(int64_t numBytes); + void bytesUsed(int64_t numBytes); + void recycleIntBlocks(Collection blocks, int32_t start, int32_t end); + + CharArray getCharBlock(); + void recycleCharBlocks(Collection blocks, int32_t numBlocks); + + String toMB(int64_t v); + + /// We have four pools of RAM: Postings, byte blocks (holds freq/prox posting data), char blocks (holds + /// characters in the term) and per-doc buffers (stored fields/term vectors). Different docs require + /// varying amount of storage from these four classes. + /// + /// For example, docs with many unique single-occurrence short terms will use up the Postings + /// RAM and hardly any of the other two. Whereas docs with very large terms will use alot of char blocks + /// RAM and relatively less of the other two. This method just frees allocations from the pools once we + /// are over-budget, which balances the pools to match the current docs. + void balanceRAM(); + +protected: + /// Reset after a flush + void doAfterFlush(); + + bool allThreadsIdle(); + + void waitReady(const DocumentsWriterThreadStatePtr& state); + + bool timeToFlushDeletes(); + + // used only by assert + bool checkDeleteTerm(const TermPtr& term); + + bool applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart); + void addDeleteTerm(const TermPtr& term, int32_t docCount); + + /// Buffer a specific docID for deletion. Currently only used when we hit a exception when adding a document + void addDeleteDocID(int32_t docID); + void addDeleteQuery(const QueryPtr& query, int32_t docID); + + /// Does the synchronized work to finish/flush the inverted document. + void finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter); + + friend class WaitQueue; +}; + +class DocState : public LuceneObject { +public: + DocState(); + virtual ~DocState(); + + LUCENE_CLASS(DocState); + +public: + DocumentsWriterWeakPtr _docWriter; + AnalyzerPtr analyzer; + int32_t maxFieldLength; + InfoStreamPtr infoStream; + SimilarityPtr similarity; + int32_t docID; + DocumentPtr doc; + String maxTermPrefix; + +public: + /// Only called by asserts + virtual bool testPoint(const String& name); + + void clear(); +}; + +/// RAMFile buffer for DocWriters. +class PerDocBuffer : public RAMFile { +public: + PerDocBuffer(const DocumentsWriterPtr& docWriter); + virtual ~PerDocBuffer(); + + LUCENE_CLASS(PerDocBuffer); + +protected: + DocumentsWriterWeakPtr _docWriter; + +public: + /// Recycle the bytes used. + void recycle(); + +protected: + /// Allocate bytes used from shared pool. + virtual ByteArray newBuffer(int32_t size); +}; + +/// Consumer returns this on each doc. This holds any state that must be flushed synchronized +/// "in docID order". We gather these and flush them in order. +class DocWriter : public LuceneObject { +public: + DocWriter(); + virtual ~DocWriter(); + + LUCENE_CLASS(DocWriter); + +public: + DocWriterPtr next; + int32_t docID; + +public: + virtual void finish() = 0; + virtual void abort() = 0; + virtual int64_t sizeInBytes() = 0; + + virtual void setNext(const DocWriterPtr& next); +}; + +/// The IndexingChain must define the {@link #getChain(DocumentsWriter)} method which returns the DocConsumer +/// that the DocumentsWriter calls to process the documents. +class IndexingChain : public LuceneObject { +public: + virtual ~IndexingChain(); + + LUCENE_CLASS(IndexingChain); + +public: + virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter) = 0; +}; + +/// This is the current indexing chain: +/// DocConsumer / DocConsumerPerThread +/// --> code: DocFieldProcessor / DocFieldProcessorPerThread +/// --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField +/// --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField +/// --> code: DocInverter / DocInverterPerThread / DocInverterPerField +/// --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField +/// --> code: TermsHash / TermsHashPerThread / TermsHashPerField +/// --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField +/// --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField +/// --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField +/// --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField +/// --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField +/// --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField +class DefaultIndexingChain : public IndexingChain { +public: + virtual ~DefaultIndexingChain(); + + LUCENE_CLASS(DefaultIndexingChain); + +public: + virtual DocConsumerPtr getChain(const DocumentsWriterPtr& documentsWriter); +}; + +class SkipDocWriter : public DocWriter { +public: + virtual ~SkipDocWriter(); + + LUCENE_CLASS(SkipDocWriter); + +public: + virtual void finish(); + virtual void abort(); + virtual int64_t sizeInBytes(); +}; + +class WaitQueue : public LuceneObject { +public: + WaitQueue(const DocumentsWriterPtr& docWriter); + virtual ~WaitQueue(); + + LUCENE_CLASS(WaitQueue); + +protected: + DocumentsWriterWeakPtr _docWriter; + +public: + Collection waiting; + int32_t nextWriteDocID; + int32_t nextWriteLoc; + int32_t numWaiting; + int64_t waitingBytes; + +public: + void reset(); + bool doResume(); + bool doPause(); + void abort(); + bool add(const DocWriterPtr& doc); + +protected: + void writeDocument(const DocWriterPtr& doc); +}; + +class ByteBlockAllocator : public ByteBlockPoolAllocatorBase { +public: + ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize); + virtual ~ByteBlockAllocator(); + + LUCENE_CLASS(ByteBlockAllocator); + +protected: + DocumentsWriterWeakPtr _docWriter; + +public: + int32_t blockSize; + Collection freeByteBlocks; + +public: + /// Allocate another byte[] from the shared pool + virtual ByteArray getByteBlock(bool trackAllocations); + + /// Return byte[]'s to the pool + virtual void recycleByteBlocks(Collection blocks, int32_t start, int32_t end); + virtual void recycleByteBlocks(Collection blocks); +}; + +} + +#endif diff --git a/include/lucene++/DocumentsWriterThreadState.h b/include/lucene++/DocumentsWriterThreadState.h new file mode 100644 index 00000000..c0d86d33 --- /dev/null +++ b/include/lucene++/DocumentsWriterThreadState.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOCUMENTSWRITERTHREADSTATE_H +#define DOCUMENTSWRITERTHREADSTATE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Used by DocumentsWriter to maintain per-thread state. +/// We keep a separate Posting hash and other state for each thread and then merge postings +/// hashes from all threads when writing the segment. +class DocumentsWriterThreadState : public LuceneObject { +public: + DocumentsWriterThreadState(const DocumentsWriterPtr& docWriter); + virtual ~DocumentsWriterThreadState(); + + LUCENE_CLASS(DocumentsWriterThreadState); + +public: + bool isIdle; // false if this is currently in use by a thread + int32_t numThreads; // Number of threads that share this instance + bool doFlushAfter; // true if we should flush after processing current doc + DocConsumerPerThreadPtr consumer; + DocStatePtr docState; + DocumentsWriterWeakPtr _docWriter; + +public: + virtual void initialize(); + void doAfterFlush(); +}; + +} + +#endif diff --git a/include/lucene++/DoubleFieldSource.h b/include/lucene++/DoubleFieldSource.h new file mode 100644 index 00000000..8934c979 --- /dev/null +++ b/include/lucene++/DoubleFieldSource.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef DOUBLEFIELDSOURCE_H +#define DOUBLEFIELDSOURCE_H + +#include "FieldCacheSource.h" +#include "DocValues.h" + +namespace Lucene { + +/// Obtains double field values from the {@link FieldCache} using getDoubles() and makes those values available +/// as other numeric types, casting as needed. +/// +/// @see FieldCacheSource for requirements on the field. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, +/// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU +/// per lookup but will not consume double the FieldCache RAM. +class DoubleFieldSource : public FieldCacheSource { +public: + /// Create a cached double field source with a specific string-to-double parser. + DoubleFieldSource(const String& field, const DoubleParserPtr& parser = DoubleParserPtr()); + virtual ~DoubleFieldSource(); + + LUCENE_CLASS(DoubleFieldSource); + +protected: + DoubleParserPtr parser; + +public: + virtual String description(); + virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); + virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); + virtual int32_t cachedFieldSourceHashCode(); +}; + +class DoubleDocValues : public DocValues { +public: + DoubleDocValues(const DoubleFieldSourcePtr& source, Collection arr); + virtual ~DoubleDocValues(); + + LUCENE_CLASS(DoubleDocValues); + +protected: + DoubleFieldSourceWeakPtr _source; + Collection arr; + +public: + virtual double doubleVal(int32_t doc); + virtual String toString(int32_t doc); + virtual CollectionValue getInnerArray(); +}; + +} + +#endif diff --git a/include/lucene++/ExactPhraseScorer.h b/include/lucene++/ExactPhraseScorer.h new file mode 100644 index 00000000..c70ac192 --- /dev/null +++ b/include/lucene++/ExactPhraseScorer.h @@ -0,0 +1,27 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EXACTPHRASESCORER_H +#define EXACTPHRASESCORER_H + +#include "PhraseScorer.h" + +namespace Lucene { + +class ExactPhraseScorer : public PhraseScorer { +public: + ExactPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms); + virtual ~ExactPhraseScorer(); + + LUCENE_CLASS(ExactPhraseScorer); + +protected: + virtual double phraseFreq(); +}; + +} + +#endif diff --git a/include/lucene++/Explanation.h b/include/lucene++/Explanation.h new file mode 100644 index 00000000..289868d1 --- /dev/null +++ b/include/lucene++/Explanation.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef EXPLANATION_H +#define EXPLANATION_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Describes the score computation for document and query. +class LPPAPI Explanation : public LuceneObject { +public: + Explanation(double value = 0, const String& description = EmptyString); + virtual ~Explanation(); + + LUCENE_CLASS(Explanation); + +protected: + double value; // the value of this node + String description; // what it represents + Collection details; // sub-explanations + +public: + /// Indicates whether or not this Explanation models a good match. + /// + /// By default, an Explanation represents a "match" if the value is positive. + /// + /// @see #getValue + virtual bool isMatch(); + + /// The value assigned to this explanation node. + virtual double getValue(); + + /// Sets the value assigned to this explanation node. + virtual void setValue(double value); + + /// A description of this explanation node. + virtual String getDescription(); + + /// Sets the description of this explanation node. + virtual void setDescription(const String& description); + + /// The sub-nodes of this explanation node. + virtual Collection getDetails(); + + /// Adds a sub-node to this explanation node. + virtual void addDetail(const ExplanationPtr& detail); + + /// Render an explanation as text. + virtual String toString(); + + /// Render an explanation as HTML. + virtual String toHtml(); + +protected: + /// A short one line summary which should contain all high level information about this Explanation, + /// without the "Details" + virtual String getSummary(); + + virtual String toString(int32_t depth); +}; + +/// Small Util class used to pass both an idf factor as well as an explanation for that factor. +/// +/// This class will likely be held on a {@link Weight}, so be aware before storing any large fields. +class LPPAPI IDFExplanation : public LuceneObject { +public: + virtual ~IDFExplanation(); + LUCENE_CLASS(IDFExplanation); + +public: + /// @return the idf factor + virtual double getIdf() = 0; + + /// This should be calculated lazily if possible. + /// @return the explanation for the idf factor. + virtual String explain() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FSDirectory.h b/include/lucene++/FSDirectory.h new file mode 100644 index 00000000..31bed0e0 --- /dev/null +++ b/include/lucene++/FSDirectory.h @@ -0,0 +1,133 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FSDIRECTORY_H +#define FSDIRECTORY_H + +#include "Directory.h" + +namespace Lucene { + +/// Base class for Directory implementations that store index files in the file system. There are currently three +/// core subclasses: +/// +/// {@link SimpleFSDirectory} is a straightforward implementation using std::ofstream and std::ifstream. +/// +/// {@link MMapDirectory} uses memory-mapped IO when reading. This is a good choice if you have plenty of virtual +/// memory relative to your index size, eg if you are running on a 64 bit operating system, oryour index sizes are +/// small enough to fit into the virtual memory space. +/// +/// For users who have no reason to prefer a specific implementation, it's best to simply use {@link #open}. For +/// all others, you should instantiate the desired implementation directly. +/// +/// The locking implementation is by default {@link NativeFSLockFactory}, but can be changed by passing in a custom +/// {@link LockFactory} instance. +/// @see Directory +class LPPAPI FSDirectory : public Directory { +protected: + /// Create a new FSDirectory for the named location (ctor for subclasses). + /// @param path the path of the directory. + /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) + FSDirectory(const String& path, const LockFactoryPtr& lockFactory); + +public: + virtual ~FSDirectory(); + + LUCENE_CLASS(FSDirectory); + +public: + /// Default read chunk size. This is a conditional default based on operating system. + /// @see #setReadChunkSize + static const int32_t DEFAULT_READ_CHUNK_SIZE; + +protected: + bool checked; + + /// The underlying filesystem directory. + String directory; + + /// @see #DEFAULT_READ_CHUNK_SIZE + int32_t chunkSize; + +public: + /// Creates an FSDirectory instance. + static FSDirectoryPtr open(const String& path); + + /// Just like {@link #open(File)}, but allows you to also specify a custom {@link LockFactory}. + static FSDirectoryPtr open(const String& path, const LockFactoryPtr& lockFactory); + + /// Lists all files (not subdirectories) in the directory. + /// @throws NoSuchDirectoryException if the directory does not exist, or does exist but is not a directory. + static HashSet listAll(const String& dir); + + /// Returns the time the named file was last modified. + static uint64_t fileModified(const String& directory, const String& name); + + /// Create file system directory. + void createDir(); + + /// Return file system directory. + String getFile(); + + /// Sets the maximum number of bytes read at once from the underlying file during {@link IndexInput#readBytes}. + /// The default value is {@link #DEFAULT_READ_CHUNK_SIZE}. Changes to this value will not impact any already-opened + /// {@link IndexInput}s. You should call this before attempting to open an index on the directory. This value should + /// be as large as possible to reduce any possible performance impact. + void setReadChunkSize(int32_t chunkSize); + + /// The maximum number of bytes to read at once from the underlying file during {@link IndexInput#readBytes}. + /// @see #setReadChunkSize + int32_t getReadChunkSize(); + + /// Lists all files (not subdirectories) in the directory. + /// @see #listAll(const String&) + virtual HashSet listAll(); + + /// Returns true if a file with the given name exists. + virtual bool fileExists(const String& name); + + /// Returns the time the named file was last modified. + virtual uint64_t fileModified(const String& name); + + /// Set the modified time of an existing file to now. + virtual void touchFile(const String& name); + + /// Removes an existing file in the directory. + virtual void deleteFile(const String& name); + + /// Returns the length in bytes of a file in the directory. + virtual int64_t fileLength(const String& name); + + /// Ensure that any writes to this file are moved to stable storage. Lucene uses this to properly commit changes to + /// the index, to prevent a machine/OS crash from corrupting the index. + virtual void sync(const String& name); + + /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory + /// implementation may ignore the buffer size. + virtual IndexInputPtr openInput(const String& name); + + /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory + /// implementation may ignore the buffer size. Currently the only Directory implementations that respect this parameter + /// are {@link FSDirectory} and {@link CompoundFileReader}. + virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); + + /// Return a string identifier that uniquely differentiates this Directory instance from other Directory instances. + virtual String getLockID(); + + /// Closes the store to future operations. + virtual void close(); + + /// For debug output. + virtual String toString(); + +protected: + /// Initializes the directory to create a new file with the given name. This method should be used in {@link #createOutput}. + void initOutput(const String& name); +}; + +} + +#endif diff --git a/include/lucene++/FSLockFactory.h b/include/lucene++/FSLockFactory.h new file mode 100644 index 00000000..99ee04e6 --- /dev/null +++ b/include/lucene++/FSLockFactory.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FSLOCKFACTORY_H +#define FSLOCKFACTORY_H + +#include "LockFactory.h" + +namespace Lucene { + +/// Base class for file system based locking implementation. +class LPPAPI FSLockFactory : public LockFactory { +protected: + FSLockFactory(); + +public: + virtual ~FSLockFactory(); + + LUCENE_CLASS(FSLockFactory); + +protected: + /// Directory for the lock files. + String lockDir; + +public: + /// Set the lock directory. This method can be only called once to + /// initialize the lock directory. It is used by {@link FSDirectory} + /// to set the lock directory to itself. Subclasses can also use + /// this method to set the directory in the constructor. + void setLockDir(const String& lockDir); + + /// Retrieve the lock directory. + String getLockDir(); +}; + +} + +#endif diff --git a/include/lucene++/FastCharStream.h b/include/lucene++/FastCharStream.h new file mode 100644 index 00000000..01bbec3b --- /dev/null +++ b/include/lucene++/FastCharStream.h @@ -0,0 +1,57 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FASTCHARSTREAM_H +#define FASTCHARSTREAM_H + +#include "QueryParserCharStream.h" + +namespace Lucene { + +/// An efficient implementation of QueryParserCharStream interface. +/// +/// Note that this does not do line-number counting, but instead keeps track of the character position of +/// the token in the input, as required by Lucene's {@link Token} API. +class LPPAPI FastCharStream : public QueryParserCharStream, public LuceneObject { +public: + /// Constructs from a Reader. + FastCharStream(const ReaderPtr& reader); + virtual ~FastCharStream(); + + LUCENE_CLASS(FastCharStream); + +public: + CharArray buffer; + + int32_t bufferLength; // end of valid chars + int32_t bufferPosition; // next char to read + + int32_t tokenStart; // offset in buffer + int32_t bufferStart; // position in file of buffer + + ReaderPtr input; // source of chars + +public: + virtual wchar_t readChar(); + virtual wchar_t BeginToken(); + virtual void backup(int32_t amount); + virtual String GetImage(); + virtual CharArray GetSuffix(int32_t length); + virtual void Done(); + virtual int32_t getColumn(); + virtual int32_t getLine(); + virtual int32_t getEndColumn(); + virtual int32_t getEndLine(); + virtual int32_t getBeginColumn(); + virtual int32_t getBeginLine(); + +protected: + void refill(); +}; + +} + +#endif diff --git a/include/lucene++/Field.h b/include/lucene++/Field.h new file mode 100644 index 00000000..1429240a --- /dev/null +++ b/include/lucene++/Field.h @@ -0,0 +1,155 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELD_H +#define FIELD_H + +#include "AbstractField.h" + +namespace Lucene { + +class LPPAPI Field : public AbstractField { +public: + /// Create a field by specifying its name, value and how it will be saved in the index. Term vectors + /// will not be stored in the index. + /// + /// @param name The name of the field + /// @param value The string to process + /// @param store Whether value should be stored in the index + /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing + Field(const String& name, const String& value, Store store, Index index); + + /// Create a field by specifying its name, value and how it will be saved in the index. + /// + /// @param name The name of the field + /// @param value The string to process + /// @param store Whether value should be stored in the index + /// @param index Whether the field should be indexed, and if so, if it should be tokenized before indexing + /// @param termVector Whether term vector should be stored + Field(const String& name, const String& value, Store store, Index index, TermVector termVector); + + /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. The Reader is + /// read only when the Document is added to the index, ie. you may not close the Reader until {@link + /// IndexWriter#addDocument(Document)} has been called. + /// + /// @param name The name of the field + /// @param reader The reader with the content + Field(const String& name, const ReaderPtr& reader); + + /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. The + /// Reader is read only when the Document is added to the index, ie. you may not close the Reader until + /// {@link IndexWriter#addDocument(Document)} has been called. + /// + /// @param name The name of the field + /// @param reader The reader with the content + /// @param termVector Whether term vector should be stored + Field(const String& name, const ReaderPtr& reader, TermVector termVector); + + /// Create a tokenized and indexed field that is not stored. Term vectors will not be stored. This is useful + /// for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, ie. you + /// may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. + /// + /// @param name The name of the field + /// @param tokenStream The TokenStream with the content + Field(const String& name, const TokenStreamPtr& tokenStream); + + /// Create a tokenized and indexed field that is not stored, optionally with storing term vectors. This is + /// useful for pre-analyzed fields. The TokenStream is read only when the Document is added to the index, + /// ie. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} has been called. + /// + /// @param name The name of the field + /// @param tokenStream The TokenStream with the content + /// @param termVector Whether term vector should be stored + Field(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector); + + /// Create a stored field with binary value. Optionally the value may be compressed. + /// + /// @param name The name of the field + /// @param value The binary value + /// @param store How value should be stored (compressed or not) + Field(const String& name, ByteArray value, Store store); + + /// Create a stored field with binary value. Optionally the value may be compressed. + /// + /// @param name The name of the field + /// @param value The binary value + /// @param offset Starting offset in value where this Field's bytes are + /// @param length Number of bytes to use for this Field, starting at offset + /// @param store How value should be stored (compressed or not) + Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); + + virtual ~Field(); + + LUCENE_CLASS(Field); + +public: + using AbstractField::isStored; + using AbstractField::isIndexed; + + /// Specifies whether and how a field should be stored. + static bool isStored(Store store); + + /// Specifies whether and how a field should be indexed. + static bool isIndexed(Index index); + static bool isAnalyzed(Index index); + static bool omitNorms(Index index); + + /// Get the best representation of the index given the flags. + static Field::Index toIndex(bool indexed, bool analyzed); + + /// Get the best representation of the index given the flags. + static Field::Index toIndex(bool indexed, bool analyzed, bool omitNorms); + + /// Specifies whether and how a field should have term vectors. + static bool isStored(TermVector termVector); + static bool withPositions(TermVector termVector); + static bool withOffsets(TermVector termVector); + + /// Get the best representation of the index given the flags. + static Field::TermVector toTermVector(bool stored, bool withOffsets, bool withPositions); + + /// The value of the field as a String, or null. If null, the Reader value or binary value is used. + /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. + virtual String stringValue(); + + /// The value of the field as a Reader, or null. If null, the String value or binary value is used. + /// Exactly one of stringValue(), readerValue(), and getBinaryValue() must be set. + virtual ReaderPtr readerValue(); + + /// The value of the field as a TokesStream, or null. If null, the Reader value or String value is + /// analyzed to produce the indexed tokens. + virtual TokenStreamPtr tokenStreamValue(); + + /// Change the value of this field. This can be used during indexing to re-use a single Field instance + /// to improve indexing speed. Typically a single {@link Document} instance is re-used as well. This + /// helps most on small documents. + /// + /// Each Field instance should only be used once within a single {@link Document} instance. + virtual void setValue(const String& value); + + /// Change the value of this field. + virtual void setValue(const ReaderPtr& value); + + /// Change the value of this field. + virtual void setValue(ByteArray value); + + /// Change the value of this field. + virtual void setValue(ByteArray value, int32_t offset, int32_t length); + + /// Sets the token stream to be used for indexing and causes isIndexed() and isTokenized() to return + /// true. May be combined with stored values from stringValue() or getBinaryValue() + virtual void setTokenStream(const TokenStreamPtr& tokenStream); + +protected: + void ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector); + void ConstructField(const String& name, const ReaderPtr& reader, TermVector termVector); + void ConstructField(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector); + void ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store); +}; + +} + +#endif diff --git a/include/lucene++/FieldCache.h b/include/lucene++/FieldCache.h new file mode 100644 index 00000000..d00073b6 --- /dev/null +++ b/include/lucene++/FieldCache.h @@ -0,0 +1,272 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHE_H +#define FIELDCACHE_H + +#include +#include "LuceneObject.h" + +namespace Lucene { + +/// Maintains caches of term values. +/// @see FieldCacheSanityChecker +class LPPAPI FieldCache { +public: + virtual ~FieldCache(); + LUCENE_INTERFACE(FieldCache); + +public: + /// Specifies whether and how a field should be stored. + enum CacheType { + CACHE_BYTE = 1, + CACHE_INT, + CACHE_LONG, + CACHE_DOUBLE, + CACHE_STRING, + CACHE_STRING_INDEX + }; + + /// Indicator for StringIndex values in the cache. + /// NOTE: the value assigned to this constant must not be the same as any of those in SortField + static const int32_t STRING_INDEX; + +public: + /// The cache used internally by sorting and range query classes. + static FieldCachePtr DEFAULT(); + + /// The default parser for byte values, which are encoded by StringUtils::toInt + static ByteParserPtr DEFAULT_BYTE_PARSER(); + + /// The default parser for int values, which are encoded by StringUtils::toInt + static IntParserPtr DEFAULT_INT_PARSER(); + + /// The default parser for int values, which are encoded by StringUtils::toLong + static LongParserPtr DEFAULT_LONG_PARSER(); + + /// The default parser for double values, which are encoded by StringUtils::toDouble + static DoubleParserPtr DEFAULT_DOUBLE_PARSER(); + + /// A parser instance for int values encoded by {@link NumericUtils#prefixCodedToInt(String)}, + /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. + static IntParserPtr NUMERIC_UTILS_INT_PARSER(); + + /// A parser instance for long values encoded by {@link NumericUtils#prefixCodedToLong(String)}, + /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. + static LongParserPtr NUMERIC_UTILS_LONG_PARSER(); + + /// A parser instance for double values encoded by {@link NumericUtils}, + /// eg. when indexed via {@link NumericField}/{@link NumericTokenStream}. + static DoubleParserPtr NUMERIC_UTILS_DOUBLE_PARSER(); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as a single byte and returns an array of size reader.maxDoc() of the value each document + /// has in the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the single byte values. + /// @return The values in the given field for each document. + virtual Collection getBytes(const IndexReaderPtr& reader, const String& field); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as bytes and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the bytes. + /// @param parser Computes byte for string values. + /// @return The values in the given field for each document. + virtual Collection getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as integers and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the integers. + /// @return The values in the given field for each document. + virtual Collection getInts(const IndexReaderPtr& reader, const String& field); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as integers and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the integers. + /// @param parser Computes integer for string values. + /// @return The values in the given field for each document. + virtual Collection getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as longs and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the longs. + /// @return The values in the given field for each document. + virtual Collection getLongs(const IndexReaderPtr& reader, const String& field); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as longs and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the longs. + /// @param parser Computes long for string values. + /// @return The values in the given field for each document. + virtual Collection getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as integers and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the doubles. + /// @return The values in the given field for each document. + virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the terms in + /// field as doubles and returns an array of size reader.maxDoc() of the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the doubles. + /// @param parser Computes double for string values. + /// @return The values in the given field for each document. + virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser); + + /// Checks the internal cache for an appropriate entry, and if none are found, reads the term values in + /// field and returns an array of size reader.maxDoc() containing the value each document has in + /// the given field. + /// @param reader Used to get field values. + /// @param field Which field contains the strings. + /// @return The values in the given field for each document. + virtual Collection getStrings(const IndexReaderPtr& reader, const String& field); + + /// Checks the internal cache for an appropriate entry, and if none are found reads the term values in + /// field and returns an array of them in natural order, along with an array telling which element in + /// the term array each document uses. + /// @param reader Used to get field values. + /// @param field Which field contains the strings. + /// @return Array of terms and index into the array for each document. + virtual StringIndexPtr getStringIndex(const IndexReaderPtr& reader, const String& field); + + /// Generates an array of CacheEntry objects representing all items currently in the FieldCache. + virtual Collection getCacheEntries() = 0; + + /// Instructs the FieldCache to forcibly expunge all entries from the underlying caches. This is intended + /// only to be used for test methods as a way to ensure a known base state of the Cache. It should not be + /// relied on for "Cache maintenance" in general application code. + virtual void purgeAllCaches() = 0; + + /// Drops all cache entries associated with this reader. NOTE: this reader must precisely match the reader + /// that the cache entry is keyed on. If you pass a top-level reader, it usually will have no effect as + /// Lucene now caches at the segment reader level. + virtual void purge(const IndexReaderPtr& r) = 0; + + /// If non-null, FieldCacheImpl will warn whenever entries are created that are not sane according to + /// {@link FieldCacheSanityChecker}. + virtual void setInfoStream(const InfoStreamPtr& stream); + + /// @see #setInfoStream + virtual InfoStreamPtr getInfoStream(); +}; + +class LPPAPI CreationPlaceholder : public LuceneObject { +public: + virtual ~CreationPlaceholder(); + LUCENE_CLASS(CreationPlaceholder); + +public: + boost::any value; +}; + +/// Stores term text values and document ordering data. +class LPPAPI StringIndex : public LuceneObject { +public: + StringIndex(Collection values, Collection lookup); + virtual ~StringIndex(); + + LUCENE_CLASS(StringIndex); + +public: + /// All the term values, in natural order. + Collection lookup; + + /// For each document, an index into the lookup array. + Collection order; + +public: + int32_t binarySearchLookup(const String& key); +}; + +/// Marker interface as super-interface to all parsers. It is used to specify a custom parser to {@link +/// SortField#SortField(String, Parser)}. +class LPPAPI Parser : public LuceneObject { +public: + virtual ~Parser(); + LUCENE_CLASS(Parser); +}; + +/// Interface to parse bytes from document fields. +/// @see FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr) +class LPPAPI ByteParser : public Parser { +public: + virtual ~ByteParser(); + LUCENE_CLASS(ByteParser); + +public: + /// Return a single Byte representation of this field's value. + virtual uint8_t parseByte(const String& string); +}; + +/// Interface to parse ints from document fields. +/// @see FieldCache#getInts(IndexReaderPtr, String, IntParserPtr) +class LPPAPI IntParser : public Parser { +public: + virtual ~IntParser(); + LUCENE_CLASS(IntParser); + +public: + /// Return a integer representation of this field's value. + virtual int32_t parseInt(const String& string); +}; + +/// Interface to parse longs from document fields. +/// @see FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr) +class LPPAPI LongParser : public Parser { +public: + virtual ~LongParser(); + LUCENE_CLASS(LongParser); + +public: + /// Return a long representation of this field's value. + virtual int64_t parseLong(const String& string); +}; + +/// Interface to parse doubles from document fields. +/// @see FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr) +class LPPAPI DoubleParser : public Parser { +public: + virtual ~DoubleParser(); + LUCENE_CLASS(DoubleParser); + +public: + /// Return a double representation of this field's value. + virtual double parseDouble(const String& string); +}; + +/// A unique Identifier/Description for each item in the FieldCache. Can be useful for logging/debugging. +class LPPAPI FieldCacheEntry : public LuceneObject { +public: + virtual ~FieldCacheEntry(); + LUCENE_CLASS(FieldCacheEntry); + +public: + virtual LuceneObjectPtr getReaderKey() = 0; + virtual String getFieldName() = 0; + virtual int32_t getCacheType() = 0; + virtual boost::any getCustom() = 0; + virtual boost::any getValue() = 0; + + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/FieldCacheImpl.h b/include/lucene++/FieldCacheImpl.h new file mode 100644 index 00000000..c58eed09 --- /dev/null +++ b/include/lucene++/FieldCacheImpl.h @@ -0,0 +1,184 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHEIMPL_H +#define FIELDCACHEIMPL_H + +#include "FieldCache.h" + +namespace Lucene { + +/// The default cache implementation, storing all values in memory. A WeakHashMap is used for storage. +class FieldCacheImpl : public FieldCache, public LuceneObject { +public: + FieldCacheImpl(); + virtual ~FieldCacheImpl(); + + LUCENE_CLASS(FieldCacheImpl); + +protected: + MapStringCache caches; + InfoStreamPtr infoStream; + +public: + virtual void initialize(); + virtual void purgeAllCaches(); + virtual void purge(const IndexReaderPtr& r); + virtual Collection getCacheEntries(); + + virtual Collection getBytes(const IndexReaderPtr& reader, const String& field); + virtual Collection getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser); + + virtual Collection getInts(const IndexReaderPtr& reader, const String& field); + virtual Collection getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser); + + virtual Collection getLongs(const IndexReaderPtr& reader, const String& field); + virtual Collection getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser); + + virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field); + virtual Collection getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser); + + virtual Collection getStrings(const IndexReaderPtr& reader, const String& field); + virtual StringIndexPtr getStringIndex(const IndexReaderPtr& reader, const String& field); + + virtual void setInfoStream(const InfoStreamPtr& stream); + virtual InfoStreamPtr getInfoStream(); +}; + +class Entry : public LuceneObject { +public: + /// Creates one of these objects for a custom comparator/parser. + Entry(const String& field, const boost::any& custom); + virtual ~Entry(); + + LUCENE_CLASS(Entry); + +public: + String field; // which Fieldable + boost::any custom; // which custom comparator or parser + +public: + /// Two of these are equal if they reference the same field and type. + virtual bool equals(const LuceneObjectPtr& other); + + /// Composes a hashcode based on the field and type. + virtual int32_t hashCode(); +}; + +/// Internal cache. +class Cache : public LuceneObject { +public: + Cache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~Cache(); + + LUCENE_CLASS(Cache); + +public: + FieldCacheWeakPtr _wrapper; + WeakMapLuceneObjectMapEntryAny readerCache; + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key) = 0; + +public: + /// Remove this reader from the cache, if present. + virtual void purge(const IndexReaderPtr& r); + + virtual boost::any get(const IndexReaderPtr& reader, const EntryPtr& key); + virtual void printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value); +}; + +class ByteCache : public Cache { +public: + ByteCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~ByteCache(); + + LUCENE_CLASS(ByteCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class IntCache : public Cache { +public: + IntCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~IntCache(); + + LUCENE_CLASS(IntCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class LongCache : public Cache { +public: + LongCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~LongCache(); + + LUCENE_CLASS(LongCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class DoubleCache : public Cache { +public: + DoubleCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~DoubleCache(); + + LUCENE_CLASS(DoubleCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class StringCache : public Cache { +public: + StringCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~StringCache(); + + LUCENE_CLASS(StringCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class StringIndexCache : public Cache { +public: + StringIndexCache(const FieldCachePtr& wrapper = FieldCachePtr()); + virtual ~StringIndexCache(); + + LUCENE_CLASS(StringIndexCache); + +protected: + virtual boost::any createValue(const IndexReaderPtr& reader, const EntryPtr& key); +}; + +class FieldCacheEntryImpl : public FieldCacheEntry { +public: + FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value); + virtual ~FieldCacheEntryImpl(); + + LUCENE_CLASS(FieldCacheEntryImpl); + +protected: + LuceneObjectPtr readerKey; + String fieldName; + int32_t cacheType; + boost::any custom; + boost::any value; + +public: + virtual LuceneObjectPtr getReaderKey(); + virtual String getFieldName(); + virtual int32_t getCacheType(); + virtual boost::any getCustom(); + virtual boost::any getValue(); +}; + +} + +#endif diff --git a/include/lucene++/FieldCacheRangeFilter.h b/include/lucene++/FieldCacheRangeFilter.h new file mode 100644 index 00000000..87be89af --- /dev/null +++ b/include/lucene++/FieldCacheRangeFilter.h @@ -0,0 +1,115 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHERANGEFILTER_H +#define FIELDCACHERANGEFILTER_H + +#include "Filter.h" +#include "FieldCache.h" + +namespace Lucene { + +/// A range filter built on top of a cached single term field (in {@link FieldCache}). +/// +/// FieldCacheRangeFilter builds a single cache for the field the first time it is used. Each subsequent +/// FieldCacheRangeFilter on the same field then reuses this cache, even if the range itself changes. +/// +/// This means that FieldCacheRangeFilter is much faster (sometimes more than 100x as fast) as building a +/// {@link TermRangeFilter}, if using a {@link #newStringRange}. However, if the range never changes it is +/// slower (around 2x as slow) than building a CachingWrapperFilter on top of a single {@link TermRangeFilter}. +/// +/// For numeric data types, this filter may be significantly faster than {@link NumericRangeFilter}. +/// Furthermore, it does not need the numeric values encoded by {@link NumericField}. But it has the problem +/// that it only works with exact one value/document (see below). +/// +/// As with all {@link FieldCache} based functionality, FieldCacheRangeFilter is only valid for fields which +/// exact one term for each document (except for {@link #newStringRange} where 0 terms are also allowed). Due +/// to a restriction of {@link FieldCache}, for numeric ranges all terms that do not have a numeric value, 0 +/// is assumed. +/// +/// Thus it works on dates, prices and other single value fields but will not work on regular text fields. It +/// is preferable to use a NOT_ANALYZED field to ensure that there is only a single term. +/// +/// Do not instantiate this template directly, use one of the static factory methods available, that create a +/// correct instance for different data types supported by {@link FieldCache}. +class LPPAPI FieldCacheRangeFilter : public Filter { +public: + FieldCacheRangeFilter(const String& field, const ParserPtr& parser, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilter(); + + LUCENE_CLASS(FieldCacheRangeFilter); + +INTERNAL: + String field; + ParserPtr parser; + bool includeLower; + bool includeUpper; + +public: + /// Creates a string range filter using {@link FieldCache#getStringIndex}. This works with all fields containing + /// zero or one term in the field. The range can be half-open by setting one of the values to null. + static FieldCacheRangeFilterPtr newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String)}. This works with all + /// byte fields containing exactly one numeric term in the field. The range can be half-open by setting one of the + /// values to null. + static FieldCacheRangeFilterPtr newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getBytes(IndexReaderPtr, String, ByteParserPtr)}. This + /// works with all byte fields containing exactly one numeric term in the field. The range can be half-open by + /// setting one of the values to null. + static FieldCacheRangeFilterPtr newByteRange(const String& field, const ByteParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String)}. This works with all + /// int fields containing exactly one numeric term in the field. The range can be half-open by setting one of the + /// values to null. + static FieldCacheRangeFilterPtr newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getInts(IndexReaderPtr, String, IntParserPtr)}. This + /// works with all int fields containing exactly one numeric term in the field. The range can be half-open by + /// setting one of the values to null. + static FieldCacheRangeFilterPtr newIntRange(const String& field, const IntParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String)}. This works with all + /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the + /// values to null. + static FieldCacheRangeFilterPtr newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getLongs(IndexReaderPtr, String, LongParserPtr)}. This + /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by + /// setting one of the values to null. + static FieldCacheRangeFilterPtr newLongRange(const String& field, const LongParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String)}. This works with all + /// long fields containing exactly one numeric term in the field. The range can be half-open by setting one of the + /// values to null. + static FieldCacheRangeFilterPtr newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper); + + /// Creates a numeric range filter using {@link FieldCache#getDoubles(IndexReaderPtr, String, DoubleParserPtr)}. This + /// works with all long fields containing exactly one numeric term in the field. The range can be half-open by + /// setting one of the values to null. + static FieldCacheRangeFilterPtr newDoubleRange(const String& field, const DoubleParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); + + virtual String toString() = 0; + virtual bool equals(const LuceneObjectPtr& other) = 0; + virtual int32_t hashCode() = 0; + + /// Returns the field name for this filter + virtual String getField(); + + /// Returns true if the lower endpoint is inclusive + virtual bool includesLower(); + + /// Returns true if the upper endpoint is inclusive + virtual bool includesUpper(); + + /// Returns the current numeric parser + virtual ParserPtr getParser(); +}; + +} + +#endif diff --git a/include/lucene++/FieldCacheSanityChecker.h b/include/lucene++/FieldCacheSanityChecker.h new file mode 100644 index 00000000..dc2c2eea --- /dev/null +++ b/include/lucene++/FieldCacheSanityChecker.h @@ -0,0 +1,124 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHESANITYCHECKER_H +#define FIELDCACHESANITYCHECKER_H + +#include "LuceneObject.h" +#include "MapOfSets.h" + +namespace Lucene { + +/// Provides methods for sanity checking that entries in the FieldCache are not wasteful or inconsistent. +/// +/// Lucene 2.9 Introduced numerous enhancements into how the FieldCache is used by the low levels of Lucene +/// searching (for Sorting and ValueSourceQueries) to improve both the speed for Sorting, as well as reopening +/// of IndexReaders. But these changes have shifted the usage of FieldCache from "top level" IndexReaders +/// (frequently a MultiReader or DirectoryReader) down to the leaf level SegmentReaders. As a result, +/// existing applications that directly access the FieldCache may find RAM usage increase significantly when +/// upgrading to 2.9 or later. This class provides an API for these applications (or their Unit tests) to +/// check at run time if the FieldCache contains "insane" usages of the FieldCache. +/// +/// @see FieldCache +/// @see FieldCacheSanityChecker.Insanity +/// @see FieldCacheSanityChecker.InsanityType +class LPPAPI FieldCacheSanityChecker : public LuceneObject { +public: + FieldCacheSanityChecker(); + virtual ~FieldCacheSanityChecker(); + + LUCENE_CLASS(FieldCacheSanityChecker); + +public: + typedef MapOfSets< int32_t, boost::hash, std::equal_to, FieldCacheEntryPtr, luceneHash, luceneEquals > MapSetIntFieldCacheEntry; + typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, int32_t, boost::hash, std::equal_to > MapSetReaderFieldInt; + typedef MapOfSets< ReaderFieldPtr, luceneHash, luceneEquals, ReaderFieldPtr, luceneHash, luceneEquals > MapSetReaderFieldReaderField; + + /// An Enumeration of the different types of "insane" behaviour that may be detected in a FieldCache. + enum InsanityType { + /// Indicates an overlap in cache usage on a given field in sub/super readers. + SUBREADER, + + /// Indicates entries have the same reader+fieldname but different cached values. This can happen + /// if different datatypes, or parsers are used -- and while it's not necessarily a bug it's + /// typically an indication of a possible problem. + /// + /// NOTE: Only the reader, fieldname, and cached value are actually tested -- if two cache entries + /// have different parsers or datatypes but the cached values are the same Object (== not just equal()) + /// this method does not consider that a red flag. This allows for subtle variations in the way a + /// Parser is specified (null vs DEFAULT_LONG_PARSER, etc...) + VALUEMISMATCH, + + /// Indicates an expected bit of "insanity". This may be useful for clients that wish to preserve/log + /// information about insane usage but indicate that it was expected. + EXPECTED + }; + + /// Quick and dirty convenience method + /// @see #check + static Collection checkSanity(const FieldCachePtr& cache); + + /// Quick and dirty convenience method that instantiates an instance with "good defaults" and uses it to + /// test the CacheEntrys. + /// @see #check + static Collection checkSanity(Collection cacheEntries); + + /// Tests a CacheEntry[] for indication of "insane" cache usage. + /// NOTE: FieldCache CreationPlaceholder objects are ignored. + Collection check(Collection cacheEntries); + +protected: + /// Internal helper method used by check that iterates over valMismatchKeys and generates a Collection of + /// Insanity instances accordingly. The MapOfSets are used to populate the Insanity objects. + /// @see InsanityType#VALUEMISMATCH + Collection checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, + MapSetReaderFieldInt readerFieldToValIds, + SetReaderField valMismatchKeys); + + /// Internal helper method used by check that iterates over the keys of readerFieldToValIds and generates a + /// Collection of Insanity instances whenever two (or more) ReaderField instances are found that have an + /// ancestry relationships. + /// @see InsanityType#SUBREADER + Collection checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, + MapSetReaderFieldInt readerFieldToValIds); + + /// Checks if the seed is an IndexReader, and if so will walk the hierarchy of subReaders building up a + /// list of the objects returned by obj.getFieldCacheKey() + Collection getAllDecendentReaderKeys(const LuceneObjectPtr& seed); +}; + +/// Simple container for a collection of related CacheEntry objects that in conjunction with each other +/// represent some "insane" usage of the FieldCache. +class LPPAPI Insanity : public LuceneObject { +public: + Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries); + virtual ~Insanity(); + + LUCENE_CLASS(Insanity); + +protected: + FieldCacheSanityChecker::InsanityType type; + String msg; + Collection entries; + +public: + /// Type of insane behavior this object represents + FieldCacheSanityChecker::InsanityType getType(); + + /// Description of the insane behaviour + String getMsg(); + + /// CacheEntry objects which suggest a problem + Collection getCacheEntries(); + + /// Multi-Line representation of this Insanity object, starting with the Type and Msg, followed by each + /// CacheEntry.toString() on it's own line prefaced by a tab character + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/FieldCacheSource.h b/include/lucene++/FieldCacheSource.h new file mode 100644 index 00000000..18806420 --- /dev/null +++ b/include/lucene++/FieldCacheSource.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHESOURCE_H +#define FIELDCACHESOURCE_H + +#include "ValueSource.h" + +namespace Lucene { + +/// A base class for ValueSource implementations that retrieve values for a single field from the +/// {@link FieldCache}. +/// +/// Fields used herein must be indexed (doesn't matter if these fields are stored or not). +/// +/// It is assumed that each such indexed field is untokenized, or at least has a single token in a document. +/// For documents with multiple tokens of the same field, behavior is undefined (It is likely that current +/// code would use the value of one of these tokens, but this is not guaranteed). +/// +/// Document with no tokens in this field are assigned the Zero value. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. +class LPPAPI FieldCacheSource : public ValueSource { +public: + /// Create a cached field source for the input field. + FieldCacheSource(const String& field); + virtual ~FieldCacheSource(); + + LUCENE_CLASS(FieldCacheSource); + +protected: + String field; + +public: + virtual DocValuesPtr getValues(const IndexReaderPtr& reader); + virtual String description(); + + /// Return cached DocValues for input field and reader. + /// @param cache FieldCache so that values of a field are loaded once per reader (RAM allowing) + /// @param field Field for which values are required. + /// @see ValueSource + virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) = 0; + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Check if equals to another {@link FieldCacheSource}, already knowing that cache and field are equal. + virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other) = 0; + + /// Return a hash code of a {@link FieldCacheSource}, without the hash-codes of the field and the cache + /// (those are taken care of elsewhere). + virtual int32_t cachedFieldSourceHashCode() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FieldCacheTermsFilter.h b/include/lucene++/FieldCacheTermsFilter.h new file mode 100644 index 00000000..9b51cea2 --- /dev/null +++ b/include/lucene++/FieldCacheTermsFilter.h @@ -0,0 +1,65 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCACHETERMSFILTER_H +#define FIELDCACHETERMSFILTER_H + +#include "Filter.h" + +namespace Lucene { + +/// A {@link Filter} that only accepts documents whose single term value in the specified field is contained +/// in the provided set of allowed terms. +/// +/// This is the same functionality as TermsFilter (from contrib/queries), except this filter requires that the +/// field contains only a single term for all documents. Because of drastically different implementations, +/// they also have different performance characteristics, as described below. +/// +/// The first invocation of this filter on a given field will be slower, since a {@link StringIndex} must be +/// created. Subsequent invocations using the same field will re-use this cache. However, as with all +/// functionality based on {@link FieldCache}, persistent RAM is consumed to hold the cache, and is not freed +/// until the {@link IndexReader} is closed. In contrast, TermsFilter has no persistent RAM consumption. +/// +/// With each search, this filter translates the specified set of Terms into a private {@link OpenBitSet} keyed +/// by term number per unique {@link IndexReader} (normally one reader per segment). Then, during matching, +/// the term number for each docID is retrieved from the cache and then checked for inclusion using the {@link +/// OpenBitSet}. Since all testing is done using RAM resident data structures, performance should be very fast, +/// most likely fast enough to not require further caching of the DocIdSet for each possible combination of +/// terms. However, because docIDs are simply scanned linearly, an index with a great many small documents may +/// find this linear scan too costly. +/// +/// In contrast, TermsFilter builds up an {@link OpenBitSet}, keyed by docID, every time it's created, by +/// enumerating through all matching docs using {@link TermDocs} to seek and scan through each term's docID list. +/// While there is no linear scan of all docIDs, besides the allocation of the underlying array in the {@link +/// OpenBitSet}, this approach requires a number of "disk seeks" in proportion to the number of terms, which can +/// be exceptionally costly when there are cache misses in the OS's IO cache. +/// +/// Generally, this filter will be slower on the first invocation for a given field, but subsequent invocations, +/// even if you change the allowed set of Terms, should be faster than TermsFilter, especially as the number of +/// Terms being matched increases. If you are matching only a very small number of terms, and those terms in +/// turn match a very small number of documents, TermsFilter may perform faster. +/// +/// Which filter is best is very application dependent. +class LPPAPI FieldCacheTermsFilter : public Filter { +public: + FieldCacheTermsFilter(const String& field, Collection terms); + virtual ~FieldCacheTermsFilter(); + + LUCENE_CLASS(FieldCacheTermsFilter); + +protected: + String field; + Collection terms; + +public: + FieldCachePtr getFieldCache(); + + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/FieldComparator.h b/include/lucene++/FieldComparator.h new file mode 100644 index 00000000..a276eb38 --- /dev/null +++ b/include/lucene++/FieldComparator.h @@ -0,0 +1,331 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCOMPARATOR_H +#define FIELDCOMPARATOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A FieldComparator compares hits so as to determine their sort order when collecting the top results with +/// {@link TopFieldCollector}. The concrete public FieldComparator classes here correspond to the SortField types. +/// +/// This API is designed to achieve high performance sorting, by exposing a tight interaction with {@link +/// FieldValueHitQueue} as it visits hits. Whenever a hit is competitive, it's enrolled into a virtual slot, +/// which is an int ranging from 0 to numHits-1. The {@link FieldComparator} is made aware of segment transitions +/// during searching in case any internal state it's tracking needs to be recomputed during these transitions. +/// +/// A comparator must define these functions: +///
    +///
  • {@link #compare} Compare a hit at 'slot a' with hit 'slot b'. +/// +///
  • {@link #setBottom} This method is called by {@link FieldValueHitQueue} to notify the FieldComparator of +/// the current weakest ("bottom") slot. Note that this slot may not hold the weakest value according to your +/// comparator, in cases where your comparator is not the primary one (ie, is only used to break ties from the +/// comparators before it). +/// +///
  • {@link #compareBottom} Compare a new hit (docID) against the "weakest" (bottom) entry in the queue. +/// +///
  • {@link #copy} Installs a new hit into the priority queue. The {@link FieldValueHitQueue} calls this +/// method when a new hit is competitive. +/// +///
  • {@link #setNextReader} Invoked when the search is switching to the next segment. You may need to update +/// internal state of the comparator, for example retrieving new values from the {@link FieldCache}. +/// +///
  • {@link #value} Return the sort value stored in the specified slot. This is only called at the end of +/// the search, in order to populate {@link FieldDoc#fields} when returning the top results. +///
+class LPPAPI FieldComparator : public LuceneObject { +public: + virtual ~FieldComparator(); + LUCENE_CLASS(FieldComparator); + +public: + /// Compare hit at slot1 with hit at slot2. + /// @param slot1 first slot to compare + /// @param slot2 second slot to compare + /// @return any N < 0 if slot2's value is sorted after slot1, any N > 0 if the slot2's value is sorted + /// before slot1 and 0 if they are equal + virtual int32_t compare(int32_t slot1, int32_t slot2) = 0; + + /// Set the bottom slot, ie the "weakest" (sorted last) entry in the queue. When {@link #compareBottom} + /// is called, you should compare against this slot. This will always be called before {@link #compareBottom}. + /// @param slot the currently weakest (sorted last) slot in the queue + virtual void setBottom(int32_t slot) = 0; + + /// Compare the bottom of the queue with doc. This will only invoked after setBottom has been called. + /// This should return the same result as {@link #compare(int,int)}} as if bottom were slot1 and the new + /// document were slot 2. + /// + /// For a search that hits many results, this method will be the hotspot (invoked by far the most frequently). + /// + /// @param doc that was hit + /// @return any N < 0 if the doc's value is sorted after the bottom entry (not competitive), any N > 0 if + /// the doc's value is sorted before the bottom entry and 0 if they are equal. + virtual int32_t compareBottom(int32_t doc) = 0; + + /// This method is called when a new hit is competitive. You should copy any state associated with this + /// document that will be required for future comparisons, into the specified slot. + /// @param slot which slot to copy the hit to + /// @param doc docID relative to current reader + virtual void copy(int32_t slot, int32_t doc) = 0; + + /// Set a new Reader. All doc correspond to the current Reader. + /// + /// @param reader current reader + /// @param docBase docBase of this reader + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) = 0; + + /// Sets the Scorer to use in case a document's score is needed. + /// @param scorer Scorer instance that you should use to obtain the current hit's score, if necessary. + virtual void setScorer(const ScorerPtr& scorer); + + /// Return the actual value in the slot. + /// @param slot the value + /// @return value in this slot upgraded to ComparableValue + virtual ComparableValue value(int32_t slot) = 0; +}; + +template +class NumericComparator : public FieldComparator { +public: + NumericComparator(int32_t numHits, const String& field = EmptyString) { + this->values = Collection::newInstance(numHits); + this->field = field; + this->bottom = 0; + } + + virtual ~NumericComparator() { + } + +protected: + Collection values; + Collection currentReaderValues; + String field; + TYPE bottom; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2) { + return (int32_t)(values[slot1] - values[slot2]); + } + + virtual int32_t compareBottom(int32_t doc) { + return (int32_t)(bottom - currentReaderValues[doc]); + } + + virtual void copy(int32_t slot, int32_t doc) { + values[slot] = currentReaderValues[doc]; + } + + virtual void setBottom(int32_t slot) { + bottom = values[slot]; + } + + virtual ComparableValue value(int32_t slot) { + return ComparableValue(values[slot]); + } +}; + +/// Parses field's values as byte (using {@link FieldCache#getBytes} and sorts by ascending value. +class LPPAPI ByteComparator : public NumericComparator { +public: + ByteComparator(int32_t numHits, const String& field, const ParserPtr& parser); + virtual ~ByteComparator(); + + LUCENE_CLASS(ByteComparator); + +protected: + ByteParserPtr parser; + +public: + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +/// Sorts by ascending docID +class LPPAPI DocComparator : public NumericComparator { +public: + DocComparator(int32_t numHits); + virtual ~DocComparator(); + + LUCENE_CLASS(DocComparator); + +protected: + int32_t docBase; + +public: + virtual int32_t compareBottom(int32_t doc); + virtual void copy(int32_t slot, int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +/// Parses field's values as double (using {@link FieldCache#getDoubles} and sorts by ascending value +class LPPAPI DoubleComparator : public NumericComparator { +public: + DoubleComparator(int32_t numHits, const String& field, const ParserPtr& parser); + virtual ~DoubleComparator(); + + LUCENE_CLASS(DoubleComparator); + +protected: + DoubleParserPtr parser; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +/// Parses field's values as int (using {@link FieldCache#getInts} and sorts by ascending value +class LPPAPI IntComparator : public NumericComparator { +public: + IntComparator(int32_t numHits, const String& field, const ParserPtr& parser); + virtual ~IntComparator(); + + LUCENE_CLASS(IntComparator); + +protected: + IntParserPtr parser; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +/// Parses field's values as long (using {@link FieldCache#getLongs} and sorts by ascending value +class LPPAPI LongComparator : public NumericComparator { +public: + LongComparator(int32_t numHits, const String& field, const ParserPtr& parser); + virtual ~LongComparator(); + + LUCENE_CLASS(LongComparator); + +protected: + LongParserPtr parser; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +/// Sorts by descending relevance. NOTE: if you are sorting only by descending relevance and then secondarily +/// by ascending docID, performance is faster using {@link TopScoreDocCollector} directly (which {@link +/// IndexSearcher#search} uses when no {@link Sort} is specified). +class LPPAPI RelevanceComparator : public NumericComparator { +public: + RelevanceComparator(int32_t numHits); + virtual ~RelevanceComparator(); + + LUCENE_CLASS(RelevanceComparator); + +protected: + ScorerPtr scorer; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void copy(int32_t slot, int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Sorts by a field's value using the Collator for a given Locale. +class LPPAPI StringComparatorLocale : public FieldComparator { +public: + StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale); + virtual ~StringComparatorLocale(); + + LUCENE_CLASS(StringComparatorLocale); + +protected: + Collection values; + Collection currentReaderValues; + String field; + CollatorPtr collator; + String bottom; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void copy(int32_t slot, int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setBottom(int32_t slot); + virtual ComparableValue value(int32_t slot); +}; + +/// Sorts by field's natural String sort order, using ordinals. This is functionally equivalent to {@link +/// StringValComparator}, but it first resolves the string to their relative ordinal positions (using the +/// index returned by {@link FieldCache#getStringIndex}), and does most comparisons using the ordinals. +/// For medium to large results, this comparator will be much faster than {@link StringValComparator}. For +/// very small result sets it may be slower. +class LPPAPI StringOrdValComparator : public FieldComparator { +public: + StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed); + virtual ~StringOrdValComparator(); + + LUCENE_CLASS(StringOrdValComparator); + +protected: + Collection ords; + Collection values; + Collection readerGen; + + int32_t currentReaderGen; + Collection lookup; + Collection order; + String field; + + int32_t bottomSlot; + int32_t bottomOrd; + String bottomValue; + bool reversed; + int32_t sortPos; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void copy(int32_t slot, int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setBottom(int32_t slot); + virtual ComparableValue value(int32_t slot); + virtual Collection getValues(); + virtual int32_t getBottomSlot(); + virtual String getField(); + +protected: + void convert(int32_t slot); + int32_t binarySearch(Collection lookup, const String& key, int32_t low, int32_t high); +}; + +/// Sorts by field's natural String sort order. All comparisons are done using String.compare, which is +/// slow for medium to large result sets but possibly very fast for very small results sets. +class LPPAPI StringValComparator : public FieldComparator { +public: + StringValComparator(int32_t numHits, const String& field); + virtual ~StringValComparator(); + + LUCENE_CLASS(StringOrdValComparator); + +protected: + Collection values; + Collection currentReaderValues; + String field; + String bottom; + +public: + virtual int32_t compare(int32_t slot1, int32_t slot2); + virtual int32_t compareBottom(int32_t doc); + virtual void copy(int32_t slot, int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setBottom(int32_t slot); + virtual ComparableValue value(int32_t slot); +}; + +} + +#endif diff --git a/include/lucene++/FieldComparatorSource.h b/include/lucene++/FieldComparatorSource.h new file mode 100644 index 00000000..f8074d7b --- /dev/null +++ b/include/lucene++/FieldComparatorSource.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDCOMPARATORSOURCE_H +#define FIELDCOMPARATORSOURCE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Provides a {@link FieldComparator} for custom field sorting. +class LPPAPI FieldComparatorSource : public LuceneObject { +public: + virtual ~FieldComparatorSource(); + LUCENE_CLASS(FieldComparatorSource); + +public: + /// Creates a comparator for the field in the given index. + /// @param fieldname Name of the field to create comparator for. + /// @return FieldComparator. + virtual FieldComparatorPtr newComparator(const String& fieldname, int32_t numHits, int32_t sortPos, bool reversed) = 0; +}; + +} + +#endif diff --git a/include/lucene++/FieldDoc.h b/include/lucene++/FieldDoc.h new file mode 100644 index 00000000..0438894f --- /dev/null +++ b/include/lucene++/FieldDoc.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDDOC_H +#define FIELDDOC_H + +#include "ScoreDoc.h" + +namespace Lucene { + +/// A ScoreDoc which also contains information about how to sort the referenced document. In addition to the +/// document number and score, this object contains an array of values for the document from the field(s) used +/// to sort. For example, if the sort criteria was to sort by fields "a", "b" then "c", the fields object array +/// will have three elements, corresponding respectively to the term values for the document in fields "a", "b" +/// and "c". The class of each element in the array will be either Integer, Double or String depending on the +/// type of values in the terms of each field. +class LPPAPI FieldDoc : public ScoreDoc { +public: + FieldDoc(int32_t doc, double score, Collection fields = Collection()); + virtual ~FieldDoc(); + + LUCENE_CLASS(FieldDoc); + +public: + /// The values which are used to sort the referenced document. The order of these will match the original + /// sort criteria given by a Sort object. Each Object will be either an Integer, Double or String, depending + /// on the type of values in the terms of the original field. + /// @see Sort + /// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) + Collection fields; + +public: + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/FieldDocSortedHitQueue.h b/include/lucene++/FieldDocSortedHitQueue.h new file mode 100644 index 00000000..114d923e --- /dev/null +++ b/include/lucene++/FieldDocSortedHitQueue.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDDOCSORTEDHITQUEUE_H +#define FIELDDOCSORTEDHITQUEUE_H + +#include "PriorityQueue.h" + +namespace Lucene { + +/// Collects sorted results from Searchable's and collates them. +/// The elements put into this queue must be of type FieldDoc. +class FieldDocSortedHitQueue : public PriorityQueue { +public: + FieldDocSortedHitQueue(int32_t size); + virtual ~FieldDocSortedHitQueue(); + + LUCENE_CLASS(FieldDocSortedHitQueue); + +public: + Collection fields; + + // used in the case where the fields are sorted by locale based strings + Collection collators; + +public: + /// Allows redefinition of sort fields if they are null. This is to handle the case using + /// ParallelMultiSearcher where the original list contains AUTO and we don't know the actual sort + /// type until the values come back. The fields can only be set once. This method should be + /// synchronized external like all other PQ methods. + void setFields(Collection fields); + + /// Returns the fields being used to sort. + Collection getFields(); + +protected: + /// Returns an array of collators, possibly null. The collators correspond to any SortFields which + /// were given a specific locale. + /// @param fields Array of sort fields. + /// @return Array, possibly null. + Collection hasCollators(Collection fields); + + /// Returns whether first is less relevant than second. + virtual bool lessThan(const FieldDocPtr& first, const FieldDocPtr& second); +}; + +} + +#endif diff --git a/include/lucene++/FieldInfo.h b/include/lucene++/FieldInfo.h new file mode 100644 index 00000000..6cffbedd --- /dev/null +++ b/include/lucene++/FieldInfo.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDINFO_H +#define FIELDINFO_H + +#include "LuceneObject.h" + +namespace Lucene { + +class FieldInfo : public LuceneObject { +public: + FieldInfo(const String& na, bool tk, int32_t nu, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); + virtual ~FieldInfo(); + + LUCENE_CLASS(FieldInfo); + +public: + String name; + bool isIndexed; + int32_t number; + + // true if term vector for this field should be stored + bool storeTermVector; + bool storeOffsetWithTermVector; + bool storePositionWithTermVector; + + bool omitNorms; // omit norms associated with indexed fields + bool omitTermFreqAndPositions; + + bool storePayloads; // whether this field stores payloads together with term positions + +public: + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + void update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector, + bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); +}; + +} + +#endif diff --git a/include/lucene++/FieldInfos.h b/include/lucene++/FieldInfos.h new file mode 100644 index 00000000..310fc8ac --- /dev/null +++ b/include/lucene++/FieldInfos.h @@ -0,0 +1,151 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDINFOS_H +#define FIELDINFOS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Access to the Fieldable Info file that describes document fields and whether or not they are indexed. +/// Each segment has a separate Fieldable Info file. Objects of this class are thread-safe for multiple +/// readers, but only one thread can be adding documents at a time, with no other reader or writer threads +/// accessing this object. +class LPPAPI FieldInfos : public LuceneObject { +public: + FieldInfos(); + + /// Construct a FieldInfos object using the directory and the name of the file IndexInput + /// @param d The directory to open the IndexInput from + /// @param name The name of the file to open the IndexInput from in the Directory + FieldInfos(const DirectoryPtr& d, const String& name); + + virtual ~FieldInfos(); + + LUCENE_CLASS(FieldInfos); + +public: + // Used internally (ie not written to *.fnm files) for pre-2.9 files + static const int32_t FORMAT_PRE; + + // First used in 2.9; prior to 2.9 there was no format header + static const int32_t FORMAT_START; + + static const int32_t CURRENT_FORMAT; + + static const uint8_t IS_INDEXED; + static const uint8_t STORE_TERMVECTOR; + static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; + static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; + static const uint8_t OMIT_NORMS; + static const uint8_t STORE_PAYLOADS; + static const uint8_t OMIT_TERM_FREQ_AND_POSITIONS; + +protected: + Collection byNumber; + MapStringFieldInfo byName; + int32_t format; + +public: + /// Returns a deep clone of this FieldInfos instance. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Adds field info for a Document. + void add(const DocumentPtr& doc); + + /// Returns true if any fields do not omitTermFreqAndPositions + bool hasProx(); + + /// Add fields that are indexed. Whether they have termvectors has to be specified. + /// @param names The names of the fields + /// @param storeTermVectors Whether the fields store term vectors or not + /// @param storePositionWithTermVector true if positions should be stored. + /// @param storeOffsetWithTermVector true if offsets should be stored + void addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector); + + /// Assumes the fields are not storing term vectors. + /// @param names The names of the fields + /// @param isIndexed Whether the fields are indexed or not + /// @see #add(const String&, bool) + void add(HashSet names, bool isIndexed); + + /// Calls 5 parameter add with false for all TermVector parameters. + /// @param name The name of the Fieldable + /// @param isIndexed true if the field is indexed + /// @see #add(const String&, bool, bool, bool, bool) + void add(const String& name, bool isIndexed); + + /// Calls 5 parameter add with false for term vector positions and offsets. + /// @param name The name of the field + /// @param isIndexed true if the field is indexed + /// @param storeTermVector true if the term vector should be stored + void add(const String& name, bool isIndexed, bool storeTermVector); + + /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag + /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes + /// for the TermVector parameters. + /// @param name The name of the field + /// @param isIndexed true if the field is indexed + /// @param storeTermVector true if the term vector should be stored + /// @param storePositionWithTermVector true if the term vector with positions should be stored + /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored + void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector); + + /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed flag + /// is the same as was given previously for this field. If not - marks it as being indexed. Same goes + /// for the TermVector parameters. + /// @param name The name of the field + /// @param isIndexed true if the field is indexed + /// @param storeTermVector true if the term vector should be stored + /// @param storePositionWithTermVector true if the term vector with positions should be stored + /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored + /// @param omitNorms true if the norms for the indexed field should be omitted + void add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms); + + /// If the field is not yet known, adds it. If it is known, checks to make sure that the isIndexed + /// flag is the same as was given previously for this field. If not - marks it as being indexed. + /// Same goes for the TermVector parameters. + /// @param name The name of the field + /// @param isIndexed true if the field is indexed + /// @param storeTermVector true if the term vector should be stored + /// @param storePositionWithTermVector true if the term vector with positions should be stored + /// @param storeOffsetWithTermVector true if the term vector with offsets should be stored + /// @param omitNorms true if the norms for the indexed field should be omitted + /// @param storePayloads true if payloads should be stored for this field + /// @param omitTermFreqAndPositions true if term freqs should be omitted for this field + FieldInfoPtr add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); + + int32_t fieldNumber(const String& fieldName); + FieldInfoPtr fieldInfo(const String& fieldName); + + /// Return the fieldName identified by its number. + /// @return the fieldName or an empty string when the field with the given number doesn't exist. + String fieldName(int32_t fieldNumber); + + /// Return the fieldinfo object referenced by the fieldNumber. + /// @return the FieldInfo object or null when the given fieldNumber doesn't exist. + FieldInfoPtr fieldInfo(int32_t fieldNumber); + + int32_t size(); + + bool hasVectors(); + + void write(const DirectoryPtr& d, const String& name); + void write(const IndexOutputPtr& output); + +protected: + FieldInfoPtr addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions); + + void read(const IndexInputPtr& input, const String& fileName); +}; + +} + +#endif diff --git a/include/lucene++/FieldInvertState.h b/include/lucene++/FieldInvertState.h new file mode 100644 index 00000000..18818e4d --- /dev/null +++ b/include/lucene++/FieldInvertState.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDINVERTSTATE_H +#define FIELDINVERTSTATE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This class tracks the number and position / offset parameters of terms being added to the index. +/// The information collected in this class is also used to calculate the normalization factor for a field. +class LPPAPI FieldInvertState : public LuceneObject { +public: + FieldInvertState(int32_t position = 0, int32_t length = 0, int32_t numOverlap = 0, int32_t offset = 0, double boost = 0); + virtual ~FieldInvertState(); + + LUCENE_CLASS(FieldInvertState); + +INTERNAL: + int32_t position; + int32_t length; + int32_t numOverlap; + int32_t offset; + double boost; + AttributeSourcePtr attributeSource; + +public: + /// Re-initialize the state, using this boost value. + /// @param docBoost boost value to use. + void reset(double docBoost); + + /// Get the last processed term position. + /// @return the position + int32_t getPosition(); + + /// Get total number of terms in this field. + /// @return the length + int32_t getLength(); + + /// Get the number of terms with positionIncrement == 0. + /// @return the numOverlap + int32_t getNumOverlap(); + + /// Get end offset of the last processed term. + /// @return the offset + int32_t getOffset(); + + /// Get boost value. This is the cumulative product of document boost and field boost for all field + /// instances sharing the same field name. + /// @return the boost + double getBoost(); + + AttributeSourcePtr getAttributeSource(); +}; + +} + +#endif diff --git a/include/lucene++/FieldMaskingSpanQuery.h b/include/lucene++/FieldMaskingSpanQuery.h new file mode 100644 index 00000000..27dedf89 --- /dev/null +++ b/include/lucene++/FieldMaskingSpanQuery.h @@ -0,0 +1,84 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDMASKINGSPANQUERY_H +#define FIELDMASKINGSPANQUERY_H + +#include "SpanQuery.h" + +namespace Lucene { + +/// Wrapper to allow {@link SpanQuery} objects participate in composite single-field SpanQueries by +/// 'lying' about their search field. That is, the masked SpanQuery will function as normal, but +/// {@link SpanQuery#getField()} simply hands back the value supplied in this class's constructor. +/// +/// This can be used to support Queries like {@link SpanNearQuery} or {@link SpanOrQuery} across +/// different fields, which is not ordinarily permitted. +/// +/// This can be useful for denormalized relational data: for example, when indexing a document with +/// conceptually many 'children': +/// +///
+/// teacherid: 1
+/// studentfirstname: james
+/// studentsurname: jones
+///
+/// teacherid: 2
+/// studenfirstname: james
+/// studentsurname: smith
+/// studentfirstname: sally
+/// studentsurname: jones
+/// 
+/// +/// A SpanNearQuery with a slop of 0 can be applied across two {@link SpanTermQuery} objects as follows: +/// +///
+/// SpanQueryPtr q1 = newLucene(newLucene(L"studentfirstname", L"james"));
+/// SpanQueryPtr q2 = newLucene(newLucene(L"studentsurname", L"jones"));
+/// SpanQueryPtr q2m = newLucene(q2, L"studentfirstname");
+///
+/// Collection span = newCollection(q1, q1);
+///
+/// QueryPtr q = newLucene(span, -1, false);
+/// 
+/// to search for 'studentfirstname:james studentsurname:jones' and find teacherid 1 without matching +/// teacherid 2 (which has a 'james' in position 0 and 'jones' in position 1). +/// +/// Note: as {@link #getField()} returns the masked field, scoring will be done using the norms of the +/// field name supplied. This may lead to unexpected scoring behaviour. +class LPPAPI FieldMaskingSpanQuery : public SpanQuery { +public: + FieldMaskingSpanQuery(const SpanQueryPtr& maskedQuery, const String& maskedField); + virtual ~FieldMaskingSpanQuery(); + + LUCENE_CLASS(FieldMaskingSpanQuery); + +protected: + SpanQueryPtr maskedQuery; + String field; + +public: + using SpanQuery::toString; + + virtual String getField(); + SpanQueryPtr getMaskedQuery(); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); + virtual void extractTerms(SetTerm terms); + virtual WeightPtr createWeight(const SearcherPtr& searcher); + virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Returns a clone of this query. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/FieldScoreQuery.h b/include/lucene++/FieldScoreQuery.h new file mode 100644 index 00000000..a961fe50 --- /dev/null +++ b/include/lucene++/FieldScoreQuery.h @@ -0,0 +1,74 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDSCOREQUERY_H +#define FIELDSCOREQUERY_H + +#include "ValueSourceQuery.h" + +namespace Lucene { + +/// A query that scores each document as the value of the numeric input field. +/// +/// The query matches all documents, and scores each document according to the numeric value of that field. +/// +/// It is assumed, and expected, that: +///
    +///
  • The field used here is indexed, and has exactly one token in every scored document. +///
  • Best if this field is un_tokenized. +///
  • That token is parseable to the selected type. +///
+/// +/// Combining this query in a FunctionQuery allows much freedom in affecting document scores. Note, that +/// with this freedom comes responsibility: it is more than likely that the default Lucene scoring is superior +/// in quality to scoring modified as explained here. However, in some cases, and certainly for research +/// experiments, this capability may turn useful. +/// +/// When constructing this query, select the appropriate type. That type should match the data stored in the +/// field. So in fact the "right" type should be selected before indexing. Type selection has effect on the +/// RAM usage: +///
    +///
  • Byte consumes 1 * maxDocs bytes. +///
  • Int consumes 4 * maxDocs bytes. +///
  • Double consumes 8 * maxDocs bytes. +///
+/// +/// Caching: Values for the numeric field are loaded once and cached in memory for further use with the same +/// IndexReader. To take advantage of this, it is extremely important to reuse index-readers or index- +/// searchers, otherwise, for instance if for each query a new index reader is opened, large penalties would +/// be paid for loading the field values into memory over and over again. +class LPPAPI FieldScoreQuery : public ValueSourceQuery { +public: + /// Type of score field, indicating how field values are interpreted/parsed. + enum Type { + /// Field values are interpreted as numeric byte values. + BYTE, + + /// Field values are interpreted as numeric integer values. + INT, + + /// Field values are interpreted as numeric double values. + DOUBLE + }; + + /// Create a FieldScoreQuery - a query that scores each document as the value of the numeric input field. + /// The type param tells how to parse the field string values into a numeric score value. + /// @param field the numeric field to be used. + /// @param type the type of the field. + FieldScoreQuery(const String& field, Type type); + + virtual ~FieldScoreQuery(); + + LUCENE_CLASS(FieldScoreQuery); + +public: + /// Create the appropriate (cached) field value source. + static ValueSourcePtr getValueSource(const String& field, Type type); +}; + +} + +#endif diff --git a/include/lucene++/FieldSelector.h b/include/lucene++/FieldSelector.h new file mode 100644 index 00000000..e35d0c13 --- /dev/null +++ b/include/lucene++/FieldSelector.h @@ -0,0 +1,73 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDSELECTOR_H +#define FIELDSELECTOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The FieldSelector allows one to make decisions about what Fields get loaded on a {@link Document} by +/// {@link IndexReader#document(int32_t, FieldSelector)} +class LPPAPI FieldSelector : public LuceneObject { +protected: + FieldSelector(); + +public: + virtual ~FieldSelector(); + + LUCENE_CLASS(FieldSelector); + +public: + /// Provides information about what should be done with this Field + enum FieldSelectorResult { + /// Null value + SELECTOR_NULL, + + /// Load this {@link Field} every time the {@link Document} is loaded, reading in the data as it is + /// encountered. {@link Document#getField(String)} and {@link Document#getFieldable(String)} should + /// not return null. + /// {@link Document#add(Fieldable)} should be called by the Reader. + SELECTOR_LOAD, + + /// Lazily load this {@link Field}. This means the {@link Field} is valid, but it may not actually + /// contain its data until invoked. {@link Document#getField(String)} SHOULD NOT BE USED. {@link + /// Document#getFieldable(String)} is safe to use and should return a valid instance of a {@link + /// Fieldable}. + /// {@link Document#add(Fieldable)} should be called by the Reader. + SELECTOR_LAZY_LOAD, + + /// Do not load the {@link Field}. {@link Document#getField(String)} and {@link + /// Document#getFieldable(String)} should return null. {@link Document#add(Fieldable)} is not called. + /// {@link Document#add(Fieldable)} should not be called by the Reader. + SELECTOR_NO_LOAD, + + /// Load this field as in the {@link #LOAD} case, but immediately return from {@link Field} loading + /// for the {@link Document}. Thus, the Document may not have its complete set of Fields. {@link + /// Document#getField(String)} and {@link Document#getFieldable(String)} should both be valid for + /// this {@link Field} + /// {@link Document#add(Fieldable)} should be called by the Reader. + SELECTOR_LOAD_AND_BREAK, + + /// Load the size of this {@link Field} rather than its value. Size is measured as number of bytes + /// required to store the field == bytes for a binary or any compressed value, and 2*chars for a String + /// value. The size is stored as a binary value, represented as an int in a byte[], with the higher + /// order byte first in [0] + SELECTOR_SIZE, + + /// Like {@link #SIZE} but immediately break from the field loading loop, i.e., stop loading further + /// fields, after the size is loaded + SELECTOR_SIZE_AND_BREAK + }; + +public: + virtual FieldSelectorResult accept(const String& fieldName) = 0; +}; + +} + +#endif diff --git a/include/lucene++/FieldSortedTermVectorMapper.h b/include/lucene++/FieldSortedTermVectorMapper.h new file mode 100644 index 00000000..49e5c4fe --- /dev/null +++ b/include/lucene++/FieldSortedTermVectorMapper.h @@ -0,0 +1,51 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDSORTEDTERMVECTORMAPPER_H +#define FIELDSORTEDTERMVECTORMAPPER_H + +#include +#include "TermVectorMapper.h" + +namespace Lucene { + +/// For each Field, store a sorted collection of {@link TermVectorEntry}s +/// This is not thread-safe. +class LPPAPI FieldSortedTermVectorMapper : public TermVectorMapper { +public: + /// @param comparator A Comparator for sorting {@link TermVectorEntry}s + FieldSortedTermVectorMapper(TermVectorEntryComparator comparator); + + FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); + + virtual ~FieldSortedTermVectorMapper(); + + LUCENE_CLASS(FieldSortedTermVectorMapper); + +protected: + MapStringCollectionTermVectorEntry fieldToTerms; + Collection currentSet; + String currentField; + TermVectorEntryComparator comparator; + +public: + /// Map the Term Vector information into your own structure + virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); + + /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); + + /// Get the mapping between fields and terms, sorted by the comparator + /// @return A map between field names and {@link java.util.SortedSet}s per field. SortedSet entries are + /// {@link TermVectorEntry} + MapStringCollectionTermVectorEntry getFieldToTerms(); + + TermVectorEntryComparator getComparator(); +}; + +} + +#endif diff --git a/include/lucene++/FieldValueHitQueue.h b/include/lucene++/FieldValueHitQueue.h new file mode 100644 index 00000000..7b4fab70 --- /dev/null +++ b/include/lucene++/FieldValueHitQueue.h @@ -0,0 +1,72 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDVALUEHITQUEUE_H +#define FIELDVALUEHITQUEUE_H + +#include "HitQueueBase.h" +#include "ScoreDoc.h" + +namespace Lucene { + +/// A hit queue for sorting by hits by terms in more than one field. Uses FieldCache::DEFAULT for maintaining +/// internal term lookup tables. +/// @see Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr) +/// @see FieldCache +class LPPAPI FieldValueHitQueue : public HitQueueBase { +protected: + FieldValueHitQueue(Collection fields, int32_t size); + +public: + virtual ~FieldValueHitQueue(); + + LUCENE_CLASS(FieldValueHitQueue); + +protected: + /// Stores the sort criteria being used. + Collection fields; + Collection comparators; + Collection reverseMul; + +public: + /// Creates a hit queue sorted by the given list of fields. + /// @param fields SortField array we are sorting by in priority order (highest priority first); cannot + /// be null or empty. + /// @param size The number of hits to retain. Must be greater than zero. + static FieldValueHitQueuePtr create(Collection fields, int32_t size); + + Collection getComparators(); + Collection getReverseMul(); + + /// Given a queue Entry, creates a corresponding FieldDoc that contains the values used to sort the given + /// document. These values are not the raw values out of the index, but the internal representation of + /// them. This is so the given search hit can be collated by a MultiSearcher with other search hits. + /// @param entry The Entry used to create a FieldDoc + /// @return The newly created FieldDoc + /// @see Searchable#search(WeightPtr, FilterPtr, int32_t, SortPtr) + FieldDocPtr fillFields(const FieldValueHitQueueEntryPtr& entry); + + /// Returns the SortFields being used by this hit queue. + Collection getFields(); +}; + +class LPPAPI FieldValueHitQueueEntry : public ScoreDoc { +public: + FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score); + virtual ~FieldValueHitQueueEntry(); + + LUCENE_CLASS(FieldValueHitQueueEntry); + +public: + int32_t slot; + +public: + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/Fieldable.h b/include/lucene++/Fieldable.h new file mode 100644 index 00000000..93f7fd5d --- /dev/null +++ b/include/lucene++/Fieldable.h @@ -0,0 +1,152 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDABLE_H +#define FIELDABLE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Synonymous with {@link Field}. +/// +/// WARNING: This interface may change within minor versions, despite Lucene's backward compatibility +/// requirements. This means new methods may be added from version to version. This change only +/// affects the Fieldable API; other backwards compatibility promises remain intact. For example, Lucene +/// can still read and write indices created within the same major version. +class LPPAPI Fieldable { +public: + LUCENE_INTERFACE(Fieldable); + virtual ~Fieldable() {} + +public: + /// Sets the boost factor hits on this field. This value will be multiplied into the score of all + /// hits on this this field of this document. + /// + /// The boost is multiplied by {@link Document#getBoost()} of the document containing this field. + /// If a document has multiple fields with the same name, all such values are multiplied together. + /// This product is then used to compute the norm factor for the field. By default, in the {@link + /// Similarity#computeNorm(String, FieldInvertState)} method, the boost value is multiplied by the + /// {@link Similarity#lengthNorm(String,int)} and then rounded by {@link Similarity#encodeNorm(double)} + /// before it is stored in the index. One should attempt to ensure that this product does not overflow + /// the range of that encoding. + /// + /// @see Document#setBoost(double) + /// @see Similarity#computeNorm(String, FieldInvertState) + /// @see Similarity#encodeNorm(double) + virtual void setBoost(double boost) = 0; + + /// Returns the boost factor for hits for this field. + /// + /// The default value is 1.0. + /// + /// Note: this value is not stored directly with the document in the index. Documents returned from + /// {@link IndexReader#document(int)} and {@link Searcher#doc(int)} may thus not have the same value + /// present as when this field was indexed. + virtual double getBoost() = 0; + + /// Returns the name of the field as an interned string. For example "date", "title", "body", ... + virtual String name() = 0; + + /// The value of the field as a String, or empty. + /// + /// For indexing, if isStored()==true, the stringValue() will be used as the stored field value + /// unless isBinary()==true, in which case getBinaryValue() will be used. + /// + /// If isIndexed()==true and isTokenized()==false, this String value will be indexed as a single token. + /// If isIndexed()==true and isTokenized()==true, then tokenStreamValue() will be used to generate + /// indexed tokens if not null, else readerValue() will be used to generate indexed tokens if not null, + /// else stringValue() will be used to generate tokens. + virtual String stringValue() = 0; + + /// The value of the field as a Reader, which can be used at index time to generate indexed tokens. + /// @see #stringValue() + virtual ReaderPtr readerValue() = 0; + + /// The TokenStream for this field to be used when indexing, or null. + /// @see #stringValue() + virtual TokenStreamPtr tokenStreamValue() = 0; + + /// True if the value of the field is to be stored in the index for return with search hits. + virtual bool isStored() = 0; + + /// True if the value of the field is to be indexed, so that it may be searched on. + virtual bool isIndexed() = 0; + + /// True if the value of the field should be tokenized as text prior to indexing. Un-tokenized fields + /// are indexed as a single word and may not be Reader-valued. + virtual bool isTokenized() = 0; + + /// True if the term or terms used to index this field are stored as a term vector, available from + /// {@link IndexReader#getTermFreqVector(int,String)}. These methods do not provide access to the + /// original content of the field, only to terms used to index it. If the original content must be + /// preserved, use the stored attribute instead. + virtual bool isTermVectorStored() = 0; + + /// True if terms are stored as term vector together with their offsets (start and end position in + /// source text). + virtual bool isStoreOffsetWithTermVector() = 0; + + /// True if terms are stored as term vector together with their token positions. + virtual bool isStorePositionWithTermVector() = 0; + + /// True if the value of the field is stored as binary. + virtual bool isBinary() = 0; + + /// True if norms are omitted for this indexed field. + virtual bool getOmitNorms() = 0; + + /// If set, omit normalization factors associated with this indexed field. + /// This effectively disables indexing boosts and length normalization for this field. + virtual void setOmitNorms(bool omitNorms) = 0; + + /// Indicates whether a Field is Lazy or not. The semantics of Lazy loading are such that if a Field + /// is lazily loaded, retrieving it's values via {@link #stringValue()} or {@link #getBinaryValue()} + /// is only valid as long as the {@link IndexReader} that retrieved the {@link Document} is still open. + /// + /// @return true if this field can be loaded lazily + virtual bool isLazy() = 0; + + /// Returns offset into byte[] segment that is used as value, if Field is not binary returned value is + /// undefined. + /// @return index of the first character in byte[] segment that represents this Field value. + virtual int32_t getBinaryOffset() = 0; + + /// Returns length of byte[] segment that is used as value, if Field is not binary returned value is + /// undefined. + /// @return length of byte[] segment that represents this Field value. + virtual int32_t getBinaryLength() = 0; + + /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} + /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. + /// @return reference to the Field value as byte[]. + virtual ByteArray getBinaryValue() = 0; + + /// Return the raw byte[] for the binary field. Note that you must also call {@link #getBinaryLength} + /// and {@link #getBinaryOffset} to know which range of bytes in this returned array belong to the field. + /// + /// About reuse: if you pass in the result byte[] and it is used, likely the underlying implementation will + /// hold onto this byte[] and return it in future calls to {@link #getBinaryValue()}. So if you subsequently + /// re-use the same byte[] elsewhere it will alter this Fieldable's value. + /// @param result User defined buffer that will be used if possible. If this is null or not large enough, + /// a new buffer is allocated + /// @return reference to the Field value as byte[]. + virtual ByteArray getBinaryValue(ByteArray result) = 0; + + /// @see #setOmitTermFreqAndPositions + virtual bool getOmitTermFreqAndPositions() = 0; + + /// If set, omit term freq, positions and payloads from postings for this field. + /// + /// NOTE: While this option reduces storage space required in the index, it also means any query requiring + /// positional information, such as {@link PhraseQuery} or {@link SpanQuery} subclasses will silently fail + /// to find results. + virtual void setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) = 0; +}; + +} + +#endif diff --git a/include/lucene++/FieldsReader.h b/include/lucene++/FieldsReader.h new file mode 100644 index 00000000..a524e0cc --- /dev/null +++ b/include/lucene++/FieldsReader.h @@ -0,0 +1,140 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDSREADER_H +#define FIELDSREADER_H + +#include "AbstractField.h" +#include "CloseableThreadLocal.h" + +namespace Lucene { + +/// Class responsible for access to stored document fields. It uses .fdt and .fdx; files. +class LPPAPI FieldsReader : public LuceneObject { +public: + /// Used only by clone + FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, int32_t formatSize, + int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, const IndexInputPtr& cloneableIndexStream); + FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn); + FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); + + virtual ~FieldsReader(); + + LUCENE_CLASS(FieldsReader); + +protected: + FieldInfosPtr fieldInfos; + + // The main fieldStream, used only for cloning. + IndexInputPtr cloneableFieldsStream; + + // This is a clone of cloneableFieldsStream used for reading documents. It should not be cloned outside of a + // synchronized context. + IndexInputPtr fieldsStream; + + IndexInputPtr cloneableIndexStream; + IndexInputPtr indexStream; + int32_t numTotalDocs; + int32_t _size; + bool closed; + int32_t format; + int32_t formatSize; + + // The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. + int32_t docStoreOffset; + + CloseableThreadLocal fieldsStreamTL; + bool isOriginal; + +public: + /// Returns a cloned FieldsReader that shares open IndexInputs with the original one. It is the caller's job not to + /// close the original FieldsReader until all clones are called (eg, currently SegmentReader manages this logic). + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Closes the underlying {@link IndexInput} streams, including any ones associated with a lazy implementation of a + /// Field. This means that the Fields values will not be accessible. + void close(); + + int32_t size(); + + bool canReadRawDocs(); + + DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Returns the length in bytes of each raw document in a contiguous range of length numDocs starting with startDocID. + /// Returns the IndexInput (the fieldStream), already seeked to the starting point for startDocID. + IndexInputPtr rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs); + +protected: + void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); + + void ensureOpen(); + + void seekIndex(int32_t docID); + + /// Skip the field. We still have to read some of the information about the field, but can skip past the actual content. + /// This will have the most payoff on large fields. + void skipField(bool binary, bool compressed); + void skipField(bool binary, bool compressed, int32_t toRead); + + void addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); + void addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize); + + /// Add the size of field as a byte[] containing the 4 bytes of the integer byte size (high order byte first; char = 2 bytes). + /// Read just the size - caller must skip the field content to continue reading fields. Return the size in bytes or chars, + /// depending on field type. + int32_t addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed); + + ByteArray uncompress(ByteArray b); + String uncompressString(ByteArray b); + + friend class LazyField; +}; + +class LazyField : public AbstractField { +public: + LazyField(const FieldsReaderPtr& reader, const String& name, Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); + LazyField(const FieldsReaderPtr& reader, const String& name, Store store, Index index, TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed); + virtual ~LazyField(); + + LUCENE_CLASS(LazyField); + +protected: + FieldsReaderWeakPtr _reader; + int32_t toRead; + int64_t pointer; + + /// @deprecated Only kept for backward-compatibility with <3.0 indexes. + bool isCompressed; + +public: + /// The value of the field as a Reader, or null. If null, the String value, binary value, or TokenStream value is used. + /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. + ReaderPtr readerValue(); + + /// The value of the field as a TokenStream, or null. If null, the Reader value, String value, or binary value is used. + /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. + TokenStreamPtr tokenStreamValue(); + + /// The value of the field as a String, or null. If null, the Reader value, binary value, or TokenStream value is used. + /// Exactly one of stringValue(), readerValue(), getBinaryValue(), and tokenStreamValue() must be set. + String stringValue(); + + int64_t getPointer(); + void setPointer(int64_t pointer); + int32_t getToRead(); + void setToRead(int32_t toRead); + + /// Return the raw byte[] for the binary field. + virtual ByteArray getBinaryValue(ByteArray result); + +protected: + IndexInputPtr getFieldStream(); +}; + +} + +#endif diff --git a/include/lucene++/FieldsWriter.h b/include/lucene++/FieldsWriter.h new file mode 100644 index 00000000..70cac199 --- /dev/null +++ b/include/lucene++/FieldsWriter.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FIELDSWRITER_H +#define FIELDSWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class FieldsWriter : public LuceneObject { +public: + FieldsWriter(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn); + FieldsWriter(const IndexOutputPtr& fdx, const IndexOutputPtr& fdt, const FieldInfosPtr& fn); + virtual ~FieldsWriter(); + + LUCENE_CLASS(FieldsWriter); + +protected: + FieldInfosPtr fieldInfos; + IndexOutputPtr fieldsStream; + IndexOutputPtr indexStream; + bool doClose; + +public: + static const uint8_t FIELD_IS_TOKENIZED; + static const uint8_t FIELD_IS_BINARY; + static const uint8_t FIELD_IS_COMPRESSED; + + static const int32_t FORMAT; // Original format + static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; // Changed strings to UTF8 + static const int32_t FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; // Lucene 3.0: Removal of compressed fields + + // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this + // if you switch to a new format! + static const int32_t FORMAT_CURRENT; + +public: + void setFieldsStream(const IndexOutputPtr& stream); + + /// Writes the contents of buffer into the fields stream and adds a new entry for this document into the index + /// stream. This assumes the buffer was already written in the correct fields format. + void flushDocument(int32_t numStoredFields, const RAMOutputStreamPtr& buffer); + + void skipDocument(); + void flush(); + void close(); + void writeField(const FieldInfoPtr& fi, const FieldablePtr& field); + + /// Bulk write a contiguous series of documents. The lengths array is the length (in bytes) of each raw document. + /// The stream IndexInput is the fieldsStream from which we should bulk-copy all bytes. + void addRawDocuments(const IndexInputPtr& stream, Collection lengths, int32_t numDocs); + + void addDocument(const DocumentPtr& doc); +}; + +} + +#endif diff --git a/include/lucene++/FileReader.h b/include/lucene++/FileReader.h new file mode 100644 index 00000000..5b26dcdc --- /dev/null +++ b/include/lucene++/FileReader.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILEREADER_H +#define FILEREADER_H + +#include "Reader.h" + +namespace Lucene { + +/// Convenience class for reading character files. +class LPPAPI FileReader : public Reader { +public: + /// Creates a new FileReader, given the file name to read from. + FileReader(const String& fileName); + virtual ~FileReader(); + + LUCENE_CLASS(FileReader); + +protected: + ifstreamPtr file; + int64_t _length; + ByteArray fileBuffer; + +public: + static const int32_t FILE_EOF; + static const int32_t FILE_ERROR; + +public: + /// Read a single character. + virtual int32_t read(); + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + + /// Close the stream. + virtual void close(); + + /// Tell whether this stream supports the mark() operation + virtual bool markSupported(); + + /// Reset the stream. + virtual void reset(); + + /// The number of bytes in the file. + virtual int64_t length(); +}; + +} + +#endif diff --git a/include/lucene++/FileSwitchDirectory.h b/include/lucene++/FileSwitchDirectory.h new file mode 100644 index 00000000..7cad4291 --- /dev/null +++ b/include/lucene++/FileSwitchDirectory.h @@ -0,0 +1,85 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILESWITCHDIRECTORY_H +#define FILESWITCHDIRECTORY_H + +#include "Directory.h" + +namespace Lucene { + +/// A Directory instance that switches files between two other +/// Directory instances. +/// +/// Files with the specified extensions are placed in the primary +/// directory; others are placed in the secondary directory. The +/// provided Set must not change once passed to this class, and +/// must allow multiple threads to call contains at once. +class LPPAPI FileSwitchDirectory : public Directory { +public: + FileSwitchDirectory(HashSet primaryExtensions, const DirectoryPtr& primaryDir, const DirectoryPtr& secondaryDir, bool doClose); + virtual ~FileSwitchDirectory(); + + LUCENE_CLASS(FileSwitchDirectory); + +protected: + HashSet primaryExtensions; + DirectoryPtr primaryDir; + DirectoryPtr secondaryDir; + bool doClose; + +public: + /// Return the primary directory. + DirectoryPtr getPrimaryDir(); + + /// Return the secondary directory. + DirectoryPtr getSecondaryDir(); + + /// Closes the store. + virtual void close(); + + /// Returns an array of strings, one for each file in the directory. + virtual HashSet listAll(); + + /// Utility method to return a file's extension. + static String getExtension(const String& name); + + /// Returns true if a file with the given name exists. + virtual bool fileExists(const String& name); + + /// Returns the time the named file was last modified. + virtual uint64_t fileModified(const String& name); + + /// Set the modified time of an existing file to now. + virtual void touchFile(const String& name); + + /// Removes an existing file in the directory. + virtual void deleteFile(const String& name); + + /// Returns the length of a file in the directory. + virtual int64_t fileLength(const String& name); + + /// Creates a new, empty file in the directory with the given name. + /// Returns a stream writing this file. + virtual IndexOutputPtr createOutput(const String& name); + + /// Ensure that any writes to this file are moved to stable storage. + /// Lucene uses this to properly commit changes to the index, to + /// prevent a machine/OS crash from corrupting the index. + virtual void sync(const String& name); + + /// Returns a stream reading an existing file, with the specified + /// read buffer size. The particular Directory implementation may + /// ignore the buffer size. + virtual IndexInputPtr openInput(const String& name); + +protected: + DirectoryPtr getDirectory(const String& name); +}; + +} + +#endif diff --git a/include/lucene++/FileUtils.h b/include/lucene++/FileUtils.h new file mode 100644 index 00000000..da3021a4 --- /dev/null +++ b/include/lucene++/FileUtils.h @@ -0,0 +1,67 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILEUTILS_H +#define FILEUTILS_H + +#include "Lucene.h" + +namespace Lucene { + +namespace FileUtils { + +/// Return true if given file or directory exists. +LPPAPI bool fileExists(const String& path); + +/// Return file last modified date and time. +LPPAPI uint64_t fileModified(const String& path); + +/// Set file last modified date and time to now. +LPPAPI bool touchFile(const String& path); + +/// Return file length in bytes. +LPPAPI int64_t fileLength(const String& path); + +/// Set new file length, truncating or expanding as required. +LPPAPI bool setFileLength(const String& path, int64_t length); + +/// Delete file from file system. +LPPAPI bool removeFile(const String& path); + +/// Copy a file to/from file system. +LPPAPI bool copyFile(const String& source, const String& dest); + +/// Create new directory under given location. +LPPAPI bool createDirectory(const String& path); + +/// Delete directory from file system. +LPPAPI bool removeDirectory(const String& path); + +/// Return true if given path points to a directory. +LPPAPI bool isDirectory(const String& path); + +/// Return list of files (and/or directories) under given directory. +/// @param path path to list directory. +/// @param filesOnly if true the exclude sub-directories. +/// @param dirList list of files to return. +LPPAPI bool listDirectory(const String& path, bool filesOnly, HashSet dirList); + +/// Copy a directory to/from file system. +LPPAPI bool copyDirectory(const String& source, const String& dest); + +/// Return complete path after joining given directory and file name. +LPPAPI String joinPath(const String& path, const String& file); + +/// Extract parent path from given path. +LPPAPI String extractPath(const String& path); + +/// Extract file name from given path. +LPPAPI String extractFile(const String& path); +} + +} + +#endif diff --git a/include/lucene++/Filter.h b/include/lucene++/Filter.h new file mode 100644 index 00000000..e4a5af47 --- /dev/null +++ b/include/lucene++/Filter.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTER_H +#define FILTER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract base class for restricting which documents may be returned during searching. +class LPPAPI Filter : public LuceneObject { +public: + virtual ~Filter(); + LUCENE_CLASS(Filter); + +public: + /// Creates a {@link DocIdSet} enumerating the documents that should be permitted in search results. + /// + /// Note: null can be returned if no documents are accepted by this Filter. + /// + /// Note: This method will be called once per segment in the index during searching. The returned + /// {@link DocIdSet} must refer to document IDs for that segment, not for the top-level reader. + /// + /// @param reader a {@link IndexReader} instance opened on the index currently searched on. Note, + /// it is likely that the provided reader does not represent the whole underlying index ie. if the + /// index has more than one segment the given reader only represents a single segment. + /// @return a DocIdSet that provides the documents which should be permitted or prohibited in search + /// results. NOTE: null can be returned if no documents will be accepted by this Filter. + /// + /// @see DocIdBitSet + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader) = 0; +}; + +} + +#endif diff --git a/include/lucene++/FilterIndexReader.h b/include/lucene++/FilterIndexReader.h new file mode 100644 index 00000000..b04c0234 --- /dev/null +++ b/include/lucene++/FilterIndexReader.h @@ -0,0 +1,136 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTERINDEXREADER_H +#define FILTERINDEXREADER_H + +#include "IndexReader.h" +#include "TermPositions.h" +#include "TermEnum.h" + +namespace Lucene { + +/// A FilterIndexReader contains another IndexReader, which it uses as its basic source of data, possibly +/// transforming the data along the way or providing additional functionality. The class FilterIndexReader +/// itself simply implements all abstract methods of IndexReader with versions that pass all requests to +/// the contained index reader. Subclasses of FilterIndexReader may further override some of these methods +/// and may also provide additional methods and fields. +class LPPAPI FilterIndexReader : public IndexReader { +public: + /// Construct a FilterIndexReader based on the specified base reader. Directory locking for delete, + /// undeleteAll, and setNorm operations is left to the base reader. + /// + /// Note that base reader is closed if this FilterIndexReader is closed. + /// @param in specified base reader. + FilterIndexReader(const IndexReaderPtr& in); + + virtual ~FilterIndexReader(); + + LUCENE_CLASS(FilterIndexReader); + +protected: + IndexReaderPtr in; + +public: + virtual DirectoryPtr directory(); + virtual Collection getTermFreqVectors(int32_t docNumber); + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + virtual int32_t numDocs(); + virtual int32_t maxDoc(); + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + virtual bool isDeleted(int32_t n); + virtual bool hasDeletions(); + virtual bool hasNorms(const String& field); + virtual ByteArray norms(const String& field); + virtual void norms(const String& field, ByteArray norms, int32_t offset); + virtual TermEnumPtr terms(); + virtual TermEnumPtr terms(const TermPtr& t); + virtual int32_t docFreq(const TermPtr& t); + virtual TermDocsPtr termDocs(); + virtual TermDocsPtr termDocs(const TermPtr& term); + virtual TermPositionsPtr termPositions(); + virtual HashSet getFieldNames(FieldOption fieldOption); + virtual int64_t getVersion(); + virtual bool isCurrent(); + virtual bool isOptimized(); + virtual Collection getSequentialSubReaders(); + + /// If the subclass of FilteredIndexReader modifies the contents of the FieldCache, you must + /// override this method to provide a different key + virtual LuceneObjectPtr getFieldCacheKey(); + + /// If the subclass of FilteredIndexReader modifies the deleted docs, you must override this + /// method to provide a different key + virtual LuceneObjectPtr getDeletesCacheKey(); + +protected: + virtual void doUndeleteAll(); + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + virtual void doDelete(int32_t docNum); + virtual void doCommit(MapStringString commitUserData); + virtual void doClose(); +}; + +/// Base class for filtering {@link TermDocs} implementations. +class LPPAPI FilterTermDocs : public TermPositions, public LuceneObject { +public: + FilterTermDocs(const TermDocsPtr& in); + virtual ~FilterTermDocs(); + + LUCENE_CLASS(FilterTermDocs); + +protected: + TermDocsPtr in; + +public: + virtual void seek(const TermPtr& term); + virtual void seek(const TermEnumPtr& termEnum); + virtual int32_t doc(); + virtual int32_t freq(); + virtual bool next(); + virtual int32_t read(Collection& docs, Collection& freqs); + virtual bool skipTo(int32_t target); + virtual void close(); +}; + +/// Base class for filtering {@link TermPositions} implementations. +class LPPAPI FilterTermPositions : public FilterTermDocs { +public: + FilterTermPositions(const TermPositionsPtr& in); + virtual ~FilterTermPositions(); + + LUCENE_CLASS(FilterTermPositions); + +public: + virtual int32_t nextPosition(); + virtual int32_t getPayloadLength(); + virtual ByteArray getPayload(ByteArray data, int32_t offset); + virtual bool isPayloadAvailable(); +}; + +/// Base class for filtering {@link TermEnum} implementations. +class LPPAPI FilterTermEnum : public TermEnum { +public: + FilterTermEnum(const TermEnumPtr& in); + virtual ~FilterTermEnum(); + + LUCENE_CLASS(FilterTermEnum); + +protected: + TermEnumPtr in; + +public: + virtual bool next(); + virtual TermPtr term(); + virtual int32_t docFreq(); + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/FilterManager.h b/include/lucene++/FilterManager.h new file mode 100644 index 00000000..4c10988a --- /dev/null +++ b/include/lucene++/FilterManager.h @@ -0,0 +1,71 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTERMANAGER_H +#define FILTERMANAGER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Filter caching singleton. It can be used to save filters locally for reuse. Also could be used as a +/// persistent storage for any filter as long as the filter provides a proper hashCode(), as that is used +/// as the key in the cache. +/// +/// The cache is periodically cleaned up from a separate thread to ensure the cache doesn't exceed the +/// maximum size. +class LPPAPI FilterManager : public LuceneObject { +public: + /// Sets up the FilterManager singleton. + FilterManager(); + virtual ~FilterManager(); + + LUCENE_CLASS(FilterManager); + +protected: + /// The default maximum number of Filters in the cache + static const int32_t DEFAULT_CACHE_CLEAN_SIZE; + + /// The default frequency of cache cleanup + static const int64_t DEFAULT_CACHE_SLEEP_TIME; + + /// The cache itself + MapIntFilterItem cache; + + /// Maximum allowed cache size + int32_t cacheCleanSize; + + /// Cache cleaning frequency + int64_t cleanSleepTime; + + /// Cache cleaner that runs in a separate thread + FilterCleanerPtr filterCleaner; + +public: + virtual void initialize(); + + static FilterManagerPtr getInstance(); + + /// Sets the max size that cache should reach before it is cleaned up + /// @param cacheCleanSize maximum allowed cache size + void setCacheSize(int32_t cacheCleanSize); + + /// Sets the cache cleaning frequency in milliseconds. + /// @param cleanSleepTime cleaning frequency in milliseconds + void setCleanThreadSleepTime(int64_t cleanSleepTime); + + /// Returns the cached version of the filter. Allows the caller to pass up a small filter but this will + /// keep a persistent version around and allow the caching filter to do its job. + /// @param filter The input filter + /// @return The cached version of the filter + FilterPtr getFilter(const FilterPtr& filter); + + friend class FilterCleaner; +}; + +} + +#endif diff --git a/include/lucene++/FilteredDocIdSet.h b/include/lucene++/FilteredDocIdSet.h new file mode 100644 index 00000000..70c8e615 --- /dev/null +++ b/include/lucene++/FilteredDocIdSet.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTEREDDOCIDSET_H +#define FILTEREDDOCIDSET_H + +#include "DocIdSet.h" + +namespace Lucene { + +/// Abstract decorator class for a DocIdSet implementation that provides on-demand filtering/validation +/// mechanism on a given DocIdSet. +/// +/// Technically, this same functionality could be achieved with ChainedFilter (under contrib/misc), however +/// the benefit of this class is it never materializes the full bitset for the filter. Instead, the {@link +/// #match} method is invoked on-demand, per docID visited during searching. If you know few docIDs will +/// be visited, and the logic behind {@link #match} is relatively costly, this may be a better way to filter +/// than ChainedFilter. +/// @see DocIdSet +class LPPAPI FilteredDocIdSet : public DocIdSet { +public: + /// @param innerSet Underlying DocIdSet + FilteredDocIdSet(const DocIdSetPtr& innerSet); + virtual ~FilteredDocIdSet(); + + LUCENE_CLASS(FilteredDocIdSet); + +protected: + DocIdSetPtr innerSet; + +public: + /// This DocIdSet implementation is cacheable if the inner set is cacheable. + virtual bool isCacheable(); + + /// Implementation of the contract to build a DocIdSetIterator. + /// @see DocIdSetIterator + /// @see FilteredDocIdSetIterator + virtual DocIdSetIteratorPtr iterator(); + +protected: + /// Validation method to determine whether a docid should be in the result set. + /// @param docid docid to be tested + /// @return true if input docid should be in the result set, false otherwise. + virtual bool match(int32_t docid) = 0; + + friend class DefaultFilteredDocIdSetIterator; +}; + +} + +#endif diff --git a/include/lucene++/FilteredDocIdSetIterator.h b/include/lucene++/FilteredDocIdSetIterator.h new file mode 100644 index 00000000..43165e45 --- /dev/null +++ b/include/lucene++/FilteredDocIdSetIterator.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTEREDDOCIDSETITERATOR_H +#define FILTEREDDOCIDSETITERATOR_H + +#include "DocIdSetIterator.h" + +namespace Lucene { + +/// Abstract decorator class of a DocIdSetIterator implementation that provides on-demand filter/validation +/// mechanism on an underlying DocIdSetIterator. See {@link FilteredDocIdSet}. +class LPPAPI FilteredDocIdSetIterator : public DocIdSetIterator { +public: + /// @param innerIter Underlying DocIdSetIterator. + FilteredDocIdSetIterator(const DocIdSetIteratorPtr& innerIter); + virtual ~FilteredDocIdSetIterator(); + + LUCENE_CLASS(FilteredDocIdSetIterator); + +protected: + DocIdSetIteratorPtr innerIter; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); + +protected: + /// Validation method to determine whether a docid should be in the result set. + /// @param doc docid to be tested + /// @return true if input docid should be in the result set, false otherwise. + /// @see #FilteredDocIdSetIterator(DocIdSetIterator). + virtual bool match(int32_t docid) = 0; +}; + +} + +#endif diff --git a/include/lucene++/FilteredQuery.h b/include/lucene++/FilteredQuery.h new file mode 100644 index 00000000..ed33e059 --- /dev/null +++ b/include/lucene++/FilteredQuery.h @@ -0,0 +1,63 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTEREDQUERY_H +#define FILTEREDQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A query that applies a filter to the results of another query. +/// +/// Note: the bits are retrieved from the filter each time this query is used in a search - use a +/// CachingWrapperFilter to avoid regenerating the bits every time. +/// +/// @see CachingWrapperFilter +class LPPAPI FilteredQuery : public Query { +public: + /// Constructs a new query which applies a filter to the results of the original query. + /// Filter::getDocIdSet() will be called every time this query is used in a search. + /// @param query Query to be filtered, cannot be null. + /// @param filter Filter to apply to query results, cannot be null. + FilteredQuery(const QueryPtr& query, const FilterPtr& filter); + + virtual ~FilteredQuery(); + + LUCENE_CLASS(FilteredQuery); + +private: + QueryPtr query; + FilterPtr filter; + +public: + using Query::toString; + + /// Returns a Weight that applies the filter to the enclosed query's Weight. + /// This is accomplished by overriding the Scorer returned by the Weight. + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Rewrites the wrapped query. + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + QueryPtr getQuery(); + FilterPtr getFilter(); + + virtual void extractTerms(SetTerm terms); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class FilteredQueryWeight; +}; + +} + +#endif diff --git a/include/lucene++/FilteredTermEnum.h b/include/lucene++/FilteredTermEnum.h new file mode 100644 index 00000000..0da4f2ce --- /dev/null +++ b/include/lucene++/FilteredTermEnum.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FILTEREDTERMENUM_H +#define FILTEREDTERMENUM_H + +#include "TermEnum.h" + +namespace Lucene { + +/// Abstract class for enumerating a subset of all terms. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than +/// all that precede it. +class LPPAPI FilteredTermEnum : public TermEnum { +public: + virtual ~FilteredTermEnum(); + LUCENE_CLASS(FilteredTermEnum); + +protected: + /// The current term + TermPtr currentTerm; + + /// The delegate enum - to set this member use {@link #setEnum} + TermEnumPtr actualEnum; + +public: + /// Equality measure on the term + virtual double difference() = 0; + + /// Returns the docFreq of the current Term in the enumeration. + /// Returns -1 if no Term matches or all terms have been enumerated. + virtual int32_t docFreq(); + + /// Increments the enumeration to the next element. True if one exists. + virtual bool next(); + + /// Returns the current Term in the enumeration. + /// Returns null if no Term matches or all terms have been enumerated. + virtual TermPtr term(); + + /// Closes the enumeration to further activity, freeing resources. + virtual void close(); + +protected: + /// Equality compare on the term + virtual bool termCompare(const TermPtr& term) = 0; + + /// Indicates the end of the enumeration has been reached + virtual bool endEnum() = 0; + + /// Use this method to set the actual TermEnum (eg. in ctor), it will be automatically positioned + /// on the first matching term. + virtual void setEnum(const TermEnumPtr& actualEnum); +}; + +} + +#endif diff --git a/include/lucene++/FlagsAttribute.h b/include/lucene++/FlagsAttribute.h new file mode 100644 index 00000000..572cadf5 --- /dev/null +++ b/include/lucene++/FlagsAttribute.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FLAGSATTRIBUTE_H +#define FLAGSATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// This attribute can be used to pass different flags down the tokenizer chain, eg from one TokenFilter +/// to another one. +class LPPAPI FlagsAttribute : public Attribute { +public: + FlagsAttribute(); + virtual ~FlagsAttribute(); + + LUCENE_CLASS(FlagsAttribute); + +protected: + int32_t flags; + +public: + virtual String toString(); + + /// Get the bitset for any bits that have been set. This is completely distinct from {@link + /// TypeAttribute#type()}, although they do share similar purposes. The flags can be used to encode + /// information about the token for use by other {@link TokenFilter}s. + virtual int32_t getFlags(); + + /// @see #getFlags() + virtual void setFlags(int32_t flags); + + virtual void clear(); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual void copyTo(const AttributePtr& target); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsDocsConsumer.h b/include/lucene++/FormatPostingsDocsConsumer.h new file mode 100644 index 00000000..59d4d06a --- /dev/null +++ b/include/lucene++/FormatPostingsDocsConsumer.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSDOCSCONSUMER_H +#define FORMATPOSTINGSDOCSCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class FormatPostingsDocsConsumer : public LuceneObject { +public: + virtual ~FormatPostingsDocsConsumer(); + + LUCENE_CLASS(FormatPostingsDocsConsumer); + +public: + /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. + virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq) = 0; + + /// Called when we are done adding docs to this term + virtual void finish() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsDocsWriter.h b/include/lucene++/FormatPostingsDocsWriter.h new file mode 100644 index 00000000..771396a3 --- /dev/null +++ b/include/lucene++/FormatPostingsDocsWriter.h @@ -0,0 +1,58 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSDOCSWRITER_H +#define FORMATPOSTINGSDOCSWRITER_H + +#include "FormatPostingsDocsConsumer.h" + +namespace Lucene { + +/// Consumes doc & freq, writing them using the current index file format +class FormatPostingsDocsWriter : public FormatPostingsDocsConsumer { +public: + FormatPostingsDocsWriter(const SegmentWriteStatePtr& state, const FormatPostingsTermsWriterPtr& parent); + virtual ~FormatPostingsDocsWriter(); + + LUCENE_CLASS(FormatPostingsDocsWriter); + +public: + IndexOutputPtr out; + FormatPostingsTermsWriterWeakPtr _parent; + SegmentWriteStatePtr state; + FormatPostingsPositionsWriterPtr posWriter; + DefaultSkipListWriterPtr skipListWriter; + int32_t skipInterval; + int32_t totalNumDocs; + + bool omitTermFreqAndPositions; + bool storePayloads; + int64_t freqStart; + FieldInfoPtr fieldInfo; + + int32_t lastDocID; + int32_t df; + + TermInfoPtr termInfo; // minimize consing + UTF8ResultPtr utf8; + +public: + virtual void initialize(); + + void setField(const FieldInfoPtr& fieldInfo); + + /// Adds a new doc in this term. If this returns null then we just skip consuming positions/payloads. + virtual FormatPostingsPositionsConsumerPtr addDoc(int32_t docID, int32_t termDocFreq); + + /// Called when we are done adding docs to this term + virtual void finish(); + + void close(); +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsFieldsConsumer.h b/include/lucene++/FormatPostingsFieldsConsumer.h new file mode 100644 index 00000000..7cb0bdfd --- /dev/null +++ b/include/lucene++/FormatPostingsFieldsConsumer.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSFIELDSCONSUMER_H +#define FORMATPOSTINGSFIELDSCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract API that consumes terms, doc, freq, prox and payloads postings. Concrete implementations of this +/// actually do "something" with the postings (write it into the index in a specific format). +class FormatPostingsFieldsConsumer : public LuceneObject { +public: + virtual ~FormatPostingsFieldsConsumer(); + + LUCENE_CLASS(FormatPostingsFieldsConsumer); + +public: + /// Add a new field. + virtual FormatPostingsTermsConsumerPtr addField(const FieldInfoPtr& field) = 0; + + /// Called when we are done adding everything. + virtual void finish() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsFieldsWriter.h b/include/lucene++/FormatPostingsFieldsWriter.h new file mode 100644 index 00000000..6222cfad --- /dev/null +++ b/include/lucene++/FormatPostingsFieldsWriter.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSFIELDSWRITER_H +#define FORMATPOSTINGSFIELDSWRITER_H + +#include "FormatPostingsFieldsConsumer.h" + +namespace Lucene { + +class FormatPostingsFieldsWriter : public FormatPostingsFieldsConsumer { +public: + FormatPostingsFieldsWriter(const SegmentWriteStatePtr& state, const FieldInfosPtr& fieldInfos); + virtual ~FormatPostingsFieldsWriter(); + + LUCENE_CLASS(FormatPostingsFieldsWriter); + +public: + DirectoryPtr dir; + String segment; + TermInfosWriterPtr termsOut; + SegmentWriteStatePtr state; + FieldInfosPtr fieldInfos; + FormatPostingsTermsWriterPtr termsWriter; + DefaultSkipListWriterPtr skipListWriter; + int32_t totalNumDocs; + +public: + virtual void initialize(); + + /// Add a new field. + virtual FormatPostingsTermsConsumerPtr addField(const FieldInfoPtr& field); + + /// Called when we are done adding everything. + virtual void finish(); +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsPositionsConsumer.h b/include/lucene++/FormatPostingsPositionsConsumer.h new file mode 100644 index 00000000..685c73df --- /dev/null +++ b/include/lucene++/FormatPostingsPositionsConsumer.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSPOSITIONSCONSUMER_H +#define FORMATPOSTINGSPOSITIONSCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class FormatPostingsPositionsConsumer : public LuceneObject { +public: + virtual ~FormatPostingsPositionsConsumer(); + + LUCENE_CLASS(FormatPostingsPositionsConsumer); + +public: + /// Add a new position & payload. If payloadLength > 0 you must read those bytes from the IndexInput. + virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) = 0; + + /// Called when we are done adding positions & payloads. + virtual void finish() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsPositionsWriter.h b/include/lucene++/FormatPostingsPositionsWriter.h new file mode 100644 index 00000000..cfcca4c9 --- /dev/null +++ b/include/lucene++/FormatPostingsPositionsWriter.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSPOSITIONSWRITER_H +#define FORMATPOSTINGSPOSITIONSWRITER_H + +#include "FormatPostingsPositionsConsumer.h" + +namespace Lucene { + +class FormatPostingsPositionsWriter : public FormatPostingsPositionsConsumer { +public: + FormatPostingsPositionsWriter(const SegmentWriteStatePtr& state, const FormatPostingsDocsWriterPtr& parent); + virtual ~FormatPostingsPositionsWriter(); + + LUCENE_CLASS(FormatPostingsPositionsWriter); + +public: + FormatPostingsDocsWriterWeakPtr _parent; + IndexOutputPtr out; + + bool omitTermFreqAndPositions; + bool storePayloads; + int32_t lastPayloadLength; + + int32_t lastPosition; + +public: + /// Add a new position & payload + virtual void addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength); + + void setField(const FieldInfoPtr& fieldInfo); + + /// Called when we are done adding positions & payloads + virtual void finish(); + + void close(); +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsTermsConsumer.h b/include/lucene++/FormatPostingsTermsConsumer.h new file mode 100644 index 00000000..d3b4a743 --- /dev/null +++ b/include/lucene++/FormatPostingsTermsConsumer.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSTERMSCONSUMER_H +#define FORMATPOSTINGSTERMSCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class FormatPostingsTermsConsumer : public LuceneObject { +public: + virtual ~FormatPostingsTermsConsumer(); + + LUCENE_CLASS(FormatPostingsTermsConsumer); + +public: + CharArray termBuffer; + +public: + /// Adds a new term in this field + virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start) = 0; + virtual FormatPostingsDocsConsumerPtr addTerm(const String& text); + + /// Called when we are done adding terms to this field + virtual void finish() = 0; +}; + +} + +#endif diff --git a/include/lucene++/FormatPostingsTermsWriter.h b/include/lucene++/FormatPostingsTermsWriter.h new file mode 100644 index 00000000..06b0fa47 --- /dev/null +++ b/include/lucene++/FormatPostingsTermsWriter.h @@ -0,0 +1,50 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FORMATPOSTINGSTERMSWRITER_H +#define FORMATPOSTINGSTERMSWRITER_H + +#include "FormatPostingsTermsConsumer.h" + +namespace Lucene { + +class FormatPostingsTermsWriter : public FormatPostingsTermsConsumer { +public: + FormatPostingsTermsWriter(const SegmentWriteStatePtr& state, const FormatPostingsFieldsWriterPtr& parent); + virtual ~FormatPostingsTermsWriter(); + + LUCENE_CLASS(FormatPostingsTermsWriter); + +public: + FormatPostingsFieldsWriterWeakPtr _parent; + SegmentWriteStatePtr state; + FormatPostingsDocsWriterPtr docsWriter; + TermInfosWriterPtr termsOut; + FieldInfoPtr fieldInfo; + + CharArray currentTerm; + int32_t currentTermStart; + + int64_t freqStart; + int64_t proxStart; + +public: + virtual void initialize(); + + void setField(const FieldInfoPtr& fieldInfo); + + /// Adds a new term in this field + virtual FormatPostingsDocsConsumerPtr addTerm(CharArray text, int32_t start); + + /// Called when we are done adding terms to this field + virtual void finish(); + + void close(); +}; + +} + +#endif diff --git a/include/lucene++/FreqProxFieldMergeState.h b/include/lucene++/FreqProxFieldMergeState.h new file mode 100644 index 00000000..fb61ff12 --- /dev/null +++ b/include/lucene++/FreqProxFieldMergeState.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FREQPROXFIELDMERGESTATE_H +#define FREQPROXFIELDMERGESTATE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Used by DocumentsWriter to merge the postings from multiple ThreadStates when creating a segment +class FreqProxFieldMergeState : public LuceneObject { +public: + FreqProxFieldMergeState(const FreqProxTermsWriterPerFieldPtr& field); + virtual ~FreqProxFieldMergeState(); + + LUCENE_CLASS(FreqProxFieldMergeState); + +public: + FreqProxTermsWriterPerFieldPtr field; + int32_t numPostings; + CharBlockPoolPtr charPool; + Collection postings; + + FreqProxTermsWriterPostingListPtr p; + CharArray text; + int32_t textOffset; + + ByteSliceReaderPtr freq; + ByteSliceReaderPtr prox; + + int32_t docID; + int32_t termFreq; + +protected: + int32_t postingUpto; + +public: + bool nextTerm(); + bool nextDoc(); +}; + +} + +#endif diff --git a/include/lucene++/FreqProxTermsWriter.h b/include/lucene++/FreqProxTermsWriter.h new file mode 100644 index 00000000..c037db4d --- /dev/null +++ b/include/lucene++/FreqProxTermsWriter.h @@ -0,0 +1,57 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FREQPROXTERMSWRITER_H +#define FREQPROXTERMSWRITER_H + +#include "TermsHashConsumer.h" +#include "RawPostingList.h" + +namespace Lucene { + +class FreqProxTermsWriter : public TermsHashConsumer { +public: + virtual ~FreqProxTermsWriter(); + + LUCENE_CLASS(FreqProxTermsWriter); + +protected: + ByteArray payloadBuffer; + +public: + virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread); + virtual void createPostings(Collection postings, int32_t start, int32_t count); + virtual void closeDocStore(const SegmentWriteStatePtr& state); + virtual void abort(); + virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + + /// Walk through all unique text tokens (Posting instances) found in this field and serialize them + /// into a single RAM segment. + void appendPostings(Collection fields, const FormatPostingsFieldsConsumerPtr& consumer); + + virtual int32_t bytesPerPosting(); + +protected: + static int32_t compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2); +}; + +class FreqProxTermsWriterPostingList : public RawPostingList { +public: + FreqProxTermsWriterPostingList(); + virtual ~FreqProxTermsWriterPostingList(); + + LUCENE_CLASS(FreqProxTermsWriterPostingList); + +public: + int32_t docFreq; // # times this term occurs in the current doc + int32_t lastDocID; // Last docID where this term occurred + int32_t lastDocCode; // Code for prior doc + int32_t lastPosition; // Last position where this term occurred +}; + +} + +#endif diff --git a/include/lucene++/FreqProxTermsWriterPerField.h b/include/lucene++/FreqProxTermsWriterPerField.h new file mode 100644 index 00000000..c2152cd2 --- /dev/null +++ b/include/lucene++/FreqProxTermsWriterPerField.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FREQPROXTERMSWRITERPERFIELD_H +#define FREQPROXTERMSWRITERPERFIELD_H + +#include "TermsHashConsumerPerField.h" + +namespace Lucene { + +class FreqProxTermsWriterPerField : public TermsHashConsumerPerField { +public: + FreqProxTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const FreqProxTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); + virtual ~FreqProxTermsWriterPerField(); + + LUCENE_CLASS(FreqProxTermsWriterPerField); + +public: + FreqProxTermsWriterPerThreadWeakPtr _perThread; + TermsHashPerFieldWeakPtr _termsHashPerField; + FieldInfoPtr fieldInfo; + DocStatePtr docState; + FieldInvertStatePtr fieldState; + bool omitTermFreqAndPositions; + PayloadAttributePtr payloadAttribute; + bool hasPayloads; + +public: + virtual int32_t getStreamCount(); + virtual void finish(); + virtual void skippingLongTerm(); + virtual int32_t compareTo(const LuceneObjectPtr& other); + void reset(); + virtual bool start(Collection fields, int32_t count); + virtual void start(const FieldablePtr& field); + void writeProx(const FreqProxTermsWriterPostingListPtr& p, int32_t proxCode); + virtual void newTerm(const RawPostingListPtr& p); + virtual void addTerm(const RawPostingListPtr& p); + void abort(); +}; + +} + +#endif diff --git a/include/lucene++/FreqProxTermsWriterPerThread.h b/include/lucene++/FreqProxTermsWriterPerThread.h new file mode 100644 index 00000000..f9e4c6ac --- /dev/null +++ b/include/lucene++/FreqProxTermsWriterPerThread.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FREQPROXTERMSWRITERPERTHREAD_H +#define FREQPROXTERMSWRITERPERTHREAD_H + +#include "TermsHashConsumerPerThread.h" + +namespace Lucene { + +class FreqProxTermsWriterPerThread : public TermsHashConsumerPerThread { +public: + FreqProxTermsWriterPerThread(const TermsHashPerThreadPtr& perThread); + virtual ~FreqProxTermsWriterPerThread(); + + LUCENE_CLASS(FreqProxTermsWriterPerThread); + +public: + TermsHashPerThreadWeakPtr _termsHashPerThread; + DocStatePtr docState; + +public: + virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo); + virtual void startDocument(); + virtual DocWriterPtr finishDocument(); + virtual void abort(); +}; + +} + +#endif diff --git a/include/lucene++/FuzzyQuery.h b/include/lucene++/FuzzyQuery.h new file mode 100644 index 00000000..281616ea --- /dev/null +++ b/include/lucene++/FuzzyQuery.h @@ -0,0 +1,78 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FUZZYQUERY_H +#define FUZZYQUERY_H + +#include "MultiTermQuery.h" + +namespace Lucene { + +/// Implements the fuzzy search query. The similarity measurement is based on the Levenshtein (edit +/// distance) algorithm. +/// +/// Warning: this query is not very scalable with its default prefix length of 0 - in this case, *every* +/// term will be enumerated and cause an edit score calculation. +class LPPAPI FuzzyQuery : public MultiTermQuery { +public: + /// Create a new FuzzyQuery that will match terms with a similarity of at least minimumSimilarity + /// to term. If a prefixLength > 0 is specified, a common prefix of that length is also required. + /// @param term The term to search for + /// @param minimumSimilarity A value between 0 and 1 to set the required similarity between the query + /// term and the matching terms. For example, for a minimumSimilarity of 0.5 a term of the same + /// length as the query term is considered similar to the query term if the edit distance between + /// both terms is less than length(term) * 0.5 + /// @param prefixLength Length of common (non-fuzzy) prefix + FuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); + FuzzyQuery(const TermPtr& term, double minimumSimilarity); + FuzzyQuery(const TermPtr& term); + + virtual ~FuzzyQuery(); + + LUCENE_CLASS(FuzzyQuery); + +protected: + double minimumSimilarity; + int32_t prefixLength; + bool termLongEnough; + + TermPtr term; + +public: + static double defaultMinSimilarity(); + static const int32_t defaultPrefixLength; + +public: + using MultiTermQuery::toString; + + /// Returns the minimum similarity that is required for this query to match. + /// @return float value between 0.0 and 1.0 + double getMinSimilarity(); + + /// Returns the non-fuzzy prefix length. This is the number of characters at the start of a term that + /// must be identical (not fuzzy) to the query term if the query is to match that term. + int32_t getPrefixLength(); + + /// Returns the pattern term. + TermPtr getTerm(); + + virtual void setRewriteMethod(const RewriteMethodPtr& method); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + +protected: + void ConstructQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); + + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/FuzzyTermEnum.h b/include/lucene++/FuzzyTermEnum.h new file mode 100644 index 00000000..de391f05 --- /dev/null +++ b/include/lucene++/FuzzyTermEnum.h @@ -0,0 +1,116 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef FUZZYTERMENUM_H +#define FUZZYTERMENUM_H + +#include "FilteredTermEnum.h" + +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating all terms that are similar to the specified filter term. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater +/// than all that precede it. +class LPPAPI FuzzyTermEnum : public FilteredTermEnum { +public: + /// Constructor for enumeration of all terms from specified reader which share a prefix of length + /// prefixLength with term and which have a fuzzy similarity > minSimilarity. + /// + /// After calling the constructor the enumeration is already pointing to the first valid term if + /// such a term exists. + /// @param reader Delivers terms. + /// @param term Pattern term. + /// @param minSimilarity Minimum required similarity for terms from the reader. Default value is 0.5. + /// @param prefixLength Length of required common prefix. Default value is 0. + FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength); + FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity); + FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term); + + virtual ~FuzzyTermEnum(); + + LUCENE_CLASS(FuzzyTermEnum); + +protected: + /// Allows us save time required to create a new array every time similarity is called. + Collection p; + Collection d; + + double _similarity; + bool _endEnum; + + TermPtr searchTerm; + String field; + String text; + String prefix; + + double minimumSimilarity; + double scale_factor; + +public: + virtual double difference(); + virtual bool endEnum(); + virtual void close(); + +protected: + void ConstructTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength); + + /// The termCompare method in FuzzyTermEnum uses Levenshtein distance to calculate the distance between + /// the given term and the comparing term. + virtual bool termCompare(const TermPtr& term); + + /// + /// Compute Levenshtein distance + /// + /// Similarity returns a number that is 1.0f or less (including negative numbers) based on how similar the + /// Term is compared to a target term. It returns exactly 0.0 when + ///
+    /// editDistance > maximumEditDistance
+    /// 
+ /// + /// Otherwise it returns: + ///
+    /// 1 - (editDistance / length)
+    /// 
+ /// where length is the length of the shortest term (text or target) including a prefix that are identical + /// and editDistance is the Levenshtein distance for the two words. + /// + /// Embedded within this algorithm is a fail-fast Levenshtein distance algorithm. The fail-fast algorithm + /// differs from the standard Levenshtein distance algorithm in that it is aborted if it is discovered that + /// the minimum distance between the words is greater than some threshold. + /// + /// To calculate the maximum distance threshold we use the following formula: + ///
+    /// (1 - minimumSimilarity) * length
+    /// 
+ /// where length is the shortest term including any prefix that is not part of the similarity comparison. + /// This formula was derived by solving for what maximum value of distance returns false for the following + /// statements: + ///
+    /// similarity = 1 - ((double)distance / (double)(prefixLength + std::min(textlen, targetlen)));
+    /// return (similarity > minimumSimilarity);
+    /// 
+ /// where distance is the Levenshtein distance for the two words. + /// + /// Levenshtein distance (also known as edit distance) is a measure of similarity between two strings where + /// the distance is measured as the number of character deletions, insertions or substitutions required to + /// transform one string to the other string. + /// + /// @param target The target word or phrase. + /// @return the similarity, 0.0 or less indicates that it matches less than the required threshold and 1.0 + /// indicates that the text and target are identical. + double similarity(const String& target); + + /// The max Distance is the maximum Levenshtein distance for the text compared to some other value that + /// results in score that is better than the minimum similarity. + /// @param m The length of the "other value" + /// @return The maximum Levenshtein distance that we care about + int32_t calculateMaxDistance(int32_t m); +}; + +} + +#endif diff --git a/include/lucene++/HashMap.h b/include/lucene++/HashMap.h new file mode 100644 index 00000000..ff2efa1c --- /dev/null +++ b/include/lucene++/HashMap.h @@ -0,0 +1,168 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef HASHMAP_H +#define HASHMAP_H + +#include +#include "LuceneSync.h" + +namespace Lucene { + +/// Utility template class to handle hash maps that can be safely copied and shared +template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > +class HashMap : public LuceneSync { +public: + typedef HashMap this_type; + typedef std::pair key_value; + typedef boost::unordered_map map_type; + typedef typename map_type::iterator iterator; + typedef typename map_type::const_iterator const_iterator; + typedef KEY key_type; + typedef VALUE value_type; + + virtual ~HashMap() { + } + +protected: + boost::shared_ptr mapContainer; + +public: + static this_type newInstance() { + this_type instance; + instance.mapContainer = Lucene::newInstance(); + return instance; + } + + void reset() { + mapContainer.reset(); + } + + int32_t size() const { + return (int32_t)mapContainer->size(); + } + + bool empty() const { + return mapContainer->empty(); + } + + void clear() { + mapContainer->clear(); + } + + iterator begin() { + return mapContainer->begin(); + } + + iterator end() { + return mapContainer->end(); + } + + const_iterator begin() const { + return mapContainer->begin(); + } + + const_iterator end() const { + return mapContainer->end(); + } + + operator bool() const { + return mapContainer.get() != NULL; + } + + bool operator! () const { + return !mapContainer; + } + + map_type& operator= (const map_type& other) { + mapContainer = other.mapContainer; + return *this; + } + + void put(const KEY& key, const VALUE& value) { + (*mapContainer)[key] = value; + } + + template + void putAll(ITER first, ITER last) { + for (iterator current = first; current != last; ++current) { + (*mapContainer)[current->first] = current->second; + } + } + + template + void remove(ITER pos) { + mapContainer->erase(pos); + } + + template + ITER remove(ITER first, ITER last) { + return mapContainer->erase(first, last); + } + + bool remove(const KEY& key) { + return (mapContainer->erase(key) > 0); + } + + iterator find(const KEY& key) { + return mapContainer->find(key); + } + + VALUE get(const KEY& key) const { + iterator findValue = mapContainer->find(key); + return findValue == mapContainer->end() ? VALUE() : findValue->second; + } + + bool contains(const KEY& key) const { + return (mapContainer->find(key) != mapContainer->end()); + } + + VALUE& operator[] (const KEY& key) { + return (*mapContainer)[key]; + } +}; + +/// Utility template class to handle weak keyed maps +template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > +class WeakHashMap : public HashMap { +public: + typedef WeakHashMap this_type; + typedef std::pair key_value; + typedef typename boost::unordered_map map_type; + typedef typename map_type::iterator iterator; + + static this_type newInstance() { + this_type instance; + instance.mapContainer = Lucene::newInstance(); + return instance; + } + + void removeWeak() { + if (!this->mapContainer || this->mapContainer->empty()) { + return; + } + map_type clearCopy; + for (iterator key = this->mapContainer->begin(); key != this->mapContainer->end(); ++key) { + if (!key->first.expired()) { + clearCopy.insert(*key); + } + } + this->mapContainer->swap(clearCopy); + } + + VALUE get(const KEY& key) { + iterator findValue = this->mapContainer->find(key); + if (findValue != this->mapContainer->end()) { + return findValue->second; + } + removeWeak(); + return VALUE(); + } +}; + +} + +#endif diff --git a/include/lucene++/HashSet.h b/include/lucene++/HashSet.h new file mode 100644 index 00000000..f6036fd7 --- /dev/null +++ b/include/lucene++/HashSet.h @@ -0,0 +1,114 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef HASHSET_H +#define HASHSET_H + +#include +#include "LuceneSync.h" + +namespace Lucene { + +/// Utility template class to handle hash set collections that can be safely copied and shared +template < class TYPE, class HASH = boost::hash, class EQUAL = std::equal_to > +class HashSet : public LuceneSync { +public: + typedef HashSet this_type; + typedef boost::unordered_set set_type; + typedef typename set_type::iterator iterator; + typedef typename set_type::const_iterator const_iterator; + typedef TYPE value_type; + + virtual ~HashSet() { + } + +protected: + boost::shared_ptr setContainer; + +public: + static this_type newInstance() { + this_type instance; + instance.setContainer = Lucene::newInstance(); + return instance; + } + + template + static this_type newInstance(ITER first, ITER last) { + this_type instance; + instance.setContainer = Lucene::newInstance(first, last); + return instance; + } + + void reset() { + setContainer.reset(); + } + + int32_t size() const { + return (int32_t)setContainer->size(); + } + + bool empty() const { + return setContainer->empty(); + } + + void clear() { + setContainer->clear(); + } + + iterator begin() { + return setContainer->begin(); + } + + iterator end() { + return setContainer->end(); + } + + const_iterator begin() const { + return setContainer->begin(); + } + + const_iterator end() const { + return setContainer->end(); + } + + operator bool() const { + return setContainer.get() != NULL; + } + + bool operator! () const { + return !setContainer; + } + + set_type& operator= (const set_type& other) { + setContainer = other.setContainer; + return *this; + } + + bool add(const TYPE& type) { + return setContainer->insert(type).second; + } + + template + void addAll(ITER first, ITER last) { + setContainer->insert(first, last); + } + + bool remove(const TYPE& type) { + return (setContainer->erase(type) > 0); + } + + iterator find(const TYPE& type) { + return setContainer->find(type); + } + + bool contains(const TYPE& type) const { + return (setContainer->find(type) != setContainer->end()); + } +}; + +} + +#endif diff --git a/include/lucene++/HitQueue.h b/include/lucene++/HitQueue.h new file mode 100644 index 00000000..db61e81a --- /dev/null +++ b/include/lucene++/HitQueue.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef HITQUEUE_H +#define HITQUEUE_H + +#include "HitQueueBase.h" + +namespace Lucene { + +class LPPAPI HitQueue : public HitQueueBase { +public: + /// Creates a new instance with size elements. + HitQueue(int32_t size, bool prePopulate); + virtual ~HitQueue(); + + LUCENE_CLASS(HitQueue); + +protected: + bool prePopulate; + +protected: + virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); + + /// Returns null if prePopulate is false. + virtual ScoreDocPtr getSentinelObject(); +}; + +} + +#endif diff --git a/include/lucene++/HitQueueBase.h b/include/lucene++/HitQueueBase.h new file mode 100644 index 00000000..dea149a3 --- /dev/null +++ b/include/lucene++/HitQueueBase.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef HITQUEUEBASE_H +#define HITQUEUEBASE_H + +#include "PriorityQueue.h" + +namespace Lucene { + +class LPPAPI HitQueueBase : public LuceneObject { +public: + HitQueueBase(int32_t size); + virtual ~HitQueueBase(); + + LUCENE_CLASS(HitQueueBase); + +public: + virtual ScoreDocPtr add(const ScoreDocPtr& scoreDoc); + virtual ScoreDocPtr addOverflow(const ScoreDocPtr& scoreDoc); + virtual ScoreDocPtr top(); + virtual ScoreDocPtr pop(); + virtual ScoreDocPtr updateTop(); + virtual int32_t size(); + virtual bool empty(); + virtual void clear(); + +protected: + PriorityQueueScoreDocsPtr queue; + int32_t queueSize; + +public: + virtual void initialize(); + +protected: + virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) = 0; + virtual ScoreDocPtr getSentinelObject(); + + friend class PriorityQueueScoreDocs; +}; + +class LPPAPI PriorityQueueScoreDocs : public PriorityQueue { +public: + PriorityQueueScoreDocs(const HitQueueBasePtr& hitQueue, int32_t size); + virtual ~PriorityQueueScoreDocs(); + + LUCENE_CLASS(PriorityQueueScoreDocs); + +protected: + HitQueueBaseWeakPtr _hitQueue; + +protected: + virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); + virtual ScoreDocPtr getSentinelObject(); +}; + +} + +#endif diff --git a/include/lucene++/ISOLatin1AccentFilter.h b/include/lucene++/ISOLatin1AccentFilter.h new file mode 100644 index 00000000..1c6a7562 --- /dev/null +++ b/include/lucene++/ISOLatin1AccentFilter.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ISOLATIN1ACCENTFILTER_H +#define ISOLATIN1ACCENTFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// A filter that replaces accented characters in the ISO Latin 1 character set (ISO-8859-1) by their unaccented +/// equivalent. The case will not be altered. +/// +/// For instance, 'à' will be replaced by 'a'. +/// +/// @deprecated If you build a new index, use {@link ASCIIFoldingFilter} which covers a superset of Latin 1. +/// This class is included for use with existing indexes and will be removed in a future release (possibly Lucene 4.0). +class LPPAPI ISOLatin1AccentFilter : public TokenFilter { +public: + ISOLatin1AccentFilter(const TokenStreamPtr& input); + virtual ~ISOLatin1AccentFilter(); + + LUCENE_CLASS(ISOLatin1AccentFilter); + +protected: + CharArray output; + int32_t outputPos; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// To replace accented characters in a String by unaccented equivalents. + void removeAccents(const wchar_t* input, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/IndexCommit.h b/include/lucene++/IndexCommit.h new file mode 100644 index 00000000..cc14322a --- /dev/null +++ b/include/lucene++/IndexCommit.h @@ -0,0 +1,76 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXCOMMIT_H +#define INDEXCOMMIT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Represents a single commit into an index as seen by the {@link IndexDeletionPolicy} or {@link IndexReader}. +/// +/// Changes to the content of an index are made visible only after the writer who made that change commits by +/// writing a new segments file (segments_N). This point in time, when the action of writing of a new segments +/// file to the directory is completed, is an index commit. +/// +/// Each index commit point has a unique segments file associated with it. The segments file associated with a +/// later index commit point would have a larger N. +class LPPAPI IndexCommit : public LuceneObject { +public: + virtual ~IndexCommit(); + + LUCENE_CLASS(IndexCommit); + +public: + /// Get the segments file (segments_N) associated with this commit point. + virtual String getSegmentsFileName() = 0; + + /// Returns all index files referenced by this commit point. + virtual HashSet getFileNames() = 0; + + /// Returns the {@link Directory} for the index. + virtual DirectoryPtr getDirectory() = 0; + + /// Delete this commit point. This only applies when using the commit point in the context of IndexWriter's + /// IndexDeletionPolicy. + /// + /// Upon calling this, the writer is notified that this commit point should be deleted. + /// + /// Decision that a commit-point should be deleted is taken by the {@link IndexDeletionPolicy} in effect + /// and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or + /// {@link IndexDeletionPolicy#onCommit onCommit()} methods. + virtual void deleteCommit() = 0; + + virtual bool isDeleted() = 0; + + /// Returns true if this commit is an optimized index. + virtual bool isOptimized() = 0; + + /// Two IndexCommits are equal if both their Directory and versions are equal. + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); + + /// Returns the version for this IndexCommit. This is the same value that {@link IndexReader#getVersion} + /// would return if it were opened on this commit. + virtual int64_t getVersion() = 0; + + /// Returns the generation (the _N in segments_N) for this IndexCommit. + virtual int64_t getGeneration() = 0; + + /// Convenience method that returns the last modified time of the segments_N file corresponding to this + /// index commit, equivalent to getDirectory()->fileModified(getSegmentsFileName()). + virtual int64_t getTimestamp(); + + /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. Map is + /// String -> String. + virtual MapStringString getUserData() = 0; +}; + +} + +#endif diff --git a/include/lucene++/IndexDeletionPolicy.h b/include/lucene++/IndexDeletionPolicy.h new file mode 100644 index 00000000..14d2a4f0 --- /dev/null +++ b/include/lucene++/IndexDeletionPolicy.h @@ -0,0 +1,71 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXDELETIONPOLICY_H +#define INDEXDELETIONPOLICY_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Policy for deletion of stale {@link IndexCommit index commits}. +/// Implement this interface, and pass +/// it to one of the {@link IndexWriter} or {@link IndexReader} constructors, to customize when older +/// {@link IndexCommit point-in-time commits} are deleted from the index directory. The default deletion +/// policy is {@link KeepOnlyLastCommitDeletionPolicy}, which always removes old commits as soon as a new +/// commit is done (this matches the behavior before 2.2). +/// +/// One expected use case for this (and the reason why it was first created) is to work around problems +/// with an index directory accessed via filesystems like NFS because NFS does not provide the "delete on +/// last close" semantics that Lucene's "point in time" search normally relies on. By implementing a +/// custom deletion policy, such as "a commit is only removed once it has been stale for more than X +/// minutes", you can give your readers time to refresh to the new commit before {@link IndexWriter} +/// removes the old commits. Note that doing so will increase the storage requirements of the index. +class LPPAPI IndexDeletionPolicy : public LuceneObject { +protected: + IndexDeletionPolicy(); + +public: + virtual ~IndexDeletionPolicy(); + + LUCENE_CLASS(IndexDeletionPolicy); + +public: + /// This is called once when a writer is first instantiated to give the policy a chance to remove old + /// commit points. + /// + /// The writer locates all index commits present in the index directory and calls this method. The + /// policy may choose to delete some of the commit points, doing so by calling method {@link + /// IndexCommit#delete delete()} of {@link IndexCommit}. + /// + /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to + /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the + /// index content while doing that. + /// + /// @param commits List of current {@link IndexCommit point-in-time commits}, sorted by age (the 0th + /// one is the oldest commit). + virtual void onInit(Collection commits) = 0; + + /// This is called each time the writer completed a commit. This gives the policy a chance to remove + /// old commit points with each commit. + /// + /// The policy may now choose to delete old commit points by calling method {@link + /// IndexCommit#delete delete()} of {@link IndexCommit}. + /// + /// This method is only called when {@link IndexWriter#commit} or {@link IndexWriter#close} is called, + /// or possibly not at all if the {@link IndexWriter#rollback} is called. + /// + /// Note: the last CommitPoint is the most recent one, ie. the "front index state". Be careful not to + /// delete it, unless you know for sure what you are doing, and unless you can afford to lose the + /// index content while doing that. + /// + /// @param commits List of {@link IndexCommit}, sorted by age (the 0th one is the oldest commit). + virtual void onCommit(Collection commits) = 0; +}; + +} + +#endif diff --git a/include/lucene++/IndexFileDeleter.h b/include/lucene++/IndexFileDeleter.h new file mode 100644 index 00000000..474f5adb --- /dev/null +++ b/include/lucene++/IndexFileDeleter.h @@ -0,0 +1,202 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXFILEDELETER_H +#define INDEXFILEDELETER_H + +#include "IndexCommit.h" + +namespace Lucene { + +/// This class keeps track of each SegmentInfos instance that is still "live", either because it corresponds to a +/// segments_N file in the Directory (a "commit", ie. a committed SegmentInfos) or because it's an in-memory +/// SegmentInfos that a writer is actively updating but has not yet committed. This class uses simple reference +/// counting to map the live SegmentInfos instances to individual files in the Directory. +/// +/// The same directory file may be referenced by more than one IndexCommit, i.e. more than one SegmentInfos. +/// Therefore we count how many commits reference each file. When all the commits referencing a certain file have +/// been deleted, the refcount for that file becomes zero, and the file is deleted. +/// +/// A separate deletion policy interface (IndexDeletionPolicy) is consulted on creation (onInit) and once per +/// commit (onCommit), to decide when a commit should be removed. +/// +/// It is the business of the IndexDeletionPolicy to choose when to delete commit points. The actual mechanics of +/// file deletion, retrying, etc, derived from the deletion of commit points is the business of the IndexFileDeleter. +/// +/// The current default deletion policy is {@link KeepOnlyLastCommitDeletionPolicy}, which removes all prior commits +/// when a new commit has completed. This matches the behavior before 2.2. +/// +/// Note that you must hold the write.lock before instantiating this class. It opens segments_N file(s) directly +/// with no retry logic. +class LPPAPI IndexFileDeleter : public LuceneObject { +public: + /// Initialize the deleter: find all previous commits in the Directory, incref the files they reference, call + /// the policy to let it delete commits. This will remove any files not referenced by any of the commits. + IndexFileDeleter(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& policy, const SegmentInfosPtr& segmentInfos, const InfoStreamPtr& infoStream, const DocumentsWriterPtr& docWriter, HashSet synced); + virtual ~IndexFileDeleter(); + + LUCENE_CLASS(IndexFileDeleter); + +protected: + /// Files that we tried to delete but failed (likely because they are open and we are running on Windows), + /// so we will retry them again later + HashSet deletable; + + /// Reference count for all files in the index. Counts how many existing commits reference a file. + MapStringRefCount refCounts; + + /// Holds all commits (segments_N) currently in the index. This will have just 1 commit if you are using the + /// default delete policy (KeepOnlyLastCommitDeletionPolicy). Other policies may leave commit points live for + /// longer in which case this list would be longer than 1 + Collection commits; + + /// Holds files we had incref'd from the previous non-commit checkpoint + Collection< HashSet > lastFiles; + + /// Commits that the IndexDeletionPolicy have decided to delete + Collection commitsToDelete; + + InfoStreamPtr infoStream; + DirectoryPtr directory; + IndexDeletionPolicyPtr policy; + DocumentsWriterPtr docWriter; + + SegmentInfosPtr lastSegmentInfos; + HashSet synced; + + /// Change to true to see details of reference counts when infoStream != null + static bool VERBOSE_REF_COUNTS; + +public: + bool startingCommitDeleted; + +protected: + void message(const String& message); + + /// Remove the CommitPoints in the commitsToDelete List by DecRef'ing all files from each SegmentInfos. + void deleteCommits(); + + void deletePendingFiles(); + + RefCountPtr getRefCount(const String& fileName); + +public: + void setInfoStream(const InfoStreamPtr& infoStream); + + SegmentInfosPtr getLastSegmentInfos(); + + /// Writer calls this when it has hit an error and had to roll back, to tell us that there may now be + /// unreferenced files in the filesystem. So we re-list the filesystem and delete such files. If + /// segmentName is non-null, we will only delete files corresponding to that segment. + void refresh(const String& segmentName); + void refresh(); + + void close(); + + /// For definition of "check point" see IndexWriter comments: "Clarification: Check Points (and commits)". + /// Writer calls this when it has made a "consistent change" to the index, meaning new files are written to + /// the index and the in-memory SegmentInfos have been modified to point to those files. + /// + /// This may or may not be a commit (segments_N may or may not have been written). + /// + /// We simply incref the files referenced by the new SegmentInfos and decref the files we had previously + /// seen (if any). + /// + /// If this is a commit, we also call the policy to give it a chance to remove other commits. If any + /// commits are removed, we decref their files as well. + void checkpoint(const SegmentInfosPtr& segmentInfos, bool isCommit); + + void incRef(const SegmentInfosPtr& segmentInfos, bool isCommit); + void incRef(HashSet files); + void incRef(const String& fileName); + void decRef(HashSet files); + void decRef(const String& fileName); + void decRef(const SegmentInfosPtr& segmentInfos); + + bool exists(const String& fileName); + + void deleteFiles(HashSet files); + + /// Deletes the specified files, but only if they are new (have not yet been incref'd). + void deleteNewFiles(HashSet files); + + void deleteFile(const String& fileName); +}; + +/// Tracks the reference count for a single index file +class RefCount : public LuceneObject { +public: + RefCount(const String& fileName); + virtual ~RefCount(); + + LUCENE_CLASS(RefCount); + +public: + String fileName; // fileName used only for better assert error messages + bool initDone; + int32_t count; + +public: + int32_t IncRef(); + int32_t DecRef(); +}; + +/// Holds details for each commit point. This class is also passed to the deletion policy. Note: this class +/// has a natural ordering that is inconsistent with equals. +class CommitPoint : public IndexCommit { +public: + CommitPoint(Collection commitsToDelete, const DirectoryPtr& directory, const SegmentInfosPtr& segmentInfos); + virtual ~CommitPoint(); + + LUCENE_CLASS(CommitPoint); + +public: + int64_t gen; + HashSet files; + String segmentsFileName; + bool deleted; + DirectoryPtr directory; + Collection commitsToDelete; + int64_t version; + int64_t generation; + bool _isOptimized; + MapStringString userData; + +public: + virtual String toString(); + + /// Returns true if this commit is an optimized index. + virtual bool isOptimized(); + + /// Get the segments file (segments_N) associated with this commit point. + virtual String getSegmentsFileName(); + + /// Returns all index files referenced by this commit point. + virtual HashSet getFileNames(); + + /// Returns the {@link Directory} for the index. + virtual DirectoryPtr getDirectory(); + + /// Returns the version for this IndexCommit. + virtual int64_t getVersion(); + + /// Returns the generation (the _N in segments_N) for this IndexCommit. + virtual int64_t getGeneration(); + + /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. + virtual MapStringString getUserData(); + + /// Called only be the deletion policy, to remove this commit point from the index. + virtual void deleteCommit(); + + virtual bool isDeleted(); + + virtual int32_t compareTo(const LuceneObjectPtr& other); +}; + +} + +#endif diff --git a/include/lucene++/IndexFileNameFilter.h b/include/lucene++/IndexFileNameFilter.h new file mode 100644 index 00000000..d101ad3c --- /dev/null +++ b/include/lucene++/IndexFileNameFilter.h @@ -0,0 +1,31 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXFILENAMEFILTER_H +#define INDEXFILENAMEFILTER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Filename filter that accept filenames and extensions only created by Lucene. +class LPPAPI IndexFileNameFilter : public LuceneObject { +public: + /// Returns true if this is a file known to be a Lucene index file. + static bool accept(const String& directory, const String& name); + + /// Returns true if this is a file that would be contained in a CFS file. + /// This function should only be called on files that pass the + /// {@link #accept} (ie, are already known to be a Lucene index file). + static bool isCFSFile(const String& name); + + /// Return singleton IndexFileNameFilter + static IndexFileNameFilterPtr getFilter(); +}; + +} + +#endif diff --git a/include/lucene++/IndexFileNames.h b/include/lucene++/IndexFileNames.h new file mode 100644 index 00000000..a62e6ef1 --- /dev/null +++ b/include/lucene++/IndexFileNames.h @@ -0,0 +1,116 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXFILENAMES_H +#define INDEXFILENAMES_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Constants representing filenames and extensions used by Lucene. +class LPPAPI IndexFileNames : public LuceneObject { +public: + virtual ~IndexFileNames(); + LUCENE_CLASS(IndexFileNames); + +public: + /// Name of the index segment file. + static const String& SEGMENTS(); + + /// Name of the generation reference file name. + static const String& SEGMENTS_GEN(); + + /// Name of the index deletable file (only used in pre-lockless indices). + static const String& DELETABLE(); + + /// Extension of norms file. + static const String& NORMS_EXTENSION(); + + /// Extension of freq postings file. + static const String& FREQ_EXTENSION(); + + /// Extension of prox postings file. + static const String& PROX_EXTENSION(); + + /// Extension of terms file. + static const String& TERMS_EXTENSION(); + + /// Extension of terms index file. + static const String& TERMS_INDEX_EXTENSION(); + + /// Extension of stored fields index file. + static const String& FIELDS_INDEX_EXTENSION(); + + /// Extension of stored fields file. + static const String& FIELDS_EXTENSION(); + + /// Extension of vectors fields file. + static const String& VECTORS_FIELDS_EXTENSION(); + + /// Extension of vectors documents file. + static const String& VECTORS_DOCUMENTS_EXTENSION(); + + /// Extension of vectors index file. + static const String& VECTORS_INDEX_EXTENSION(); + + /// Extension of compound file. + static const String& COMPOUND_FILE_EXTENSION(); + + /// Extension of compound file for doc store files. + static const String& COMPOUND_FILE_STORE_EXTENSION(); + + /// Extension of deletes. + static const String& DELETES_EXTENSION(); + + /// Extension of field infos. + static const String& FIELD_INFOS_EXTENSION(); + + /// Extension of plain norms. + static const String& PLAIN_NORMS_EXTENSION(); + + /// Extension of separate norms. + static const String& SEPARATE_NORMS_EXTENSION(); + + /// Extension of gen file. + static const String& GEN_EXTENSION(); + + /// This array contains all filename extensions used by Lucene's index + /// files, with two exceptions, namely the extension made up from + /// ".f" + number and from ".s" + number. Also note that Lucene's + /// "segments_N" files do not have any filename extension. + static const HashSet INDEX_EXTENSIONS(); + + /// File extensions that are added to a compound file (same as + /// {@link #INDEX_EXTENSIONS}, minus "del", "gen", "cfs"). + static const HashSet INDEX_EXTENSIONS_IN_COMPOUND_FILE(); + + static const HashSet STORE_INDEX_EXTENSIONS(); + static const HashSet NON_STORE_INDEX_EXTENSIONS(); + + /// File extensions of old-style index files. + static const HashSet COMPOUND_EXTENSIONS(); + + /// File extensions for term vector support. + static const HashSet VECTOR_EXTENSIONS(); + + /// Computes the full file name from base, extension and generation. + /// If the generation is {@link SegmentInfo#NO}, the file name is null. + /// If it's {@link SegmentInfo#WITHOUT_GEN} the file name is base+extension. + /// If it's > 0, the file name is base_generation+extension. + static String fileNameFromGeneration(const String& base, const String& extension, int64_t gen); + + /// Returns true if the provided filename is one of the doc store files + /// (ends with an extension in STORE_INDEX_EXTENSIONS). + static bool isDocStoreFile(const String& fileName); + + /// Return segment file name. + static String segmentFileName(const String& segmentName, const String& ext); +}; + +} + +#endif diff --git a/include/lucene++/IndexInput.h b/include/lucene++/IndexInput.h new file mode 100644 index 00000000..198ab51a --- /dev/null +++ b/include/lucene++/IndexInput.h @@ -0,0 +1,124 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXINPUT_H +#define INDEXINPUT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract base class for input from a file in a {@link Directory}. +/// A random-access input stream. Used for all Lucene index input operations. +/// @see Directory +class LPPAPI IndexInput : public LuceneObject { +public: + IndexInput(); + virtual ~IndexInput(); + + LUCENE_CLASS(IndexInput); + +protected: + bool preUTF8Strings; // true if we are reading old (modified UTF8) string format + +public: + /// Reads and returns a single byte. + /// @see IndexOutput#writeByte(uint8_t) + virtual uint8_t readByte() = 0; + + /// Reads a specified number of bytes into an array at the specified offset. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @see IndexOutput#writeBytes(const uint8_t*, int) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length) = 0; + + /// Reads a specified number of bytes into an array at the specified offset + /// with control over whether the read should be buffered (callers who have + /// their own buffer should pass in "false" for useBuffer). Currently only + /// {@link BufferedIndexInput} respects this parameter. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @param useBuffer set to false if the caller will handle buffering. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer); + + /// Reads four bytes and returns an int. + /// @see IndexOutput#writeInt(int32_t) + virtual int32_t readInt(); + + /// Reads an int stored in variable-length format. Reads between one and five + /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. + /// @see IndexOutput#writeVInt(int32_t) + virtual int32_t readVInt(); + + /// Reads eight bytes and returns a int64. + /// @see IndexOutput#writeLong(int64_t) + virtual int64_t readLong(); + + /// Reads a int64 stored in variable-length format. Reads between one and nine + /// bytes. Smaller values take fewer bytes. Negative numbers are not supported. + virtual int64_t readVLong(); + + /// Call this if readString should read characters stored in the old modified + /// UTF8 format. This is used for indices written pre-2.4. + virtual void setModifiedUTF8StringsMode(); + + /// Reads a string. + /// @see IndexOutput#writeString(const String&) + virtual String readString(); + + /// Reads a modified UTF8 format string. + virtual String readModifiedUTF8String(); + + /// Reads Lucene's old "modified UTF-8" encoded characters into an array. + /// @param buffer the array to read characters into. + /// @param start the offset in the array to start storing characters. + /// @param length the number of characters to read. + /// @see IndexOutput#writeChars(const String& s, int32_t, int32_t) + virtual int32_t readChars(wchar_t* buffer, int32_t start, int32_t length); + + /// Similar to {@link #readChars(wchar_t*, int32_t, int32_t)} but does not + /// do any conversion operations on the bytes it is reading in. It still + /// has to invoke {@link #readByte()} just as {@link #readChars(wchar_t*, int32_t, int32_t)} + /// does, but it does not need a buffer to store anything and it does not have + /// to do any of the bitwise operations, since we don't actually care what is + /// in the byte except to determine how many more bytes to read. + /// @param length The number of chars to read. + /// @deprecated this method operates on old "modified utf8" encoded strings. + virtual void skipChars(int32_t length); + + /// Closes the stream to further operations. + virtual void close() = 0; + + /// Returns the current position in this file, where the next read will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer() = 0; + + /// Sets current position in this file, where the next read will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos) = 0; + + /// The number of bytes in the file. + virtual int64_t length() = 0; + + /// Returns a clone of this stream. + /// + /// Clones of a stream access the same data, and are positioned at the same + /// point as the stream they were cloned from. + /// + /// Subclasses must ensure that clones may be positioned at different points + /// in the input from each other and from the stream they were cloned from. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Read string map as a series of key/value pairs. + virtual MapStringString readStringStringMap(); +}; + +} + +#endif diff --git a/include/lucene++/IndexOutput.h b/include/lucene++/IndexOutput.h new file mode 100644 index 00000000..03073a9f --- /dev/null +++ b/include/lucene++/IndexOutput.h @@ -0,0 +1,108 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXOUTPUT_H +#define INDEXOUTPUT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract base class for output to a file in a Directory. A random-access output stream. Used for all +/// Lucene index output operations. +/// @see Directory +/// @see IndexInput +class LPPAPI IndexOutput : public LuceneObject { +public: + virtual ~IndexOutput(); + + LUCENE_CLASS(IndexOutput); + +protected: + static const int32_t COPY_BUFFER_SIZE; + ByteArray copyBuffer; + +public: + /// Writes a single byte. + /// @see IndexInput#readByte() + virtual void writeByte(uint8_t b) = 0; + + /// Writes an array of bytes. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) + virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length) = 0; + + /// Forces any buffered output to be written. + virtual void flush() = 0; + + /// Closes this stream to further operations. + virtual void close() = 0; + + /// Returns the current position in this file, where the next write will occur. + virtual int64_t getFilePointer() = 0; + + /// Sets current position in this file, where the next write will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos) = 0; + + /// The number of bytes in the file. + virtual int64_t length() = 0; + +public: + /// Writes an array of bytes. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) + void writeBytes(const uint8_t* b, int32_t length); + + /// Writes an int as four bytes. + /// @see IndexInput#readInt() + void writeInt(int32_t i); + + /// Writes an int in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. + /// Negative numbers are not supported. + /// @see IndexInput#readVInt() + void writeVInt(int32_t i); + + /// Writes a int64 as eight bytes. + /// @see IndexInput#readLong() + void writeLong(int64_t i); + + /// Writes an int64 in a variable-length format. Writes between one and five bytes. Smaller values take fewer bytes. + /// Negative numbers are not supported. + /// @see IndexInput#readVLong() + void writeVLong(int64_t i); + + /// Writes a string. + /// @see IndexInput#readString() + void writeString(const String& s); + + /// Writes a sub sequence of characters from s as the old format (modified UTF-8 encoded bytes). + /// @param s the source of the characters. + /// @param start the first character in the sequence. + /// @param length the number of characters in the sequence. + /// @deprecated -- please use {@link #writeString} + void writeChars(const String& s, int32_t start, int32_t length); + + /// Copy numBytes bytes from input to ourself. + void copyBytes(const IndexInputPtr& input, int64_t numBytes); + + /// Set the file length. By default, this method does nothing (it's optional for a Directory to implement it). + /// But, certain Directory implementations (for example @see FSDirectory) can use this to inform the underlying IO + /// system to pre-allocate the file to the specified size. If the length is longer than the current file length, + /// the bytes added to the file are undefined. Otherwise the file is truncated. + /// @param length file length. + void setLength(int64_t length); + + /// Write string map as a series of key/value pairs. + /// @param map map of string-string key-values. + void writeStringStringMap(MapStringString map); +}; + +} + +#endif diff --git a/include/lucene++/IndexReader.h b/include/lucene++/IndexReader.h new file mode 100644 index 00000000..4a4aad54 --- /dev/null +++ b/include/lucene++/IndexReader.h @@ -0,0 +1,557 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXREADER_H +#define INDEXREADER_H + +#include "SegmentInfos.h" + +namespace Lucene { + +/// IndexReader is an abstract class, providing an interface for accessing an index. Search of an index is done +/// entirely through this abstract interface, so that any subclass which implements it is searchable. +/// +/// Concrete subclasses of IndexReader are usually constructed with a call to one of the static open methods, +/// eg. {@link #open(DirectoryPtr, bool)}. +/// +/// For efficiency, in this API documents are often referred to via document numbers, non-negative integers which +/// each name a unique document in the index. These document numbers are ephemeral -they may change as documents +/// are added to and deleted from an index. Clients should thus not rely on a given document having the same number +/// between sessions. +/// +/// An IndexReader can be opened on a directory for which an IndexWriter is opened already, but it cannot be used +/// to delete documents from the index then. +/// +/// NOTE: for backwards API compatibility, several methods are not listed as abstract, but have no useful implementations +/// in this base class and instead always throw UnsupportedOperation exception. Subclasses are strongly encouraged to +/// override these methods, but in many cases may not need to. +/// +/// NOTE: as of 2.4, it's possible to open a read-only IndexReader using the static open methods that accept the bool +/// readOnly parameter. Such a reader has better concurrency as it's not necessary to synchronize on the isDeleted +/// method. You must specify false if you want to make changes with the resulting IndexReader. +/// +/// NOTE: {@link IndexReader} instances are completely thread safe, meaning multiple threads can call any of its methods, +/// concurrently. If your application requires external synchronization, you should not synchronize on the IndexReader +/// instance; use your own (non-Lucene) objects instead. +class LPPAPI IndexReader : public LuceneObject { +public: + IndexReader(); + virtual ~IndexReader(); + + LUCENE_CLASS(IndexReader); + +public: + /// Constants describing field properties, for example used for {@link IndexReader#getFieldNames(FieldOption)}. + enum FieldOption { + /// All fields + FIELD_OPTION_ALL, + /// All indexed fields + FIELD_OPTION_INDEXED, + /// All fields that store payloads + FIELD_OPTION_STORES_PAYLOADS, + /// All fields that omit tf + FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS, + /// All fields which are not indexed + FIELD_OPTION_UNINDEXED, + /// All fields which are indexed with termvectors enabled + FIELD_OPTION_INDEXED_WITH_TERMVECTOR, + /// All fields which are indexed but don't have termvectors enabled + FIELD_OPTION_INDEXED_NO_TERMVECTOR, + /// All fields with termvectors enabled. Please note that only standard termvector fields are returned + FIELD_OPTION_TERMVECTOR, + /// All fields with termvectors with position values enabled + FIELD_OPTION_TERMVECTOR_WITH_POSITION, + /// All fields with termvectors with offset values enabled + FIELD_OPTION_TERMVECTOR_WITH_OFFSET, + /// All fields with termvectors with offset values and position values enabled + FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET + }; + + static const int32_t DEFAULT_TERMS_INDEX_DIVISOR; + +protected: + bool closed; + bool _hasChanges; + int32_t refCount; + +public: + /// Returns the current refCount for this reader + int32_t getRefCount(); + + /// Increments the refCount of this IndexReader instance. RefCounts are used to determine when a reader can be + /// closed safely, i.e. as soon as there are no more references. Be sure to always call a corresponding {@link + /// #decRef}, in a finally clause; otherwise the reader may never be closed. Note that {@link #close} simply + /// calls decRef(), which means that the IndexReader will not really be closed until {@link #decRef} has been + /// called for all outstanding references. + /// @see #decRef + void incRef(); + + /// Decreases the refCount of this IndexReader instance. If the refCount drops to 0, then pending changes + /// (if any) are committed to the index and this reader is closed. + /// @see #incRef + void decRef(); + + /// Returns a IndexReader reading the index in the given Directory, with readOnly = true. + /// @param directory the index directory + static IndexReaderPtr open(const DirectoryPtr& directory); + + /// Returns an IndexReader reading the index in the given Directory. You should pass readOnly = true, since it + /// gives much better concurrent performance, unless you intend to do write operations (delete documents or change + /// norms) with the reader. + /// @param directory the index directory + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + static IndexReaderPtr open(const DirectoryPtr& directory, bool readOnly); + + /// Returns an IndexReader reading the index in the given {@link IndexCommit}. You should pass readOnly = true, + /// since it gives much better concurrent performance, unless you intend to do write operations (delete documents + /// or change norms) with the reader. + /// @param commit the commit point to open + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + static IndexReaderPtr open(const IndexCommitPtr& commit, bool readOnly); + + /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. + /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write + /// operations (delete documents or change norms) with the reader. + /// @param directory the index directory + /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform + /// deletes or to set norms); see {@link IndexWriter} for details. + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly); + + /// Returns an IndexReader reading the index in the given Directory, with a custom {@link IndexDeletionPolicy}. + /// You should pass readOnly=true, since it gives much better concurrent performance, unless you intend to do write + /// operations (delete documents or change norms) with the reader. + /// @param directory the index directory + /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform + /// deletes or to set norms); see {@link IndexWriter} for details. + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the + /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at + /// indexing time while this setting can be set per reader. When set to N, then one in every + /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 + /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. The + /// default value is 1. Set this to -1 to skip loading the terms index entirely. + static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); + + /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom + /// {@link IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, + /// unless you intend to do write operations (delete documents or change norms) with the reader. + /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to list all + /// commits in a directory + /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform + /// deletes or to set norms); see {@link IndexWriter} for details. + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly); + + /// Returns an IndexReader reading the index in the given Directory, using a specific commit and with a custom {@link + /// IndexDeletionPolicy}. You should pass readOnly=true, since it gives much better concurrent performance, unless + /// you intend to do write operations (delete documents or change norms) with the reader. + /// @param commit the specific {@link IndexCommit} to open; see {@link IndexReader#listCommits} to + /// list all commits in a directory + /// @param deletionPolicy a custom deletion policy (only used if you use this reader to perform deletes + /// or to set norms); see {@link IndexWriter} for details. + /// @param readOnly true if no changes (deletions, norms) will be made with this IndexReader + /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the same effect as + /// {@link IndexWriter#setTermIndexInterval} except that setting must be done at indexing time while this setting can + /// be set per reader. When set to N, then one in every N * termIndexInterval terms in the index is loaded into + /// memory. By setting this to a value > 1 you can reduce memory usage, at the expense of higher latency when loading + /// a TermInfo. The default value is 1. Set this to -1 to skip loading the terms index entirely. + static IndexReaderPtr open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor); + + /// Refreshes an IndexReader if the index has changed since this instance was (re)opened. + /// + /// Opening an IndexReader is an expensive operation. This method can be used to refresh an existing IndexReader to + /// reduce these costs. This method tries to only load segments that have changed or were created after the + /// IndexReader was (re)opened. + /// + /// If the index has not changed since this instance was (re)opened, then this call is a NOOP and returns this + /// instance. Otherwise, a new instance is returned. The old instance is not closed and remains usable. + /// + /// If the reader is reopened, even though they share resources internally, it's safe to make changes (deletions, + /// norms) with the new reader. All shared mutable state obeys "copy on write" semantics to ensure the changes are + /// not seen by other readers. + /// + /// You can determine whether a reader was actually reopened by comparing the old instance with the + /// instance returned by this method: + /// + ///
+    /// IndexReaderPtr reader = ...
+    /// ...
+    /// IndexReaderPtr newReader = r.reopen();
+    /// if (newReader != reader)
+    /// {
+    ///     ... // reader was reopened
+    ///     reader->close();
+    /// }
+    /// reader = newReader;
+    /// ...
+    /// 
+ /// + /// Be sure to synchronize that code so that other threads, if present, can never use reader after it has been + /// closed and before it's switched to newReader. If this reader is a near real-time reader (obtained from + /// {@link IndexWriter#getReader()}, reopen() will simply call writer.getReader() again for you, though this + /// may change in the future. + virtual IndexReaderPtr reopen(); + + /// Just like {@link #reopen()}, except you can change the readOnly of the original reader. If the index is + /// unchanged but readOnly is different then a new reader will be returned. + virtual IndexReaderPtr reopen(bool openReadOnly); + + /// Reopen this reader on a specific commit point. This always returns a readOnly reader. If the specified commit + /// point matches what this reader is already on, and this reader is already readOnly, then this same instance is + /// returned; if it is not already readOnly, a readOnly clone is returned. + virtual IndexReaderPtr reopen(const IndexCommitPtr& commit); + + /// Efficiently clones the IndexReader (sharing most internal state). + /// + /// On cloning a reader with pending changes (deletions, norms), the original reader transfers its write lock to the + /// cloned reader. This means only the cloned reader may make further changes to the index, and commit the changes + /// to the index on close, but the old reader still reflects all changes made up until it was cloned. + /// + /// Like {@link #reopen()}, it's safe to make changes to either the original or the cloned reader: all shared mutable + /// state obeys "copy on write" semantics to ensure the changes are not seen by other readers. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Clones the IndexReader and optionally changes readOnly. A readOnly reader cannot open a writable reader. + virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Returns the directory associated with this index. The default implementation returns the directory specified by + /// subclasses when delegating to the IndexReader(Directory) constructor, or throws an UnsupportedOperation exception + /// if one was not specified. + virtual DirectoryPtr directory(); + + /// Returns the time the index in the named directory was last modified. Do not use this to check + /// whether the reader is still up-to-date, use {@link #isCurrent()} instead. + static int64_t lastModified(const DirectoryPtr& directory2); + + /// Reads version number from segments files. The version number is initialized with a timestamp + /// and then increased by one for each change of the index. + /// @param directory where the index resides. + /// @return version number. + static int64_t getCurrentVersion(const DirectoryPtr& directory); + + /// Reads commitUserData, previously passed to {@link IndexWriter#commit(MapStringString)}, from + /// current index segments file. This will return null if {@link IndexWriter#commit(MapStringString)} + /// has never been called for this index. + static MapStringString getCommitUserData(const DirectoryPtr& directory); + + /// Version number when this IndexReader was opened. Not implemented in the IndexReader base class. + /// + /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link + /// #reopen} on a reader based on a Directory), then this method returns the version recorded in the + /// commit that the reader opened. This version is advanced every time {@link IndexWriter#commit} + /// is called. + /// + /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link + /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this + /// method returns the version of the last commit done by the writer. Note that even as further + /// changes are made with the writer, the version will not changed until a commit is completed. + /// Thus, you should not rely on this method to determine when a near real-time reader should be + /// opened. Use {@link #isCurrent} instead. + virtual int64_t getVersion(); + + /// Retrieve the String userData optionally passed to IndexWriter#commit. This will return null if + /// {@link IndexWriter#commit(MapStringString)} has never been called for this index. + virtual MapStringString getCommitUserData(); + + /// Check whether any new changes have occurred to the index since this reader was opened. + /// + /// If this reader is based on a Directory (ie, was created by calling {@link #open}, or {@link + /// #reopen} on a reader based on a Directory), then this method checks if any further commits (see + /// {@link IndexWriter#commit} have occurred in that directory). + /// + /// If instead this reader is a near real-time reader (ie, obtained by a call to {@link + /// IndexWriter#getReader}, or by calling {@link #reopen} on a near real-time reader), then this + /// method checks if either a new commit has occurred, or any new uncommitted changes have taken + /// place via the writer. Note that even if the writer has only performed merging, this method + /// will still return false. + /// + /// In any event, if this returns false, you should call {@link #reopen} to get a new reader that + /// sees the changes. + virtual bool isCurrent(); + + /// Checks is the index is optimized (if it has a single segment and no deletions). Not implemented + /// in the IndexReader base class. + /// @return true if the index is optimized; false otherwise + virtual bool isOptimized(); + + /// Return an array of term frequency vectors for the specified document. The array contains a + /// vector for each vectorized field in the document. Each vector contains terms and frequencies + /// for all terms in a given vectorized field. If no such fields existed, the method returns null. + /// The term vectors that are returned may either be of type {@link TermFreqVector} or of type + /// {@link TermPositionVector} if positions or offsets have been stored. + /// + /// @param docNumber document for which term frequency vectors are returned + /// @return array of term frequency vectors. May be null if no term vectors have been stored for the + /// specified document. + virtual Collection getTermFreqVectors(int32_t docNumber) = 0; + + /// Return a term frequency vector for the specified document and field. The returned vector contains + /// terms and frequencies for the terms in the specified field of this document, if the field had the + /// storeTermVector flag set. If termvectors had been stored with positions or offsets, a + /// {@link TermPositionVector} is returned. + /// + /// @param docNumber document for which the term frequency vector is returned. + /// @param field field for which the term frequency vector is returned. + /// @return term frequency vector May be null if field does not exist in the specified document or + /// term vector was not stored. + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field) = 0; + + /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays + /// of the {@link TermFreqVector}. + /// @param docNumber The number of the document to load the vector for + /// @param field The name of the field to load + /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) = 0; + + /// Map all the term vectors for all fields in a Document + /// @param docNumber The number of the document to load the vector for + /// @param mapper The {@link TermVectorMapper} to process the vector. Must not be null. + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) = 0; + + /// Returns true if an index exists at the specified directory. If the directory does not exist or + /// if there is no index in it. + /// @param directory the directory to check for an index + /// @return true if an index exists; false otherwise + static bool indexExists(const DirectoryPtr& directory); + + /// Returns the number of documents in this index. + virtual int32_t numDocs() = 0; + + /// Returns one greater than the largest possible document number. This may be used to, eg., determine + /// how big to allocate an array which will have an element for every document number in an index. + virtual int32_t maxDoc() = 0; + + /// Returns the number of deleted documents. + int32_t numDeletedDocs(); + + /// Returns the stored fields of the n'th Document in this index. + /// + /// NOTE: for performance reasons, this method does not check if the requested document is deleted, and + /// therefore asking for a deleted document may yield unspecified results. Usually this is not required, + /// however you can call {@link #isDeleted(int)} with the requested document ID to verify the document + /// is not deleted. + virtual DocumentPtr document(int32_t n); + + /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine + /// what {@link Field}s to load and how they should be loaded. + /// NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy + /// {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} + /// to be available after closing you must explicitly load it or fetch the Document again with a new + /// loader. + /// + /// NOTE: for performance reasons, this method does not check if the requested document is deleted, + /// and therefore asking for a deleted document may yield unspecified results. Usually this is not + /// required, however you can call {@link #isDeleted(int32_t)} with the requested document ID to verify + /// the document is not deleted. + /// + /// @param n Get the document at the n'th position + /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on + /// the Document. May be null, in which case all Fields will be loaded. + /// @return The stored fields of the {@link Document} at the n'th position + /// @see Fieldable + /// @see FieldSelector + /// @see SetBasedFieldSelector + /// @see LoadFirstFieldSelector + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; + + /// Returns true if document n has been deleted + virtual bool isDeleted(int32_t n) = 0; + + /// Returns true if any documents have been deleted + virtual bool hasDeletions() = 0; + + /// Used for testing + virtual bool hasChanges(); + + /// Returns true if there are norms stored for this field. + virtual bool hasNorms(const String& field); + + /// Returns the byte-encoded normalization factor for the named field of every document. This is used + /// by the search code to score documents. + /// @see Field#setBoost(double) + virtual ByteArray norms(const String& field) = 0; + + /// Reads the byte-encoded normalization factor for the named field of every document. This is used + /// by the search code to score documents. + /// @see Field#setBoost(double) + virtual void norms(const String& field, ByteArray norms, int32_t offset) = 0; + + /// Resets the normalization factor for the named field of the named document. The norm represents + /// the product of the field's {@link Fieldable#setBoost(double) boost} and its {@link + /// Similarity#lengthNorm(String, int) length normalization}. Thus, to preserve the length normalization + /// values when resetting this, one should base the new value upon the old. + /// + /// NOTE: If this field does not store norms, then this method call will silently do nothing. + /// + /// @see #norms(String) + /// @see Similarity#decodeNorm(byte) + virtual void setNorm(int32_t doc, const String& field, uint8_t value); + + /// Resets the normalization factor for the named field of the named document. + /// + /// @see #norms(String) + /// @see Similarity#decodeNorm(byte) + virtual void setNorm(int32_t doc, const String& field, double value); + + /// Returns an enumeration of all the terms in the index. The enumeration is ordered by + /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. + /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting + /// enumeration before calling other methods such as {@link TermEnum#term()}. + virtual TermEnumPtr terms() = 0; + + /// Returns an enumeration of all terms starting at a given term. If the given term does not + /// exist, the enumeration is positioned at the first term greater than the supplied term. + /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede + /// it in the enumeration. + virtual TermEnumPtr terms(const TermPtr& t) = 0; + + /// Returns the number of documents containing the term t. + virtual int32_t docFreq(const TermPtr& t) = 0; + + /// Returns an enumeration of all the documents which contain term. For each document, the + /// document number, the frequency of the term in that document is also provided, for use in + /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. + /// The enumeration is ordered by document number. Each document number is greater than all + /// that precede it in the enumeration. + virtual TermDocsPtr termDocs(const TermPtr& term); + + /// Returns an unpositioned {@link TermDocs} enumerator. + virtual TermDocsPtr termDocs() = 0; + + /// Returns an enumeration of all the documents which contain term. For each document, in + /// addition to the document number and frequency of the term in that document, a list of all + /// of the ordinal positions of the term in the document is available. Thus, this method + /// positions of the term in the document is available. + /// This positional information facilitates phrase and proximity searching. + /// The enumeration is ordered by document number. Each document number is greater than all + /// that precede it in the enumeration. + virtual TermPositionsPtr termPositions(const TermPtr& term); + + /// Returns an unpositioned {@link TermPositions} enumerator. + virtual TermPositionsPtr termPositions() = 0; + + /// Deletes the document numbered docNum. Once a document is deleted it will not appear in + /// TermDocs or TermPostitions enumerations. Attempts to read its field with the {@link + /// #document} method will result in an error. The presence of this document may still be + /// reflected in the {@link #docFreq} statistic, though this will be corrected eventually as + /// the index is further modified. + virtual void deleteDocument(int32_t docNum); + + /// Deletes all documents that have a given term indexed. This is useful if one uses a + /// document field to hold a unique ID string for the document. Then to delete such a + /// document, one merely constructs a term with the appropriate field and the unique ID string + /// as its text and passes it to this method. See {@link #deleteDocument(int)} for information + /// about when this deletion will become effective. + /// @return the number of documents deleted + virtual int32_t deleteDocuments(const TermPtr& term); + + /// Undeletes all documents currently marked as deleted in this index. + virtual void undeleteAll(); + + void flush(); + + /// @param commitUserData Opaque Map (String -> String) that's recorded into the segments file + /// in the index, and retrievable by {@link IndexReader#getCommitUserData}. + void flush(MapStringString commitUserData); + + /// Commit changes resulting from delete, undeleteAll, or setNorm operations. + /// If an exception is hit, then either no changes or all changes will have been committed to + /// the index (transactional semantics). + void commit(MapStringString commitUserData); + + /// Closes files associated with this index. Also saves any new deletions to disk. + /// No other methods should be called after this has been called. + void close(); + + /// Get a list of unique field names that exist in this index and have the specified field option information. + /// @param fieldOption specifies which field option should be available for the returned fields + /// @return Collection of Strings indicating the names of the fields. + virtual HashSet getFieldNames(FieldOption fieldOption) = 0; + + /// Return the IndexCommit that this reader has opened. This method is only implemented by those + /// readers that correspond to a Directory with its own segments_N file. + virtual IndexCommitPtr getIndexCommit(); + + /// Prints the filename and size of each file within a given compound file. Add the -extract flag + /// to extract files to the current working directory. In order to make the extracted version of + /// the index work, you have to copy the segments file from the compound index into the directory + /// where the extracted files are stored. + /// @param args Usage: IndexReader [-extract] + static void main(Collection args); + + /// Returns all commit points that exist in the Directory. Normally, because the default is {@link + /// KeepOnlyLastCommitDeletionPolicy}, there would be only one commit point. But if you're using a + /// custom {@link IndexDeletionPolicy} then there could be many commits. Once you have a given + /// commit, you can open a reader on it by calling {@link IndexReader#open(IndexCommit,bool)}. + /// There must be at least one commit in the Directory, else this method throws an exception. + /// Note that if a commit is in progress while this method is running, that commit may or may not + /// be returned array. + static Collection listCommits(const DirectoryPtr& dir); + + /// Returns the sequential sub readers that this reader is logically composed of. For example, + /// IndexSearcher uses this API to drive searching by one sub reader at a time. If this reader is + /// not composed of sequential child readers, it should return null. If this method returns an empty + /// array, that means this reader is a null reader (for example a MultiReader that has no sub readers). + /// + /// NOTE: You should not try using sub-readers returned by this method to make any changes (setNorm, + /// deleteDocument, etc.). While this might succeed for one composite reader (like MultiReader), it + /// will most likely lead to index corruption for other readers (like DirectoryReader obtained + /// through {@link #open}. Use the parent reader directly. + virtual Collection getSequentialSubReaders(); + + virtual LuceneObjectPtr getFieldCacheKey(); + + /// This returns null if the reader has no deletions. + virtual LuceneObjectPtr getDeletesCacheKey(); + + /// Returns the number of unique terms (across all fields) in this reader. + /// + /// This method returns int64_t, even though internally Lucene cannot handle more than 2^31 unique + /// terms, for a possible future when this limitation is removed. + virtual int64_t getUniqueTermCount(); + + /// For IndexReader implementations that use TermInfosReader to read terms, this returns the current + /// indexDivisor as specified when the reader was opened. + virtual int32_t getTermInfosIndexDivisor(); + +protected: + void ensureOpen(); + + static IndexReaderPtr open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor); + + /// Implements setNorm in subclass. + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value) = 0; + + /// Implements deletion of the document numbered docNum. + /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. + virtual void doDelete(int32_t docNum) = 0; + + /// Implements actual undeleteAll() in subclass. + virtual void doUndeleteAll() = 0; + + /// Does nothing by default. Subclasses that require a write lock for index modifications must + /// implement this method. + virtual void acquireWriteLock(); + + /// Commit changes resulting from delete, undeleteAll, or setNorm operations. + /// If an exception is hit, then either no changes or all changes will have been committed to + /// the index (transactional semantics). + void commit(); + + /// Implements commit. + virtual void doCommit(MapStringString commitUserData) = 0; + + /// Implements close. + virtual void doClose() = 0; + + friend class DirectoryReader; + friend class ParallelReader; +}; + +} + +#endif diff --git a/include/lucene++/IndexSearcher.h b/include/lucene++/IndexSearcher.h new file mode 100644 index 00000000..7f620137 --- /dev/null +++ b/include/lucene++/IndexSearcher.h @@ -0,0 +1,102 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXSEARCHER_H +#define INDEXSEARCHER_H + +#include "Searcher.h" + +namespace Lucene { + +/// Implements search over a single IndexReader. +/// +/// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link +/// #search(QueryPtr, FilterPtr, int32_t)} methods. For performance reasons it is recommended to open only +/// one IndexSearcher and use it for all of your searches. +/// +/// NOTE: {@link IndexSearcher} instances are completely thread safe, meaning multiple threads can call any +/// of its methods, concurrently. If your application requires external synchronization, you should not +/// synchronize on the IndexSearcher instance; use your own (non-Lucene) objects instead. +class LPPAPI IndexSearcher : public Searcher { +public: + /// Creates a searcher searching the index in the named directory. You should pass readOnly = true, + /// since it gives much better concurrent performance, unless you intend to do write operations (delete + /// documents or change norms) with the underlying IndexReader. + /// @param path Directory where IndexReader will be opened + /// @param readOnly If true, the underlying IndexReader will be opened readOnly + IndexSearcher(const DirectoryPtr& path, bool readOnly = true); + + /// Creates a searcher searching the provided index. + IndexSearcher(const IndexReaderPtr& reader); + + /// Directly specify the reader, subReaders and their docID starts. + IndexSearcher(const IndexReaderPtr& reader, Collection subReaders, Collection docStarts); + + virtual ~IndexSearcher(); + + LUCENE_CLASS(IndexSearcher); + +public: + IndexReaderPtr reader; + +protected: + bool closeReader; + + Collection subReaders; + Collection docStarts; + + bool fieldSortDoTrackScores; + bool fieldSortDoMaxScore; + +public: + /// Return the {@link IndexReader} this searches. + IndexReaderPtr getIndexReader(); + + /// Note that the underlying IndexReader is not closed, if IndexSearcher was constructed with + /// IndexSearcher(const IndexReaderPtr& reader). If the IndexReader was supplied implicitly by specifying a + /// directory, then the IndexReader gets closed. + virtual void close(); + + virtual int32_t docFreq(const TermPtr& term); + virtual DocumentPtr doc(int32_t n); + virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); + virtual int32_t maxDoc(); + + using Searcher::search; + using Searcher::explain; + + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); + + /// Just like {@link #search(WeightPtr, FilterPtr, int32_t, SortPtr)}, but you choose whether or not the + /// fields in the returned {@link FieldDoc} instances should be set by specifying fillFields. + /// + /// NOTE: this does not compute scores by default. If you need scores, create a {@link TopFieldCollector} + /// instance by calling {@link TopFieldCollector#create} and then pass that to {@link #search(WeightPtr, + /// FilterPtr, CollectorPtr)}. + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort, bool fillFields); + + virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); + virtual QueryPtr rewrite(const QueryPtr& query); + virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); + + /// By default, no scores are computed when sorting by field (using {@link #search(QueryPtr, FilterPtr, + /// int32_t, SortPtr)}). You can change that, per IndexSearcher instance, by calling this method. Note + /// that this will incur a CPU cost. + /// + /// @param doTrackScores If true, then scores are returned for every matching document in {@link TopFieldDocs}. + /// @param doMaxScore If true, then the max score for all matching docs is computed. + virtual void setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore); + +protected: + void ConstructSearcher(const IndexReaderPtr& reader, bool closeReader); + void gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader); + void searchWithFilter(const IndexReaderPtr& reader, const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector); +}; + +} + +#endif diff --git a/include/lucene++/IndexWriter.h b/include/lucene++/IndexWriter.h new file mode 100644 index 00000000..5ace8092 --- /dev/null +++ b/include/lucene++/IndexWriter.h @@ -0,0 +1,1112 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INDEXWRITER_H +#define INDEXWRITER_H + +#include "MergePolicy.h" + +namespace Lucene { + +/// An IndexWriter creates and maintains an index. +/// +/// The create argument to the {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, bool, int32_t) constructor} +/// determines whether a new index is created, or whether an existing index is opened. Note that you can +/// open an index with create=true even while readers are using the index. The old readers will continue +/// to search the "point in time" snapshot they had opened, and won't see the newly created index until +/// they re-open. There are also {@link #IndexWriter(DirectoryPtr, AnalyzerPtr, int32_t) constructors} +/// with no create argument which will create a new index if there is not already an index at the provided +/// path and otherwise open the existing index. +/// +/// In either case, documents are added with {@link #addDocument(DocumentPtr) addDocument} and removed +/// with {@link #deleteDocuments(TermPtr)} or {@link #deleteDocuments(QueryPtr)}. A document can be updated +/// with {@link #updateDocument(TermPtr, DocumentPtr) updateDocument} (which just deletes and then adds +/// the entire document). When finished adding, deleting and updating documents, {@link #close() close} +/// should be called. +/// +/// These changes are buffered in memory and periodically flushed to the {@link Directory} (during the +/// above method calls). A flush is triggered when there are enough buffered deletes (see +/// {@link #setMaxBufferedDeleteTerms}) or enough added documents since the last flush, whichever is +/// sooner. For the added documents, flushing is triggered either by RAM usage of the documents (see +/// {@link #setRAMBufferSizeMB}) or the number of added documents. The default is to flush when RAM usage +/// hits 16 MB. For best indexing speed you should flush by RAM usage with a large RAM buffer. Note that +/// flushing just moves the internal buffered state in IndexWriter into the index, but these changes are +/// not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may +/// also trigger one or more segment merges which by default run with a background thread so as not to +/// block the addDocument calls (see mergePolicy below for changing the {@link MergeScheduler}). +/// +/// If an index will not have more documents added for a while and optimal search performance is desired, +/// then either the full {@link #optimize() optimize} method or partial {@link #optimize(int32_t)} method +/// should be called before the index is closed. +/// +/// Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter +/// on the same directory will lead to a LockObtainFailed exception. The LockObtainFailed exception is also +/// thrown if an IndexReader on the same directory is used to delete documents from the index. +/// +/// IndexWriter allows an optional {@link IndexDeletionPolicy} implementation to be specified. You can use +/// this to control when prior commits are deleted from the index. The default policy is {@link +/// KeepOnlyLastCommitDeletionPolicy} which removes all prior commits as soon as a new commit is done (this +/// matches behavior before 2.2). Creating your own policy can allow you to explicitly keep previous +/// "point in time" commits alive in the index for some time, to allow readers to refresh to the new commit +/// without having the old commit deleted out from under them. This is necessary on file systems like NFS +/// that do not support "delete on last close" semantics, which Lucene's "point in time" search normally +/// relies on. +/// +/// IndexWriter allows you to separately change the {@link MergePolicy} and the {@link MergeScheduler}. +/// The {@link MergePolicy} is invoked whenever there are changes to the segments in the index. Its role +/// is to select which merges to do, if any, and return a {@link MergePolicy.MergeSpecification} describing +/// the merges. It also selects merges to do for optimize(). (The default is {@link LogByteSizeMergePolicy}. +/// Then, the {@link MergeScheduler} is invoked with the requested merges and it decides when and how to run +/// the merges. The default is {@link ConcurrentMergeScheduler}. +/// +/// NOTE: if you hit an std::bad_alloc then IndexWriter will quietly record this fact and block all future +/// segment commits. This is a defensive measure in case any internal state (buffered documents and +/// deletions) were corrupted. Any subsequent calls to {@link #commit()} will throw an IllegalState +/// exception. The only course of action is to call {@link #close()}, which internally will call {@link +/// #rollback()}, to undo any changes to the index since the last commit. You can also just call {@link +/// #rollback()} directly. +/// +/// NOTE: {@link IndexWriter} instances are completely thread safe, meaning multiple threads can call any of +/// its methods, concurrently. If your application requires external synchronization, you should not +/// synchronize on the IndexWriter instance as this may cause deadlock; use your own (non-Lucene) objects +/// instead. +/// +/// Clarification: Check Points (and commits) +/// IndexWriter writes new index files to the directory without writing a new segments_N file which +/// references these new files. It also means that the state of the in memory SegmentInfos object is different +/// than the most recent segments_N file written to the directory. +/// +/// Each time the SegmentInfos is changed, and matches the (possibly modified) directory files, we have a new +/// "check point". If the modified/new SegmentInfos is written to disk - as a new (generation of) segments_N +/// file - this check point is also an IndexCommit. +/// +/// A new checkpoint always replaces the previous checkpoint and becomes the new "front" of the index. This +/// allows the IndexFileDeleter to delete files that are referenced only by stale checkpoints (files that were +/// created since the last commit, but are no longer referenced by the "front" of the index). For this, +/// IndexFileDeleter keeps track of the last non commit checkpoint. +class LPPAPI IndexWriter : public LuceneObject { +protected: + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexingChainPtr& indexingChain, const IndexCommitPtr& commit); + +public: + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, int32_t mfl); + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, int32_t mfl); + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl); + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl); + IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexCommitPtr& commit); + virtual ~IndexWriter(); + + LUCENE_CLASS(IndexWriter); + +protected: + int64_t writeLockTimeout; + + /// The normal read buffer size defaults to 1024, but increasing this during merging seems to + /// yield performance gains. However we don't want to increase it too much because there are + /// quite a few BufferedIndexInputs created during merging. + static const int32_t MERGE_READ_BUFFER_SIZE; + + SynchronizePtr messageIDLock; + static int32_t MESSAGE_ID; + int32_t messageID; + bool hitOOM; + + DirectoryPtr directory; // where this index resides + AnalyzerPtr analyzer; // how to analyze text + + bool create; + IndexDeletionPolicyPtr deletionPolicy; + IndexingChainPtr indexingChain; + IndexCommitPtr indexCommit; + + SimilarityPtr similarity; // how to normalize + + int64_t changeCount; // increments every time a change is completed + int64_t lastCommitChangeCount; // last changeCount that was committed + + SegmentInfosPtr rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails + MapSegmentInfoInt rollbackSegments; + + SegmentInfosPtr localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails + int32_t localFlushedDocCount; + + SegmentInfosPtr segmentInfos; // the segments + + DocumentsWriterPtr docWriter; + IndexFileDeleterPtr deleter; + + SetSegmentInfo segmentsToOptimize; // used by optimize to note those needing optimization + int32_t optimizeMaxNumSegments; + + LockPtr writeLock; + + int32_t termIndexInterval; + + bool closed; + bool closing; + + SetSegmentInfo mergingSegments; + MergePolicyPtr mergePolicy; + MergeSchedulerPtr mergeScheduler; + Collection pendingMerges; + SetOneMerge runningMerges; + Collection mergeExceptions; + int64_t mergeGen; + bool stopMerges; + + int32_t flushCount; + int32_t flushDeletesCount; + + /// Used to only allow one addIndexes to proceed at once + int32_t readCount; // count of how many threads are holding read lock + int64_t writeThread; // non-null if any thread holds write lock + int32_t upgradeCount; + + int32_t readerTermsIndexDivisor; + + // This is a "write once" variable (like the organic dye on a DVD-R that may or may not + // be heated by a laser and then cooled to permanently record the event): it's false, + // until getReader() is called for the first time, at which point it's switched to true + // and never changes back to false. Once this is true, we hold open and reuse SegmentReader + // instances internally for applying deletes, doing merges, and reopening near real-time readers. + bool poolReaders; + + /// The maximum number of terms that will be indexed for a single field in a document. This + /// limits the amount of memory required for indexing, so that collections with very large files + /// will not crash the indexing process by running out of memory. + /// Note that this effectively truncates large documents, excluding from the index terms that + /// occur further in the document. If you know your source documents are large, be sure to set + /// this value high enough to accommodate the expected size. If you set it to INT_MAX, then the + /// only limit is your memory, but you should anticipate an std::bad_alloc. By default, no more + /// than 10,000 terms will be indexed for a field. + /// + /// @see #setMaxFieldLength(int32_t) + int32_t maxFieldLength; + + InfoStreamPtr infoStream; + static InfoStreamPtr defaultInfoStream; + + HashSet synced; // files that have been sync'd already + HashSet syncing; // files that are now being sync'd + + IndexReaderWarmerPtr mergedSegmentWarmer; + + /// Used only by commit; lock order is commitLock -> IW + SynchronizePtr commitLock; + +INTERNAL: + SegmentInfosPtr pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) + int64_t pendingCommitChangeCount; + + ReaderPoolPtr readerPool; + +public: + /// Default value for the write lock timeout (1,000). + /// @see #setDefaultWriteLockTimeout + static int64_t WRITE_LOCK_TIMEOUT; + + static const String WRITE_LOCK_NAME; + + /// Value to denote a flush trigger is disabled. + static const int32_t DISABLE_AUTO_FLUSH; + + /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using + /// {@link #setMaxBufferedDocs(int32_t)}. + static const int32_t DEFAULT_MAX_BUFFERED_DOCS; + + /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). + /// Change using {@link #setRAMBufferSizeMB}. + static const double DEFAULT_RAM_BUFFER_SIZE_MB; + + /// Disabled by default (because IndexWriter flushes by RAM usage by default). Change using + /// {@link #setMaxBufferedDeleteTerms(int32_t)}. + static const int32_t DEFAULT_MAX_BUFFERED_DELETE_TERMS; + + /// Default value is 10,000. Change using {@link #setMaxFieldLength(int32_t)}. + static const int32_t DEFAULT_MAX_FIELD_LENGTH; + + /// Default value is 128. Change using {@link #setTermIndexInterval(int32_t)}. + static const int32_t DEFAULT_TERM_INDEX_INTERVAL; + + /// Absolute hard maximum length for a term. If a term arrives from the analyzer longer than + /// this length, it is skipped and a message is printed to infoStream, if set (see {@link + /// #setInfoStream}). + static int32_t MAX_TERM_LENGTH(); + + /// Sets the maximum field length to INT_MAX + static const int32_t MaxFieldLengthUNLIMITED; + + /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} + static const int32_t MaxFieldLengthLIMITED; + +public: + virtual void initialize(); + + /// Returns a read-only reader, covering all committed as well as un-committed changes to the + /// index. This provides "near real-time" searching, in that changes made during an IndexWriter + /// session can be quickly made available for searching without closing the writer nor calling + /// {@link #commit}. + /// + /// Note that this is functionally equivalent to calling {#commit} and then using {@link + /// IndexReader#open} to open a new reader. But the turnaround time of this method should be + /// faster since it avoids the potentially costly {@link #commit}. + /// + /// You must close the {@link IndexReader} returned by this method once you are done using it. + /// + /// It's near real-time because there is no hard guarantee on how quickly you can get a new + /// reader after making changes with IndexWriter. You'll have to experiment in your situation + /// to determine if it's fast enough. As this is a new and experimental feature, please report + /// back on your findings so we can learn, improve and iterate. + /// + /// The resulting reader supports {@link IndexReader#reopen}, but that call will simply forward + /// back to this method (though this may change in the future). + /// + /// The very first time this method is called, this writer instance will make every effort to + /// pool the readers that it opens for doing merges, applying deletes, etc. This means additional + /// resources (RAM, file descriptors, CPU time) will be consumed. + /// + /// For lower latency on reopening a reader, you should call {@link #setMergedSegmentWarmer} to + /// pre-warm a newly merged segment before it's committed to the index. This is important for + /// minimizing index-to-search delay after a large merge. + /// + /// If an addIndexes* call is running in another thread, then this reader will only search those + /// segments from the foreign index that have been successfully copied over, so far. + /// + /// NOTE: Once the writer is closed, any outstanding readers may continue to be used. However, + /// if you attempt to reopen any of those readers, you'll hit an AlreadyClosed exception. + /// + /// NOTE: This API is experimental and might change in incompatible ways in the next release. + /// + /// @return IndexReader that covers entire index plus all changes made so far by this IndexWriter + /// instance + virtual IndexReaderPtr getReader(); + + /// Like {@link #getReader}, except you can specify which termInfosIndexDivisor should be used for + /// any newly opened readers. + /// + /// @param termInfosIndexDivisor Subsamples which indexed terms are loaded into RAM. This has the + /// same effect as {@link IndexWriter#setTermIndexInterval} except that setting must be done at + /// indexing time while this setting can be set per reader. When set to N, then one in every + /// N*termIndexInterval terms in the index is loaded into memory. By setting this to a value > 1 + /// you can reduce memory usage, at the expense of higher latency when loading a TermInfo. + /// The default value is 1. Set this to -1 to skip loading the terms index entirely. + virtual IndexReaderPtr getReader(int32_t termInfosIndexDivisor); + + /// Obtain the number of deleted docs for a pooled reader. If the reader isn't being pooled, + /// the segmentInfo's delCount is returned. + virtual int32_t numDeletedDocs(const SegmentInfoPtr& info); + + virtual void acquireWrite(); + virtual void releaseWrite(); + virtual void acquireRead(); + + /// Allows one readLock to upgrade to a writeLock even if there are other readLocks as long + /// as all other readLocks are also blocked in this method + virtual void upgradeReadToWrite(); + + virtual void releaseRead(); + virtual bool isOpen(bool includePendingClose); + virtual void message(const String& message); + + /// Get the current setting of whether newly flushed segments will use the compound file format. + /// Note that this just returns the value previously set with setUseCompoundFile(bool), or the + /// default value (true). You cannot use this to query the status of previously flushed segments. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.getUseCompoundFile + /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument + /// exception is thrown. + /// @see #setUseCompoundFile(bool) + virtual bool getUseCompoundFile(); + + /// Setting to turn on usage of a compound file. When on, multiple files for each segment are + /// merged into a single file when a new segment is flushed. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.setUseCompoundFile + /// as long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument + /// exception is thrown. + virtual void setUseCompoundFile(bool value); + + /// Set the Similarity implementation used by this IndexWriter. + virtual void setSimilarity(const SimilarityPtr& similarity); + + /// Return the Similarity implementation used by this IndexWriter. + /// This defaults to the current value of {@link Similarity#getDefault()}. + virtual SimilarityPtr getSimilarity(); + + /// Set the interval between indexed terms. Large values cause less memory to be used by + /// IndexReader, but slow random-access to terms. Small values cause more memory to be used by + /// an IndexReader, and speed random-access to terms. + /// + /// This parameter determines the amount of computation required per query term, regardless of + /// the number of documents that contain that term. In particular, it is the maximum number of + /// other terms that must be scanned before a term is located and its frequency and position + /// information may be processed. In a large index with user-entered query terms, query + /// processing time is likely to be dominated not by term lookup but rather by the processing of + /// frequency and positional data. In a small index or when many uncommon query terms are + /// generated (eg., by wildcard queries) term lookup may become a dominant cost. + /// + /// In particular, numUniqueTerms/interval terms are read into memory by an IndexReader, and on + /// average, interval/2 terms must be scanned for each random term access. + /// + /// @see #DEFAULT_TERM_INDEX_INTERVAL + virtual void setTermIndexInterval(int32_t interval); + + /// Return the interval between indexed terms. + /// @see #setTermIndexInterval(int32_t) + virtual int32_t getTermIndexInterval(); + + /// Set the merge policy used by this writer. + virtual void setMergePolicy(const MergePolicyPtr& mp); + + /// Returns the current MergePolicy in use by this writer. + /// @see #setMergePolicy + virtual MergePolicyPtr getMergePolicy(); + + /// Set the merge scheduler used by this writer. + virtual void setMergeScheduler(const MergeSchedulerPtr& mergeScheduler); + + /// Returns the current MergePolicy in use by this writer. + /// @see #setMergePolicy + virtual MergeSchedulerPtr getMergeScheduler(); + + /// Determines the largest segment (measured by document count) that may be merged with other + /// segments. Small values (eg., less than 10,000) are best for interactive indexing, as this + /// limits the length of pauses while indexing to a few seconds. Larger values are best for + /// batched indexing and speedier searches. + /// + /// The default value is INT_MAX. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.setMaxMergeDocs as + /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument + /// exception is thrown. + /// + /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit + /// by net size (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. + virtual void setMaxMergeDocs(int32_t maxMergeDocs); + + /// Returns the largest segment (measured by document count) that may be merged with other + /// segments. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.getMaxMergeDocs as + /// long as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument + /// exception is thrown. + /// + /// @see #setMaxMergeDocs + virtual int32_t getMaxMergeDocs(); + + /// The maximum number of terms that will be indexed for a single field in a document. This + /// limits the amount of memory required for indexing, so that collections with very large files + /// will not crash the indexing process by running out of memory. This setting refers to the + /// number of running terms, not to the number of different terms. + /// Note: this silently truncates large documents, excluding from the index all terms that occur + /// further in the document. If you know your source documents are large, be sure to set this + /// value high enough to accommodate the expected size. If you set it to INT_MAX, then the only + /// limit is your memory, but you should anticipate an std::bad_alloc. + /// By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms will be indexed for a field. + virtual void setMaxFieldLength(int32_t maxFieldLength); + + /// Returns the maximum number of terms that will be indexed for a single field in a document. + /// @see #setMaxFieldLength + virtual int32_t getMaxFieldLength(); + + /// Sets the termsIndexDivisor passed to any readers that IndexWriter opens, for example when + /// applying deletes or creating a near-real-time reader in {@link IndexWriter#getReader}. + /// Default value is {@link IndexReader#DEFAULT_TERMS_INDEX_DIVISOR}. + virtual void setReaderTermsIndexDivisor(int32_t divisor); + + /// @see #setReaderTermsIndexDivisor() + virtual int32_t getReaderTermsIndexDivisor(); + + /// Determines the minimal number of documents required before the buffered in-memory documents + /// are flushed as a new Segment. Large values generally gives faster indexing. + /// + /// When this is set, the writer will flush every maxBufferedDocs added documents. Pass in + /// {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to number of buffered + /// documents. Note that if flushing by RAM usage is also enabled, then the flush will be + /// triggered by whichever comes first. + /// + /// Disabled by default (writer flushes by RAM usage). + /// + /// @see #setRAMBufferSizeMB + virtual void setMaxBufferedDocs(int32_t maxBufferedDocs); + + /// Returns the number of buffered added documents that will trigger a flush if enabled. + /// @see #setMaxBufferedDocs + virtual int32_t getMaxBufferedDocs(); + + /// Determines the amount of RAM that may be used for buffering added documents and deletions + /// before they are flushed to the Directory. Generally for faster indexing performance it's + /// best to flush by RAM usage instead of document count and use as large a RAM buffer as you can. + /// + /// When this is set, the writer will flush whenever buffered documents and deletions use this + /// much RAM. Pass in {@link #DISABLE_AUTO_FLUSH} to prevent triggering a flush due to RAM usage. + /// Note that if flushing by document count is also enabled, then the flush will be triggered by + /// whichever comes first. + /// + /// Note: the account of RAM usage for pending deletions is only approximate. Specifically, if + /// you delete by Query, Lucene currently has no way to measure the RAM usage if individual + /// Queries so the accounting will under-estimate and you should compensate by either calling + /// commit() periodically yourself, or by using {@link #setMaxBufferedDeleteTerms} to flush by + /// count instead of RAM usage (each buffered delete Query counts as one). + /// + /// Note: because IndexWriter uses int32_t when managing its internal storage, the absolute + /// maximum value for this setting is somewhat less than 2048 MB. The precise limit depends on + /// various factors, such as how large your documents are, how many fields have norms, etc., so + /// it's best to set this value comfortably under 2048. + /// + /// The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}. + virtual void setRAMBufferSizeMB(double mb); + + /// Returns the value set by {@link #setRAMBufferSizeMB} if enabled. + virtual double getRAMBufferSizeMB(); + + /// Determines the minimal number of delete terms required before the buffered in-memory delete + /// terms are applied and flushed. If there are documents buffered in memory at the time, they + /// are merged and a new segment is created. + /// + /// Disabled by default (writer flushes by RAM usage). + /// @see #setRAMBufferSizeMB + virtual void setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms); + + /// Returns the number of buffered deleted terms that will trigger a flush if enabled. + /// @see #setMaxBufferedDeleteTerms + virtual int32_t getMaxBufferedDeleteTerms(); + + /// Determines how often segment indices are merged by addDocument(). With smaller values, less + /// RAM is used while indexing, and searches on unoptimized indices are faster, but indexing + /// speed is slower. With larger values, more RAM is used during indexing, and while searches + /// on unoptimized indices are slower, indexing is faster. Thus larger values (> 10) are best + /// for batch index creation, and smaller values (< 10) for indices that are interactively maintained. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.setMergeFactor as long + /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception + /// is thrown. This must never be less than 2. The default value is 10. + virtual void setMergeFactor(int32_t mergeFactor); + + /// Returns the number of segments that are merged at once and also controls the total number of + /// segments allowed to accumulate in the index. + /// + /// Note that this method is a convenience method: it just calls mergePolicy.getMergeFactor as long + /// as mergePolicy is an instance of {@link LogMergePolicy}. Otherwise an IllegalArgument exception + /// is thrown. + /// @see #setMergeFactor + virtual int32_t getMergeFactor(); + + /// If non-null, this will be the default infoStream used by a newly instantiated IndexWriter. + /// @see #setInfoStream + static void setDefaultInfoStream(const InfoStreamPtr& infoStream); + + /// Returns the current default infoStream for newly instantiated IndexWriters. + /// @see #setDefaultInfoStream + static InfoStreamPtr getDefaultInfoStream(); + + /// If non-null, information about merges, deletes and a message when maxFieldLength is reached + /// will be printed to this. + virtual void setInfoStream(const InfoStreamPtr& infoStream); + + /// Returns the current infoStream in use by this writer. + /// @see #setInfoStream + virtual InfoStreamPtr getInfoStream(); + + /// Returns true if verbosing is enabled (i.e., infoStream != null). + virtual bool verbose(); + + /// Sets the maximum time to wait for a write lock (in milliseconds) for this instance of + /// IndexWriter. @see #setDefaultWriteLockTimeout to change the default value for all instances + /// of IndexWriter. + virtual void setWriteLockTimeout(int64_t writeLockTimeout); + + /// Returns allowed timeout when acquiring the write lock. + /// @see #setWriteLockTimeout + virtual int64_t getWriteLockTimeout(); + + /// Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock + /// (in milliseconds). + static void setDefaultWriteLockTimeout(int64_t writeLockTimeout); + + /// Returns default write lock timeout for newly instantiated IndexWriters. + /// @see #setDefaultWriteLockTimeout + static int64_t getDefaultWriteLockTimeout(); + + /// Commits all changes to an index and closes all associated files. Note that this may be + /// a costly operation, so try to re-use a single writer instead of closing and opening a + /// new one. See {@link #commit()} for caveats about write caching done by some IO devices. + /// + /// If an Exception is hit during close, eg due to disk full or some other reason, then both + /// the on-disk index and the internal state of the IndexWriter instance will be consistent. + /// However, the close will not be complete even though part of it (flushing buffered documents) + /// may have succeeded, so the write lock will still be held. + /// + /// If you can correct the underlying cause (eg free up some disk space) then you can call + /// close() again. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. + virtual void close(); + + /// Closes the index with or without waiting for currently running merges to finish. This is + /// only meaningful when using a MergeScheduler that runs merges in background threads. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer, again. + /// + /// NOTE: it is dangerous to always call close(false), especially when IndexWriter is not open + /// for very long, because this can result in "merge starvation" whereby long merges will never + /// have a chance to finish. This will cause too many segments in your index over time. + /// + /// @param waitForMerges if true, this call will block until all merges complete; else, it will + /// ask all running merges to abort, wait until those merges have finished (which should be at + /// most a few seconds), and then return. + virtual void close(bool waitForMerges); + + /// Returns the Directory used by this index. + virtual DirectoryPtr getDirectory(); + + /// Returns the analyzer used by this index. + virtual AnalyzerPtr getAnalyzer(); + + /// Returns total number of docs in this index, including docs not yet flushed (still in the + /// RAM buffer), not counting deletions. + /// @see #numDocs + virtual int32_t maxDoc(); + + /// Returns total number of docs in this index, including docs not yet flushed (still in the + /// RAM buffer), and including deletions. + /// NOTE: buffered deletions are not counted. If you really need these to be counted you should + /// call {@link #commit()} first. + virtual int32_t numDocs(); + + virtual bool hasDeletions(); + + /// Adds a document to this index. If the document contains more than {@link + /// #setMaxFieldLength(int32_t)} terms for a given field, the remainder are discarded. + /// + /// Note that if an Exception is hit (for example disk full) then the index will be consistent, + /// but this document may not have been added. Furthermore, it's possible the index will have + /// one segment in non-compound format even when using compound files (when a merge has partially + /// succeeded). + /// + /// This method periodically flushes pending documents to the Directory, and also periodically + /// triggers segment merges in the index according to the {@link MergePolicy} in use. + /// + /// Merges temporarily consume space in the directory. The amount of space required is up to 1X + /// the size of all segments being merged, when no size of all segments being merged, when no + /// 2X the size of all segments being merged when readers/searchers are open against the index + /// (see {@link #optimize()} for details). The sequence of primitive merge operations performed + /// is governed by the merge policy. + /// + /// Note that each term in the document can be no longer than 16383 characters, otherwise an + /// IllegalArgument exception will be thrown. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void addDocument(const DocumentPtr& doc); + + /// Adds a document to this index, using the provided analyzer instead of the value of {@link + /// #getAnalyzer()}. If the document contains more than {@link #setMaxFieldLength(int32_t)} terms + /// for a given field, the remainder are discarded. + /// + /// See {@link #addDocument(DocumentPtr)} for details on index and IndexWriter state after an + /// exception, and flushing/merging temporary free space requirements. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer); + + /// Deletes the document(s) containing term. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param term the term to identify the documents to be deleted + virtual void deleteDocuments(const TermPtr& term); + + /// Deletes the document(s) containing any of the terms. All deletes are flushed at the same time. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param terms array of terms to identify the documents to be deleted + virtual void deleteDocuments(Collection terms); + + /// Deletes the document(s) matching the provided query. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param query the query to identify the documents to be deleted + virtual void deleteDocuments(const QueryPtr& query); + + /// Deletes the document(s) matching any of the provided queries. All deletes are flushed at + /// the same time. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param queries array of queries to identify the documents to be deleted + virtual void deleteDocuments(Collection queries); + + /// Updates a document by first deleting the document(s) containing term and then adding the new + /// document. The delete and then add are atomic as seen by a reader on the same index (flush + /// may happen only after the add). + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param term the term to identify the document(s) to be deleted + /// @param doc the document to be added + virtual void updateDocument(const TermPtr& term, const DocumentPtr& doc); + + /// Updates a document by first deleting the document(s) containing term and then adding the new + /// document. The delete and then add are atomic as seen by a reader on the same index (flush + /// may happen only after the add). + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param term the term to identify the document(s) to be deleted + /// @param doc the document to be added + /// @param analyzer the analyzer to use when analyzing the document + virtual void updateDocument(const TermPtr& term, const DocumentPtr& doc, const AnalyzerPtr& analyzer); + + virtual int32_t getSegmentCount(); + virtual int32_t getNumBufferedDocuments(); + virtual int32_t getDocCount(int32_t i); + virtual int32_t getFlushCount(); + virtual int32_t getFlushDeletesCount(); + + virtual String newSegmentName(); + + /// Requests an "optimize" operation on an index, priming the index for the fastest available + /// search. Traditionally this has meant merging all segments into a single segment as is done in + /// the default merge policy, but individual merge policies may implement optimize in different ways. + /// + /// It is recommended that this method be called upon completion of indexing. In environments with + /// frequent updates, optimize is best done during low volume times, if at all. + /// + /// Note that optimize requires 2X the index size free space in your Directory (3X if you're using + /// compound file format). For example, if your index size is 10 MB then you need 20 MB free for + /// optimize to complete (30 MB if you're using compound file format). + /// + /// If some but not all readers re-open while an optimize is underway, this will cause > 2X temporary + /// space to be consumed as those new readers will then hold open the partially optimized segments at + /// that time. It is best not to re-open readers while optimize is running. + /// + /// The actual temporary usage could be much less than these figures (it depends on many factors). + /// + /// In general, once the optimize completes, the total size of the index will be less than the size + /// of the starting index. It could be quite a bit smaller (if there were many pending deletes) or + /// just slightly smaller. + /// + /// If an Exception is hit during optimize(), for example due to disk full, the index will not be + /// corrupt and no documents will have been lost. However, it may have been partially optimized + /// (some segments were merged but not all), and it's possible that one of the segments in the index + /// will be in non-compound format even when using compound file format. This will occur when the + /// exception is hit during conversion of the segment into compound format. + /// + /// This call will optimize those segments present in the index when the call started. If other + /// threads are still adding documents and flushing segments, those newly created segments will not + /// be optimized unless you call optimize again. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @see LogMergePolicy#findMergesForOptimize + virtual void optimize(); + + /// Optimize the index down to <= maxNumSegments. If maxNumSegments==1 then this is the same as + /// {@link #optimize()}. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param maxNumSegments maximum number of segments left in the index after optimization finishes + virtual void optimize(int32_t maxNumSegments); + + /// Just like {@link #optimize()}, except you can specify whether the call should block until the + /// optimize completes. This is only meaningful with a {@link MergeScheduler} that is able to run + /// merges in background threads. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void optimize(bool doWait); + + /// Just like {@link #optimize(int32_t)}, except you can specify whether the call should block + /// until the optimize completes. This is only meaningful with a {@link MergeScheduler} that is + /// able to run merges in background threads. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void optimize(int32_t maxNumSegments, bool doWait); + + /// Just like {@link #expungeDeletes()}, except you can specify whether the call should block + /// until the operation completes. This is only meaningful with a {@link MergeScheduler} that + /// is able to run merges in background threads. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void expungeDeletes(bool doWait); + + /// Expunges all deletes from the index. When an index has many document deletions (or updates + /// to existing documents), it's best to either call optimize or expungeDeletes to remove all + /// unused data in the index associated with the deleted documents. To see how many deletions + /// you have pending in your index, call {@link IndexReader#numDeletedDocs}. This saves disk + /// space and memory usage while searching. expungeDeletes should be somewhat faster than + /// optimize since it does not insist on reducing the index to a single segment (though, this + /// depends on the {@link MergePolicy}; see {@link MergePolicy#findMergesToExpungeDeletes}.). + /// Note that this call does not first commit any buffered documents, so you must do so yourself + /// if necessary. See also {@link #expungeDeletes(bool)} + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void expungeDeletes(); + + /// Asks the mergePolicy whether any merges are necessary now and if so, runs the requested + /// merges and then iterate (test again if merges are needed) until no more merges are returned + /// by the mergePolicy. + /// + /// Explicit calls to maybeMerge() are usually not necessary. The most common case is when merge + /// policy parameters have changed. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void maybeMerge(); + + /// The {@link MergeScheduler} calls this method to retrieve the next merge requested by the + /// MergePolicy. + virtual OneMergePtr getNextMerge(); + + /// Close the IndexWriter without committing any changes that have occurred since the last commit + /// (or since it was opened, if commit hasn't been called). This removes any temporary files that + /// had been created, after which the state of the index will be the same as it was when commit() + /// was last called or when this writer was first opened. This also clears a previous call to + /// {@link #prepareCommit}. + virtual void rollback(); + + /// Delete all documents in the index. + /// + /// This method will drop all buffered documents and will remove all segments from the index. This + /// change will not be visible until a {@link #commit()} has been called. This method can be rolled + /// back using {@link #rollback()}. + /// + /// NOTE: this method is much faster than using {@link #deleteDocuments()}. + /// + /// NOTE: this method will forcefully abort all merges in progress. If other threads are running + /// {@link #optimize()} or any of the addIndexes methods, they will receive {@link + /// MergePolicy.MergeAbortedException} + virtual void deleteAll(); + + /// Wait for any currently outstanding merges to finish. + /// + /// It is guaranteed that any merges started prior to calling this method will have completed once + /// this method completes. + virtual void waitForMerges(); + + /// Merges all segments from an array of indexes into this index. + /// + /// This may be used to parallelize batch indexing. A large document collection can be broken into + /// sub-collections. Each sub-collection can be indexed in parallel, on a different thread, process + /// or machine. The complete index can then be created by merging sub-collection indexes with this + /// method. + /// + /// NOTE: the index in each Directory must not be changed (opened by a writer) while this method is + /// running. This method does not acquire a write lock in each input Directory, so it is up to the + /// caller to enforce this. + /// + /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will + /// be paused until this method completes. + /// + /// This method is transactional in how exceptions are handled: it does not commit a new segments_N + /// file until all indexes are added. This means if an exception occurs (for example disk full), + /// then either no indexes will have been added or they all will have been. + /// + /// Note that this requires temporary free space in the Directory up to 2X the sum of all input + /// indexes (including the starting index). If readers/searchers are open against the starting index, + /// then temporary free space required will be higher by the size of the starting index (see + /// {@link #optimize()} for details). + /// + /// Once this completes, the final size of the index will be less than the sum of all input index + /// sizes (including the starting index). It could be quite a bit smaller (if there were many pending + /// deletes) or just slightly smaller. + /// + /// This requires this index not be among those to be added. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void addIndexesNoOptimize(Collection dirs); + + /// Merges the provided indexes into this index. + /// After this completes, the index is optimized. The provided IndexReaders are not closed. + /// + /// NOTE: while this is running, any attempts to add or delete documents (with another thread) will + /// be paused until this method completes. + /// + /// See {@link #addIndexesNoOptimize} for details on transactional semantics, temporary free space + /// required in the Directory, and non-CFS segments on an exception. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void addIndexes(Collection readers); + + /// Prepare for commit. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// @see #prepareCommit(MapStringString) + virtual void prepareCommit(); + + /// Prepare for commit, specifying commitUserData Map (String -> String). This does the first phase + /// of 2-phase commit. This method does all steps necessary to commit changes since this writer was + /// opened: flushes pending added and deleted docs, syncs the index files, writes most of next + /// segments_N file. After calling this you must call either {@link #commit()} to finish the commit, + /// or {@link #rollback()} to revert the commit and undo all changes done since the writer was opened. + /// + /// You can also just call {@link #commit(Map)} directly without prepareCommit first in which case + /// that method will internally call prepareCommit. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @param commitUserData Opaque Map (String->String) that's recorded into the segments file in the + /// index, and retrievable by {@link IndexReader#getCommitUserData}. Note that when IndexWriter + /// commits itself during {@link #close}, the commitUserData is unchanged (just carried over from the + /// prior commit). If this is null then the previous commitUserData is kept. Also, the commitUserData + // will only "stick" if there are actually changes in the index to commit. + virtual void prepareCommit(MapStringString commitUserData); + + /// Commits all pending changes (added & deleted documents, optimizations, segment merges, added + /// indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the + /// changes and the index updates will survive an OS or machine crash or power loss. Note that this + /// does not wait for any running background merges to finish. This may be a costly operation, so you + /// should test the cost in your application and do it only when really necessary. + /// + /// Note that this operation calls Directory.sync on the index files. That call should not return until + /// the file contents & metadata are on stable storage. For FSDirectory, this calls the OS's fsync. + /// But, beware: some hardware devices may in fact cache writes even during fsync, and return before the + /// bits are actually on stable storage, to give the appearance of faster performance. If you have such + /// a device, and it does not have a battery backup (for example) then on power loss it may still lose + /// data. Lucene cannot guarantee consistency on such devices. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + /// + /// @see #prepareCommit + /// @see #commit(MapStringString) + virtual void commit(); + + /// Commits all changes to the index, specifying a commitUserData Map (String -> String). This just + /// calls {@link #prepareCommit(MapStringString)} (if you didn't already call it) and then + /// {@link #finishCommit}. + /// + /// NOTE: if this method hits an std::bad_alloc you should immediately close the writer. + virtual void commit(MapStringString commitUserData); + + /// Return the total size of all index files currently cached in memory. Useful for size management + /// with flushRamDocs() + virtual int64_t ramSizeInBytes(); + + /// Return the number of documents currently buffered in RAM. + virtual int32_t numRamDocs(); + + /// Merges the indicated segments, replacing them in the stack with a single segment. + virtual void merge(const OneMergePtr& merge); + + /// Hook that's called when the specified merge is complete. + virtual void mergeSuccess(const OneMergePtr& merge); + + /// Checks whether this merge involves any segments already participating in a merge. If not, this + /// merge is "registered", meaning we record that its segments are now participating in a merge, + /// and true is returned. Else (the merge conflicts) false is returned. + virtual bool registerMerge(const OneMergePtr& merge); + + /// Does initial setup for a merge, which is fast but holds the synchronized lock on IndexWriter + /// instance. + virtual void mergeInit(const OneMergePtr& merge); + + /// Does finishing for a merge, which is fast but holds the synchronized lock on IndexWriter instance. + virtual void mergeFinish(const OneMergePtr& merge); + + virtual void addMergeException(const OneMergePtr& merge); + + /// For test purposes. + virtual int32_t getBufferedDeleteTermsSize(); + + /// For test purposes. + virtual int32_t getNumBufferedDeleteTerms(); + + /// Utility routines for tests + virtual SegmentInfoPtr newestSegment(); + + virtual String segString(); + + /// Returns true if the index in the named directory is currently locked. + /// @param directory the directory to check for a lock + static bool isLocked(const DirectoryPtr& directory); + + /// Forcibly unlocks the index in the named directory. + /// Caution: this should only be used by failure recovery code, when it is known that no other process + /// nor thread is in fact currently accessing this index. + static void unlock(const DirectoryPtr& directory); + + /// Set the merged segment warmer. See {@link IndexReaderWarmer}. + virtual void setMergedSegmentWarmer(const IndexReaderWarmerPtr& warmer); + + /// Returns the current merged segment warmer. See {@link IndexReaderWarmer}. + virtual IndexReaderWarmerPtr getMergedSegmentWarmer(); + + /// Used only by assert for testing. Current points: + /// startDoFlush + /// startCommitMerge + /// startStartCommit + /// midStartCommit + /// midStartCommit2 + /// midStartCommitSuccess + /// finishStartCommit + /// startCommitMergeDeletes + /// startMergeInit + /// startApplyDeletes + /// startMergeInit + /// startMergeInit + virtual bool testPoint(const String& name); + + virtual bool nrtIsCurrent(const SegmentInfosPtr& infos); + virtual bool isClosed(); + +protected: + virtual void ensureOpen(bool includePendingClose); + virtual void ensureOpen(); + virtual void setMessageID(const InfoStreamPtr& infoStream); + + /// Casts current mergePolicy to LogMergePolicy, and throws an exception if the + /// mergePolicy is not a LogMergePolicy. + virtual LogMergePolicyPtr getLogMergePolicy(); + + virtual void setRollbackSegmentInfos(const SegmentInfosPtr& infos); + + /// If we are flushing by doc count (not by RAM usage), and using LogDocMergePolicy then push + /// maxBufferedDocs down as its minMergeDocs, to keep backwards compatibility. + virtual void pushMaxBufferedDocs(); + + virtual void messageState(); + + /// Returns true if this thread should attempt to close, or false if IndexWriter is now closed; + /// else, waits until another thread finishes closing + virtual bool shouldClose(); + virtual void closeInternal(bool waitForMerges); + + /// Tells the docWriter to close its currently open shared doc stores (stored fields & vectors + /// files). Return value specifies whether new doc store files are compound or not. + virtual bool flushDocStores(); + + /// Returns true if any merges in pendingMerges or runningMerges are optimization merges. + virtual bool optimizeMergesPending(); + + virtual void maybeMerge(bool optimize); + virtual void maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize); + virtual void updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize); + + /// Like {@link #getNextMerge()} except only returns a merge if it's external. + virtual OneMergePtr getNextExternalMerge(); + + /// Begin a transaction. During a transaction, any segment merges that happen (or ram segments + /// flushed) will not write a new segments file and will not remove any files that were present + /// at the start of the transaction. You must make a matched call to commitTransaction() or + /// rollbackTransaction() to finish the transaction. + /// + /// Note that buffered documents and delete terms are not handled within the transactions, so + /// they must be flushed before the transaction is started. + virtual void startTransaction(bool haveReadLock); + + /// Rolls back the transaction and restores state to where we were at the start. + virtual void rollbackTransaction(); + + /// Commits the transaction. This will write the new segments file and remove and pending + /// deletions we have accumulated during the transaction. + virtual void commitTransaction(); + virtual void rollbackInternal(); + + virtual void finishMerges(bool waitForMerges); + + /// Called whenever the SegmentInfos has been updated and the index files referenced exist + /// (correctly) in the index directory. + virtual void checkpoint(); + + virtual void finishAddIndexes(); + virtual void blockAddIndexes(bool includePendingClose); + virtual void resumeAddIndexes(); + virtual void resetMergeExceptions(); + virtual void noDupDirs(Collection dirs); + + virtual bool hasExternalSegments(); + + /// If any of our segments are using a directory != ours then we have to either copy them over one + /// by one, merge them (if merge policy has chosen to) or wait until currently running merges (in + /// the background) complete. We don't return until the SegmentInfos has no more external segments. + /// Currently this is only used by addIndexesNoOptimize(). + virtual void resolveExternalSegments(); + + /// A hook for extending classes to execute operations after pending added and deleted documents have + /// been flushed to the Directory but before the change is committed (new segments_N file written). + virtual void doAfterFlush(); + + /// A hook for extending classes to execute operations before pending added and deleted documents are + /// flushed to the Directory. + virtual void doBeforeFlush(); + + virtual void commit(int64_t sizeInBytes); + virtual void finishCommit(); + + /// Flush all in-memory buffered updates (adds and deletes) to the Directory. + /// @param triggerMerge if true, we may merge segments (if deletes or docs were flushed) if necessary + /// @param flushDocStores if false we are allowed to keep doc stores open to share with the next segment + /// @param flushDeletes whether pending deletes should also be flushed + virtual void flush(bool triggerMerge, bool flushDocStores, bool flushDeletes); + virtual bool doFlush(bool flushDocStores, bool flushDeletes); + virtual bool doFlushInternal(bool flushDocStores, bool flushDeletes); + + virtual int32_t ensureContiguousMerge(const OneMergePtr& merge); + + /// Carefully merges deletes for the segments we just merged. This is tricky because, although merging + /// will clear all deletes (compacts the documents), new deletes may have been flushed to the segments + /// since the merge was started. This method "carries over" such new deletes onto the newly merged + /// segment, and saves the resulting deletes file (incrementing the delete generation for merge.info). + /// If no deletes were flushed, no new deletes file is saved. + virtual void commitMergedDeletes(const OneMergePtr& merge, const SegmentReaderPtr& mergeReader); + virtual bool commitMerge(const OneMergePtr& merge, const SegmentMergerPtr& merger, int32_t mergedDocCount, const SegmentReaderPtr& mergedReader); + + virtual LuceneException handleMergeException(const LuceneException& exc, const OneMergePtr& merge); + + virtual void _mergeInit(const OneMergePtr& merge); + + virtual void setDiagnostics(const SegmentInfoPtr& info, const String& source); + virtual void setDiagnostics(const SegmentInfoPtr& info, const String& source, MapStringString details); + + virtual void setMergeDocStoreIsCompoundFile(const OneMergePtr& merge); + virtual void closeMergeReaders(const OneMergePtr& merge, bool suppressExceptions); + + /// Does the actual (time-consuming) work of the merge, but without holding synchronized lock on + /// IndexWriter instance. + virtual int32_t mergeMiddle(const OneMergePtr& merge); + + /// Apply buffered deletes to all segments. + virtual bool applyDeletes(); + + virtual String segString(const SegmentInfosPtr& infos); + + virtual bool startSync(const String& fileName, HashSet pending); + virtual void finishSync(const String& fileName, bool success); + + /// Blocks until all files in syncing are sync'd + bool waitForAllSynced(HashSet syncing); + void doWait(); + + /// Walk through all files referenced by the current segmentInfos and ask the Directory to sync each + /// file, if it wasn't already. If that succeeds, then we prepare a new segments_N file but do not + /// fully commit it. + virtual void startCommit(int64_t sizeInBytes, MapStringString commitUserData); + + virtual LuceneException handleOOM(const std::bad_alloc& oom, const String& location); + + friend class ReaderPool; +}; + +/// If {@link #getReader} has been called (ie, this writer is in near real-time mode), then after +/// a merge completes, this class can be invoked to warm the reader on the newly merged segment, +/// before the merge commits. This is not required for near real-time search, but will reduce +/// search latency on opening a new near real-time reader after a merge completes. +/// +/// NOTE: warm is called before any deletes have been carried over to the merged segment. +class LPPAPI IndexReaderWarmer : public LuceneObject { +public: + virtual ~IndexReaderWarmer(); + + LUCENE_CLASS(IndexReaderWarmer); + +public: + virtual void warm(const IndexReaderPtr& reader) = 0; +}; + +} + +#endif diff --git a/include/lucene++/InfoStream.h b/include/lucene++/InfoStream.h new file mode 100644 index 00000000..2c649762 --- /dev/null +++ b/include/lucene++/InfoStream.h @@ -0,0 +1,65 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INFOSTREAM_H +#define INFOSTREAM_H + +#include "LuceneObject.h" +#include + +namespace Lucene { + +/// Utility class to support streaming info messages. +class LPPAPI InfoStream : public LuceneObject { +protected: + InfoStream(); + +public: + virtual ~InfoStream(); + LUCENE_CLASS(InfoStream); + +public: + virtual InfoStream& operator<< (const String& t) = 0; +}; + +/// Stream override to write messages to a file. +class LPPAPI InfoStreamFile : public InfoStream { +public: + InfoStreamFile(const String& path); + virtual ~InfoStreamFile(); + + LUCENE_CLASS(InfoStreamFile); + +protected: + boost::filesystem::wofstream file; + +public: + virtual InfoStreamFile& operator<< (const String& t); +}; + +/// Stream override to write messages to a std::cout. +class LPPAPI InfoStreamOut : public InfoStream { +public: + virtual ~InfoStreamOut(); + LUCENE_CLASS(InfoStreamOut); + +public: + virtual InfoStreamOut& operator<< (const String& t); +}; + +/// Null stream override to eat messages. +class LPPAPI InfoStreamNull : public InfoStream { +public: + virtual ~InfoStreamNull(); + LUCENE_CLASS(InfoStreamNull); + +public: + virtual InfoStreamNull& operator<< (const String& t); +}; + +} + +#endif diff --git a/include/lucene++/InputStreamReader.h b/include/lucene++/InputStreamReader.h new file mode 100644 index 00000000..a26bbefd --- /dev/null +++ b/include/lucene++/InputStreamReader.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INPUTSTREAMREADER_H +#define INPUTSTREAMREADER_H + +#include "Reader.h" + +namespace Lucene { + +/// An InputStreamReader is a bridge from byte streams to character streams. +class LPPAPI InputStreamReader : public Reader { +public: + /// Create an InputStreamReader that uses the utf8 charset. + InputStreamReader(const ReaderPtr& reader); + virtual ~InputStreamReader(); + + LUCENE_CLASS(InputStreamReader); + +protected: + ReaderPtr reader; + UTF8DecoderStreamPtr decoder; + +public: + /// Read a single character. + virtual int32_t read(); + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* b, int32_t offset, int32_t length); + + /// Close the stream. + virtual void close(); + + /// Tell whether this stream supports the mark() operation + virtual bool markSupported(); + + /// Reset the stream. + virtual void reset(); +}; + +} + +#endif diff --git a/include/lucene++/IntBlockPool.h b/include/lucene++/IntBlockPool.h new file mode 100644 index 00000000..e96f1a19 --- /dev/null +++ b/include/lucene++/IntBlockPool.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INTBLOCKPOOL_H +#define INTBLOCKPOOL_H + +#include "LuceneObject.h" + +namespace Lucene { + +class IntBlockPool : public LuceneObject { +public: + IntBlockPool(const DocumentsWriterPtr& docWriter, bool trackAllocations); + virtual ~IntBlockPool(); + + LUCENE_CLASS(IntBlockPool); + +public: + Collection buffers; + + int32_t bufferUpto; // Which buffer we are upto + int32_t intUpto; // Where we are in head buffer + + IntArray buffer; // Current head buffer + int32_t intOffset; // Current head offset + bool trackAllocations; + +protected: + DocumentsWriterWeakPtr _docWriter; + +public: + void reset(); + void nextBuffer(); +}; + +} + +#endif diff --git a/include/lucene++/IntFieldSource.h b/include/lucene++/IntFieldSource.h new file mode 100644 index 00000000..c2473bf4 --- /dev/null +++ b/include/lucene++/IntFieldSource.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INTFIELDSOURCE_H +#define INTFIELDSOURCE_H + +#include "FieldCacheSource.h" + +namespace Lucene { + +/// Obtains int field values from the {@link FieldCache} using getInts() and makes those values available +/// as other numeric types, casting as needed. +/// +/// @see FieldCacheSource for requirements on the field. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, +/// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU +/// per lookup but will not consume double the FieldCache RAM. +class LPPAPI IntFieldSource : public FieldCacheSource { +public: + /// Create a cached int field source with a specific string-to-int parser. + IntFieldSource(const String& field, const IntParserPtr& parser = IntParserPtr()); + virtual ~IntFieldSource(); + + LUCENE_CLASS(IntFieldSource); + +protected: + IntParserPtr parser; + +public: + virtual String description(); + virtual DocValuesPtr getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader); + virtual bool cachedFieldSourceEquals(const FieldCacheSourcePtr& other); + virtual int32_t cachedFieldSourceHashCode(); +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocConsumer.h b/include/lucene++/InvertedDocConsumer.h new file mode 100644 index 00000000..3ef78cb6 --- /dev/null +++ b/include/lucene++/InvertedDocConsumer.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCCONSUMER_H +#define INVERTEDDOCCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocConsumer : public LuceneObject { +public: + virtual ~InvertedDocConsumer(); + + LUCENE_CLASS(InvertedDocConsumer); + +public: + FieldInfosPtr fieldInfos; + +public: + /// Add a new thread + virtual InvertedDocConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread) = 0; + + /// Abort (called after hitting AbortException) + virtual void abort() = 0; + + /// Flush a new segment + virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; + + /// Close doc stores + virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; + + /// Attempt to free RAM, returning true if any RAM was freed + virtual bool freeRAM() = 0; + + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocConsumerPerField.h b/include/lucene++/InvertedDocConsumerPerField.h new file mode 100644 index 00000000..d5591a5b --- /dev/null +++ b/include/lucene++/InvertedDocConsumerPerField.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCCONSUMERPERFIELD_H +#define INVERTEDDOCCONSUMERPERFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocConsumerPerField : public LuceneObject { +public: + virtual ~InvertedDocConsumerPerField(); + + LUCENE_CLASS(InvertedDocConsumerPerField); + +public: + /// Called once per field, and is given all Fieldable occurrences for this field in the document. + /// Return true if you wish to see inverted tokens for these fields + virtual bool start(Collection fields, int32_t count) = 0; + + /// Called before a field instance is being processed + virtual void start(const FieldablePtr& field) = 0; + + /// Called once per inverted token + virtual void add() = 0; + + /// Called once per field per document, after all Fieldable occurrences are inverted + virtual void finish() = 0; + + /// Called on hitting an aborting exception + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocConsumerPerThread.h b/include/lucene++/InvertedDocConsumerPerThread.h new file mode 100644 index 00000000..e66fab7e --- /dev/null +++ b/include/lucene++/InvertedDocConsumerPerThread.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCCONSUMERPERTHREAD_H +#define INVERTEDDOCCONSUMERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocConsumerPerThread : public LuceneObject { +public: + virtual ~InvertedDocConsumerPerThread(); + + LUCENE_CLASS(InvertedDocConsumerPerThread); + +public: + virtual void startDocument() = 0; + virtual InvertedDocConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) = 0; + virtual DocWriterPtr finishDocument() = 0; + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocEndConsumer.h b/include/lucene++/InvertedDocEndConsumer.h new file mode 100644 index 00000000..86a8a834 --- /dev/null +++ b/include/lucene++/InvertedDocEndConsumer.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCENDCONSUMER_H +#define INVERTEDDOCENDCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocEndConsumer : public LuceneObject { +public: + virtual ~InvertedDocEndConsumer(); + + LUCENE_CLASS(InvertedDocEndConsumer); + +public: + virtual InvertedDocEndConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread) = 0; + virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; + virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; + virtual void abort() = 0; + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos) = 0; +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocEndConsumerPerField.h b/include/lucene++/InvertedDocEndConsumerPerField.h new file mode 100644 index 00000000..51194c00 --- /dev/null +++ b/include/lucene++/InvertedDocEndConsumerPerField.h @@ -0,0 +1,27 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCENDCONSUMERPERFIELD_H +#define INVERTEDDOCENDCONSUMERPERFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocEndConsumerPerField : public LuceneObject { +public: + virtual ~InvertedDocEndConsumerPerField(); + + LUCENE_CLASS(InvertedDocEndConsumerPerField); + +public: + virtual void finish() = 0; + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/InvertedDocEndConsumerPerThread.h b/include/lucene++/InvertedDocEndConsumerPerThread.h new file mode 100644 index 00000000..9a070d34 --- /dev/null +++ b/include/lucene++/InvertedDocEndConsumerPerThread.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef INVERTEDDOCENDCONSUMERPERTHREAD_H +#define INVERTEDDOCENDCONSUMERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class InvertedDocEndConsumerPerThread : public LuceneObject { +public: + virtual ~InvertedDocEndConsumerPerThread(); + + LUCENE_CLASS(InvertedDocEndConsumerPerThread); + +public: + virtual void startDocument() = 0; + virtual InvertedDocEndConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) = 0; + virtual void finishDocument() = 0; + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/KeepOnlyLastCommitDeletionPolicy.h b/include/lucene++/KeepOnlyLastCommitDeletionPolicy.h new file mode 100644 index 00000000..ceb25104 --- /dev/null +++ b/include/lucene++/KeepOnlyLastCommitDeletionPolicy.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef KEEPONLYLASTCOMMITDELETIONPOLICY_H +#define KEEPONLYLASTCOMMITDELETIONPOLICY_H + +#include "IndexDeletionPolicy.h" + +namespace Lucene { + +/// This {@link IndexDeletionPolicy} implementation that keeps only the most recent commit and immediately +/// removes all prior commits after a new commit is done. This is the default deletion policy. +class LPPAPI KeepOnlyLastCommitDeletionPolicy : public IndexDeletionPolicy { +public: + virtual ~KeepOnlyLastCommitDeletionPolicy(); + + LUCENE_CLASS(KeepOnlyLastCommitDeletionPolicy); + +public: + /// Deletes all commits except the most recent one. + virtual void onInit(Collection commits); + + /// Deletes all commits except the most recent one. + virtual void onCommit(Collection commits); +}; + +} + +#endif diff --git a/include/lucene++/KeywordAnalyzer.h b/include/lucene++/KeywordAnalyzer.h new file mode 100644 index 00000000..d57351d4 --- /dev/null +++ b/include/lucene++/KeywordAnalyzer.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef KEYWORDANALYZER_H +#define KEYWORDANALYZER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// Tokenizes the entire stream as a single token. This is useful for data like zip codes, ids, and some +/// product names. +class LPPAPI KeywordAnalyzer : public Analyzer { +public: + virtual ~KeywordAnalyzer(); + + LUCENE_CLASS(KeywordAnalyzer); + +public: + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/KeywordTokenizer.h b/include/lucene++/KeywordTokenizer.h new file mode 100644 index 00000000..e93bbc4e --- /dev/null +++ b/include/lucene++/KeywordTokenizer.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef KEYWORDTOKENIZER_H +#define KEYWORDTOKENIZER_H + +#include "Tokenizer.h" + +namespace Lucene { + +/// Emits the entire input as a single token. +class LPPAPI KeywordTokenizer : public Tokenizer { +public: + KeywordTokenizer(const ReaderPtr& input); + KeywordTokenizer(const ReaderPtr& input, int32_t bufferSize); + KeywordTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input, int32_t bufferSize); + KeywordTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input, int32_t bufferSize); + + virtual ~KeywordTokenizer(); + + LUCENE_CLASS(KeywordTokenizer); + +protected: + static const int32_t DEFAULT_BUFFER_SIZE; + + bool done; + int32_t finalOffset; + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + +protected: + void init(int32_t bufferSize); + +public: + virtual bool incrementToken(); + virtual void end(); + virtual void reset(); +}; + +} + +#endif diff --git a/include/lucene++/LengthFilter.h b/include/lucene++/LengthFilter.h new file mode 100644 index 00000000..63f8e9d4 --- /dev/null +++ b/include/lucene++/LengthFilter.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LENGTHFILTER_H +#define LENGTHFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// Removes words that are too long or too short from the stream. +class LPPAPI LengthFilter : public TokenFilter { +public: + /// Build a filter that removes words that are too long or too short from the text. + LengthFilter(const TokenStreamPtr& input, int32_t min, int32_t max); + virtual ~LengthFilter(); + + LUCENE_CLASS(LengthFilter); + +public: + int32_t min; + int32_t max; + +protected: + TermAttributePtr termAtt; + +public: + /// Returns the next input Token whose term() is the right len + virtual bool incrementToken(); +}; + +} + +#endif diff --git a/include/lucene++/LetterTokenizer.h b/include/lucene++/LetterTokenizer.h new file mode 100644 index 00000000..eddb040f --- /dev/null +++ b/include/lucene++/LetterTokenizer.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LETTERTOKENIZER_H +#define LETTERTOKENIZER_H + +#include "CharTokenizer.h" + +namespace Lucene { + +/// A LetterTokenizer is a tokenizer that divides text at non-letters. That's to say, it defines tokens as maximal +/// strings of adjacent letters, as defined UnicodeUtil::isAlpha(c) predicate. +/// +/// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, where +/// words are not separated by spaces. +class LPPAPI LetterTokenizer : public CharTokenizer { +public: + /// Construct a new LetterTokenizer. + LetterTokenizer(const ReaderPtr& input); + + /// Construct a new LetterTokenizer using a given {@link AttributeSource}. + LetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Construct a new LetterTokenizer using a given {@link AttributeFactory}. + LetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~LetterTokenizer(); + + LUCENE_CLASS(LetterTokenizer); + +public: + /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). + virtual bool isTokenChar(wchar_t c); +}; + +} + +#endif diff --git a/include/lucene++/LoadFirstFieldSelector.h b/include/lucene++/LoadFirstFieldSelector.h new file mode 100644 index 00000000..ba02dd5b --- /dev/null +++ b/include/lucene++/LoadFirstFieldSelector.h @@ -0,0 +1,28 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOADFIRSTFIELDSELECTOR_H +#define LOADFIRSTFIELDSELECTOR_H + +#include "FieldSelector.h" + +namespace Lucene { + +/// Load the First field and break. +/// See {@link FieldSelectorResult#LOAD_AND_BREAK} +class LPPAPI LoadFirstFieldSelector : public FieldSelector { +public: + virtual ~LoadFirstFieldSelector(); + + LUCENE_CLASS(LoadFirstFieldSelector); + +public: + virtual FieldSelectorResult accept(const String& fieldName); +}; + +} + +#endif diff --git a/include/lucene++/Lock.h b/include/lucene++/Lock.h new file mode 100644 index 00000000..f27a11ca --- /dev/null +++ b/include/lucene++/Lock.h @@ -0,0 +1,50 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOCK_H +#define LOCK_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// An interprocess mutex lock. +/// @see Directory#makeLock(const String&) +class LPPAPI Lock : public LuceneObject { +public: + virtual ~Lock(); + LUCENE_CLASS(Lock); + +public: + /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. + static const int32_t LOCK_OBTAIN_WAIT_FOREVER; + + /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. + static const int32_t LOCK_POLL_INTERVAL; + +public: + /// Attempts to obtain exclusive access and immediately return upon success or failure. + /// @return true if exclusive access is obtained. + virtual bool obtain() = 0; + + /// Releases exclusive access. + virtual void release() = 0; + + /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} + /// before using the resource. + virtual bool isLocked() = 0; + + /// Attempts to obtain an exclusive lock within amount of time given. Polls once per {@link #LOCK_POLL_INTERVAL} + /// (currently 1000) milliseconds until lockWaitTimeout is passed. + /// @param lockWaitTimeout length of time to wait in milliseconds or {@link #LOCK_OBTAIN_WAIT_FOREVER} + /// to retry forever. + /// @return true if lock was obtained. + bool obtain(int32_t lockWaitTimeout); +}; + +} + +#endif diff --git a/include/lucene++/LockFactory.h b/include/lucene++/LockFactory.h new file mode 100644 index 00000000..b921e5a6 --- /dev/null +++ b/include/lucene++/LockFactory.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOCKFACTORY_H +#define LOCKFACTORY_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Base class for Locking implementation. {@link Directory} uses +/// instances of this class to implement locking. +/// Note that there are some useful tools to verify that +/// your LockFactory is working correctly: {@link +/// VerifyingLockFactory}, {@link LockStressTest}, {@link +/// LockVerifyServer}. +/// @see LockVerifyServer +/// @see LockStressTest +/// @see VerifyingLockFactory +class LPPAPI LockFactory : public LuceneObject { +public: + virtual ~LockFactory(); + + LUCENE_CLASS(LockFactory); + +protected: + String lockPrefix; + +public: + /// Set the prefix in use for all locks created in this LockFactory. This is normally called once, when a + /// Directory gets this LockFactory instance. However, you can also call this (after this instance is + /// assigned to a Directory) to override the prefix in use. This is helpful if you're running Lucene on + /// machines that have different mount points for the same shared directory. + virtual void setLockPrefix(const String& lockPrefix); + + /// Get the prefix in use for all locks created in this LockFactory. + virtual String getLockPrefix(); + + /// Return a new Lock instance identified by lockName. + /// @param lockName name of the lock to be created. + virtual LockPtr makeLock(const String& lockName) = 0; + + /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you + /// are certain this lock is no longer in use. + /// @param lockName name of the lock to be cleared. + virtual void clearLock(const String& lockName) = 0; +}; + +} + +#endif diff --git a/include/lucene++/LogByteSizeMergePolicy.h b/include/lucene++/LogByteSizeMergePolicy.h new file mode 100644 index 00000000..83fff274 --- /dev/null +++ b/include/lucene++/LogByteSizeMergePolicy.h @@ -0,0 +1,61 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOGBYTESIZEMERGEPOLICY_H +#define LOGBYTESIZEMERGEPOLICY_H + +#include "LogMergePolicy.h" + +namespace Lucene { + +/// This is a {@link LogMergePolicy} that measures size of a segment as the total byte size of the +/// segment's files. +class LPPAPI LogByteSizeMergePolicy : public LogMergePolicy { +public: + LogByteSizeMergePolicy(const IndexWriterPtr& writer); + virtual ~LogByteSizeMergePolicy(); + + LUCENE_CLASS(LogByteSizeMergePolicy); + +public: + /// Default minimum segment size. @see setMinMergeMB. + static const double DEFAULT_MIN_MERGE_MB; + + /// Default maximum segment size. A segment of this size or larger will never be merged. + /// @see setMaxMergeMB + static const double DEFAULT_MAX_MERGE_MB; + +protected: + virtual int64_t size(const SegmentInfoPtr& info); + +public: + /// Determines the largest segment (measured by total byte size of the segment's files, in MB) + /// that may be merged with other segments. Small values (eg., less than 50 MB) are best for + /// interactive indexing, as this limits the length of pauses while indexing to a few seconds. + /// Larger values are best for batched indexing and speedier searches. + /// + /// Note that {@link #setMaxMergeDocs} is also used to check whether a segment is too large for + /// merging (it's either or). + void setMaxMergeMB(double mb); + + /// Returns the largest segment (measured by total byte size of the segment's files, in MB) that + /// may be merged with other segments. @see #setMaxMergeMB + double getMaxMergeMB(); + + /// Sets the minimum size for the lowest level segments. Any segments below this size are + /// considered to be on the same level (even if they vary drastically in size) and will be merged + /// whenever there are mergeFactor of them. This effectively truncates the "long tail" of small + /// segments that would otherwise be created into a single level. If you set this too large, it + /// could greatly increase the merging cost during indexing (if you flush many small segments). + void setMinMergeMB(double mb); + + /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeMB + double getMinMergeMB(); +}; + +} + +#endif diff --git a/include/lucene++/LogDocMergePolicy.h b/include/lucene++/LogDocMergePolicy.h new file mode 100644 index 00000000..4d26df83 --- /dev/null +++ b/include/lucene++/LogDocMergePolicy.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOGDOCMERGEPOLICY_H +#define LOGDOCMERGEPOLICY_H + +#include "LogMergePolicy.h" + +namespace Lucene { + +/// This is a {@link LogMergePolicy} that measures size of a segment as the number of documents +/// (not taking deletions into account). +class LPPAPI LogDocMergePolicy : public LogMergePolicy { +public: + LogDocMergePolicy(const IndexWriterPtr& writer); + virtual ~LogDocMergePolicy(); + + LUCENE_CLASS(LogDocMergePolicy); + +public: + /// Default minimum segment size. @see setMinMergeDocs + static const int32_t DEFAULT_MIN_MERGE_DOCS; + +protected: + virtual int64_t size(const SegmentInfoPtr& info); + +public: + /// Sets the minimum size for the lowest level segments. Any segments below this size are considered + /// to be on the same level (even if they vary drastically in size) and will be merged whenever there + /// are mergeFactor of them. This effectively truncates the "long tail" of small segments that would + /// otherwise be created into a single level. If you set this too large, it could greatly increase the + /// merging cost during indexing (if you flush many small segments). + void setMinMergeDocs(int32_t minMergeDocs); + + /// Get the minimum size for a segment to remain un-merged. @see #setMinMergeDocs + int32_t getMinMergeDocs(); +}; + +} + +#endif diff --git a/include/lucene++/LogMergePolicy.h b/include/lucene++/LogMergePolicy.h new file mode 100644 index 00000000..c4100487 --- /dev/null +++ b/include/lucene++/LogMergePolicy.h @@ -0,0 +1,161 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOGMERGEPOLICY_H +#define LOGMERGEPOLICY_H + +#include "MergePolicy.h" + +namespace Lucene { + +/// This class implements a {@link MergePolicy} that tries to merge segments into levels of exponentially +/// increasing size, where each level has fewer segments than the value of the merge factor. Whenever extra +/// segments (beyond the merge factor upper bound) are encountered, all segments within the level are merged. +/// You can get or set the merge factor using {@link #getMergeFactor()} and {@link #setMergeFactor(int)} +/// respectively. +/// +/// This class is abstract and requires a subclass to define the {@link #size} method which specifies how a +/// segment's size is determined. {@link LogDocMergePolicy} is one subclass that measures size by document +/// count in the segment. {@link LogByteSizeMergePolicy} is another subclass that measures size as the total +/// byte size of the file(s) for the segment. +class LPPAPI LogMergePolicy : public MergePolicy { +public: + LogMergePolicy(const IndexWriterPtr& writer); + virtual ~LogMergePolicy(); + + LUCENE_CLASS(LogMergePolicy); + +protected: + int32_t mergeFactor; + + double noCFSRatio; + + bool calibrateSizeByDeletes; + bool _useCompoundFile; + bool _useCompoundDocStore; + +public: + /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment + /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. + static const double LEVEL_LOG_SPAN; + + /// Default merge factor, which is how many segments are merged at a time. + static const int32_t DEFAULT_MERGE_FACTOR; + + /// Default maximum segment size. A segment of this size or larger will never be merged. + /// @see setMaxMergeDocs + static const int32_t DEFAULT_MAX_MERGE_DOCS; + + /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. + /// @see #setNoCFSRatio + static const double DEFAULT_NO_CFS_RATIO; + + int64_t minMergeSize; + int64_t maxMergeSize; + int32_t maxMergeDocs; + +public: + /// @see #setNoCFSRatio + double getNoCFSRatio(); + + /// If a merged segment will be more than this percentage of the total size of the index, leave the segment as + /// non-compound file even if compound file is enabled. Set to 1.0 to always use CFS regardless of merge size. + void setNoCFSRatio(double noCFSRatio); + + /// Returns the number of segments that are merged at once and also controls the total number of segments + /// allowed to accumulate in the index. + int32_t getMergeFactor(); + + /// Determines how often segment indices are merged by addDocument(). With smaller values, less RAM is + /// used while indexing, and searches on unoptimized indices are faster, but indexing speed is slower. + /// With larger values, more RAM is used during indexing, and while searches on unoptimized indices are + /// slower, indexing is faster. Thus larger values (> 10) are best for batch index creation, and smaller + /// values (< 10) for indices that are interactively maintained. + void setMergeFactor(int32_t mergeFactor); + + /// Returns true if a newly flushed (not from merge) segment should use the compound file format. + virtual bool useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment); + + /// Sets whether compound file format should be used for newly flushed and newly merged segments. + void setUseCompoundFile(bool useCompoundFile); + + /// Returns true if newly flushed and newly merge segments are written in compound file format. + /// @see #setUseCompoundFile + bool getUseCompoundFile(); + + /// Returns true if the doc store files should use the compound file format. + virtual bool useCompoundDocStore(const SegmentInfosPtr& segments); + + /// Sets whether compound file format should be used for newly flushed and newly merged doc store + /// segment files (term vectors and stored fields). + void setUseCompoundDocStore(bool useCompoundDocStore); + + /// Returns true if newly flushed and newly merge doc store segment files (term vectors and stored fields) + /// are written in compound file format. @see #setUseCompoundDocStore + bool getUseCompoundDocStore(); + + /// Sets whether the segment size should be calibrated by the number of deletes when choosing segments + /// for merge. + void setCalibrateSizeByDeletes(bool calibrateSizeByDeletes); + + /// Returns true if the segment size should be calibrated by the number of deletes when choosing segments + /// for merge. + bool getCalibrateSizeByDeletes(); + + /// Release all resources for the policy. + virtual void close(); + + /// Returns the merges necessary to optimize the index. This merge policy defines "optimized" to mean only + /// one segment in the index, where that segment has no deletions pending nor separate norms, and it is in + /// compound file format if the current useCompoundFile setting is true. This method returns multiple merges + /// (mergeFactor at a time) so the {@link MergeScheduler} in use may make use of concurrency. + virtual MergeSpecificationPtr findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize); + + /// Finds merges necessary to expunge all deletes from the index. We simply merge adjacent segments that have + /// deletes, up to mergeFactor at a time. + virtual MergeSpecificationPtr findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos); + + /// Checks if any merges are now necessary and returns a {@link MergePolicy.MergeSpecification} if so. A merge + /// is necessary when there are more than {@link #setMergeFactor} segments at a given level. When multiple + /// levels have too many segments, this method will return multiple merges, allowing the {@link MergeScheduler} + /// to use concurrency. + virtual MergeSpecificationPtr findMerges(const SegmentInfosPtr& segmentInfos); + + /// Determines the largest segment (measured by document count) that may be merged with other segments. + /// Small values (eg., less than 10,000) are best for interactive indexing, as this limits the length of + /// pauses while indexing to a few seconds. Larger values are best for batched indexing and speedier searches. + /// + /// The default value is INT_MAX. + /// + /// The default merge policy ({@link LogByteSizeMergePolicy}) also allows you to set this limit by net size + /// (in MB) of the segment, using {@link LogByteSizeMergePolicy#setMaxMergeMB}. + void setMaxMergeDocs(int32_t maxMergeDocs); + + /// Returns the largest segment (measured by document count) that may be merged with other segments. + /// @see #setMaxMergeDocs + int32_t getMaxMergeDocs(); + +protected: + bool verbose(); + void message(const String& message); + + virtual int64_t size(const SegmentInfoPtr& info) = 0; + + int64_t sizeDocs(const SegmentInfoPtr& info); + int64_t sizeBytes(const SegmentInfoPtr& info); + + bool isOptimized(const SegmentInfosPtr& infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize); + + /// Returns true if this single info is optimized (has no pending norms or deletes, is in the same dir as the + /// writer, and matches the current compound file setting + bool isOptimized(const SegmentInfoPtr& info); + + OneMergePtr makeOneMerge(const SegmentInfosPtr& infos, const SegmentInfosPtr& infosToMerge); +}; + +} + +#endif diff --git a/include/lucene++/LowerCaseFilter.h b/include/lucene++/LowerCaseFilter.h new file mode 100644 index 00000000..8679436f --- /dev/null +++ b/include/lucene++/LowerCaseFilter.h @@ -0,0 +1,31 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOWERCASEFILTER_H +#define LOWERCASEFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// Normalizes token text to lower case. +class LPPAPI LowerCaseFilter : public TokenFilter { +public: + LowerCaseFilter(const TokenStreamPtr& input); + virtual ~LowerCaseFilter(); + + LUCENE_CLASS(LowerCaseFilter); + +protected: + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + +} + +#endif diff --git a/include/lucene++/LowerCaseTokenizer.h b/include/lucene++/LowerCaseTokenizer.h new file mode 100644 index 00000000..769e10cd --- /dev/null +++ b/include/lucene++/LowerCaseTokenizer.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LOWERCASETOKENIZER_H +#define LOWERCASETOKENIZER_H + +#include "LetterTokenizer.h" + +namespace Lucene { + +/// LowerCaseTokenizer performs the function of LetterTokenizer and LowerCaseFilter together. It divides text at +/// non-letters and converts them to lower case. While it is functionally equivalent to the combination of +/// LetterTokenizer and LowerCaseFilter, there is a performance advantage to doing the two tasks at once, hence +/// this (redundant) implementation. +/// +/// Note: this does a decent job for most European languages, but does a terrible job for some Asian languages, +/// where words are not separated by spaces. +class LPPAPI LowerCaseTokenizer : public LetterTokenizer { +public: + /// Construct a new LowerCaseTokenizer. + LowerCaseTokenizer(const ReaderPtr& input); + + /// Construct a new LowerCaseTokenizer using a given {@link AttributeSource}. + LowerCaseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Construct a new LowerCaseTokenizer using a given {@link AttributeFactory}. + LowerCaseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~LowerCaseTokenizer(); + + LUCENE_CLASS(LowerCaseTokenizer); + +public: + /// Converts char to lower case CharFolder::toLower. + virtual wchar_t normalize(wchar_t c); +}; + +} + +#endif diff --git a/include/lucene++/Lucene.h b/include/lucene++/Lucene.h new file mode 100644 index 00000000..ee2faff2 --- /dev/null +++ b/include/lucene++/Lucene.h @@ -0,0 +1,225 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENE_H +#define LUCENE_H + +#include "Config.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef LPP_USE_BOOST_INTEGER +#include +#endif + +#include +#include +#include +#include + +#ifdef LPP_USE_BOOST_INTEGER +using boost::int8_t; +using boost::uint8_t; +using boost::int16_t; +using boost::uint16_t; +using boost::int32_t; +using boost::uint32_t; +using boost::int64_t; +using boost::uint64_t; +#endif + +#define SIZEOF_ARRAY(arr) (sizeof(arr) / sizeof((arr)[0])) + +#include "LuceneTypes.h" +#include "LuceneAllocator.h" + +namespace boost { + +struct blank; +class thread; +class any; +template < typename Signature > class function; +namespace interprocess { + +class file_lock; +} +namespace posix_time { + +class ptime; +} + +} + +namespace Lucene { + +typedef std::basic_string< char, std::char_traits > SingleString; +typedef std::basic_ostringstream< char, std::char_traits > SingleStringStream; +typedef std::basic_string< wchar_t, std::char_traits > String; +typedef std::basic_ostringstream< wchar_t, std::char_traits > StringStream; + +const std::basic_string< wchar_t, std::char_traits > EmptyString; + +typedef boost::shared_ptr filelockPtr; +typedef boost::shared_ptr threadPtr; + +typedef boost::shared_ptr ofstreamPtr; +typedef boost::shared_ptr ifstreamPtr; +typedef boost::shared_ptr localePtr; +} + +#include "LuceneFactory.h" +#include "LuceneException.h" +#include "Array.h" +#include "Collection.h" +#include "Map.h" +#include "Set.h" +#include "HashMap.h" +#include "HashSet.h" +#include "Constants.h" + +namespace Lucene { + +typedef Array ByteArray; +typedef Array IntArray; +typedef Array LongArray; +typedef Array CharArray; +typedef Array DoubleArray; + +template +struct luceneEquals { + inline bool operator()(const TYPE& first, const TYPE& second) const { + return first ? first->equals(second) : (!first && !second); + } +}; + +template +struct luceneEqualTo { + luceneEqualTo(const TYPE& type) : equalType(type) {} + inline bool operator()(const TYPE& other) const { + return equalType->equals(other); + } + const TYPE& equalType; +}; + +template +struct luceneWeakEquals { + inline bool operator()(const TYPE& first, const TYPE& second) const { + if (first.expired() || second.expired()) { + return (first.expired() && second.expired()); + } + return first.lock()->equals(second.lock()); + } +}; + +template +struct luceneHash { + std::size_t operator()(const TYPE& type) const { + return type ? type->hashCode() : 0; + } +}; + +template +struct luceneWeakHash { + std::size_t operator()(const TYPE& type) const { + return type.expired() ? 0 : type.lock()->hashCode(); + } +}; + +template +struct luceneCompare { + inline bool operator()(const TYPE& first, const TYPE& second) const { + if (!second) { + return false; + } + if (!first) { + return true; + } + return (first->compareTo(second) < 0); + } +}; + +typedef boost::blank VariantNull; +typedef boost::variant FieldsData; +typedef boost::variant ComparableValue; +typedef boost::variant NumericValue; +typedef boost::variant StringValue; +typedef boost::variant, Collection, Collection, VariantNull> CollectionValue; + +typedef HashSet< SegmentInfoPtr, luceneHash, luceneEquals > SetSegmentInfo; +typedef HashSet< MergeThreadPtr, luceneHash, luceneEquals > SetMergeThread; +typedef HashSet< OneMergePtr, luceneHash, luceneEquals > SetOneMerge; +typedef HashSet< QueryPtr, luceneHash, luceneEquals > SetQuery; +typedef HashSet< TermPtr, luceneHash, luceneEquals > SetTerm; +typedef HashSet< BooleanClausePtr, luceneHash, luceneEquals > SetBooleanClause; +typedef HashSet< ReaderFieldPtr, luceneHash, luceneEquals > SetReaderField; +typedef HashSet SetByteArray; + +typedef HashMap< String, String > MapStringString; +typedef HashMap< wchar_t, NormalizeCharMapPtr > MapCharNormalizeCharMap; +typedef HashMap< String, AnalyzerPtr > MapStringAnalyzer; +typedef HashMap< String, ByteArray > MapStringByteArray; +typedef HashMap< String, int32_t > MapStringInt; +typedef HashMap< String, FieldInfoPtr > MapStringFieldInfo; +typedef HashMap< String, Collection > MapStringCollectionTermVectorEntry; +typedef HashMap< String, RefCountPtr > MapStringRefCount; +typedef HashMap< int32_t, TermVectorsPositionInfoPtr > MapIntTermVectorsPositionInfo; +typedef HashMap< String, MapIntTermVectorsPositionInfo > MapStringMapIntTermVectorsPositionInfo; +typedef HashMap< String, NormPtr > MapStringNorm; +typedef HashMap< String, TermVectorEntryPtr > MapStringTermVectorEntry; +typedef HashMap< String, RAMFilePtr > MapStringRAMFile; +typedef HashMap< int32_t, ByteArray > MapIntByteArray; +typedef HashMap< int32_t, FilterItemPtr > MapIntFilterItem; +typedef HashMap< int32_t, double > MapIntDouble; +typedef HashMap< int64_t, int32_t > MapLongInt; +typedef HashMap< String, double > MapStringDouble; +typedef HashMap< int32_t, CachePtr > MapStringCache; +typedef HashMap< String, LockPtr > MapStringLock; + +typedef HashMap< SegmentInfoPtr, SegmentReaderPtr, luceneHash, luceneEquals > MapSegmentInfoSegmentReader; +typedef HashMap< SegmentInfoPtr, int32_t, luceneHash, luceneEquals > MapSegmentInfoInt; +typedef HashMap< DocFieldConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField; +typedef HashMap< InvertedDocConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField; +typedef HashMap< InvertedDocEndConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField; +typedef HashMap< TermsHashConsumerPerThreadPtr, Collection, luceneHash, luceneEquals > MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField; +typedef HashMap< FieldInfoPtr, Collection, luceneHash, luceneEquals > MapFieldInfoCollectionNormsWriterPerField; +typedef HashMap< IndexReaderPtr, HashSet, luceneHash, luceneEquals > MapIndexReaderSetString; +typedef HashMap< TermPtr, int32_t, luceneHash, luceneEquals > MapTermInt; +typedef HashMap< QueryPtr, int32_t, luceneHash, luceneEquals > MapQueryInt; +typedef HashMap< EntryPtr, boost::any, luceneHash, luceneEquals > MapEntryAny; +typedef HashMap< PhrasePositionsPtr, LuceneObjectPtr, luceneHash, luceneEquals > MapPhrasePositionsLuceneObject; +typedef HashMap< ReaderFieldPtr, SetReaderField, luceneHash, luceneEquals > MapReaderFieldSetReaderField; + +typedef WeakHashMap< LuceneObjectWeakPtr, LuceneObjectPtr, luceneWeakHash, luceneWeakEquals > WeakMapObjectObject; +typedef WeakHashMap< LuceneObjectWeakPtr, MapEntryAny, luceneWeakHash, luceneWeakEquals > WeakMapLuceneObjectMapEntryAny; + +typedef Map< String, AttributePtr > MapStringAttribute; +typedef Map< int64_t, DocumentsWriterThreadStatePtr > MapThreadDocumentsWriterThreadState; +typedef Map< String, IndexReaderPtr > MapStringIndexReader; +typedef Map< TermPtr, NumPtr, luceneCompare > MapTermNum; + +typedef boost::function TermVectorEntryComparator; + +template < class KEY, class VALUE, class HASH = boost::hash, class EQUAL = std::equal_to > class SimpleLRUCache; +typedef SimpleLRUCache< TermPtr, TermInfoPtr, luceneHash, luceneEquals > TermInfoCache; +typedef boost::shared_ptr TermInfoCachePtr; +} + +#include "Synchronize.h" +#include "CycleCheck.h" +#if defined(LPP_BUILDING_LIB) || defined(LPP_EXPOSE_INTERNAL) +#define INTERNAL public +#else +#define INTERNAL protected +#endif + +#endif diff --git a/include/lucene++/LuceneAllocator.h b/include/lucene++/LuceneAllocator.h new file mode 100644 index 00000000..b1436e12 --- /dev/null +++ b/include/lucene++/LuceneAllocator.h @@ -0,0 +1,24 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ALLOCATOR_H +#define ALLOCATOR_H + +#include "Config.h" + +namespace Lucene { + +/// Allocate block of memory. +LPPAPI void* AllocMemory(size_t size); + +/// Reallocate a given block of memory. +LPPAPI void* ReallocMemory(void* memory, size_t size); + +/// Release a given block of memory. +LPPAPI void FreeMemory(void* memory); +} + +#endif diff --git a/include/lucene++/LuceneException.h b/include/lucene++/LuceneException.h new file mode 100644 index 00000000..230d8b55 --- /dev/null +++ b/include/lucene++/LuceneException.h @@ -0,0 +1,102 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENEEXCEPTION_H +#define LUCENEEXCEPTION_H + +#include "Lucene.h" + +namespace Lucene { + +/// Lucene exception container. +class LPPAPI LuceneException : public std::exception { +public: + enum ExceptionType { + Null, + AlreadyClosed, + Compression, + CorruptIndex, + FieldReader, + FileNotFound, + IllegalArgument, + IllegalState, + IndexOutOfBounds, + IO, + LockObtainFailed, + LockReleaseFailed, + Lookahead, + MergeAborted, + Merge, + NoSuchDirectory, + NullPointer, + NumberFormat, + OutOfMemory, + Parse, + QueryParser, + Runtime, + StaleReader, + StopFillCache, + Temporary, + TimeExceeded, + TooManyClauses, + UnsupportedOperation + }; + + LuceneException(const String& error = EmptyString, LuceneException::ExceptionType type = Null) throw(); + ~LuceneException() throw(); + +protected: + ExceptionType type; + String error; + + std::string _what; + +public: + ExceptionType getType() const; + String getError() const; + bool isNull() const; + void throwException(); + + virtual const char* what() const throw(); +}; + +template +class LPPAPI ExceptionTemplate : public ParentException { +public: + ExceptionTemplate(const String& error = EmptyString, LuceneException::ExceptionType type = Type) : ParentException(error, type) { + } +}; + +typedef ExceptionTemplate RuntimeException; +typedef ExceptionTemplate OutOfMemoryError; +typedef ExceptionTemplate TemporaryException; +typedef ExceptionTemplate IllegalStateException; +typedef ExceptionTemplate IllegalArgumentException; +typedef ExceptionTemplate IndexOutOfBoundsException; +typedef ExceptionTemplate NullPointerException; +typedef ExceptionTemplate FieldReaderException; +typedef ExceptionTemplate MergeException; +typedef ExceptionTemplate StopFillCacheException; +typedef ExceptionTemplate TimeExceededException; +typedef ExceptionTemplate TooManyClausesException; +typedef ExceptionTemplate UnsupportedOperationException; +typedef ExceptionTemplate NumberFormatException; +typedef ExceptionTemplate AlreadyClosedException; +typedef ExceptionTemplate IOException; +typedef ExceptionTemplate CorruptIndexException; +typedef ExceptionTemplate FileNotFoundException; +typedef ExceptionTemplate LockObtainFailedException; +typedef ExceptionTemplate LockReleaseFailedException; +typedef ExceptionTemplate MergeAbortedException; +typedef ExceptionTemplate StaleReaderException; +typedef ExceptionTemplate NoSuchDirectoryException; +typedef ExceptionTemplate LookaheadSuccess; +typedef ExceptionTemplate ParseException; +typedef ExceptionTemplate QueryParserError; +typedef ExceptionTemplate CompressionException; +} + +#endif diff --git a/include/lucene++/LuceneFactory.h b/include/lucene++/LuceneFactory.h new file mode 100644 index 00000000..bf4bd68c --- /dev/null +++ b/include/lucene++/LuceneFactory.h @@ -0,0 +1,181 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENEFACTORY_H +#define LUCENEFACTORY_H + +#include +#include + +namespace Lucene { + +template +boost::shared_ptr newInstance() { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T); +#else + return boost::make_shared(); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1)); +#else + return boost::make_shared(a1); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2)); +#else + return boost::make_shared(a1, a2); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3)); +#else + return boost::make_shared(a1, a2, a3); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4)); +#else + return boost::make_shared(a1, a2, a3, a4); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4, a5)); +#else + return boost::make_shared(a1, a2, a3, a4, a5); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6)); +#else + return boost::make_shared(a1, a2, a3, a4, a5, a6); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7)); +#else + return boost::make_shared(a1, a2, a3, a4, a5, a6, a7); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8)); +#else + return boost::make_shared(a1, a2, a3, a4, a5, a6, a7, a8); +#endif +} + +template +boost::shared_ptr newInstance(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { +#if BOOST_VERSION <= 103800 + return boost::shared_ptr(new T(a1, a2, a3, a4, a5, a6, a7, a8, a9)); +#else + return boost::make_shared(a1, a2, a3, a4, a5, a6, a7, a8, a9); +#endif +} + +template +boost::shared_ptr newLucene() { +#if BOOST_VERSION <= 103800 + boost::shared_ptr instance = boost::shared_ptr(new T); +#else + boost::shared_ptr instance = boost::make_shared(); +#endif + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1) { + boost::shared_ptr instance(newInstance(a1)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2) { + boost::shared_ptr instance(newInstance(a1, a2)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3) { + boost::shared_ptr instance(newInstance(a1, a2, a3)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8)); + instance->initialize(); + return instance; +} + +template +boost::shared_ptr newLucene(A1 const& a1, A2 const& a2, A3 const& a3, A4 const& a4, A5 const& a5, A6 const& a6, A7 const& a7, A8 const& a8, A9 const& a9) { + boost::shared_ptr instance(newInstance(a1, a2, a3, a4, a5, a6, a7, a8, a9)); + instance->initialize(); + return instance; +} + +} + +#endif diff --git a/include/LuceneHeaders.h b/include/lucene++/LuceneHeaders.h similarity index 98% rename from include/LuceneHeaders.h rename to include/lucene++/LuceneHeaders.h index 76759cc6..aa58d632 100644 --- a/include/LuceneHeaders.h +++ b/include/lucene++/LuceneHeaders.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// diff --git a/include/lucene++/LuceneObject.h b/include/lucene++/LuceneObject.h new file mode 100644 index 00000000..54363dbd --- /dev/null +++ b/include/lucene++/LuceneObject.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENEOBJECT_H +#define LUCENEOBJECT_H + +#include +#include "LuceneSync.h" + +#ifdef LPP_USE_CYCLIC_CHECK +#define LUCENE_INTERFACE(Name) \ + static String _getClassName() { return L###Name; } \ + virtual String getClassName() { return L###Name; } \ + CycleCheckT cycleCheck; +#else +#define LUCENE_INTERFACE(Name) \ + static String _getClassName() { return L###Name; } \ + virtual String getClassName() { return L###Name; } +#endif + +#define LUCENE_CLASS(Name) \ + LUCENE_INTERFACE(Name); \ + boost::shared_ptr shared_from_this() { return boost::static_pointer_cast(LuceneObject::shared_from_this()); } \ + +namespace Lucene { + +/// Base class for all Lucene classes +class LPPAPI LuceneObject : public LuceneSync, public boost::enable_shared_from_this { +public: + virtual ~LuceneObject(); + +protected: + LuceneObject(); + +public: + /// Called directly after instantiation to create objects that depend on this object being + /// fully constructed. + virtual void initialize(); + + /// Return clone of this object + /// @param other clone reference - null when called initially, then set in top virtual override. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Return hash code for this object. + virtual int32_t hashCode(); + + /// Return whether two objects are equal + virtual bool equals(const LuceneObjectPtr& other); + + /// Compare two objects + virtual int32_t compareTo(const LuceneObjectPtr& other); + + /// Returns a string representation of the object + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/LuceneSignal.h b/include/lucene++/LuceneSignal.h new file mode 100644 index 00000000..c809a6be --- /dev/null +++ b/include/lucene++/LuceneSignal.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENESIGNAL_H +#define LUCENESIGNAL_H + +#include +#include "Lucene.h" + +namespace Lucene { + +/// Utility class to support signaling notifications. +class LPPAPI LuceneSignal { +public: + LuceneSignal(const SynchronizePtr& objectLock = SynchronizePtr()); + virtual ~LuceneSignal(); + +protected: + boost::mutex waitMutex; + boost::condition signalCondition; + SynchronizePtr objectLock; + +public: + /// create a new LuceneSignal instance atomically. + static void createSignal(LuceneSignalPtr& signal, const SynchronizePtr& objectLock); + + /// Wait for signal using an optional timeout. + void wait(int32_t timeout = 0); + + /// Notify all threads waiting for signal. + void notifyAll(); +}; + +} + +#endif diff --git a/include/lucene++/LuceneSync.h b/include/lucene++/LuceneSync.h new file mode 100644 index 00000000..4325e8d9 --- /dev/null +++ b/include/lucene++/LuceneSync.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENESYNC_H +#define LUCENESYNC_H + +#include "Lucene.h" + +namespace Lucene { + +/// Base class for all Lucene synchronised classes +class LPPAPI LuceneSync { +public: + virtual ~LuceneSync(); + +protected: + SynchronizePtr objectLock; + LuceneSignalPtr objectSignal; + +public: + /// Return this object synchronize lock. + virtual SynchronizePtr getSync(); + + /// Return this object signal. + virtual LuceneSignalPtr getSignal(); + + /// Lock this object using an optional timeout. + virtual void lock(int32_t timeout = 0); + + /// Unlock this object. + virtual void unlock(); + + /// Returns true if this object is currently locked by current thread. + virtual bool holdsLock(); + + /// Wait for signal using an optional timeout. + virtual void wait(int32_t timeout = 0); + + /// Notify all threads waiting for signal. + virtual void notifyAll(); +}; + +} + +#endif diff --git a/include/lucene++/LuceneThread.h b/include/lucene++/LuceneThread.h new file mode 100644 index 00000000..2a5c33a2 --- /dev/null +++ b/include/lucene++/LuceneThread.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENETHREAD_H +#define LUCENETHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Lucene thread container. +/// +/// It seems there are major issues with using boost::thread::id under Windows. +/// After many hours of debugging and trying various strategies, I was unable to fix an +/// occasional crash whereby boost::thread::thread_data was being deleted prematurely. +/// +/// This problem is most visible when running the AtomicUpdateTest test suite. +/// +/// Therefore, I now uniquely identify threads by their native id. +class LPPAPI LuceneThread : public LuceneObject { +public: + LuceneThread(); + virtual ~LuceneThread(); + + LUCENE_CLASS(LuceneThread); + +public: + static const int32_t MAX_THREAD_PRIORITY; + static const int32_t NORM_THREAD_PRIORITY; + static const int32_t MIN_THREAD_PRIORITY; + +protected: + threadPtr thread; + + /// Flag to indicate running thread. + /// @see #isAlive + bool running; + +public: + /// start thread see {@link #run}. + virtual void start(); + + /// return whether thread is current running. + virtual bool isAlive(); + + /// set running thread priority. + virtual void setPriority(int32_t priority); + + /// return running thread priority. + virtual int32_t getPriority(); + + /// wait for thread to finish using an optional timeout. + virtual bool join(int32_t timeout = 0); + + /// causes the currently executing thread object to temporarily pause and allow other threads to execute. + virtual void yield(); + + /// override to provide the body of the thread. + virtual void run() = 0; + + /// Return representation of current execution thread. + static int64_t currentId(); + + /// Suspends current execution thread for a given time. + static void threadSleep(int32_t time); + + /// Yield current execution thread. + static void threadYield(); + +protected: + /// set thread running state. + void setRunning(bool running); + + /// return thread running state. + bool isRunning(); + + /// function that controls the lifetime of the running thread. + static void runThread(LuceneThread* thread); +}; + +} + +#endif diff --git a/include/lucene++/LuceneTypes.h b/include/lucene++/LuceneTypes.h new file mode 100644 index 00000000..2a1f03e0 --- /dev/null +++ b/include/lucene++/LuceneTypes.h @@ -0,0 +1,563 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef LUCENETYPES_H +#define LUCENETYPES_H + +#include "Lucene.h" + +#define DECLARE_SHARED_PTR(Type) \ + class Type; \ + typedef boost::shared_ptr Type##Ptr; \ + typedef boost::weak_ptr Type##WeakPtr; + +namespace Lucene { + +// analysis +DECLARE_SHARED_PTR(Analyzer) +DECLARE_SHARED_PTR(ASCIIFoldingFilter) +DECLARE_SHARED_PTR(BaseCharFilter) +DECLARE_SHARED_PTR(CachingTokenFilter) +DECLARE_SHARED_PTR(CharArraySet) +DECLARE_SHARED_PTR(CharFilter) +DECLARE_SHARED_PTR(CharReader) +DECLARE_SHARED_PTR(CharStream) +DECLARE_SHARED_PTR(CharTokenizer) +DECLARE_SHARED_PTR(FlagsAttribute) +DECLARE_SHARED_PTR(ISOLatin1AccentFilter) +DECLARE_SHARED_PTR(KeywordAnalyzer) +DECLARE_SHARED_PTR(KeywordTokenizer) +DECLARE_SHARED_PTR(LengthFilter) +DECLARE_SHARED_PTR(LetterTokenizer) +DECLARE_SHARED_PTR(LowerCaseFilter) +DECLARE_SHARED_PTR(LowerCaseTokenizer) +DECLARE_SHARED_PTR(MappingCharFilter) +DECLARE_SHARED_PTR(NormalizeCharMap) +DECLARE_SHARED_PTR(NumericTokenStream) +DECLARE_SHARED_PTR(OffsetAttribute) +DECLARE_SHARED_PTR(PayloadAttribute) +DECLARE_SHARED_PTR(PerFieldAnalyzerWrapper) +DECLARE_SHARED_PTR(PorterStemFilter) +DECLARE_SHARED_PTR(PorterStemmer) +DECLARE_SHARED_PTR(PositionIncrementAttribute) +DECLARE_SHARED_PTR(SimpleAnalyzer) +DECLARE_SHARED_PTR(SinkFilter) +DECLARE_SHARED_PTR(SinkTokenStream) +DECLARE_SHARED_PTR(StandardAnalyzer) +DECLARE_SHARED_PTR(StandardAnalyzerSavedStreams) +DECLARE_SHARED_PTR(StandardFilter) +DECLARE_SHARED_PTR(StandardTokenizer) +DECLARE_SHARED_PTR(StandardTokenizerImpl) +DECLARE_SHARED_PTR(StopAnalyzer) +DECLARE_SHARED_PTR(StopAnalyzerSavedStreams) +DECLARE_SHARED_PTR(StopFilter) +DECLARE_SHARED_PTR(TeeSinkTokenFilter) +DECLARE_SHARED_PTR(TermAttribute) +DECLARE_SHARED_PTR(Token) +DECLARE_SHARED_PTR(TokenAttributeFactory) +DECLARE_SHARED_PTR(TokenFilter) +DECLARE_SHARED_PTR(Tokenizer) +DECLARE_SHARED_PTR(TokenStream) +DECLARE_SHARED_PTR(TypeAttribute) +DECLARE_SHARED_PTR(WhitespaceAnalyzer) +DECLARE_SHARED_PTR(WhitespaceTokenizer) +DECLARE_SHARED_PTR(WordlistLoader) + +// document +DECLARE_SHARED_PTR(AbstractField) +DECLARE_SHARED_PTR(CompressionTools) +DECLARE_SHARED_PTR(DateField) +DECLARE_SHARED_PTR(DateTools) +DECLARE_SHARED_PTR(Document) +DECLARE_SHARED_PTR(Field) +DECLARE_SHARED_PTR(Fieldable) +DECLARE_SHARED_PTR(FieldSelector) +DECLARE_SHARED_PTR(LoadFirstFieldSelector) +DECLARE_SHARED_PTR(MapFieldSelector) +DECLARE_SHARED_PTR(NumberTools) +DECLARE_SHARED_PTR(NumericField) +DECLARE_SHARED_PTR(SetBasedFieldSelector) + +// index +DECLARE_SHARED_PTR(AbstractAllTermDocs) +DECLARE_SHARED_PTR(AllTermDocs) +DECLARE_SHARED_PTR(BufferedDeletes) +DECLARE_SHARED_PTR(ByteBlockAllocator) +DECLARE_SHARED_PTR(ByteBlockPool) +DECLARE_SHARED_PTR(ByteBlockPoolAllocatorBase) +DECLARE_SHARED_PTR(ByteSliceReader) +DECLARE_SHARED_PTR(ByteSliceWriter) +DECLARE_SHARED_PTR(CharBlockPool) +DECLARE_SHARED_PTR(CheckAbort) +DECLARE_SHARED_PTR(CheckIndex) +DECLARE_SHARED_PTR(CommitPoint) +DECLARE_SHARED_PTR(CompoundFileReader) +DECLARE_SHARED_PTR(CompoundFileWriter) +DECLARE_SHARED_PTR(ConcurrentMergeScheduler) +DECLARE_SHARED_PTR(CoreReaders) +DECLARE_SHARED_PTR(CSIndexInput) +DECLARE_SHARED_PTR(DefaultIndexingChain) +DECLARE_SHARED_PTR(DefaultSkipListReader) +DECLARE_SHARED_PTR(DefaultSkipListWriter) +DECLARE_SHARED_PTR(DirectoryReader) +DECLARE_SHARED_PTR(DocConsumer) +DECLARE_SHARED_PTR(DocConsumerPerThread) +DECLARE_SHARED_PTR(DocFieldConsumer) +DECLARE_SHARED_PTR(DocFieldConsumerPerField) +DECLARE_SHARED_PTR(DocFieldConsumerPerThread) +DECLARE_SHARED_PTR(DocFieldConsumers) +DECLARE_SHARED_PTR(DocFieldConsumersPerDoc) +DECLARE_SHARED_PTR(DocFieldConsumersPerField) +DECLARE_SHARED_PTR(DocFieldConsumersPerThread) +DECLARE_SHARED_PTR(DocFieldProcessor) +DECLARE_SHARED_PTR(DocFieldProcessorPerField) +DECLARE_SHARED_PTR(DocFieldProcessorPerThread) +DECLARE_SHARED_PTR(DocFieldProcessorPerThreadPerDoc) +DECLARE_SHARED_PTR(DocInverter) +DECLARE_SHARED_PTR(DocInverterPerField) +DECLARE_SHARED_PTR(DocInverterPerThread) +DECLARE_SHARED_PTR(DocState) +DECLARE_SHARED_PTR(DocumentsWriter) +DECLARE_SHARED_PTR(DocumentsWriterThreadState) +DECLARE_SHARED_PTR(DocWriter) +DECLARE_SHARED_PTR(FieldInfo) +DECLARE_SHARED_PTR(FieldInfos) +DECLARE_SHARED_PTR(FieldInvertState) +DECLARE_SHARED_PTR(FieldNormStatus) +DECLARE_SHARED_PTR(FieldSortedTermVectorMapper) +DECLARE_SHARED_PTR(FieldsReader) +DECLARE_SHARED_PTR(FieldsReaderLocal) +DECLARE_SHARED_PTR(FieldsWriter) +DECLARE_SHARED_PTR(FilterIndexReader) +DECLARE_SHARED_PTR(FindSegmentsModified) +DECLARE_SHARED_PTR(FindSegmentsOpen) +DECLARE_SHARED_PTR(FindSegmentsRead) +DECLARE_SHARED_PTR(FindSegmentsReopen) +DECLARE_SHARED_PTR(FormatPostingsDocsConsumer) +DECLARE_SHARED_PTR(FormatPostingsDocsWriter) +DECLARE_SHARED_PTR(FormatPostingsFieldsConsumer) +DECLARE_SHARED_PTR(FormatPostingsFieldsWriter) +DECLARE_SHARED_PTR(FormatPostingsPositionsConsumer) +DECLARE_SHARED_PTR(FormatPostingsPositionsWriter) +DECLARE_SHARED_PTR(FormatPostingsTermsConsumer) +DECLARE_SHARED_PTR(FormatPostingsTermsWriter) +DECLARE_SHARED_PTR(FreqProxFieldMergeState) +DECLARE_SHARED_PTR(FreqProxTermsWriter) +DECLARE_SHARED_PTR(FreqProxTermsWriterPerField) +DECLARE_SHARED_PTR(FreqProxTermsWriterPerThread) +DECLARE_SHARED_PTR(FreqProxTermsWriterPostingList) +DECLARE_SHARED_PTR(IndexCommit) +DECLARE_SHARED_PTR(IndexDeletionPolicy) +DECLARE_SHARED_PTR(IndexFileDeleter) +DECLARE_SHARED_PTR(IndexFileNameFilter) +DECLARE_SHARED_PTR(IndexingChain) +DECLARE_SHARED_PTR(IndexReader) +DECLARE_SHARED_PTR(IndexReaderWarmer) +DECLARE_SHARED_PTR(IndexStatus) +DECLARE_SHARED_PTR(IndexWriter) +DECLARE_SHARED_PTR(IntBlockPool) +DECLARE_SHARED_PTR(IntQueue) +DECLARE_SHARED_PTR(InvertedDocConsumer) +DECLARE_SHARED_PTR(InvertedDocConsumerPerField) +DECLARE_SHARED_PTR(InvertedDocConsumerPerThread) +DECLARE_SHARED_PTR(InvertedDocEndConsumer) +DECLARE_SHARED_PTR(InvertedDocEndConsumerPerField) +DECLARE_SHARED_PTR(InvertedDocEndConsumerPerThread) +DECLARE_SHARED_PTR(KeepOnlyLastCommitDeletionPolicy) +DECLARE_SHARED_PTR(LogByteSizeMergePolicy) +DECLARE_SHARED_PTR(LogDocMergePolicy) +DECLARE_SHARED_PTR(LogMergePolicy) +DECLARE_SHARED_PTR(MergeDocIDRemapper) +DECLARE_SHARED_PTR(MergePolicy) +DECLARE_SHARED_PTR(MergeScheduler) +DECLARE_SHARED_PTR(MergeSpecification) +DECLARE_SHARED_PTR(MergeThread) +DECLARE_SHARED_PTR(MultiLevelSkipListReader) +DECLARE_SHARED_PTR(MultiLevelSkipListWriter) +DECLARE_SHARED_PTR(MultipleTermPositions) +DECLARE_SHARED_PTR(MultiReader) +DECLARE_SHARED_PTR(MultiTermDocs) +DECLARE_SHARED_PTR(MultiTermEnum) +DECLARE_SHARED_PTR(MultiTermPositions) +DECLARE_SHARED_PTR(MyCommitPoint) +DECLARE_SHARED_PTR(MySegmentTermDocs) +DECLARE_SHARED_PTR(Norm) +DECLARE_SHARED_PTR(NormsWriter) +DECLARE_SHARED_PTR(NormsWriterPerField) +DECLARE_SHARED_PTR(NormsWriterPerThread) +DECLARE_SHARED_PTR(Num) +DECLARE_SHARED_PTR(OneMerge) +DECLARE_SHARED_PTR(ParallelArrayTermVectorMapper) +DECLARE_SHARED_PTR(ParallelReader) +DECLARE_SHARED_PTR(ParallelTermEnum) +DECLARE_SHARED_PTR(ParallelTermDocs) +DECLARE_SHARED_PTR(ParallelTermPositions) +DECLARE_SHARED_PTR(Payload) +DECLARE_SHARED_PTR(PerDocBuffer) +DECLARE_SHARED_PTR(PositionBasedTermVectorMapper) +DECLARE_SHARED_PTR(RawPostingList) +DECLARE_SHARED_PTR(ReaderCommit) +DECLARE_SHARED_PTR(ReaderPool) +DECLARE_SHARED_PTR(ReadOnlyDirectoryReader) +DECLARE_SHARED_PTR(ReadOnlySegmentReader) +DECLARE_SHARED_PTR(RefCount) +DECLARE_SHARED_PTR(ReusableStringReader) +DECLARE_SHARED_PTR(SegmentInfo) +DECLARE_SHARED_PTR(SegmentInfoCollection) +DECLARE_SHARED_PTR(SegmentInfos) +DECLARE_SHARED_PTR(SegmentInfoStatus) +DECLARE_SHARED_PTR(SegmentMergeInfo) +DECLARE_SHARED_PTR(SegmentMergeQueue) +DECLARE_SHARED_PTR(SegmentMerger) +DECLARE_SHARED_PTR(SegmentReader) +DECLARE_SHARED_PTR(SegmentReaderRef) +DECLARE_SHARED_PTR(SegmentTermDocs) +DECLARE_SHARED_PTR(SegmentTermEnum) +DECLARE_SHARED_PTR(SegmentTermPositions) +DECLARE_SHARED_PTR(SegmentTermPositionVector) +DECLARE_SHARED_PTR(SegmentTermVector) +DECLARE_SHARED_PTR(SegmentWriteState) +DECLARE_SHARED_PTR(SerialMergeScheduler) +DECLARE_SHARED_PTR(SingleTokenAttributeSource) +DECLARE_SHARED_PTR(SkipBuffer) +DECLARE_SHARED_PTR(SkipDocWriter) +DECLARE_SHARED_PTR(SnapshotDeletionPolicy) +DECLARE_SHARED_PTR(SortedTermVectorMapper) +DECLARE_SHARED_PTR(StoredFieldStatus) +DECLARE_SHARED_PTR(StoredFieldsWriter) +DECLARE_SHARED_PTR(StoredFieldsWriterPerDoc) +DECLARE_SHARED_PTR(StoredFieldsWriterPerThread) +DECLARE_SHARED_PTR(Term) +DECLARE_SHARED_PTR(TermBuffer) +DECLARE_SHARED_PTR(TermEnum) +DECLARE_SHARED_PTR(TermDocs) +DECLARE_SHARED_PTR(TermFreqVector) +DECLARE_SHARED_PTR(TermIndexStatus) +DECLARE_SHARED_PTR(TermInfo) +DECLARE_SHARED_PTR(TermInfosReader) +DECLARE_SHARED_PTR(TermInfosReaderThreadResources) +DECLARE_SHARED_PTR(TermInfosWriter) +DECLARE_SHARED_PTR(TermPositions) +DECLARE_SHARED_PTR(TermPositionsQueue) +DECLARE_SHARED_PTR(TermPositionVector) +DECLARE_SHARED_PTR(TermsHash) +DECLARE_SHARED_PTR(TermsHashConsumer) +DECLARE_SHARED_PTR(TermsHashConsumerPerField) +DECLARE_SHARED_PTR(TermsHashConsumerPerThread) +DECLARE_SHARED_PTR(TermsHashPerField) +DECLARE_SHARED_PTR(TermsHashPerThread) +DECLARE_SHARED_PTR(TermVectorEntry) +DECLARE_SHARED_PTR(TermVectorEntryFreqSortedComparator) +DECLARE_SHARED_PTR(TermVectorMapper) +DECLARE_SHARED_PTR(TermVectorOffsetInfo) +DECLARE_SHARED_PTR(TermVectorsReader) +DECLARE_SHARED_PTR(TermVectorStatus) +DECLARE_SHARED_PTR(TermVectorsTermsWriter) +DECLARE_SHARED_PTR(TermVectorsTermsWriterPerDoc) +DECLARE_SHARED_PTR(TermVectorsTermsWriterPerField) +DECLARE_SHARED_PTR(TermVectorsTermsWriterPerThread) +DECLARE_SHARED_PTR(TermVectorsTermsWriterPostingList) +DECLARE_SHARED_PTR(TermVectorsWriter) +DECLARE_SHARED_PTR(TermVectorsPositionInfo) +DECLARE_SHARED_PTR(WaitQueue) + +// query parser +DECLARE_SHARED_PTR(FastCharStream) +DECLARE_SHARED_PTR(MultiFieldQueryParser) +DECLARE_SHARED_PTR(QueryParser) +DECLARE_SHARED_PTR(QueryParserCharStream) +DECLARE_SHARED_PTR(QueryParserConstants) +DECLARE_SHARED_PTR(QueryParserToken) +DECLARE_SHARED_PTR(QueryParserTokenManager) + +// search +DECLARE_SHARED_PTR(AveragePayloadFunction) +DECLARE_SHARED_PTR(BooleanClause) +DECLARE_SHARED_PTR(BooleanQuery) +DECLARE_SHARED_PTR(BooleanScorer) +DECLARE_SHARED_PTR(BooleanScorerCollector) +DECLARE_SHARED_PTR(BooleanScorer2) +DECLARE_SHARED_PTR(BooleanWeight) +DECLARE_SHARED_PTR(Bucket) +DECLARE_SHARED_PTR(BucketScorer) +DECLARE_SHARED_PTR(BucketTable) +DECLARE_SHARED_PTR(ByteCache) +DECLARE_SHARED_PTR(ByteFieldSource) +DECLARE_SHARED_PTR(ByteParser) +DECLARE_SHARED_PTR(Cache) +DECLARE_SHARED_PTR(CachedDfSource) +DECLARE_SHARED_PTR(CachingSpanFilter) +DECLARE_SHARED_PTR(CachingWrapperFilter) +DECLARE_SHARED_PTR(CellQueue) +DECLARE_SHARED_PTR(Collector) +DECLARE_SHARED_PTR(ComplexExplanation) +DECLARE_SHARED_PTR(ConjunctionScorer) +DECLARE_SHARED_PTR(ConstantScoreAutoRewrite) +DECLARE_SHARED_PTR(ConstantScoreAutoRewriteDefault) +DECLARE_SHARED_PTR(ConstantScoreBooleanQueryRewrite) +DECLARE_SHARED_PTR(ConstantScoreFilterRewrite) +DECLARE_SHARED_PTR(ConstantScoreQuery) +DECLARE_SHARED_PTR(ConstantScorer) +DECLARE_SHARED_PTR(ConstantWeight) +DECLARE_SHARED_PTR(Coordinator) +DECLARE_SHARED_PTR(CountingConjunctionSumScorer) +DECLARE_SHARED_PTR(CountingDisjunctionSumScorer) +DECLARE_SHARED_PTR(CreationPlaceholder) +DECLARE_SHARED_PTR(CustomScoreProvider) +DECLARE_SHARED_PTR(CustomScoreQuery) +DECLARE_SHARED_PTR(CustomWeight) +DECLARE_SHARED_PTR(CustomScorer) +DECLARE_SHARED_PTR(DefaultByteParser) +DECLARE_SHARED_PTR(DefaultCustomScoreProvider) +DECLARE_SHARED_PTR(DefaultDoubleParser) +DECLARE_SHARED_PTR(DefaultIntParser) +DECLARE_SHARED_PTR(DefaultLongParser) +DECLARE_SHARED_PTR(DefaultSimilarity) +DECLARE_SHARED_PTR(DisjunctionMaxQuery) +DECLARE_SHARED_PTR(DisjunctionMaxScorer) +DECLARE_SHARED_PTR(DisjunctionMaxWeight) +DECLARE_SHARED_PTR(DisjunctionSumScorer) +DECLARE_SHARED_PTR(DocIdSet) +DECLARE_SHARED_PTR(DocIdSetIterator) +DECLARE_SHARED_PTR(DocValues) +DECLARE_SHARED_PTR(DoubleCache) +DECLARE_SHARED_PTR(DoubleFieldSource) +DECLARE_SHARED_PTR(DoubleParser) +DECLARE_SHARED_PTR(EmptyDocIdSet) +DECLARE_SHARED_PTR(EmptyDocIdSetIterator) +DECLARE_SHARED_PTR(Entry) +DECLARE_SHARED_PTR(ExactPhraseScorer) +DECLARE_SHARED_PTR(Explanation) +DECLARE_SHARED_PTR(FieldCache) +DECLARE_SHARED_PTR(FieldCacheDocIdSet) +DECLARE_SHARED_PTR(FieldCacheEntry) +DECLARE_SHARED_PTR(FieldCacheEntryImpl) +DECLARE_SHARED_PTR(FieldCacheImpl) +DECLARE_SHARED_PTR(FieldCacheRangeFilter) +DECLARE_SHARED_PTR(FieldCacheRangeFilterByte) +DECLARE_SHARED_PTR(FieldCacheRangeFilterDouble) +DECLARE_SHARED_PTR(FieldCacheRangeFilterInt) +DECLARE_SHARED_PTR(FieldCacheRangeFilterLong) +DECLARE_SHARED_PTR(FieldCacheRangeFilterString) +DECLARE_SHARED_PTR(FieldCacheSource) +DECLARE_SHARED_PTR(FieldCacheTermsFilter) +DECLARE_SHARED_PTR(FieldCacheTermsFilterDocIdSet) +DECLARE_SHARED_PTR(FieldComparator) +DECLARE_SHARED_PTR(FieldComparatorSource) +DECLARE_SHARED_PTR(FieldDoc) +DECLARE_SHARED_PTR(FieldDocIdSetIteratorIncrement) +DECLARE_SHARED_PTR(FieldDocIdSetIteratorTermDocs) +DECLARE_SHARED_PTR(FieldDocSortedHitQueue) +DECLARE_SHARED_PTR(FieldMaskingSpanQuery) +DECLARE_SHARED_PTR(FieldScoreQuery) +DECLARE_SHARED_PTR(FieldValueHitQueue) +DECLARE_SHARED_PTR(FieldValueHitQueueEntry) +DECLARE_SHARED_PTR(Filter) +DECLARE_SHARED_PTR(FilterCache) +DECLARE_SHARED_PTR(FilterCleaner) +DECLARE_SHARED_PTR(FilteredDocIdSet) +DECLARE_SHARED_PTR(FilteredDocIdSetIterator) +DECLARE_SHARED_PTR(FilteredQuery) +DECLARE_SHARED_PTR(FilteredQueryWeight) +DECLARE_SHARED_PTR(FilteredTermEnum) +DECLARE_SHARED_PTR(FilterItem) +DECLARE_SHARED_PTR(FilterManager) +DECLARE_SHARED_PTR(FuzzyQuery) +DECLARE_SHARED_PTR(FuzzyTermEnum) +DECLARE_SHARED_PTR(HitQueue) +DECLARE_SHARED_PTR(HitQueueBase) +DECLARE_SHARED_PTR(IDFExplanation) +DECLARE_SHARED_PTR(IndexSearcher) +DECLARE_SHARED_PTR(IntCache) +DECLARE_SHARED_PTR(IntFieldSource) +DECLARE_SHARED_PTR(IntParser) +DECLARE_SHARED_PTR(LongCache) +DECLARE_SHARED_PTR(LongParser) +DECLARE_SHARED_PTR(MatchAllDocsQuery) +DECLARE_SHARED_PTR(MatchAllDocsWeight) +DECLARE_SHARED_PTR(MatchAllScorer) +DECLARE_SHARED_PTR(MaxPayloadFunction) +DECLARE_SHARED_PTR(MinPayloadFunction) +DECLARE_SHARED_PTR(MultiComparatorsFieldValueHitQueue) +DECLARE_SHARED_PTR(MultiPhraseQuery) +DECLARE_SHARED_PTR(MultiSearcher) +DECLARE_SHARED_PTR(MultiSearcherCallableNoSort) +DECLARE_SHARED_PTR(MultiSearcherCallableWithSort) +DECLARE_SHARED_PTR(MultiTermQuery) +DECLARE_SHARED_PTR(MultiTermQueryWrapperFilter) +DECLARE_SHARED_PTR(NearSpansOrdered) +DECLARE_SHARED_PTR(NearSpansUnordered) +DECLARE_SHARED_PTR(NumericRangeFilter) +DECLARE_SHARED_PTR(NumericRangeQuery) +DECLARE_SHARED_PTR(NumericUtilsDoubleParser) +DECLARE_SHARED_PTR(NumericUtilsIntParser) +DECLARE_SHARED_PTR(NumericUtilsLongParser) +DECLARE_SHARED_PTR(OneComparatorFieldValueHitQueue) +DECLARE_SHARED_PTR(OrdFieldSource) +DECLARE_SHARED_PTR(ParallelMultiSearcher) +DECLARE_SHARED_PTR(Parser) +DECLARE_SHARED_PTR(PayloadFunction) +DECLARE_SHARED_PTR(PayloadNearQuery) +DECLARE_SHARED_PTR(PayloadNearSpanScorer) +DECLARE_SHARED_PTR(PayloadNearSpanWeight) +DECLARE_SHARED_PTR(PayloadSpanUtil) +DECLARE_SHARED_PTR(PayloadTermQuery) +DECLARE_SHARED_PTR(PayloadTermSpanScorer) +DECLARE_SHARED_PTR(PayloadTermWeight) +DECLARE_SHARED_PTR(PhrasePositions) +DECLARE_SHARED_PTR(PhraseQuery) +DECLARE_SHARED_PTR(PhraseQueue) +DECLARE_SHARED_PTR(PhraseScorer) +DECLARE_SHARED_PTR(PositionInfo) +DECLARE_SHARED_PTR(PositiveScoresOnlyCollector) +DECLARE_SHARED_PTR(PrefixFilter) +DECLARE_SHARED_PTR(PrefixQuery) +DECLARE_SHARED_PTR(PrefixTermEnum) +DECLARE_SHARED_PTR(PriorityQueueScoreDocs) +DECLARE_SHARED_PTR(Query) +DECLARE_SHARED_PTR(QueryTermVector) +DECLARE_SHARED_PTR(QueryWrapperFilter) +DECLARE_SHARED_PTR(ReqExclScorer) +DECLARE_SHARED_PTR(ReqOptSumScorer) +DECLARE_SHARED_PTR(RewriteMethod) +DECLARE_SHARED_PTR(ReverseOrdFieldSource) +DECLARE_SHARED_PTR(ScoreCachingWrappingScorer) +DECLARE_SHARED_PTR(ScoreDoc) +DECLARE_SHARED_PTR(Scorer) +DECLARE_SHARED_PTR(ScoreTerm) +DECLARE_SHARED_PTR(ScoreTermQueue) +DECLARE_SHARED_PTR(ScoringBooleanQueryRewrite) +DECLARE_SHARED_PTR(Searchable) +DECLARE_SHARED_PTR(Searcher) +DECLARE_SHARED_PTR(Similarity) +DECLARE_SHARED_PTR(SimilarityDisableCoord) +DECLARE_SHARED_PTR(SimilarityDelegator) +DECLARE_SHARED_PTR(SimilarityIDFExplanation) +DECLARE_SHARED_PTR(SingleMatchScorer) +DECLARE_SHARED_PTR(SingleTermEnum) +DECLARE_SHARED_PTR(SloppyPhraseScorer) +DECLARE_SHARED_PTR(Sort) +DECLARE_SHARED_PTR(SortField) +DECLARE_SHARED_PTR(SpanFilter) +DECLARE_SHARED_PTR(SpanFilterResult) +DECLARE_SHARED_PTR(SpanFirstQuery) +DECLARE_SHARED_PTR(SpanNearQuery) +DECLARE_SHARED_PTR(SpanNotQuery) +DECLARE_SHARED_PTR(SpanOrQuery) +DECLARE_SHARED_PTR(SpanQuery) +DECLARE_SHARED_PTR(SpanQueryFilter) +DECLARE_SHARED_PTR(SpanQueue) +DECLARE_SHARED_PTR(Spans) +DECLARE_SHARED_PTR(SpansCell) +DECLARE_SHARED_PTR(SpanScorer) +DECLARE_SHARED_PTR(SpanTermQuery) +DECLARE_SHARED_PTR(SpanWeight) +DECLARE_SHARED_PTR(StartEnd) +DECLARE_SHARED_PTR(StringCache) +DECLARE_SHARED_PTR(StringIndex) +DECLARE_SHARED_PTR(StringIndexCache) +DECLARE_SHARED_PTR(SubScorer) +DECLARE_SHARED_PTR(TermQuery) +DECLARE_SHARED_PTR(TermRangeFilter) +DECLARE_SHARED_PTR(TermRangeQuery) +DECLARE_SHARED_PTR(TermRangeTermEnum) +DECLARE_SHARED_PTR(TermScorer) +DECLARE_SHARED_PTR(TermSpans) +DECLARE_SHARED_PTR(TimeLimitingCollector) +DECLARE_SHARED_PTR(TimerThread) +DECLARE_SHARED_PTR(TopDocs) +DECLARE_SHARED_PTR(TopDocsCollector) +DECLARE_SHARED_PTR(TopFieldCollector) +DECLARE_SHARED_PTR(TopFieldDocs) +DECLARE_SHARED_PTR(TopScoreDocCollector) +DECLARE_SHARED_PTR(ValueSource) +DECLARE_SHARED_PTR(ValueSourceQuery) +DECLARE_SHARED_PTR(ValueSourceScorer) +DECLARE_SHARED_PTR(ValueSourceWeight) +DECLARE_SHARED_PTR(Weight) +DECLARE_SHARED_PTR(WildcardQuery) +DECLARE_SHARED_PTR(WildcardTermEnum) + +// store +DECLARE_SHARED_PTR(BufferedIndexInput) +DECLARE_SHARED_PTR(BufferedIndexOutput) +DECLARE_SHARED_PTR(ChecksumIndexInput) +DECLARE_SHARED_PTR(ChecksumIndexOutput) +DECLARE_SHARED_PTR(Directory) +DECLARE_SHARED_PTR(FileSwitchDirectory) +DECLARE_SHARED_PTR(FSDirectory) +DECLARE_SHARED_PTR(FSLockFactory) +DECLARE_SHARED_PTR(IndexInput) +DECLARE_SHARED_PTR(IndexOutput) +DECLARE_SHARED_PTR(InputFile) +DECLARE_SHARED_PTR(Lock) +DECLARE_SHARED_PTR(LockFactory) +DECLARE_SHARED_PTR(MMapDirectory) +DECLARE_SHARED_PTR(MMapIndexInput) +DECLARE_SHARED_PTR(NativeFSLock) +DECLARE_SHARED_PTR(NativeFSLockFactory) +DECLARE_SHARED_PTR(NoLock) +DECLARE_SHARED_PTR(NoLockFactory) +DECLARE_SHARED_PTR(OutputFile) +DECLARE_SHARED_PTR(RAMDirectory) +DECLARE_SHARED_PTR(RAMFile) +DECLARE_SHARED_PTR(RAMInputStream) +DECLARE_SHARED_PTR(RAMOutputStream) +DECLARE_SHARED_PTR(SimpleFSDirectory) +DECLARE_SHARED_PTR(SimpleFSIndexInput) +DECLARE_SHARED_PTR(SimpleFSIndexOutput) +DECLARE_SHARED_PTR(SimpleFSLock) +DECLARE_SHARED_PTR(SimpleFSLockFactory) +DECLARE_SHARED_PTR(SingleInstanceLock) +DECLARE_SHARED_PTR(SingleInstanceLockFactory) + +// util +DECLARE_SHARED_PTR(Attribute) +DECLARE_SHARED_PTR(AttributeFactory) +DECLARE_SHARED_PTR(AttributeSource) +DECLARE_SHARED_PTR(AttributeSourceState) +DECLARE_SHARED_PTR(BitSet) +DECLARE_SHARED_PTR(BitVector) +DECLARE_SHARED_PTR(BufferedReader) +DECLARE_SHARED_PTR(Collator) +DECLARE_SHARED_PTR(DefaultAttributeFactory) +DECLARE_SHARED_PTR(DocIdBitSet) +DECLARE_SHARED_PTR(FieldCacheSanityChecker) +DECLARE_SHARED_PTR(FileReader) +DECLARE_SHARED_PTR(Future) +DECLARE_SHARED_PTR(HeapedScorerDoc) +DECLARE_SHARED_PTR(InfoStream) +DECLARE_SHARED_PTR(InfoStreamFile) +DECLARE_SHARED_PTR(InfoStreamOut) +DECLARE_SHARED_PTR(InputStreamReader) +DECLARE_SHARED_PTR(Insanity) +DECLARE_SHARED_PTR(IntRangeBuilder) +DECLARE_SHARED_PTR(LongRangeBuilder) +DECLARE_SHARED_PTR(LuceneObject) +DECLARE_SHARED_PTR(LuceneSignal) +DECLARE_SHARED_PTR(LuceneThread) +DECLARE_SHARED_PTR(NumericUtils) +DECLARE_SHARED_PTR(OpenBitSet) +DECLARE_SHARED_PTR(OpenBitSetDISI) +DECLARE_SHARED_PTR(OpenBitSetIterator) +DECLARE_SHARED_PTR(Random) +DECLARE_SHARED_PTR(Reader) +DECLARE_SHARED_PTR(ReaderField) +DECLARE_SHARED_PTR(ScorerDocQueue) +DECLARE_SHARED_PTR(SortedVIntList) +DECLARE_SHARED_PTR(StringReader) +DECLARE_SHARED_PTR(Synchronize) +DECLARE_SHARED_PTR(ThreadPool) +DECLARE_SHARED_PTR(UnicodeResult) +DECLARE_SHARED_PTR(UTF8Decoder) +DECLARE_SHARED_PTR(UTF8DecoderStream) +DECLARE_SHARED_PTR(UTF8Encoder) +DECLARE_SHARED_PTR(UTF8EncoderStream) +DECLARE_SHARED_PTR(UTF8Result) +DECLARE_SHARED_PTR(UTF16Decoder) +} + +#endif diff --git a/include/lucene++/MMapDirectory.h b/include/lucene++/MMapDirectory.h new file mode 100644 index 00000000..3a3400e4 --- /dev/null +++ b/include/lucene++/MMapDirectory.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MMAPDIRECTORY_H +#define MMAPDIRECTORY_H + +#include "FSDirectory.h" + +namespace Lucene { + +/// File-based {@link Directory} implementation that uses mmap for reading, and {@link SimpleFSIndexOutput} for writing. +/// +/// NOTE: memory mapping uses up a portion of the virtual memory address space in your process equal to the size of the +/// file being mapped. Before using this class, be sure your have plenty of virtual address space. +/// +/// NOTE: Accessing this class either directly or indirectly from a thread while it's interrupted can close the +/// underlying channel immediately if at the same time the thread is blocked on IO. The channel will remain closed and +/// subsequent access to {@link MMapDirectory} will throw an exception. +class LPPAPI MMapDirectory : public FSDirectory { +public: + /// Create a new MMapDirectory for the named location. + /// @param path the path of the directory. + /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) + MMapDirectory(const String& path, const LockFactoryPtr& lockFactory = LockFactoryPtr()); + + virtual ~MMapDirectory(); + + LUCENE_CLASS(MMapDirectory); + +public: + using FSDirectory::openInput; + + /// Creates an IndexInput for the file with the given name. + virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); + + /// Creates an IndexOutput for the file with the given name. + virtual IndexOutputPtr createOutput(const String& name); +}; + +} + +#endif diff --git a/include/lucene++/Map.h b/include/lucene++/Map.h new file mode 100644 index 00000000..a86b0c86 --- /dev/null +++ b/include/lucene++/Map.h @@ -0,0 +1,130 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MAP_H +#define MAP_H + +#include +#include "LuceneSync.h" + +namespace Lucene { + +/// Utility template class to handle maps that can be safely copied and shared +template < class KEY, class VALUE, class LESS = std::less > +class Map : public LuceneSync { +public: + typedef Map this_type; + typedef std::pair key_value; + typedef std::map map_type; + typedef typename map_type::iterator iterator; + typedef typename map_type::const_iterator const_iterator; + typedef KEY key_type; + typedef VALUE value_type; + + virtual ~Map() { + } + +protected: + boost::shared_ptr mapContainer; + +public: + static this_type newInstance() { + this_type instance; + instance.mapContainer = Lucene::newInstance(); + return instance; + } + + void reset() { + mapContainer.reset(); + } + + int32_t size() const { + return (int32_t)mapContainer->size(); + } + + bool empty() const { + return mapContainer->empty(); + } + + void clear() { + mapContainer->clear(); + } + + iterator begin() { + return mapContainer->begin(); + } + + iterator end() { + return mapContainer->end(); + } + + const_iterator begin() const { + return mapContainer->begin(); + } + + const_iterator end() const { + return mapContainer->end(); + } + + operator bool() const { + return mapContainer.get() != NULL; + } + + bool operator! () const { + return !mapContainer; + } + + map_type& operator= (const map_type& other) { + mapContainer = other.mapContainer; + return *this; + } + + void put(const KEY& key, const VALUE& value) { + (*mapContainer)[key] = value; + } + + template + void putAll(ITER first, ITER last) { + for (iterator current = first; current != last; ++current) { + (*mapContainer)[current->first] = current->second; + } + } + + template + void remove(ITER pos) { + mapContainer->erase(pos); + } + + template + ITER remove(ITER first, ITER last) { + return mapContainer->erase(first, last); + } + + bool remove(const KEY& key) { + return (mapContainer->erase(key) > 0); + } + + iterator find(const KEY& key) { + return mapContainer->find(key); + } + + VALUE get(const KEY& key) const { + iterator findValue = mapContainer->find(key); + return findValue == mapContainer->end() ? VALUE() : findValue->second; + } + + bool contains(const KEY& key) const { + return (mapContainer->find(key) != mapContainer->end()); + } + + VALUE& operator[] (const KEY& key) { + return (*mapContainer)[key]; + } +}; + +} + +#endif diff --git a/include/lucene++/MapFieldSelector.h b/include/lucene++/MapFieldSelector.h new file mode 100644 index 00000000..2db2d41a --- /dev/null +++ b/include/lucene++/MapFieldSelector.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MAPFIELDSELECTOR_H +#define MAPFIELDSELECTOR_H + +#include "FieldSelector.h" + +namespace Lucene { + +typedef HashMap MapStringFieldSelectorResult; + +/// A {@link FieldSelector} based on a Map of field names to {@link FieldSelectorResult}s +class LPPAPI MapFieldSelector : public FieldSelector { +public: + /// Create a MapFieldSelector + /// @param fieldSelections maps from field names (String) to {@link FieldSelectorResult}s + MapFieldSelector(MapStringFieldSelectorResult fieldSelections); + + /// Create a MapFieldSelector + /// @param fields fields to LOAD. List of Strings. All other fields are NO_LOAD. + MapFieldSelector(Collection fields); + + virtual ~MapFieldSelector(); + + LUCENE_CLASS(MapFieldSelector); + +public: + MapStringFieldSelectorResult fieldSelections; + +public: + /// Load field according to its associated value in fieldSelections + /// @param field a field name + /// @return the fieldSelections value that field maps to or NO_LOAD if none. + virtual FieldSelectorResult accept(const String& fieldName); +}; + +} + +#endif diff --git a/include/lucene++/MapOfSets.h b/include/lucene++/MapOfSets.h new file mode 100644 index 00000000..d3a7d81d --- /dev/null +++ b/include/lucene++/MapOfSets.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MAPOFSETS_H +#define MAPOFSETS_H + +#include "Lucene.h" + +namespace Lucene { + +/// Helper class for keeping Lists of Objects associated with keys. +template +class MapOfSets { +public: + typedef HashSet set_type; + typedef HashMap map_type; + + MapOfSets(map_type m) { + theMap = m; + } + +protected: + map_type theMap; + +public: + /// @return direct access to the map backing this object. + map_type getMap() { + return theMap; + } + + /// Adds val to the HashSet associated with key in the HashMap. If key is not already in the map, + /// a new HashSet will first be created. + /// @return the size of the HashSet associated with key once val is added to it. + int32_t put(MAPKEY key, SETVALUE val) { + typename map_type::iterator entry = theMap.find(key); + if (entry != theMap.end()) { + entry->second.add(val); + return entry->second.size(); + } else { + set_type theSet(set_type::newInstance()); + theSet.add(val); + theMap.put(key, theSet); + return 1; + } + } + + /// Adds multiple vals to the HashSet associated with key in the HashMap. If key is not already in + /// the map, a new HashSet will first be created. + /// @return the size of the HashSet associated with key once val is added to it. + int32_t putAll(MAPKEY key, set_type vals) { + typename map_type::iterator entry = theMap.find(key); + if (entry != theMap.end()) { + entry->second.addAll(vals.begin(), vals.end()); + return entry->second.size(); + } else { + set_type theSet(set_type::newInstance(vals.begin(), vals.end())); + theMap.put(key, theSet); + return theSet.size(); + } + } +}; + +} + +#endif diff --git a/include/lucene++/MappingCharFilter.h b/include/lucene++/MappingCharFilter.h new file mode 100644 index 00000000..26f530a2 --- /dev/null +++ b/include/lucene++/MappingCharFilter.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MAPPINGCHARFILTER_H +#define MAPPINGCHARFILTER_H + +#include "BaseCharFilter.h" + +namespace Lucene { + +/// Simplistic {@link CharFilter} that applies the mappings contained in a {@link NormalizeCharMap} to the character +/// stream, and correcting the resulting changes to the offsets. +class LPPAPI MappingCharFilter : public BaseCharFilter { +public: + /// Default constructor that takes a {@link CharStream}. + MappingCharFilter(const NormalizeCharMapPtr& normMap, const CharStreamPtr& in); + + /// Easy-use constructor that takes a {@link Reader}. + MappingCharFilter(const NormalizeCharMapPtr& normMap, const ReaderPtr& in); + + virtual ~MappingCharFilter(); + + LUCENE_CLASS(MappingCharFilter); + +protected: + NormalizeCharMapPtr normMap; + Collection buffer; + String replacement; + int32_t charPointer; + int32_t nextCharCounter; + +public: + virtual int32_t read(); + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + +protected: + int32_t nextChar(); + void pushChar(int32_t c); + void pushLastChar(int32_t c); + NormalizeCharMapPtr match(const NormalizeCharMapPtr& map); +}; + +} + +#endif diff --git a/include/lucene++/MatchAllDocsQuery.h b/include/lucene++/MatchAllDocsQuery.h new file mode 100644 index 00000000..1985fb4f --- /dev/null +++ b/include/lucene++/MatchAllDocsQuery.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MATCHALLDOCSQUERY_H +#define MATCHALLDOCSQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A query that matches all documents. +class LPPAPI MatchAllDocsQuery : public Query { +public: + /// @param normsField Field used for normalization factor (document boost). Null if nothing. + MatchAllDocsQuery(const String& normsField = EmptyString); + + virtual ~MatchAllDocsQuery(); + + LUCENE_CLASS(MatchAllDocsQuery); + +protected: + String normsField; + +public: + using Query::toString; + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + virtual void extractTerms(SetTerm terms); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class MatchAllDocsWeight; +}; + +} + +#endif diff --git a/include/lucene++/MaxPayloadFunction.h b/include/lucene++/MaxPayloadFunction.h new file mode 100644 index 00000000..cd95085b --- /dev/null +++ b/include/lucene++/MaxPayloadFunction.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MAXPAYLOADFUNCTION_H +#define MAXPAYLOADFUNCTION_H + +#include "PayloadFunction.h" + +namespace Lucene { + +/// Returns the maximum payload score seen, else 1 if there are no payloads on the doc. +/// +/// Is thread safe and completely reusable. +class LPPAPI MaxPayloadFunction : public PayloadFunction { +public: + virtual ~MaxPayloadFunction(); + LUCENE_CLASS(MaxPayloadFunction); + +public: + virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, + double currentScore, double currentPayloadScore); + virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); +}; + +} + +#endif diff --git a/include/lucene++/MergeDocIDRemapper.h b/include/lucene++/MergeDocIDRemapper.h new file mode 100644 index 00000000..4c48ee33 --- /dev/null +++ b/include/lucene++/MergeDocIDRemapper.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MERGEDOCIDREMAPPER_H +#define MERGEDOCIDREMAPPER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Remaps docIDs after a merge has completed, where the merged segments had at least one deletion. +/// This is used to renumber the buffered deletes in IndexWriter when a merge of segments with deletions +/// commits. +class MergeDocIDRemapper : public LuceneObject { +public: + MergeDocIDRemapper(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount); + virtual ~MergeDocIDRemapper(); + + LUCENE_CLASS(MergeDocIDRemapper); + +public: + Collection starts; // used for binary search of mapped docID + Collection newStarts; // starts, minus the deletes + Collection< Collection > docMaps; // maps docIDs in the merged set + int32_t minDocID; // minimum docID that needs renumbering + int32_t maxDocID; // 1+ the max docID that needs renumbering + int32_t docShift; // total # deleted docs that were compacted by this merge + +public: + int32_t remap(int32_t oldDocID); +}; + +} + +#endif diff --git a/include/lucene++/MergePolicy.h b/include/lucene++/MergePolicy.h new file mode 100644 index 00000000..e06da3e1 --- /dev/null +++ b/include/lucene++/MergePolicy.h @@ -0,0 +1,136 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MERGEPOLICY_H +#define MERGEPOLICY_H + +#include "SegmentInfos.h" + +namespace Lucene { + +/// A MergePolicy determines the sequence of primitive merge operations to be used for overall merge +/// and optimize operations. +/// +/// Whenever the segments in an index have been altered by {@link IndexWriter}, either the addition of +/// a newly flushed segment, addition of many segments from addIndexes* calls, or a previous merge that +/// may now need to cascade, {@link IndexWriter} invokes {@link #findMerges} to give the MergePolicy a +/// chance to pick merges that are now required. This method returns a {@link MergeSpecification} +/// instance describing the set of merges that should be done, or null if no merges are necessary. +/// When IndexWriter.optimize is called, it calls {@link #findMergesForOptimize} and the MergePolicy +/// should then return the necessary merges. +/// +/// Note that the policy can return more than one merge at a time. In this case, if the writer is using +/// {@link SerialMergeScheduler}, the merges will be run sequentially but if it is using {@link +/// ConcurrentMergeScheduler} they will be run concurrently. +/// +/// The default MergePolicy is {@link LogByteSizeMergePolicy}. +/// +/// NOTE: This API is new and still experimental (subject to change suddenly in the next release) +class LPPAPI MergePolicy : public LuceneObject { +public: + MergePolicy(const IndexWriterPtr& writer); + virtual ~MergePolicy(); + + LUCENE_CLASS(MergePolicy); + +protected: + IndexWriterWeakPtr _writer; + +public: + /// Determine what set of merge operations are now necessary on the index. {@link IndexWriter} calls + /// this whenever there is a change to the segments. This call is always synchronized on the {@link + /// IndexWriter} instance so only one thread at a time will call this method. + /// @param segmentInfos the total set of segments in the index + virtual MergeSpecificationPtr findMerges(const SegmentInfosPtr& segmentInfos) = 0; + + /// Determine what set of merge operations is necessary in order to optimize the index. {@link + /// IndexWriter} calls this when its {@link IndexWriter#optimize()} method is called. This call is + /// always synchronized on the {@link IndexWriter} instance so only one thread at a time will call + /// this method. + /// @param segmentInfos the total set of segments in the index + /// @param maxSegmentCount requested maximum number of segments in the index (currently this is always 1) + /// @param segmentsToOptimize contains the specific SegmentInfo instances that must be merged away. + /// This may be a subset of all SegmentInfos. + virtual MergeSpecificationPtr findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) = 0; + + /// Determine what set of merge operations is necessary in order to expunge all deletes from the index. + /// @param segmentInfos the total set of segments in the index + virtual MergeSpecificationPtr findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos) = 0; + + /// Release all resources for the policy. + virtual void close() = 0; + + /// Returns true if a newly flushed (not from merge) segment should use the compound file format. + virtual bool useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment) = 0; + + /// Returns true if the doc store files should use the compound file format. + virtual bool useCompoundDocStore(const SegmentInfosPtr& segments) = 0; +}; + +/// OneMerge provides the information necessary to perform an individual primitive merge operation, +/// resulting in a single new segment. The merge spec includes the subset of segments to be merged +/// as well as whether the new segment should use the compound file format. +class LPPAPI OneMerge : public LuceneObject { +public: + OneMerge(const SegmentInfosPtr& segments, bool useCompoundFile); + virtual ~OneMerge(); + + LUCENE_CLASS(OneMerge); + +public: + SegmentInfoPtr info; // used by IndexWriter + bool mergeDocStores; // used by IndexWriter + bool optimize; // used by IndexWriter + bool registerDone; // used by IndexWriter + int64_t mergeGen; // used by IndexWriter + bool isExternal; // used by IndexWriter + int32_t maxNumSegmentsOptimize; // used by IndexWriter + Collection readers; // used by IndexWriter + Collection readersClone; // used by IndexWriter + + SegmentInfosPtr segments; + bool useCompoundFile; + bool aborted; + LuceneException error; + +public: + /// Record that an exception occurred while executing this merge + void setException(const LuceneException& error); + + /// Retrieve previous exception set by {@link #setException}. + LuceneException getException(); + + /// Mark this merge as aborted. If this is called before the merge is committed then the merge will not be committed. + void abort(); + + /// Returns true if this merge was aborted. + bool isAborted(); + + void checkAborted(const DirectoryPtr& dir); + + String segString(const DirectoryPtr& dir); +}; + +/// A MergeSpecification instance provides the information necessary to perform multiple merges. +/// It simply contains a list of {@link OneMerge} instances. +class LPPAPI MergeSpecification : public LuceneObject { +public: + MergeSpecification(); + virtual ~MergeSpecification(); + + LUCENE_CLASS(MergeSpecification); + +public: + Collection merges; + +public: + void add(const OneMergePtr& merge); + String segString(const DirectoryPtr& dir); +}; + +} + +#endif diff --git a/include/lucene++/MergeScheduler.h b/include/lucene++/MergeScheduler.h new file mode 100644 index 00000000..7034ab0e --- /dev/null +++ b/include/lucene++/MergeScheduler.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MERGESCHEDULER_H +#define MERGESCHEDULER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// {@link IndexWriter} uses an instance implementing this interface to execute the merges +/// selected by a {@link MergePolicy}. The default MergeScheduler is {@link ConcurrentMergeScheduler}. +class LPPAPI MergeScheduler : public LuceneObject { +public: + virtual ~MergeScheduler(); + + LUCENE_CLASS(MergeScheduler); + +public: + /// Run the merges provided by {@link IndexWriter#getNextMerge()}. + virtual void merge(const IndexWriterPtr& writer) = 0; + + /// Close this MergeScheduler. + virtual void close() = 0; +}; + +} + +#endif diff --git a/include/lucene++/MinPayloadFunction.h b/include/lucene++/MinPayloadFunction.h new file mode 100644 index 00000000..831d9191 --- /dev/null +++ b/include/lucene++/MinPayloadFunction.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MINPAYLOADFUNCTION_H +#define MINPAYLOADFUNCTION_H + +#include "PayloadFunction.h" + +namespace Lucene { + +/// Calculates the minimum payload seen +class LPPAPI MinPayloadFunction : public PayloadFunction { +public: + virtual ~MinPayloadFunction(); + LUCENE_CLASS(MinPayloadFunction); + +public: + virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, + double currentScore, double currentPayloadScore); + virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); +}; + +} + +#endif diff --git a/include/lucene++/MiscUtils.h b/include/lucene++/MiscUtils.h new file mode 100644 index 00000000..98572248 --- /dev/null +++ b/include/lucene++/MiscUtils.h @@ -0,0 +1,145 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MISCUTILS_H +#define MISCUTILS_H + +#include "Lucene.h" + +namespace Lucene { + +class LPPAPI MiscUtils { +protected: + static const uint32_t SINGLE_EXPONENT_MASK; + static const uint32_t SINGLE_MANTISSA_MASK; + static const uint32_t SINGLE_NAN_BITS; + + static const uint64_t DOUBLE_SIGN_MASK; + static const uint64_t DOUBLE_EXPONENT_MASK; + static const uint64_t DOUBLE_MANTISSA_MASK; + static const uint64_t DOUBLE_NAN_BITS; + +public: + /// Return given time in milliseconds. + static uint64_t getTimeMillis(boost::posix_time::ptime time); + + /// Returns the current time in milliseconds. + static uint64_t currentTimeMillis(); + + /// This over-allocates proportional to the list size, making room for additional growth. + /// The over-allocation is mild, but is enough to give linear-time amortized behavior over a long + /// sequence of appends(). + /// The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... + static int32_t getNextSize(int32_t targetSize); + + /// Only reallocate if we are "substantially" smaller. This saves us from "running hot" (constantly + /// making a bit bigger then a bit smaller, over and over) + static int32_t getShrinkSize(int32_t currentSize, int32_t targetSize); + + /// Compares two byte[] arrays, element by element, and returns the number of elements common to + /// both arrays. + /// @param bytes1 The first byte[] to compare + /// @param bytes2 The second byte[] to compare + /// @return The number of common elements. + static int32_t bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2); + + template + static int32_t hashLucene(TYPE type) { + return type->hashCode(); + } + + template + static int32_t hashNumeric(TYPE type) { + return type; + } + + template + static int32_t hashCode(ITER first, ITER last, PRED pred) { + int32_t code = 0; + for (ITER hash = first; hash != last; ++hash) { + code = code * 31 + pred(*hash); + } + return code; + } + + /// Returns hash of chars in range start (inclusive) to end (inclusive) + static int32_t hashCode(const wchar_t* array, int32_t start, int32_t end); + + /// Returns hash of bytes in range start (inclusive) to end (inclusive) + static int32_t hashCode(const uint8_t* array, int32_t start, int32_t end); + + /// Returns hash code of given boolean + static int32_t hashCode(bool value); + + /// Copy elements from on buffer to another + template + static void arrayCopy(SOURCE source, int32_t sourceOffset, DEST dest, int32_t destOffset, int32_t length) { + std::copy(source + sourceOffset, source + sourceOffset + length, dest + destOffset); + } + + /// Fill buffer with given element + template + static void arrayFill(DEST dest, int32_t destFrom, int32_t destTo, FILL value) { + std::fill(dest + destFrom, dest + destTo, value); + } + + /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point + /// "single format" bit layout. + static int32_t doubleToIntBits(double value); + + /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point + /// "single format" bit layout, preserving Not-a-Number (NaN) values. + static int32_t doubleToRawIntBits(double value); + + /// Returns the float value corresponding to a given bit representation. The argument is considered to be a + /// representation of a floating-point value according to the IEEE 754 floating-point "single format" bit layout. + static double intBitsToDouble(int32_t bits); + + /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point + /// "double format" bit layout. + static int64_t doubleToLongBits(double value); + + /// Returns a representation of the specified floating-point value according to the IEEE 754 floating-point + /// "double format" bit layout, preserving Not-a-Number (NaN) values. + static int64_t doubleToRawLongBits(double value); + + /// Returns the double value corresponding to a given bit representation. The argument is considered to be a + /// representation of a floating-point value according to the IEEE 754 floating-point "double format" bit layout. + static double longBitsToDouble(int64_t bits); + + /// Returns true if the specified number is infinitely large in magnitude, false otherwise. + static bool isInfinite(double value); + + /// Returns true if this Double value is a Not-a-Number (NaN), false otherwise. + static bool isNaN(double value); + + /// Return whether given Lucene object is of a specified type + template + static bool typeOf(const LuceneObjectPtr& object) { + return boost::dynamic_pointer_cast(object).get() != NULL; + } + + /// Return whether given Lucene objects are of equal type. + static bool equalTypes(const LuceneObjectPtr& first, const LuceneObjectPtr& second); + + /// Perform unsigned right-shift (left bits are zero filled) + static int64_t unsignedShift(int64_t num, int64_t shift); + + /// Perform unsigned right-shift (left bits are zero filled) + static int32_t unsignedShift(int32_t num, int32_t shift); +}; + +inline int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) { + return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); +} + +inline int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) { + return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); +} + +} + +#endif diff --git a/include/lucene++/MultiFieldQueryParser.h b/include/lucene++/MultiFieldQueryParser.h new file mode 100644 index 00000000..9c4c4aae --- /dev/null +++ b/include/lucene++/MultiFieldQueryParser.h @@ -0,0 +1,138 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTIFIELDQUERYPARSER_H +#define MULTIFIELDQUERYPARSER_H + +#include "QueryParser.h" +#include "BooleanClause.h" + +namespace Lucene { + +/// A QueryParser which constructs queries to search multiple fields. +class LPPAPI MultiFieldQueryParser : public QueryParser { +public: + /// Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to + /// apply to each term. + /// + /// It will, when parse(String query) is called, construct a query like this (assuming the query + /// consists of two terms and you specify the two fields title and body): + ///
+    /// (title:term1 body:term1) (title:term2 body:term2)
+    /// 
+ /// + /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: + ///
+    /// +(title:term1 body:term1) +(title:term2 body:term2)
+    /// 
+ /// + /// When you pass a boost (title=>5 body=>10) you can get: + ///
+    /// +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
+    /// 
+ /// + /// In other words, all the query's terms must appear, but it doesn't matter in what fields they + /// appear. + MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer, MapStringDouble boosts); + + /// Creates a MultiFieldQueryParser. It will, when parse(String query) is called, construct a + /// query like this (assuming the query consists of two terms and you specify the two fields + /// title and body): + ///
+    /// (title:term1 body:term1) (title:term2 body:term2)
+    /// 
+ /// + /// When setDefaultOperator(AND_OPERATOR) is set, the result will be: + ///
+    /// +(title:term1 body:term1) +(title:term2 body:term2)
+    /// 
+ /// + /// In other words, all the query's terms must appear, but it doesn't matter in what fields they + /// appear. + MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer); + + virtual ~MultiFieldQueryParser(); + + LUCENE_CLASS(MultiFieldQueryParser); + +protected: + Collection fields; + MapStringDouble boosts; + +public: + using QueryParser::parse; + + /// Parses a query which searches on the fields specified. + /// + /// If x fields are specified, this effectively constructs: + ///
+    /// (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
+    /// 
+ /// @param matchVersion Lucene version to match; this is passed through to QueryParser. + /// @param queries Queries strings to parse + /// @param fields Fields to search on + /// @param analyzer Analyzer to use + static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, const AnalyzerPtr& analyzer); + + /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as + /// required, and others as prohibited. + /// + ///
+    /// Usage:
+    /// Collection fields = newCollection(L"filename", L"contents", L"description");
+    /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
+    /// MultiFieldQueryParser::parse(L"query", fields, flags, analyzer);
+    /// 
+ /// + /// The code above would construct a query: + ///
+    /// (filename:query) +(contents:query) -(description:query)
+    /// 
+ /// + /// @param matchVersion Lucene version to match; this is passed through to QueryParser. + /// @param query Query string to parse + /// @param fields Fields to search on + /// @param flags Flags describing the fields + /// @param analyzer Analyzer to use + static QueryPtr parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, const AnalyzerPtr& analyzer); + + /// Parses a query, searching on the fields specified. Use this if you need to specify certain fields as + /// required, and others as prohibited. + /// + ///
+    /// Usage:
+    /// Collection query = newCollection(L"query1", L"query2", L"query3");
+    /// Collection fields = newCollection(L"filename", L"contents", L"description");
+    /// Collection flags = newCollection(BooleanClause::SHOULD, BooleanClause::MUST, BooleanClause::MUST_NOT);
+    /// MultiFieldQueryParser::parse(query, fields, flags, analyzer);
+    /// 
+ /// + /// The code above would construct a query: + ///
+    /// (filename:query1) +(contents:query2) -(description:query3)
+    /// 
+ /// + /// @param matchVersion Lucene version to match; this is passed through to QueryParser. + /// @param queries Queries string to parse + /// @param fields Fields to search on + /// @param flags Flags describing the fields + /// @param analyzer Analyzer to use + static QueryPtr parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, const AnalyzerPtr& analyzer); + +protected: + virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); + virtual QueryPtr getFieldQuery(const String& field, const String& queryText); + void applySlop(const QueryPtr& query, int32_t slop); + + virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); + virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); + virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); + virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); +}; + +} + +#endif diff --git a/include/lucene++/MultiLevelSkipListReader.h b/include/lucene++/MultiLevelSkipListReader.h new file mode 100644 index 00000000..ab8246b0 --- /dev/null +++ b/include/lucene++/MultiLevelSkipListReader.h @@ -0,0 +1,122 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTILEVELSKIPLISTREADER_H +#define MULTILEVELSKIPLISTREADER_H + +#include "IndexInput.h" + +namespace Lucene { + +/// This abstract class reads skip lists with multiple levels. +/// +/// See {@link MultiLevelSkipListWriter} for the information about the encoding of the multi level skip lists. +/// +/// Subclasses must implement the abstract method {@link #readSkipData(int, IndexInput)} which defines the +/// actual format of the skip data. +class MultiLevelSkipListReader : public LuceneObject { +public: + MultiLevelSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval); + virtual ~MultiLevelSkipListReader(); + + LUCENE_CLASS(MultiLevelSkipListReader); + +protected: + /// the maximum number of skip levels possible for this index + int32_t maxNumberOfSkipLevels; + + /// number of levels in this skip list + int32_t numberOfSkipLevels; + + /// Defines the number of top skip levels to buffer in memory. Reducing this number results in less + /// memory usage, but possibly slower performance due to more random I/Os. Please notice that the space + /// each level occupies is limited by the skipInterval. The top level can not contain more than + /// skipLevel entries, the second top level can not contain more than skipLevel^2 entries and so forth. + int32_t numberOfLevelsToBuffer; + + int32_t docCount; + bool haveSkipped; + + Collection skipStream; // skipStream for each level + Collection skipPointer; // the start pointer of each skip level + Collection skipInterval; // skipInterval of each level + Collection numSkipped; // number of docs skipped per level + + Collection skipDoc; // doc id of current skip entry per level + int32_t lastDoc; // doc id of last read skip entry with docId <= target + Collection childPointer; // child pointer of current skip entry per level + int64_t lastChildPointer; // childPointer of last read skip entry with docId <= target + + bool inputIsBuffered; + +public: + /// Returns the id of the doc to which the last call of {@link #skipTo(int)} has skipped. + virtual int32_t getDoc(); + + /// Skips entries to the first beyond the current whose document number is greater than or equal to + /// target. Returns the current doc count. + virtual int32_t skipTo(int32_t target); + + virtual void close(); + + /// Initializes the reader. + virtual void init(int64_t skipPointer, int32_t df); + +protected: + virtual bool loadNextSkip(int32_t level); + + /// Seeks the skip entry on the given level + virtual void seekChild(int32_t level); + + /// Loads the skip levels + virtual void loadSkipLevels(); + + /// Subclasses must implement the actual skip data encoding in this method. + /// + /// @param level the level skip data shall be read from + /// @param skipStream the skip stream to read from + virtual int32_t readSkipData(int32_t level, const IndexInputPtr& skipStream) = 0; + + /// Copies the values of the last read skip entry on this level + virtual void setLastSkipData(int32_t level); +}; + +/// Used to buffer the top skip levels +class SkipBuffer : public IndexInput { +public: + SkipBuffer(const IndexInputPtr& input, int32_t length); + virtual ~SkipBuffer(); + + LUCENE_CLASS(SkipBuffer); + +protected: + ByteArray data; + int64_t pointer; + int32_t pos; + +public: + /// Closes the stream to further operations. + virtual void close(); + + /// Returns the current position in this file, where the next read will occur. + virtual int64_t getFilePointer(); + + /// The number of bytes in the file. + virtual int64_t length(); + + /// Reads and returns a single byte. + virtual uint8_t readByte(); + + /// Reads a specified number of bytes into an array at the specified offset. + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Sets current position in this file, where the next read will occur. + virtual void seek(int64_t pos); +}; + +} + +#endif diff --git a/include/lucene++/MultiLevelSkipListWriter.h b/include/lucene++/MultiLevelSkipListWriter.h new file mode 100644 index 00000000..f7529452 --- /dev/null +++ b/include/lucene++/MultiLevelSkipListWriter.h @@ -0,0 +1,76 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTILEVELSKIPLISTWRITER_H +#define MULTILEVELSKIPLISTWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This abstract class writes skip lists with multiple levels. +/// +/// Example for skipInterval = 3: +/// +/// c (skip level 2) +/// c c c (skip level 1) +/// x x x x x x x x x x (skip level 0) +/// d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d d (posting list) +/// 3 6 9 12 15 18 21 24 27 30 (df) +/// +/// d - document +/// x - skip data +/// c - skip data with child pointer +/// +/// Skip level i contains every skipInterval-th entry from skip level i-1. +/// Therefore the number of entries on level i is: floor(df / ((skipInterval ^ (i + 1))). +/// +/// Each skip entry on a level i>0 contains a pointer to the corresponding skip entry in list i-1. +/// This guarantees a logarithmic amount of skips to find the target document. +/// +/// While this class takes care of writing the different skip levels, subclasses must define the +/// actual format of the skip data. +class MultiLevelSkipListWriter : public LuceneObject { +public: + MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df); + virtual ~MultiLevelSkipListWriter(); + + LUCENE_CLASS(MultiLevelSkipListWriter); + +protected: + /// number of levels in this skip list + int32_t numberOfSkipLevels; + + /// the skip interval in the list with level = 0 + int32_t skipInterval; + + /// for every skip level a different buffer is used + Collection skipBuffer; + +public: + /// Writes the current skip data to the buffers. The current document frequency determines + /// the max level is skip data is to be written to. + /// @param df the current document frequency + void bufferSkip(int32_t df); + + /// Writes the buffered skip lists to the given output. + /// @param output the IndexOutput the skip lists shall be written to + /// @return the pointer the skip list starts + int64_t writeSkip(const IndexOutputPtr& output); + +protected: + void init(); + virtual void resetSkip(); + + /// Subclasses must implement the actual skip data encoding in this method. + /// @param level the level skip data shall be writing for + /// @param skipBuffer the skip buffer to write to + virtual void writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer) = 0; +}; + +} + +#endif diff --git a/include/lucene++/MultiPhraseQuery.h b/include/lucene++/MultiPhraseQuery.h new file mode 100644 index 00000000..020982ba --- /dev/null +++ b/include/lucene++/MultiPhraseQuery.h @@ -0,0 +1,80 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTIPHRASEQUERY_H +#define MULTIPHRASEQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// MultiPhraseQuery is a generalized version of PhraseQuery, with an added method {@link #add(Term[])}. +/// To use this class, to search for the phrase "Microsoft app*" first use add(Term) on the term "Microsoft", +/// then find all terms that have "app" as prefix using IndexReader.terms(Term), and use +/// MultiPhraseQuery.add(Term[] terms) to add them to the query. +class LPPAPI MultiPhraseQuery : public Query { +public: + MultiPhraseQuery(); + virtual ~MultiPhraseQuery(); + + LUCENE_CLASS(MultiPhraseQuery); + +protected: + String field; + Collection< Collection > termArrays; + Collection positions; + int32_t slop; + +public: + using Query::toString; + + /// Sets the phrase slop for this query. + /// @see PhraseQuery#setSlop(int32_t) + void setSlop(int32_t s); + + /// Gets the phrase slop for this query. + /// @see PhraseQuery#getSlop() + int32_t getSlop(); + + /// Add a single term at the next position in the phrase. + /// @see PhraseQuery#add(Term) + void add(const TermPtr& term); + + /// Add multiple terms at the next position in the phrase. Any of the terms may match. + /// @see PhraseQuery#add(Term) + void add(Collection terms); + + /// Allows to specify the relative position of terms within the phrase. + /// @see PhraseQuery#add(Term, int) + void add(Collection terms, int32_t position); + + /// Returns a List of the terms in the multiphrase. Do not modify the List or its contents. + Collection< Collection > getTermArrays(); + + /// Returns the relative positions of terms in this phrase. + Collection getPositions(); + + virtual void extractTerms(SetTerm terms); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + int32_t termArraysHashCode(); + bool termArraysEquals(Collection< Collection > first, Collection< Collection > second); + + friend class MultiPhraseWeight; +}; + +} + +#endif diff --git a/include/lucene++/MultiReader.h b/include/lucene++/MultiReader.h new file mode 100644 index 00000000..11854d69 --- /dev/null +++ b/include/lucene++/MultiReader.h @@ -0,0 +1,142 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTIREADER_H +#define MULTIREADER_H + +#include "IndexReader.h" + +namespace Lucene { + +/// An IndexReader which reads multiple indexes, appending their content. +class LPPAPI MultiReader : public IndexReader { +public: + /// Construct a MultiReader aggregating the named set of (sub)readers. Directory locking for delete, + /// undeleteAll, and setNorm operations is left to the subreaders. + /// @param closeSubReaders indicates whether the subreaders should be closed when this MultiReader is closed + /// @param subReaders set of (sub)readers + MultiReader(Collection subReaders, bool closeSubReaders = true); + + virtual ~MultiReader(); + + LUCENE_CLASS(MultiReader); + +protected: + Collection subReaders; + Collection starts; // 1st docno for each segment + Collection decrefOnClose; // remember which subreaders to decRef on close + MapStringByteArray normsCache; + int32_t _maxDoc; + int32_t _numDocs; + bool _hasDeletions; + +public: + /// Tries to reopen the subreaders. + /// + /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), + /// then a new MultiReader instance is returned, otherwise this instance is returned. + /// + /// A re-opened instance might share one or more subreaders with the old instance. Index modification + /// operations result in undefined behavior when performed before the old instance is closed. (see {@link + /// IndexReader#reopen()}). + /// + /// If subreaders are shared, then the reference count of those readers is increased to ensure that the + /// subreaders remain open until the last referring reader is closed. + virtual IndexReaderPtr reopen(); + + /// Clones the subreaders. (see {@link IndexReader#clone()}). + /// + /// If subreaders are shared, then the reference count of those readers is increased to ensure that the + /// subreaders remain open until the last referring reader is closed. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + virtual Collection getTermFreqVectors(int32_t docNumber); + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + + virtual bool isOptimized(); + + /// Returns the number of documents in this index. + virtual int32_t numDocs(); + + /// Returns one greater than the largest possible document number. + virtual int32_t maxDoc(); + + /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine + /// what {@link Field}s to load and how they should be loaded. + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Returns true if document n has been deleted + virtual bool isDeleted(int32_t n); + + /// Returns true if any documents have been deleted + virtual bool hasDeletions(); + + /// Returns true if there are norms stored for this field. + virtual bool hasNorms(const String& field); + + /// Returns the byte-encoded normalization factor for the named field of every document. + virtual ByteArray norms(const String& field); + + /// Reads the byte-encoded normalization factor for the named field of every document. + virtual void norms(const String& field, ByteArray norms, int32_t offset); + + /// Returns an enumeration of all the terms in the index. + virtual TermEnumPtr terms(); + + /// Returns an enumeration of all terms starting at a given term. + virtual TermEnumPtr terms(const TermPtr& t); + + /// Returns the number of documents containing the term t. + virtual int32_t docFreq(const TermPtr& t); + + /// Returns an unpositioned {@link TermDocs} enumerator. + virtual TermDocsPtr termDocs(); + + /// Returns an unpositioned {@link TermPositions} enumerator. + virtual TermPositionsPtr termPositions(); + + /// Get a list of unique field names that exist in this index and have the specified field option + /// information. + virtual HashSet getFieldNames(FieldOption fieldOption); + + /// Checks recursively if all subreaders are up to date. + virtual bool isCurrent(); + + /// Not implemented. + virtual int64_t getVersion(); + + /// Returns the sequential sub readers that this reader is logically composed of. + virtual Collection getSequentialSubReaders(); + +protected: + /// If clone is true then we clone each of the subreaders + /// @param doClone + /// @return New IndexReader, or same one (this) if reopen/clone is not necessary + IndexReaderPtr doReopen(bool doClone); + + /// Implements deletion of the document numbered docNum. + virtual void doDelete(int32_t docNum); + + /// Implements actual undeleteAll() in subclass. + virtual void doUndeleteAll(); + + /// Find reader for doc n + int32_t readerIndex(int32_t n); + + /// Implements setNorm in subclass. + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + + virtual void doCommit(MapStringString commitUserData); + + /// Implements close. + virtual void doClose(); +}; + +} + +#endif diff --git a/include/lucene++/MultiSearcher.h b/include/lucene++/MultiSearcher.h new file mode 100644 index 00000000..c0f87a74 --- /dev/null +++ b/include/lucene++/MultiSearcher.h @@ -0,0 +1,78 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTISEARCHER_H +#define MULTISEARCHER_H + +#include "Searcher.h" +#include "Collector.h" + +namespace Lucene { + +/// Implements search over a set of Searchables. +/// +/// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or {@link +/// #search(QueryPtr, FilterPtr, int32_t)} methods. +class LPPAPI MultiSearcher : public Searcher { +public: + /// Creates a searcher which searches searchers. + MultiSearcher(Collection searchables); + + virtual ~MultiSearcher(); + + LUCENE_CLASS(MultiSearcher); + +protected: + Collection searchables; + Collection starts; + int32_t _maxDoc; + +public: + using Searcher::search; + + /// Return the array of {@link Searchable}s this searches. + Collection getSearchables(); + + virtual void close(); + virtual int32_t docFreq(const TermPtr& term); + virtual DocumentPtr doc(int32_t n); + virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Returns index of the searcher for document n in the array used to construct this searcher. + int32_t subSearcher(int32_t n); + + /// Returns the document number of document n within its sub-index. + int32_t subDoc(int32_t n); + + virtual int32_t maxDoc(); + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); + virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); + virtual QueryPtr rewrite(const QueryPtr& query); + virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); + +protected: + Collection getStarts(); + + /// Create weight in multiple index scenario. + /// + /// Distributed query processing is done in the following steps: + /// 1. rewrite query. + /// 2. extract necessary terms. + /// 3. collect dfs for these terms from the Searchables. + /// 4. create query weight using aggregate dfs. + /// 5. distribute that weight to Searchables. + /// 6. merge results. + /// + /// Steps 1-4 are done here, 5+6 in the search() methods + /// + /// @return rewritten queries + virtual WeightPtr createWeight(const QueryPtr& query); +}; + +} + +#endif diff --git a/include/lucene++/MultiTermQuery.h b/include/lucene++/MultiTermQuery.h new file mode 100644 index 00000000..15036b80 --- /dev/null +++ b/include/lucene++/MultiTermQuery.h @@ -0,0 +1,178 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTITERMQUERY_H +#define MULTITERMQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// An abstract {@link Query} that matches documents containing a subset of terms provided by a {@link +/// FilteredTermEnum} enumeration. +/// +/// This query cannot be used directly; you must subclass it and define {@link #getEnum} to provide a +/// {@link FilteredTermEnum} that iterates through the terms to be matched. +/// +/// NOTE: if {@link #setRewriteMethod} is either {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link +/// #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a {@link BooleanQuery.TooManyClauses} exception +/// during searching, which happens when the number of terms to be searched exceeds {@link +/// BooleanQuery#getMaxClauseCount()}. Setting {@link #setRewriteMethod} to {@link +/// #CONSTANT_SCORE_FILTER_REWRITE} prevents this. +/// +/// The recommended rewrite method is {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU +/// computing unhelpful scores, and it tries to pick the most performant rewrite method given the query. +/// +/// Note that {@link QueryParser} produces MultiTermQueries using {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} +/// by default. +class LPPAPI MultiTermQuery : public Query { +public: + MultiTermQuery(); + virtual ~MultiTermQuery(); + + LUCENE_CLASS(MultiTermQuery); + +protected: + RewriteMethodPtr rewriteMethod; + int32_t numberOfTerms; + +public: + /// A rewrite method that first creates a private Filter, by visiting each term in sequence and marking + /// all docs for that term. Matching documents are assigned a constant score equal to the query's boost. + /// + /// This method is faster than the BooleanQuery rewrite methods when the number of matched terms or matched + /// documents is non-trivial. Also, it will never hit an errant TooManyClauses exception. + /// + /// @see #setRewriteMethod + static RewriteMethodPtr CONSTANT_SCORE_FILTER_REWRITE(); + + /// A rewrite method that first translates each term into {@link BooleanClause.Occur#SHOULD} clause in a + /// BooleanQuery, and keeps the scores as computed by the query. Note that typically such scores are + /// meaningless to the user, and require non-trivial CPU to compute, so it's almost always better to use + /// {@link #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. + /// + /// NOTE: This rewrite method will hit {@link BooleanQuery.TooManyClauses} if the number of terms exceeds + /// {@link BooleanQuery#getMaxClauseCount}. + /// + /// @see #setRewriteMethod + static RewriteMethodPtr SCORING_BOOLEAN_QUERY_REWRITE(); + + /// Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except scores are not computed. Instead, each matching + /// document receives a constant score equal to the query's boost. + /// + /// NOTE: This rewrite method will hit TooManyClauses if the number of terms exceeds {@link + /// BooleanQuery#getMaxClauseCount}. + /// + /// @see #setRewriteMethod + static RewriteMethodPtr CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE(); + + /// Read-only default instance of {@link ConstantScoreAutoRewrite}, with {@link + /// ConstantScoreAutoRewrite#setTermCountCutoff} set to {@link ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} + /// and {@link ConstantScoreAutoRewrite#setDocCountPercent} set to {@link + /// ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. Note that you cannot alter the configuration of + /// this instance; you'll need to create a private instance instead. + static RewriteMethodPtr CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); + + /// Return the number of unique terms visited during execution of the query. If there are many of them, + /// you may consider using another query type or optimize your total term count in index. + /// + /// This method is not thread safe, be sure to only call it when no query is running! If you re-use the + /// same query instance for another search, be sure to first reset the term counter with {@link + /// #clearTotalNumberOfTerms}. + /// + /// On optimized indexes / no MultiReaders, you get the correct number of unique terms for the whole index. + /// Use this number to compare different queries. For non-optimized indexes this number can also be achieved + /// in non-constant-score mode. In constant-score mode you get the total number of terms seeked for all + /// segments / sub-readers. + /// @see #clearTotalNumberOfTerms + int32_t getTotalNumberOfTerms(); + + /// Resets the counting of unique terms. Do this before executing the query/filter. + /// @see #getTotalNumberOfTerms + void clearTotalNumberOfTerms(); + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + /// @see #setRewriteMethod + virtual RewriteMethodPtr getRewriteMethod(); + + /// Sets the rewrite method to be used when executing the query. You can use one of the four core methods, + /// or implement your own subclass of {@link RewriteMethod}. + virtual void setRewriteMethod(const RewriteMethodPtr& method); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + +protected: + /// Construct the enumeration to be used, expanding the pattern term. + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader) = 0; + + void incTotalNumberOfTerms(int32_t inc); + + friend class MultiTermQueryWrapperFilter; + friend class ScoringBooleanQueryRewrite; + friend class ConstantScoreAutoRewrite; +}; + +/// Abstract class that defines how the query is rewritten. +class LPPAPI RewriteMethod : public LuceneObject { +public: + virtual ~RewriteMethod(); + LUCENE_CLASS(RewriteMethod); + +public: + virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) = 0; +}; + +/// A rewrite method that tries to pick the best constant-score rewrite method based on term and document +/// counts from the query. If both the number of terms and documents is small enough, then {@link +/// #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is +/// used. +class LPPAPI ConstantScoreAutoRewrite : public RewriteMethod { +public: + ConstantScoreAutoRewrite(); + virtual ~ConstantScoreAutoRewrite(); + + LUCENE_CLASS(ConstantScoreAutoRewrite); + +public: + // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms + // in the query, the filter method is fastest + static const int32_t DEFAULT_TERM_COUNT_CUTOFF; + + // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest + static const double DEFAULT_DOC_COUNT_PERCENT; + +protected: + int32_t termCountCutoff; + double docCountPercent; + +public: + /// If the number of terms in this query is equal to or larger than this setting then {@link + /// #CONSTANT_SCORE_FILTER_REWRITE} is used. + virtual void setTermCountCutoff(int32_t count); + + /// @see #setTermCountCutoff + virtual int32_t getTermCountCutoff(); + + /// If the number of documents to be visited in the postings exceeds this specified percentage of the + /// maxDoc() for the index, then {@link #CONSTANT_SCORE_FILTER_REWRITE} is used. + /// @param percent 0.0 to 100.0 + virtual void setDocCountPercent(double percent); + + /// @see #setDocCountPercent + virtual double getDocCountPercent(); + + virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); + + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); +}; + +} + +#endif diff --git a/include/lucene++/MultiTermQueryWrapperFilter.h b/include/lucene++/MultiTermQueryWrapperFilter.h new file mode 100644 index 00000000..62f6ca6d --- /dev/null +++ b/include/lucene++/MultiTermQueryWrapperFilter.h @@ -0,0 +1,59 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTITERMQUERYWRAPPERFILTER_H +#define MULTITERMQUERYWRAPPERFILTER_H + +#include "Filter.h" + +namespace Lucene { + +/// A wrapper for {@link MultiTermQuery}, that exposes its functionality as a {@link Filter}. +/// +/// MultiTermQueryWrapperFilter is not designed to be used by itself. Normally you subclass it to +/// provide a Filter counterpart for a {@link MultiTermQuery} subclass. +/// +/// For example, {@link TermRangeFilter} and {@link PrefixFilter} extend MultiTermQueryWrapperFilter. +/// This class also provides the functionality behind {@link MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE}; +/// this is why it is not abstract. +class LPPAPI MultiTermQueryWrapperFilter : public Filter { +INTERNAL: + /// Wrap a {@link MultiTermQuery} as a Filter. + MultiTermQueryWrapperFilter(const MultiTermQueryPtr& query); + +public: + virtual ~MultiTermQueryWrapperFilter(); + + LUCENE_CLASS(MultiTermQueryWrapperFilter); + +protected: + MultiTermQueryPtr query; + +public: + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Return the number of unique terms visited during execution of the filter. If there are many of them, + /// you may consider using another filter type or optimize your total term count in index. + /// + /// This method is not thread safe, be sure to only call it when no filter is running! If you re-use the + /// same filter instance for another search, be sure to first reset the term counter with {@link + /// #clearTotalNumberOfTerms}. + /// @see #clearTotalNumberOfTerms + int32_t getTotalNumberOfTerms(); + + /// Resets the counting of unique terms. Do this before executing the filter. + /// @see #getTotalNumberOfTerms + void clearTotalNumberOfTerms(); + + /// Returns a DocIdSet with documents that should be permitted in search results. + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/MultipleTermPositions.h b/include/lucene++/MultipleTermPositions.h new file mode 100644 index 00000000..f61dade1 --- /dev/null +++ b/include/lucene++/MultipleTermPositions.h @@ -0,0 +1,55 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef MULTIPLETERMPOSITIONS_H +#define MULTIPLETERMPOSITIONS_H + +#include "TermPositions.h" + +namespace Lucene { + +/// Allows you to iterate over the {@link TermPositions} for multiple {@link Term}s as a single +/// {@link TermPositions}. +class LPPAPI MultipleTermPositions : public TermPositions, public LuceneObject { +public: + MultipleTermPositions(const IndexReaderPtr& indexReader, Collection terms); + virtual ~MultipleTermPositions(); + + LUCENE_CLASS(MultipleTermPositions); + +protected: + int32_t _doc; + int32_t _freq; + TermPositionsQueuePtr termPositionsQueue; + IntQueuePtr posList; + +public: + virtual bool next(); + virtual int32_t nextPosition(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t freq(); + virtual void close(); + + /// Not implemented. + virtual void seek(const TermPtr& term); + + /// Not implemented. + virtual void seek(const TermEnumPtr& termEnum); + + /// Not implemented. + virtual int32_t read(Collection& docs, Collection& freqs); + + /// Not implemented. + virtual ByteArray getPayload(ByteArray data, int32_t offset); + + /// @return false + virtual bool isPayloadAvailable(); +}; + +} + +#endif diff --git a/include/lucene++/NativeFSLockFactory.h b/include/lucene++/NativeFSLockFactory.h new file mode 100644 index 00000000..273f15ab --- /dev/null +++ b/include/lucene++/NativeFSLockFactory.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NATIVEFSLOCKFACTORY_H +#define NATIVEFSLOCKFACTORY_H + +#include "FSLockFactory.h" + +namespace Lucene { + +/// Implements {@link LockFactory} using native file lock. +/// @see LockFactory +class LPPAPI NativeFSLockFactory : public FSLockFactory { +public: + /// Create a NativeFSLockFactory instance, storing lock files into + /// the specified lockDirName. + /// @param lockDirName where lock files are created. + NativeFSLockFactory(const String& lockDirName = EmptyString); + virtual ~NativeFSLockFactory(); + + LUCENE_CLASS(NativeFSLockFactory); + +public: + /// Return a new Lock instance identified by lockName. + /// @param lockName name of the lock to be created. + virtual LockPtr makeLock(const String& lockName); + + /// Attempt to clear (forcefully unlock and remove) the + /// specified lock. Only call this at a time when you are + /// certain this lock is no longer in use. + /// @param lockName name of the lock to be cleared. + virtual void clearLock(const String& lockName); + +protected: + /// Simple test to verify locking system is "working". On NFS, if + /// it's mis-configured, you can hit long (35 second) timeouts which + /// cause Lock.obtain to take far too long (it assumes the obtain() + /// call takes zero time). + void acquireTestLock(); +}; + +} + +#endif diff --git a/include/lucene++/NearSpansOrdered.h b/include/lucene++/NearSpansOrdered.h new file mode 100644 index 00000000..4ee965f3 --- /dev/null +++ b/include/lucene++/NearSpansOrdered.h @@ -0,0 +1,99 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NEARSPANSORDERED_H +#define NEARSPANSORDERED_H + +#include "Spans.h" + +namespace Lucene { + +/// A Spans that is formed from the ordered subspans of a SpanNearQuery where the subspans do not overlap +/// and have a maximum slop between them. +/// +/// The formed spans only contains minimum slop matches. The matching slop is computed from the distance(s) +/// between the non overlapping matching Spans. +/// +/// Successive matches are always formed from the successive Spans of the SpanNearQuery. +/// +/// The formed spans may contain overlaps when the slop is at least 1. For example, when querying using +///
t1 t2 t3
+/// with slop at least 1, the fragment: +///
t1 t2 t1 t3 t2 t3
+/// matches twice: +///
t1 t2 .. t3      
+///
      t1 .. t2 t3
+/// +/// Note: Only public for subclassing. Most implementations should not need this class +class LPPAPI NearSpansOrdered : public Spans { +public: + NearSpansOrdered(const SpanNearQueryPtr& spanNearQuery, const IndexReaderPtr& reader, bool collectPayloads = true); + virtual ~NearSpansOrdered(); + + LUCENE_CLASS(NearSpansOrdered); + +protected: + int32_t allowedSlop; + bool firstTime; + bool more; + + /// The spans in the same order as the SpanNearQuery + Collection subSpans; + + /// Indicates that all subSpans have same doc() + bool inSameDoc; + + int32_t matchDoc; + int32_t matchStart; + int32_t matchEnd; + Collection matchPayload; + + Collection subSpansByDoc; + SpanNearQueryPtr query; + bool collectPayloads; + +public: + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + + Collection getSubSpans(); + + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual bool next(); + virtual bool skipTo(int32_t target); + + /// Check whether two Spans in the same document are ordered. + /// @return true if spans1 starts before spans2 or the spans start at the same position, and + /// spans1 ends before spans2. + static bool docSpansOrdered(const SpansPtr& spans1, const SpansPtr& spans2); + + virtual String toString(); + +protected: + /// Advances the subSpans to just after an ordered match with a minimum slop that is smaller than the + /// slop allowed by the SpanNearQuery. + /// @return true if there is such a match. + bool advanceAfterOrdered(); + + /// Advance the subSpans to the same document. + bool toSameDoc(); + + // Like {@link #docSpansOrdered(SpansPtr, SpansPtr)}, but use the spans starts and ends as parameters. + static bool docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2); + + /// Order the subSpans within the same document by advancing all later spans after the previous one. + bool stretchToOrder(); + + /// The subSpans are ordered in the same doc, so there is a possible match. Compute the slop while + /// making the match as short as possible by advancing all subSpans except the last one in reverse order. + bool shrinkToAfterShortestMatch(); +}; + +} + +#endif diff --git a/include/lucene++/NearSpansUnordered.h b/include/lucene++/NearSpansUnordered.h new file mode 100644 index 00000000..a27d43e3 --- /dev/null +++ b/include/lucene++/NearSpansUnordered.h @@ -0,0 +1,71 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NEARSPANSUNORDERED_H +#define NEARSPANSUNORDERED_H + +#include "Spans.h" + +namespace Lucene { + +/// Similar to {@link NearSpansOrdered}, but for the unordered case. +/// +/// Only public for subclassing. Most implementations should not need this class +class LPPAPI NearSpansUnordered : public Spans { +public: + NearSpansUnordered(const SpanNearQueryPtr& query, const IndexReaderPtr& reader); + virtual ~NearSpansUnordered(); + + LUCENE_CLASS(NearSpansUnordered); + +protected: + SpanNearQueryPtr query; + IndexReaderPtr reader; + + Collection ordered; // spans in query order + Collection subSpans; + int32_t slop; // from query + + SpansCellPtr first; // linked list of spans + SpansCellPtr last; // sorted by doc only + + int32_t totalLength; // sum of current lengths + + CellQueuePtr queue; // sorted queue of spans + SpansCellPtr max; // max element in queue + + bool more; // true if not done + bool firstTime; // true before first next() + +public: + virtual void initialize(); + + Collection getSubSpans(); + + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual String toString(); + +protected: + SpansCellPtr min(); + void initList(bool next); + void addToList(const SpansCellPtr& cell); + void firstToLast(); + void queueToList(); + void listToQueue(); + bool atMatch(); + + friend class SpansCell; +}; + +} + +#endif diff --git a/include/lucene++/NoLockFactory.h b/include/lucene++/NoLockFactory.h new file mode 100644 index 00000000..39195ff7 --- /dev/null +++ b/include/lucene++/NoLockFactory.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NOLOCKFACTORY_H +#define NOLOCKFACTORY_H + +#include "LockFactory.h" + +namespace Lucene { + +/// Use this {@link LockFactory} to disable locking entirely. Only one instance of this lock is created. +/// You should call {@link #getNoLockFactory()} to get the instance. +/// +/// @see LockFactory +class LPPAPI NoLockFactory : public LockFactory { +public: + virtual ~NoLockFactory(); + + LUCENE_CLASS(NoLockFactory); + +private: + static NoLockPtr getSingletonLock(); + +public: + static NoLockFactoryPtr getNoLockFactory(); + + /// Return a new Lock instance identified by lockName. + virtual LockPtr makeLock(const String& lockName); + + /// Attempt to clear (forcefully unlock and remove) the specified lock. Only call this at a time when you + /// are certain this lock is no longer in use. + virtual void clearLock(const String& lockName); +}; + +} + +#endif diff --git a/include/lucene++/NormalizeCharMap.h b/include/lucene++/NormalizeCharMap.h new file mode 100644 index 00000000..470456da --- /dev/null +++ b/include/lucene++/NormalizeCharMap.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NORMALIZECHARMAP_H +#define NORMALIZECHARMAP_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Holds a map of String input to String output, to be used with {@link MappingCharFilter}. +class LPPAPI NormalizeCharMap : public LuceneObject { +public: + NormalizeCharMap(); + virtual ~NormalizeCharMap(); + + LUCENE_CLASS(NormalizeCharMap); + +public: + MapCharNormalizeCharMap submap; + String normStr; + int32_t diff; + +public: + /// Records a replacement to be applied to the inputs stream. Whenever singleMatch occurs in the input, it + /// will be replaced with replacement. + /// + /// @param singleMatch input String to be replaced + /// @param replacement output String + void add(const String& singleMatch, const String& replacement); +}; + +} + +#endif diff --git a/include/lucene++/NormsWriter.h b/include/lucene++/NormsWriter.h new file mode 100644 index 00000000..836774d6 --- /dev/null +++ b/include/lucene++/NormsWriter.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NORMSWRITER_H +#define NORMSWRITER_H + +#include "InvertedDocEndConsumer.h" + +namespace Lucene { + +/// Writes norms. Each thread X field accumulates the norms for the doc/fields it saw, then the flush method +/// below merges all of these together into a single _X.nrm file. +class NormsWriter : public InvertedDocEndConsumer { +public: + NormsWriter(); + virtual ~NormsWriter(); + + LUCENE_CLASS(NormsWriter); + +protected: + FieldInfosPtr fieldInfos; + +public: + virtual InvertedDocEndConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread); + virtual void abort(); + + // We only write the _X.nrm file at flush + virtual void files(HashSet files); + + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); + + /// Produce _X.nrm if any document had a field with norms not disabled + virtual void flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + virtual void closeDocStore(const SegmentWriteStatePtr& state); + +protected: + static uint8_t getDefaultNorm(); +}; + +} + +#endif diff --git a/include/lucene++/NormsWriterPerField.h b/include/lucene++/NormsWriterPerField.h new file mode 100644 index 00000000..1560c0f2 --- /dev/null +++ b/include/lucene++/NormsWriterPerField.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NORMSWRITERPERFIELD_H +#define NORMSWRITERPERFIELD_H + +#include "InvertedDocEndConsumerPerField.h" + +namespace Lucene { + +/// Taps into DocInverter, as an InvertedDocEndConsumer, which is called at the end of inverting each field. +/// We just look at the length for the field (docState.length) and record the norm. +class NormsWriterPerField : public InvertedDocEndConsumerPerField { +public: + NormsWriterPerField(const DocInverterPerFieldPtr& docInverterPerField, const NormsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); + virtual ~NormsWriterPerField(); + + LUCENE_CLASS(NormsWriterPerField); + +public: + NormsWriterPerThreadWeakPtr _perThread; + FieldInfoPtr fieldInfo; + DocStatePtr docState; + + // Holds all docID/norm pairs we've seen + Collection docIDs; + ByteArray norms; + int32_t upto; + + FieldInvertStatePtr fieldState; + +public: + void reset(); + virtual void abort(); + + /// Compare two objects + virtual int32_t compareTo(const LuceneObjectPtr& other); + + virtual void finish(); +}; + +} + +#endif diff --git a/include/lucene++/NormsWriterPerThread.h b/include/lucene++/NormsWriterPerThread.h new file mode 100644 index 00000000..e020bd3a --- /dev/null +++ b/include/lucene++/NormsWriterPerThread.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NORMSWRITERPERTHREAD_H +#define NORMSWRITERPERTHREAD_H + +#include "InvertedDocEndConsumerPerThread.h" + +namespace Lucene { + +class NormsWriterPerThread : public InvertedDocEndConsumerPerThread { +public: + NormsWriterPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const NormsWriterPtr& normsWriter); + virtual ~NormsWriterPerThread(); + + LUCENE_CLASS(NormsWriterPerThread); + +public: + NormsWriterWeakPtr _normsWriter; + DocStatePtr docState; + +public: + virtual InvertedDocEndConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo); + virtual void abort(); + virtual void startDocument(); + virtual void finishDocument(); + + bool freeRAM(); +}; + +} + +#endif diff --git a/include/lucene++/NumberTools.h b/include/lucene++/NumberTools.h new file mode 100644 index 00000000..fc47365e --- /dev/null +++ b/include/lucene++/NumberTools.h @@ -0,0 +1,59 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMBERTOOLS_H +#define NUMBERTOOLS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Provides support for converting longs to Strings, and back again. The strings are structured so that +/// lexicographic sorting order is preserved. +/// +/// That is, if l1 is less than l2 for any two longs l1 and l2, then NumberTools.longToString(l1) is +/// lexicographically less than NumberTools.longToString(l2). (Similarly for "greater than" and "equals".) +/// +/// This class handles all long values (unlike {@link DateField}). +/// +/// @deprecated For new indexes use {@link NumericUtils} instead, which provides a sortable binary representation +/// (prefix encoded) of numeric values. +/// To index and efficiently query numeric values use {@link NumericField} and {@link NumericRangeQuery}. This +/// class is included for use with existing indices and will be removed in a future release (possibly Lucene 4.0). +class LPPAPI NumberTools : public LuceneObject { +public: + virtual ~NumberTools(); + + LUCENE_CLASS(NumberTools); + +protected: + static const int32_t RADIX; + + static const wchar_t NEGATIVE_PREFIX; + + // NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX + static const wchar_t POSITIVE_PREFIX; + +public: + /// Equivalent to longToString(LLONG_MIN) + static const String& MIN_STRING_VALUE(); + + /// Equivalent to longToString(LLONG_MAX) + static const String& MAX_STRING_VALUE(); + + /// The length of (all) strings returned by {@link #longToString} + static int32_t STR_SIZE(); + + /// Converts a long to a String suitable for indexing. + static String longToString(int64_t l); + + /// Converts a String that was returned by {@link #longToString} back to a long. + static int64_t stringToLong(const String& str); +}; + +} + +#endif diff --git a/include/lucene++/NumericField.h b/include/lucene++/NumericField.h new file mode 100644 index 00000000..e8ecd538 --- /dev/null +++ b/include/lucene++/NumericField.h @@ -0,0 +1,133 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMERICFIELD_H +#define NUMERICFIELD_H + +#include "Field.h" + +namespace Lucene { + +/// This class provides a {@link Field} that enables indexing of numeric values for efficient range filtering and +/// sorting. The native types int32_t, int64_t and double are directly supported. However, any value that can be +/// converted into these native types can also be indexed. For example, date/time values represented by a {@link +/// Date} can be translated into a int64_t value. If you don't need millisecond precision, you can quantize the +/// value, either by dividing the result or using the separate getters (for year, month, etc.) to construct an int32_t +/// or int64_t value. +/// +/// To perform range querying or filtering against a NumericField, use {@link NumericRangeQuery} or {@link +/// NumericRangeFilter}. To sort according to a NumericField, use the normal numeric sort types, eg {@link +/// SortField#INT}. NumericField values can also be loaded directly from {@link FieldCache}. +/// +/// By default, a NumericField's value is not stored but is indexed for range filtering and sorting. You can use the +/// {@link #NumericField(String,Field.Store,boolean)} constructor if you need to change these defaults. +/// +/// You may add the same field name as a NumericField to the same document more than once. Range querying and +/// filtering will be the logical OR of all values; so a range query will hit all documents that have at least one +/// value in the range. However sort behavior is not defined. If you need to sort, you should separately index a +/// single-valued NumericField. +/// +/// A NumericField will consume somewhat more disk space in the index than an ordinary single-valued field. However, +/// for a typical index that includes substantial textual content per document, this increase will likely be in the +/// noise. +/// +/// Within Lucene, each numeric value is indexed as a trie structure, where each term is logically assigned to larger +/// and larger pre-defined brackets (which are simply lower-precision representations of the value). The step size +/// between each successive bracket is called the precisionStep, measured in bits. Smaller precisionStep values +/// result in larger number of brackets, which consumes more disk space in the index but may result in faster range +/// search performance. The default value 4 was selected for a reasonable trade off of disk space consumption versus +/// performance. You can use the expert constructor {@link #NumericField(String,int,Field.Store,boolean)} if you'd +/// like to change the value. Note that you must also specify a congruent value when creating {@link NumericRangeQuery} +/// or {@link NumericRangeFilter}. For low cardinality fields larger precision steps are good. If the cardinality +/// is < 100, it is fair to use {@link INT_MAX}, which produces one term per value. +/// +/// For more information on the internals of numeric trie indexing, including the precisionStep configuration, see +/// {@link NumericRangeQuery}. The format of indexed values is described in {@link NumericUtils}. +/// +/// If you only need to sort by numeric value, and never run range querying/filtering, you can index using a +/// precisionStep of {@link MAX_INT}. This will minimize disk space consumed. +/// +/// More advanced users can instead use {@link NumericTokenStream} directly, when indexing numbers. This class is a +/// wrapper around this token stream type for easier, more intuitive usage. +/// +/// NOTE: This class is only used during indexing. When retrieving the stored field value from a {@link Document} +/// instance after search, you will get a conventional {@link Fieldable} instance where the numeric values are +/// returned as strings (according to toString(value) of the used data type). +class LPPAPI NumericField : public AbstractField { +public: + /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} + /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// This constructor creates an indexed, but not stored field. + /// @param name the field name + NumericField(const String& name); + + /// Creates a field for numeric values using the default precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} + /// (4). The instance is not yet initialized with a numeric value, before indexing a document containing this field, + /// set a value using the various set???Value() methods. + /// This constructor creates an indexed, but not stored field. + /// @param name the field name + /// @param store if the field should be stored in plain text form (according to toString(value) of the used + /// data type) + /// @param index if the field should be indexed using {@link NumericTokenStream} + NumericField(const String& name, Field::Store store, bool index); + + /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() + /// methods. This constructor creates an indexed, but not stored field. + /// @param name the field name + /// @param precisionStep the used precision step + NumericField(const String& name, int32_t precisionStep); + + /// Creates a field for numeric values with the specified precisionStep. The instance is not yet initialized with + /// a numeric value, before indexing a document containing this field, set a value using the various set???Value() + /// methods. This constructor creates an indexed, but not stored field. + /// @param name the field name + /// @param precisionStep the used precision step + /// @param store if the field should be stored in plain text form (according to toString(value) of the used + /// data type) + /// @param index if the field should be indexed using {@link NumericTokenStream} + NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index); + + virtual ~NumericField(); + + LUCENE_CLASS(NumericField); + +protected: + NumericTokenStreamPtr tokenStream; + +public: + /// Returns a {@link NumericTokenStream} for indexing the numeric value. + virtual TokenStreamPtr tokenStreamValue(); + + /// Returns always null for numeric fields + virtual ByteArray getBinaryValue(ByteArray result); + + /// Returns always null for numeric fields + virtual ReaderPtr readerValue(); + + /// Returns the numeric value as a string (how it is stored, when {@link Field.Store#YES} is chosen). + virtual String stringValue(); + + /// Returns the current numeric value. + virtual int64_t getNumericValue(); + + /// Initializes the field with the supplied long value. + /// @param value the numeric value + virtual NumericFieldPtr setLongValue(int64_t value); + + /// Initializes the field with the supplied int value. + /// @param value the numeric value + virtual NumericFieldPtr setIntValue(int32_t value); + + /// Initializes the field with the supplied double value. + /// @param value the numeric value + virtual NumericFieldPtr setDoubleValue(double value); +}; + +} + +#endif diff --git a/include/lucene++/NumericRangeFilter.h b/include/lucene++/NumericRangeFilter.h new file mode 100644 index 00000000..22291f4f --- /dev/null +++ b/include/lucene++/NumericRangeFilter.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMERICRANGEFILTER_H +#define NUMERICRANGEFILTER_H + +#include "MultiTermQueryWrapperFilter.h" + +namespace Lucene { + +/// A {@link Filter} that only accepts numeric values within a specified range. To use this, you must first +/// index the numeric values using {@link NumericField} ({@link NumericTokenStream}). +/// +/// You create a new NumericRangeFilter with the static factory methods, eg: +///
+/// FilterPtr f = NumericRangeFilter::newDoubleRange(L"weight", 0.3, 0.10, true, true);
+/// 
+/// accepts all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. +/// +/// See {@link NumericRangeQuery} for details on how Lucene indexes and searches numeric valued fields. +class LPPAPI NumericRangeFilter : public MultiTermQueryWrapperFilter { +public: + NumericRangeFilter(const NumericRangeQueryPtr& query); + virtual ~NumericRangeFilter(); + + LUCENE_CLASS(NumericRangeFilter); + +public: + /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. + static NumericRangeFilterPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeFilterPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. + static NumericRangeFilterPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeFilterPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. + static NumericRangeFilterPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeFilterPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int, long or double range using the given + /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min + /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents + /// excluding the bounds, with inclusive on the boundaries are hits, too. + static NumericRangeFilterPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int, long or double range range using the default + /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which are in + /// fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting inclusive to false + /// it will match all documents excluding the bounds, with inclusive on the boundaries are hits, too. + static NumericRangeFilterPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); + + /// Returns the field name for this filter + String getField(); + + /// Returns true if the lower endpoint is inclusive + bool includesMin(); + + /// Returns true if the upper endpoint is inclusive + bool includesMax(); + + /// Returns the lower value of this range filter + NumericValue getMin(); + + /// Returns the upper value of this range filter + NumericValue getMax(); +}; + +} + +#endif diff --git a/include/lucene++/NumericRangeQuery.h b/include/lucene++/NumericRangeQuery.h new file mode 100644 index 00000000..e4c7814b --- /dev/null +++ b/include/lucene++/NumericRangeQuery.h @@ -0,0 +1,189 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMERICRANGEQUERY_H +#define NUMERICRANGEQUERY_H + +#include "MultiTermQuery.h" +#include "FilteredTermEnum.h" +#include "NumericUtils.h" + +namespace Lucene { + +/// A {@link Query} that matches numeric values within a specified range. To use this, you must first +/// index the numeric values using {@link NumericField} (expert: {@link NumericTokenStream}). If your +/// terms are instead textual, you should use {@link TermRangeQuery}. {@link NumericRangeFilter} is the +/// filter equivalent of this query. +/// +/// You create a new NumericRangeQuery with the static factory methods, eg: +///
+/// QueryPtr q = NumericRangeQuery::newDoubleRange("weight", 0.3, 0.10, true, true);
+/// 
+/// matches all documents whose double valued "weight" field ranges from 0.3 to 0.10, inclusive. +/// +/// The performance of NumericRangeQuery is much better than the corresponding {@link TermRangeQuery} +/// because the number of terms that must be searched is usually far fewer, thanks to trie indexing, +/// described below. +/// +/// You can optionally specify a precisionStep when creating this query. This is necessary if you've +/// changed this configuration from its default (4) during indexing. Lower values consume more disk +/// space but speed up searching. Suitable values are between 1 and 8. A good starting point to test +/// is 4, which is the default value for all Numeric* classes. See below for details. +/// +/// This query defaults to {@linkplain MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} for 32 bit +/// integer ranges with precisionStep <=8 and 64 bit (long/double) ranges with precisionStep <=6. +/// Otherwise it uses {@linkplain MultiTermQuery#CONSTANT_SCORE_FILTER_REWRITE} as the number of terms +/// is likely to be high. With precision steps of <=4, this query can be run with one of the BooleanQuery +/// rewrite methods without changing BooleanQuery's default max clause count. +/// +/// How it works +/// +/// See the publication about panFMP, where this +/// algorithm was described (referred to as TrieRangeQuery): +///
Schindler, U, Diepenbroek, M, 2008. +/// Generic XML-based Framework for Metadata Portals. +/// Computers & Geosciences 34 (12), 1947-1955. +/// doi:10.1016/j.cageo.2008.02.023
+/// +/// A quote from this paper: Because Apache Lucene is a full-text search engine and not a conventional +/// database, it cannot handle numerical ranges (eg., field value is inside user defined bounds, even +/// dates are numerical values). We have developed an extension to Apache Lucene that stores the +/// numerical values in a special string-encoded format with variable precision (all numerical values like +/// doubles, longs, and ints are converted to lexicographic sortable string representations and stored +/// with different precisions (for a more detailed description of how the values are stored, see {@link +/// NumericUtils}). A range is then divided recursively into multiple intervals for searching: +/// The center of the range is searched only with the lowest possible precision in the trie, while the +/// boundaries are matched more exactly. This reduces the number of terms dramatically. +/// +/// For the variant that stores long values in 8 different precisions (each reduced by 8 bits) that uses a +/// lowest precision of 1 byte, the index contains only a maximum of 256 distinct values in the lowest +/// precision. Overall, a range could consist of a theoretical maximum of 7*255*2 + 255 = 3825 distinct +/// terms (when there is a term for every distinct value of an 8-byte-number in the index and the range +/// covers almost all of them; a maximum of 255 distinct values is used because it would always be possible +/// to reduce the full 256 values to one term with degraded precision). In practice, we have seen up to +/// 300 terms in most cases (index with 500,000 metadata records and a uniform value distribution). +/// +/// Precision Step: +/// You can choose any precisionStep when encoding values. Lower step values mean more precisions and so +/// more terms in index (and index gets larger). On the other hand, the maximum number of terms to match +/// reduces, which optimized query speed. The formula to calculate the maximum term count is: +///
+/// n = [ (bitsPerValue/precisionStep - 1) * (2 ^ precisionStep - 1 ) * 2 ] + (2 ^ precisionStep - 1 )
+/// 
+/// +/// (this formula is only correct, when bitsPerValue/precisionStep is an integer; in other cases, the value +/// must be rounded up and the last summand must contain the modulo of the division as precision step). +/// For longs stored using a precision step of 4, n = 15*15*2 + 15 = 465, and for a precision step of 2, +/// n = 31*3*2 + 3 = 189. But the faster search speed is reduced by more seeking in the term enum of the +/// index. Because of this, the ideal precisionStep value can only be found out by testing. Important: You +/// can index with a lower precision step value and test search speed using a multiple of the original step +/// value. +/// +/// Good values for precisionStep are depending on usage and data type: +///
    +///
  • The default for all data types is 4, which is used, when no precisionStep is given. +///
  • Ideal value in most cases for 64 bit data types (long, double) is 6 or 8. +///
  • Ideal value in most cases for 32 bit data types (int) is 4. +///
  • For low cardinality fields larger precision steps are good. If the cardinality is < 100, it is +/// fair to use {@link Integer#MAX_VALUE} (see below). +///
  • Steps >=64 for long/double and >=32 for int/float produces one token per value in the index and +/// querying is as slow as a conventional {@link TermRangeQuery}. But it can be used to produce fields, +/// that are solely used for sorting (in this case simply use {@link Integer#MAX_VALUE} as precisionStep). +/// Using {@link NumericField NumericFields} for sorting is ideal, because building the field cache is much +/// faster than with text-only numbers. These fields have one term per value and therefore also work with +/// term enumeration for building distinct lists (eg. facets / preselected values to search for). +/// Sorting is also possible with range query optimized fields using one of the above precisionSteps. +///
+/// +/// Comparisons of the different types of RangeQueries on an index with about 500,000 docs showed that +/// {@link TermRangeQuery} in boolean rewrite mode (with raised {@link BooleanQuery} clause count) took +/// about 30-40 secs to complete, {@link TermRangeQuery} in constant score filter rewrite mode took 5 secs +/// and executing this class took <100ms to complete (on an Opteron64 machine, 8 bit precision step). This +/// query type was developed for a geographic portal, where the performance for eg. bounding boxes or exact +/// date/time stamps is important. +class LPPAPI NumericRangeQuery : public MultiTermQuery { +public: + NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); + virtual ~NumericRangeQuery(); + + LUCENE_CLASS(NumericRangeQuery); + +INTERNAL: + String field; + int32_t precisionStep; + int32_t valSize; + NumericValue min; + NumericValue max; + bool minInclusive; + bool maxInclusive; + +public: + using MultiTermQuery::toString; + + /// Factory that creates a NumericRangeFilter, that filters a long range using the given precisionStep. + static NumericRangeQueryPtr newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a long range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeQueryPtr newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int range using the given precisionStep. + static NumericRangeQueryPtr newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a int range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeQueryPtr newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a double range using the given precisionStep. + static NumericRangeQueryPtr newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeFilter, that filters a double range using the default precisionStep + /// {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). + static NumericRangeQueryPtr newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the given + /// precisionStep. You can have half-open ranges (which are in fact <= or >= queries) by setting the min + /// or max value to VariantUtils::null(). By setting inclusive to false it will match all documents + /// excluding the bounds, with inclusive on the boundaries are hits, too. + static NumericRangeQueryPtr newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); + + /// Factory that creates a NumericRangeQuery, that queries a int, long or double range using the default + /// precisionStep {@link NumericUtils#PRECISION_STEP_DEFAULT} (4). You can have half-open ranges (which + /// are in fact <= or >= queries) by setting the min or max value to VariantUtils::null(). By setting + /// inclusive to false it will match all documents excluding the bounds, with inclusive on the boundaries + /// are hits, too. + static NumericRangeQueryPtr newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive); + + /// Returns the field name for this query + String getField(); + + /// Returns true if the lower endpoint is inclusive + bool includesMin(); + + /// Returns true if the upper endpoint is inclusive + bool includesMax(); + + /// Returns the lower value of this range query + NumericValue getMin(); + + /// Returns the upper value of this range query + NumericValue getMax(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + +protected: + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); + + friend class NumericRangeTermEnum; +}; + +} + +#endif diff --git a/include/lucene++/NumericTokenStream.h b/include/lucene++/NumericTokenStream.h new file mode 100644 index 00000000..3d8970b1 --- /dev/null +++ b/include/lucene++/NumericTokenStream.h @@ -0,0 +1,120 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMERICTOKENSTREAM_H +#define NUMERICTOKENSTREAM_H + +#include "TokenStream.h" + +namespace Lucene { + +/// This class provides a {@link TokenStream} for indexing numeric values that can be used by {@link NumericRangeQuery} +/// or {@link NumericRangeFilter}. +/// +/// Note that for simple usage, {@link NumericField} is recommended. {@link NumericField} disables norms and term freqs, +/// as they are not usually needed during searching. If you need to change these settings, you should use this class. +/// +/// See {@link NumericField} for capabilities of fields indexed numerically. +/// +/// Here's an example usage, for an int field: +/// +/// FieldPtr field = newLucene(name, newLucene(precisionStep)->setIntValue(value)); +/// field->setOmitNorms(true); +/// field->setOmitTermFreqAndPositions(true); +/// document->add(field); +/// +/// For optimal performance, re-use the TokenStream and Field instance for more than one document: +/// +/// NumericTokenStreamPtr stream = newLucene(precisionStep); +/// FieldPtr field = newLucene(name, stream); +/// field->setOmitNorms(true); +/// field->setOmitTermFreqAndPositions(true); +/// DocumentPtr document = newLucene(); +/// document->add(field); +/// +/// for (all documents) +/// { +/// stream->setIntValue(value); +/// writer->addDocument(document); +/// } +/// +/// This stream is not intended to be used in analyzers; it's more for iterating the different precisions during +/// indexing a specific numeric value. +/// +/// NOTE: as token streams are only consumed once the document is added to the index, if you index more than one +/// numeric field, use a separate NumericTokenStream * instance for each. +/// +/// See {@link NumericRangeQuery} for more details on the precisionStep +/// parameter as well as how numeric fields work under the hood. +class LPPAPI NumericTokenStream : public TokenStream { +public: + /// Creates a token stream for numeric values using the default precisionStep {@link + /// NumericUtils#PRECISION_STEP_DEFAULT} (4). The stream is not yet initialized, before using set a + /// value using the various setValue() methods. + NumericTokenStream(); + + /// Creates a token stream for numeric values with the specified precisionStep. The stream is not yet + /// initialized, before using set a value using the various setValue() methods. + NumericTokenStream(int32_t precisionStep); + + /// Creates a token stream for numeric values with the specified precisionStep using the given {@link + /// AttributeSource}. The stream is not yet initialized, before using set a value using the various + /// setValue() methods. + NumericTokenStream(const AttributeSourcePtr& source, int32_t precisionStep); + + /// Creates a token stream for numeric values with the specified precisionStep using the given {@link + /// AttributeFactory}. The stream is not yet initialized, before using set a value using the various + /// setValue() methods. + NumericTokenStream(const AttributeFactoryPtr& factory, int32_t precisionStep); + + virtual ~NumericTokenStream(); + + LUCENE_CLASS(NumericTokenStream); + +protected: + TermAttributePtr termAtt; + TypeAttributePtr typeAtt; + PositionIncrementAttributePtr posIncrAtt; + + int32_t shift; + int32_t valSize; // valSize == 0 means not initialized + int32_t precisionStep; + + int64_t value; + +public: + /// The full precision token gets this token type assigned. + static const String& TOKEN_TYPE_FULL_PREC(); + + /// The lower precision tokens gets this token type assigned. + static const String& TOKEN_TYPE_LOWER_PREC(); + + /// Initializes the token stream with the supplied long value. + /// @param value the value, for which this TokenStream should enumerate tokens. + /// @return this instance, because of this you can use it the following way: + /// newLucene(name, newLucene(precisionStep)->setLongValue(value)) + NumericTokenStreamPtr setLongValue(int64_t value); + + /// Initializes the token stream with the supplied int value. + /// @param value the value, for which this TokenStream should enumerate tokens. + /// @return this instance, because of this you can use it the following way: + /// newLucene(name, newLucene(precisionStep)->setIntValue(value)) + NumericTokenStreamPtr setIntValue(int32_t value); + + /// Initializes the token stream with the supplied double value. + /// @param value the value, for which this TokenStream should enumerate tokens. + /// @return this instance, because of this you can use it the following way: + /// newLucene(name, newLucene(precisionStep)->setDoubleValue(value)) + NumericTokenStreamPtr setDoubleValue(double value); + + virtual void reset(); + virtual bool incrementToken(); + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/NumericUtils.h b/include/lucene++/NumericUtils.h new file mode 100644 index 00000000..28603586 --- /dev/null +++ b/include/lucene++/NumericUtils.h @@ -0,0 +1,178 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef NUMERICUTILS_H +#define NUMERICUTILS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This is a helper class to generate prefix-encoded representations for numerical values and supplies converters +/// to represent double values as sortable integers/longs. +/// +/// To quickly execute range queries in Apache Lucene, a range is divided recursively into multiple intervals for +/// searching: The center of the range is searched only with the lowest possible precision in the trie, while the +/// boundaries are matched more exactly. This reduces the number of terms dramatically. +/// +/// This class generates terms to achieve this: First the numerical integer values need to be converted to strings. +/// For that integer values (32 bit or 64 bit) are made unsigned and the bits are converted to ASCII chars with each +/// 7 bit. The resulting string is sortable like the original integer value. Each value is also prefixed (in the +/// first char) by the shift value (number of bits removed) used during encoding. +/// +/// To also index floating point numbers, this class supplies two methods to convert them to integer values by +/// changing their bit layout: {@link #doubleToSortableLong}, {@link #doubleToSortableInt}. You will have no precision +/// loss by converting floating point numbers to integers and back (only that the integer form is not usable). Other +/// data types like dates can easily converted to longs or ints (eg. date to long). +/// +/// For easy usage, the trie algorithm is implemented for indexing inside {@link NumericTokenStream} that can index +/// int, long, and double. For querying, {@link NumericRangeQuery} and {@link NumericRangeFilter} implement the query +/// part for the same data types. +/// +/// This class can also be used, to generate lexicographically sortable (according {@link std::string#compare}) +/// representations of numeric data types for other usages (eg. sorting). +class LPPAPI NumericUtils : public LuceneObject { +public: + virtual ~NumericUtils(); + + LUCENE_CLASS(NumericUtils); + +public: + /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, + /// and {@link NumericRangeFilter} as default. + static const int32_t PRECISION_STEP_DEFAULT; + + /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + + /// shift in the first character. + static const wchar_t SHIFT_START_LONG; + + /// The maximum term length (used for char[] buffer size) for encoding long values. + /// @see #longToPrefixCoded(long,int,char[]) + static const int32_t BUF_SIZE_LONG; + + /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + + /// shift in the first character. + static const wchar_t SHIFT_START_INT; + + /// The maximum term length (used for char[] buffer size) for encoding int values. + /// @see #intToPrefixCoded(int,int,char[]) + static const int32_t BUF_SIZE_INT; + +public: + /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by + /// {@link NumericTokenStream}. + /// @param val the numeric value + /// @param shift how many bits to strip from the right + /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_LONG} length + /// @return number of chars written to buffer + static int32_t longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer); + + /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by + /// {@link LongRangeBuilder}. + /// @param val the numeric value + /// @param shift how many bits to strip from the right + static String longToPrefixCoded(int64_t val, int32_t shift); + + /// This is a convenience method, that returns prefix coded bits of a long without reducing the precision. + /// It can be used to store the full precision value as a stored field in index. + /// To decode, use {@link #prefixCodedToLong}. + static String longToPrefixCoded(int64_t val); + + /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link + /// NumericTokenStream}. + /// @param val the numeric value + /// @param shift how many bits to strip from the right + /// @param buffer that will contain the encoded chars, must be at least of {@link #BUF_SIZE_INT} length + /// @return number of chars written to buffer + static int32_t intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer); + + /// Returns prefix coded bits after reducing the precision by shift bits. This is method is used by {@link + /// IntRangeBuilder}. + /// @param val the numeric value + /// @param shift how many bits to strip from the right + static String intToPrefixCoded(int32_t val, int32_t shift); + + /// This is a convenience method, that returns prefix coded bits of an int without reducing the precision. + /// It can be used to store the full precision value as a stored field in index. + /// To decode, use {@link #prefixCodedToInt}. + static String intToPrefixCoded(int32_t val); + + /// Returns a long from prefixCoded characters. Rightmost bits will be zero for lower precision codes. + /// This method can be used to decode eg. a stored field. + /// @see #longToPrefixCoded(int64_t) + static int64_t prefixCodedToLong(const String& prefixCoded); + + /// Returns an int from prefixCoded characters. Rightmost bits will be zero for lower precision codes. + /// This method can be used to decode eg. a stored field. + /// @see #intToPrefixCoded(int32_t) + static int32_t prefixCodedToInt(const String& prefixCoded); + + /// Converts a double value to a sortable signed long. The value is converted by getting their IEEE 754 + /// floating-point "double format" bit layout and then some bits are swapped, to be able to compare the + /// result as int64_t. By this the precision is not reduced, but the value can easily used as a int64_t. + /// @see #sortableLongToDouble + static int64_t doubleToSortableLong(double val); + + /// Convenience method: this just returns: longToPrefixCoded(doubleToSortableLong(val)) + static String doubleToPrefixCoded(double val); + + /// Converts a sortable long back to a double. + /// @see #doubleToSortableLong + static double sortableLongToDouble(int64_t val); + + /// Convenience method: this just returns: sortableLongToDouble(prefixCodedToLong(val)) + static double prefixCodedToDouble(const String& val); + + /// Splits a int64_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} + /// for each call to its {@link LongRangeBuilder#addRange(String,String)} method. + /// This method is used by {@link NumericRangeQuery}. + static void splitLongRange(const LongRangeBuilderPtr& builder, int32_t precisionStep, int64_t minBound, int64_t maxBound); + + /// Splits an int32_t range recursively. You may implement a builder that adds clauses to a {@link BooleanQuery} + /// for each call to its {@link IntRangeBuilder#addRange(String,String)} method. + /// This method is used by {@link NumericRangeQuery}. + static void splitIntRange(const IntRangeBuilderPtr& builder, int32_t precisionStep, int32_t minBound, int32_t maxBound); + + /// This helper does the splitting for both 32 and 64 bit. + static void splitRange(const LuceneObjectPtr& builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound); + + /// Helper that delegates to correct range builder + static void addRange(const LuceneObjectPtr& builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift); +}; + +/// Callback for {@link #splitLongRange}. You need to overwrite only one of the methods. +/// NOTE: This is a very low-level interface, the method signatures may change in later versions. +class LPPAPI LongRangeBuilder : public LuceneObject { +public: + virtual ~LongRangeBuilder(); + +public: + /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build + /// classical (inclusive) range queries from them. + virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); + + /// Overwrite this method, if you like to receive the raw long range bounds. You can use this for eg. debugging + /// purposes (print out range bounds). + virtual void addRange(int64_t min, int64_t max, int32_t shift); +}; + +class LPPAPI IntRangeBuilder : public LuceneObject { +public: + virtual ~IntRangeBuilder(); + +public: + /// Overwrite this method, if you like to receive the already prefix encoded range bounds. You can directly build + /// classical range (inclusive) queries from them. + virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); + + /// Overwrite this method, if you like to receive the raw int range bounds. You can use this for eg. debugging + /// purposes (print out range bounds). + virtual void addRange(int32_t min, int32_t max, int32_t shift); +}; + +} + +#endif diff --git a/include/lucene++/OffsetAttribute.h b/include/lucene++/OffsetAttribute.h new file mode 100644 index 00000000..63ffb4b3 --- /dev/null +++ b/include/lucene++/OffsetAttribute.h @@ -0,0 +1,53 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef OFFSETATTRIBUTE_H +#define OFFSETATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// The start and end character offset of a Token. +class LPPAPI OffsetAttribute : public Attribute { +public: + OffsetAttribute(); + virtual ~OffsetAttribute(); + + LUCENE_CLASS(OffsetAttribute); + +protected: + int32_t _startOffset; + int32_t _endOffset; + +public: + virtual String toString(); + + /// Returns this Token's starting offset, the position of the first character corresponding to this token + /// in the source text. + /// + /// Note that the difference between endOffset() and startOffset() may not be equal to termText.length(), + /// as the term text may have been altered by a stemmer or some other filter. + virtual int32_t startOffset(); + + /// Set the starting and ending offset. + /// @see #startOffset() and #endOffset() + virtual void setOffset(int32_t startOffset, int32_t endOffset); + + /// Returns this Token's ending offset, one greater than the position of the last character corresponding + /// to this token in the source text. The length of the token in the source text is (endOffset - startOffset). + virtual int32_t endOffset(); + + virtual void clear(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual void copyTo(const AttributePtr& target); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/OpenBitSet.h b/include/lucene++/OpenBitSet.h new file mode 100644 index 00000000..b1dc6960 --- /dev/null +++ b/include/lucene++/OpenBitSet.h @@ -0,0 +1,236 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef OPENBITSET_H +#define OPENBITSET_H + +#include "DocIdSet.h" + +namespace Lucene { + +/// An "open" BitSet implementation that allows direct access to the array of words storing the bits. +/// +/// The goals of OpenBitSet are the fastest implementation possible, and maximum code reuse. Extra +/// safety and encapsulation may always be built on top, but if that's built in, the cost can never +/// be removed (and hence people re-implement their own version in order to get better performance). +class LPPAPI OpenBitSet : public DocIdSet { +public: + /// Constructs an OpenBitSet large enough to hold numBits. + OpenBitSet(int64_t numBits = 64); + + /// Constructs an OpenBitSet from an existing LongArray. + /// + /// The first 64 bits are in long[0], with bit index 0 at the least significant bit, and bit + /// index 63 at the most significant. Given a bit index, the word containing it is long[index/64], + /// and it is at bit number index%64 within that word. + /// + /// numWords are the number of elements in the array that contain set bits (non-zero longs). + /// numWords should be <= bits.length(), and any existing words in the array at position >= + /// numWords should be zero. + OpenBitSet(LongArray bits, int32_t numWords); + + virtual ~OpenBitSet(); + + LUCENE_CLASS(OpenBitSet); + +protected: + LongArray bits; + int32_t wlen; // number of words (elements) used in the array + +public: + virtual DocIdSetIteratorPtr iterator(); + + /// This DocIdSet implementation is cacheable. + virtual bool isCacheable(); + + /// Returns the current capacity in bits (1 greater than the index of the last bit) + int64_t capacity(); + + /// Returns the current capacity of this set. Included for compatibility. This is *not* + /// equal to {@link #cardinality} + int64_t size(); + + /// Returns true if there are no set bits + bool isEmpty(); + + /// Returns the long[] storing the bits + LongArray getBits(); + + /// Sets a new long[] to use as the bit storage + void setBits(LongArray bits); + + /// Gets the number of longs in the array that are in use + int32_t getNumWords(); + + /// Sets the number of longs in the array that are in use + void setNumWords(int32_t numWords); + + /// Returns true or false for the specified bit index. + bool get(int32_t index); + + /// Returns true or false for the specified bit index. + /// The index should be less than the OpenBitSet size + bool fastGet(int32_t index); + + /// Returns true or false for the specified bit index + bool get(int64_t index); + + /// Returns true or false for the specified bit index. + /// The index should be less than the OpenBitSet size. + bool fastGet(int64_t index); + + /// Returns 1 if the bit is set, 0 if not. + /// The index should be less than the OpenBitSet size + int32_t getBit(int32_t index); + + /// Sets a bit, expanding the set size if necessary + void set(int64_t index); + + /// Sets the bit at the specified index. + /// The index should be less than the OpenBitSet size. + void fastSet(int32_t index); + + /// Sets the bit at the specified index. + /// The index should be less than the OpenBitSet size. + void fastSet(int64_t index); + + /// Sets a range of bits, expanding the set size if necessary + /// @param startIndex lower index + /// @param endIndex one-past the last bit to set + void set(int64_t startIndex, int64_t endIndex); + + /// Clears a bit. + /// The index should be less than the OpenBitSet size. + void fastClear(int32_t index); + + /// Clears a bit. + /// The index should be less than the OpenBitSet size. + void fastClear(int64_t index); + + /// Clears a bit, allowing access beyond the current set size without changing the size. + void clear(int64_t index); + + /// Clears a range of bits. Clearing past the end does not change the size of the set. + /// @param startIndex lower index + /// @param endIndex one-past the last bit to clear + void clear(int32_t startIndex, int32_t endIndex); + + /// Clears a range of bits. Clearing past the end does not change the size of the set. + /// @param startIndex lower index + /// @param endIndex one-past the last bit to clear + void clear(int64_t startIndex, int64_t endIndex); + + /// Sets a bit and returns the previous value. + /// The index should be less than the OpenBitSet size. + bool getAndSet(int32_t index); + + /// Sets a bit and returns the previous value. + /// The index should be less than the OpenBitSet size. + bool getAndSet(int64_t index); + + /// Flips a bit. + /// The index should be less than the OpenBitSet size. + void fastFlip(int32_t index); + + /// Flips a bit. + /// The index should be less than the OpenBitSet size. + void fastFlip(int64_t index); + + /// Flips a bit, expanding the set size if necessary + void flip(int64_t index); + + /// Flips a bit and returns the resulting bit value. + /// The index should be less than the OpenBitSet size. + bool flipAndGet(int32_t index); + + /// Flips a bit and returns the resulting bit value. + /// The index should be less than the OpenBitSet size. + bool flipAndGet(int64_t index); + + /// Flips a range of bits, expanding the set size if necessary + /// @param startIndex lower index + /// @param endIndex one-past the last bit to flip + void flip(int64_t startIndex, int64_t endIndex); + + /// @return the number of set bits + int64_t cardinality(); + + /// Returns the popcount or cardinality of the intersection of the two sets. + /// Neither set is modified. + static int64_t intersectionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); + + /// Returns the popcount or cardinality of the union of the two sets. + /// Neither set is modified. + static int64_t unionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); + + /// Returns the popcount or cardinality of "a and not b" or "intersection(a, not(b))". + /// Neither set is modified. + static int64_t andNotCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); + + /// Returns the popcount or cardinality of the exclusive-or of the two sets. + /// Neither set is modified. + static int64_t xorCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b); + + /// Returns the index of the first set bit starting at the index specified. + /// -1 is returned if there are no more set bits. + int32_t nextSetBit(int32_t index); + + /// Returns the index of the first set bit starting at the index specified. + /// -1 is returned if there are no more set bits. + int64_t nextSetBit(int64_t index); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// this = this AND other + void intersect(const OpenBitSetPtr& other); + + /// this = this OR other + void _union(const OpenBitSetPtr& other); + + /// Remove all elements set in other. this = this AND_NOT other + void remove(const OpenBitSetPtr& other); + + /// this = this XOR other + void _xor(const OpenBitSetPtr& other); + + /// see {@link intersect} + void _and(const OpenBitSetPtr& other); + + /// see {@link union} + void _or(const OpenBitSetPtr& other); + + /// see {@link remove} + void andNot(const OpenBitSetPtr& other); + + /// Returns true if the sets have any elements in common + bool intersects(const OpenBitSetPtr& other); + + /// Expand the LongArray with the size given as a number of words (64 bit longs). + /// getNumWords() is unchanged by this call. + void ensureCapacityWords(int32_t numWords); + + /// Ensure that the LongArray is big enough to hold numBits, expanding it if necessary. + /// getNumWords() is unchanged by this call. + void ensureCapacity(int64_t numBits); + + /// Lowers numWords, the number of words in use, by checking for trailing zero words. + void trimTrailingZeros(); + + /// Returns the number of 64 bit words it would take to hold numBits. + static int32_t bits2words(int64_t numBits); + + /// Returns true if both sets have the same bits set + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); + +protected: + int32_t expandingWordNum(int64_t index); +}; + +} + +#endif diff --git a/include/lucene++/OpenBitSetDISI.h b/include/lucene++/OpenBitSetDISI.h new file mode 100644 index 00000000..b086931e --- /dev/null +++ b/include/lucene++/OpenBitSetDISI.h @@ -0,0 +1,50 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef OPENBITSETDISI_H +#define OPENBITSETDISI_H + +#include "OpenBitSet.h" + +namespace Lucene { + +class LPPAPI OpenBitSetDISI : public OpenBitSet { +public: + /// Construct an OpenBitSetDISI with its bits set from the doc ids of the given DocIdSetIterator. + /// Also give a maximum size one larger than the largest doc id for which a bit may ever be set on + /// this OpenBitSetDISI. + OpenBitSetDISI(const DocIdSetIteratorPtr& disi, int32_t maxSize); + + /// Construct an OpenBitSetDISI with no bits set, and a given maximum size one larger than the largest + /// doc id for which a bit may ever be set on this OpenBitSetDISI. + OpenBitSetDISI(int32_t maxSize); + + virtual ~OpenBitSetDISI(); + + LUCENE_CLASS(OpenBitSetDISI); + +public: + /// Perform an in-place OR with the doc ids from a given DocIdSetIterator, setting the bit for each + /// such doc id. These doc ids should be smaller than the maximum size passed to the constructor. + void inPlaceOr(const DocIdSetIteratorPtr& disi); + + /// Perform an in-place AND with the doc ids from a given DocIdSetIterator, leaving only the bits set + /// for which the doc ids are in common. These doc ids should be smaller than the maximum size passed + /// to the constructor. + void inPlaceAnd(const DocIdSetIteratorPtr& disi); + + /// Perform an in-place NOT with the doc ids from a given DocIdSetIterator, clearing all the bits for + /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. + void inPlaceNot(const DocIdSetIteratorPtr& disi); + + /// Perform an inplace XOR with the doc ids from a given DocIdSetIterator, flipping all the bits for + /// each such doc id. These doc ids should be smaller than the maximum size passed to the constructor. + void inPlaceXor(const DocIdSetIteratorPtr& disi); +}; + +} + +#endif diff --git a/include/lucene++/OpenBitSetIterator.h b/include/lucene++/OpenBitSetIterator.h new file mode 100644 index 00000000..15c7b112 --- /dev/null +++ b/include/lucene++/OpenBitSetIterator.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef OPENBITSETITERATOR_H +#define OPENBITSETITERATOR_H + +#include "DocIdSetIterator.h" + +namespace Lucene { + +/// An iterator to iterate over set bits in an OpenBitSet. +/// This is faster than nextSetBit() for iterating over the complete set of bits, +/// especially when the density of the bits set is high. +class LPPAPI OpenBitSetIterator : public DocIdSetIterator { +public: + OpenBitSetIterator(const OpenBitSetPtr& bitSet); + OpenBitSetIterator(LongArray bits, int32_t numWords); + virtual ~OpenBitSetIterator(); + + LUCENE_CLASS(OpenBitSetIterator); + +protected: + LongArray arr; + int32_t words; + int32_t i; + int64_t word; + int32_t wordShift; + int32_t indexArray; + int32_t curDocId; + + /// The General Idea: instead of having an array per byte that has the offsets of the + /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). + /// That should be faster than accessing an array for each index, and the total array + /// size is kept smaller (256*sizeof(int32_t))=1K + static const int32_t bitlist[]; + +public: + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + +protected: + /// 64 bit shifts + void shift(); +}; + +} + +#endif diff --git a/include/lucene++/OrdFieldSource.h b/include/lucene++/OrdFieldSource.h new file mode 100644 index 00000000..2fc43b4d --- /dev/null +++ b/include/lucene++/OrdFieldSource.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef ORDFIELDSOURCE_H +#define ORDFIELDSOURCE_H + +#include "ValueSource.h" + +namespace Lucene { + +/// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex(). +/// +/// The native lucene index order is used to assign an ordinal value for each field value. +/// +/// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. +/// Example: +/// If there were only three field values: "apple","banana","pear" then ord("apple")=1, ord("banana")=2, +/// ord("pear")=3 +/// +/// WARNING: ord() depends on the position in an index and can thus change when other documents are inserted +/// or deleted, or if a MultiSearcher is used. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. Alternatively, +/// for a short-term fix, you could wrap your ValueSource using {@link MultiValueSource}, which costs more CPU +/// per lookup but will not consume double the FieldCache RAM. +class LPPAPI OrdFieldSource : public ValueSource { +public: + /// Constructor for a certain field. + ///@param field field whose values order is used. + OrdFieldSource(const String& field); + virtual ~OrdFieldSource(); + + LUCENE_CLASS(OrdFieldSource); + +protected: + String field; + +public: + virtual String description(); + virtual DocValuesPtr getValues(const IndexReaderPtr& reader); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/ParallelMultiSearcher.h b/include/lucene++/ParallelMultiSearcher.h new file mode 100644 index 00000000..a22bb0a6 --- /dev/null +++ b/include/lucene++/ParallelMultiSearcher.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PARALLELMULTISEARCHER_H +#define PARALLELMULTISEARCHER_H + +#include "MultiSearcher.h" + +namespace Lucene { + +/// Implements parallel search over a set of Searchables. +/// +/// Applications usually need only call the inherited {@link #search(QueryPtr, int32_t)} or +/// {@link #search(QueryPtr, FilterPtr, int32_t)} methods. +class LPPAPI ParallelMultiSearcher : public MultiSearcher { +public: + /// Creates a {@link Searchable} which searches searchables. + ParallelMultiSearcher(Collection searchables); + virtual ~ParallelMultiSearcher(); + + LUCENE_CLASS(ParallelMultiSearcher); + +public: + /// Executes each {@link Searchable}'s docFreq() in its own thread and waits for each search to + /// complete and merge the results back together. + virtual int32_t docFreq(const TermPtr& term); + + /// A search implementation which executes each {@link Searchable} in its own thread and waits + /// for each search to complete and merge the results back together. + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); + + /// A search implementation allowing sorting which spans a new thread for each Searchable, waits + /// for each search to complete and merges the results back together. + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); +}; + +} + +#endif diff --git a/include/lucene++/ParallelReader.h b/include/lucene++/ParallelReader.h new file mode 100644 index 00000000..d01b8b54 --- /dev/null +++ b/include/lucene++/ParallelReader.h @@ -0,0 +1,181 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PARALLELREADER_H +#define PARALLELREADER_H + +#include "IndexReader.h" + +namespace Lucene { + +/// An IndexReader which reads multiple, parallel indexes. Each index added must have the same number of +/// documents, but typically each contains different fields. Each document contains the union of the fields +/// of all documents with the same document number. When searching, matches for a query term are from the +/// first index added that has the field. +/// +/// This is useful, eg., with collections that have large fields which change rarely and small fields that +/// change more frequently. The smaller fields may be re-indexed in a new index and both indexes may be +/// searched together. +/// +/// Warning: It is up to you to make sure all indexes are created and modified the same way. For example, +/// if you add documents to one index, you need to add the same documents in the same order to the other +/// indexes. Failure to do so will result in undefined behavior +class LPPAPI ParallelReader : public IndexReader { +public: + /// Construct a ParallelReader. + /// @param closeSubReaders indicates whether the subreaders should be closed when this ParallelReader + /// is closed + ParallelReader(bool closeSubReaders = true); + + virtual ~ParallelReader(); + + LUCENE_CLASS(ParallelReader); + +protected: + Collection readers; + Collection decrefOnClose; // remember which subreaders to decRef on close + bool incRefReaders; + MapStringIndexReader fieldToReader; + MapIndexReaderSetString readerToFields; + Collection storedFieldReaders; + + int32_t _maxDoc; + int32_t _numDocs; + bool _hasDeletions; + +public: + /// Add an IndexReader. + void add(const IndexReaderPtr& reader); + + /// Add an IndexReader whose stored fields will not be returned. This can accelerate search when stored + /// fields are only needed from a subset of the IndexReaders. + void add(const IndexReaderPtr& reader, bool ignoreStoredFields); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Tries to reopen the subreaders. + /// + /// If one or more subreaders could be re-opened (ie. subReader.reopen() returned a new instance != subReader), + /// then a new ParallelReader instance is returned, otherwise this instance is returned. + /// + /// A re-opened instance might share one or more subreaders with the old instance. Index modification + /// operations result in undefined behavior when performed before the old instance is closed. + /// (see {@link IndexReader#reopen()}). + /// + /// If subreaders are shared, then the reference count of those readers is increased to ensure that the + /// subreaders remain open until the last referring reader is closed. + virtual IndexReaderPtr reopen(); + + /// Returns the number of documents in this index. + virtual int32_t numDocs(); + + /// Returns one greater than the largest possible document number. This may be used to, eg., determine + /// how big to allocate an array which will have an element for every document number in an index. + virtual int32_t maxDoc(); + + /// Returns true if any documents have been deleted + virtual bool hasDeletions(); + + /// Returns true if document n has been deleted + virtual bool isDeleted(int32_t n); + + /// Get the {@link Document} at the n'th position. + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Return an array of term frequency vectors for the specified document. + virtual Collection getTermFreqVectors(int32_t docNumber); + + /// Return a term frequency vector for the specified document and field. + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + + /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays + /// of the {@link TermFreqVector}. + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + + /// Map all the term vectors for all fields in a Document + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + + /// Returns true if there are norms stored for this field. + virtual bool hasNorms(const String& field); + + /// Returns the byte-encoded normalization factor for the named field of every document. + virtual ByteArray norms(const String& field); + + /// Reads the byte-encoded normalization factor for the named field of every document. + virtual void norms(const String& field, ByteArray norms, int32_t offset); + + /// Returns an enumeration of all the terms in the index. The enumeration is ordered by + /// Term::compareTo(). Each term is greater than all that precede it in the enumeration. + /// Note that after calling terms(), {@link TermEnum#next()} must be called on the resulting + /// enumeration before calling other methods such as {@link TermEnum#term()}. + virtual TermEnumPtr terms(); + + /// Returns an enumeration of all terms starting at a given term. If the given term does not + /// exist, the enumeration is positioned at the first term greater than the supplied term. + /// The enumeration is ordered by Term::compareTo(). Each term is greater than all that precede + /// it in the enumeration. + virtual TermEnumPtr terms(const TermPtr& t); + + /// Returns the number of documents containing the term t. + virtual int32_t docFreq(const TermPtr& t); + + /// Returns an enumeration of all the documents which contain term. For each document, the + /// document number, the frequency of the term in that document is also provided, for use in + /// search scoring. If term is null, then all non-deleted docs are returned with freq=1. + /// The enumeration is ordered by document number. Each document number is greater than all + /// that precede it in the enumeration. + virtual TermDocsPtr termDocs(const TermPtr& term); + + /// Returns an unpositioned {@link TermDocs} enumerator. + virtual TermDocsPtr termDocs(); + + /// Returns an enumeration of all the documents which contain term. + virtual TermPositionsPtr termPositions(const TermPtr& term); + + /// Returns an unpositioned {@link TermPositions} enumerator. + virtual TermPositionsPtr termPositions(); + + /// Checks recursively if all subreaders are up to date. + virtual bool isCurrent(); + + /// Checks recursively if all subindexes are optimized + virtual bool isOptimized(); + + /// Not implemented. + virtual int64_t getVersion(); + + Collection getSubReaders(); + + /// Get a list of unique field names that exist in this index and have the specified field option + /// information. + virtual HashSet getFieldNames(FieldOption fieldOption); + +protected: + IndexReaderPtr doReopen(bool doClone); + + /// Implements deletion of the document numbered docNum. + virtual void doDelete(int32_t docNum); + + /// Implements actual undeleteAll(). + virtual void doUndeleteAll(); + + /// Implements setNorm in subclass. + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + + /// Implements commit. + virtual void doCommit(MapStringString commitUserData); + + /// Implements close. + virtual void doClose(); + + friend class ParallelTermEnum; + friend class ParallelTermDocs; + friend class ParallelTermPositions; +}; + +} + +#endif diff --git a/include/lucene++/Payload.h b/include/lucene++/Payload.h new file mode 100644 index 00000000..7b28c77b --- /dev/null +++ b/include/lucene++/Payload.h @@ -0,0 +1,88 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOAD_H +#define PAYLOAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A Payload is metadata that can be stored together with each occurrence of a term. This metadata is stored +/// inline in the posting list of the specific term. +/// +/// To store payloads in the index a {@link TokenStream} has to be used that produces payload data. +/// +/// Use {@link TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve +/// the payloads from the index. +class LPPAPI Payload : public LuceneObject { +public: + /// Creates an empty payload and does not allocate a byte array. + Payload(); + + /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, + /// ie. no copy is made. + /// @param data the data of this payload + Payload(ByteArray data); + + /// Creates a new payload with the the given array as data. A reference to the passed-in array is held, + /// ie. no copy is made. + /// @param data the data of this payload + /// @param offset the offset in the data byte array + /// @param length the length of the data + Payload(ByteArray data, int32_t offset, int32_t length); + + virtual ~Payload(); + + LUCENE_CLASS(Payload); + +protected: + /// the byte array containing the payload data + ByteArray data; + + /// the offset within the byte array + int32_t offset; + + /// the length of the payload data + int32_t _length; + +public: + /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. + void setData(ByteArray data); + + /// Sets this payloads data. A reference to the passed-in array is held, ie. no copy is made. + void setData(ByteArray data, int32_t offset, int32_t length); + + /// Returns a reference to the underlying byte array that holds this payloads data. + ByteArray getData(); + + /// Returns the offset in the underlying byte array + int32_t getOffset(); + + /// Returns the length of the payload data. + int32_t length(); + + /// Returns the byte at the given index. + uint8_t byteAt(int32_t index); + + /// Allocates a new byte array, copies the payload data into it and returns it. + ByteArray toByteArray(); + + /// Copies the payload data to a byte array. + /// @param target the target byte array + /// @param targetOffset the offset in the target byte array + void copyTo(ByteArray target, int32_t targetOffset); + + /// Clones this payload by creating a copy of the underlying byte array. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/PayloadAttribute.h b/include/lucene++/PayloadAttribute.h new file mode 100644 index 00000000..2bbe7dc1 --- /dev/null +++ b/include/lucene++/PayloadAttribute.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOADATTRIBUTE_H +#define PAYLOADATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// The start and end character offset of a Token. +class LPPAPI PayloadAttribute : public Attribute { +public: + /// Initialize this attribute with no payload. + PayloadAttribute(); + + /// Initialize this attribute with the given payload. + PayloadAttribute(const PayloadPtr& payload); + + virtual ~PayloadAttribute(); + + LUCENE_CLASS(PayloadAttribute); + +protected: + PayloadPtr payload; + +public: + virtual String toString(); + + /// Returns this Token's payload. + virtual PayloadPtr getPayload(); + + /// Sets this Token's payload. + virtual void setPayload(const PayloadPtr& payload); + + virtual void clear(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual void copyTo(const AttributePtr& target); +}; + +} + +#endif diff --git a/include/lucene++/PayloadFunction.h b/include/lucene++/PayloadFunction.h new file mode 100644 index 00000000..5fa6a0b8 --- /dev/null +++ b/include/lucene++/PayloadFunction.h @@ -0,0 +1,58 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOADFUNCTION_H +#define PAYLOADFUNCTION_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// An abstract class that defines a way for Payload*Query instances to transform the cumulative +/// effects of payload scores for a document. +/// +/// @see PayloadTermQuery for more information +class LPPAPI PayloadFunction : public LuceneObject { +protected: + PayloadFunction(); + +public: + virtual ~PayloadFunction(); + LUCENE_CLASS(PayloadFunction); + +public: + /// Calculate the score up to this point for this doc and field + /// @param docId The current doc + /// @param field The field + /// @param start The start position of the matching Span + /// @param end The end position of the matching Span + /// @param numPayloadsSeen The number of payloads seen so far + /// @param currentScore The current score so far + /// @param currentPayloadScore The score for the current payload + /// @return The new current Score + /// + /// @see Spans + virtual double currentScore(int32_t docId, const String& field, int32_t start, int32_t end, int32_t numPayloadsSeen, + double currentScore, double currentPayloadScore) = 0; + + /// Calculate the final score for all the payloads seen so far for this doc/field + /// @param docId The current doc + /// @param field The current field + /// @param numPayloadsSeen The total number of payloads seen on this document + /// @param payloadScore The raw score for those payloads + /// @return The final score for the payloads + virtual double docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) = 0; + + /// Return hash code for this object. + virtual int32_t hashCode() = 0; + + /// Return whether two objects are equal + virtual bool equals(const LuceneObjectPtr& other) = 0; +}; + +} + +#endif diff --git a/include/lucene++/PayloadNearQuery.h b/include/lucene++/PayloadNearQuery.h new file mode 100644 index 00000000..0b7f54ef --- /dev/null +++ b/include/lucene++/PayloadNearQuery.h @@ -0,0 +1,99 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOADNEARQUERY_H +#define PAYLOADNEARQUERY_H + +#include "SpanNearQuery.h" +#include "SpanWeight.h" +#include "SpanScorer.h" + +namespace Lucene { + +/// This class is very similar to {@link SpanNearQuery} except that it factors in the value of the payloads +/// located at each of the positions where the {@link TermSpans} occurs. +/// +/// In order to take advantage of this, you must override {@link Similarity#scorePayload} which returns 1 +/// by default. +/// +/// Payload scores are aggregated using a pluggable {@link PayloadFunction}. +/// +/// @see Similarity#scorePayload +class LPPAPI PayloadNearQuery : public SpanNearQuery { +public: + PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder); + PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, const PayloadFunctionPtr& function); + + virtual ~PayloadNearQuery(); + + LUCENE_CLASS(PayloadNearQuery); + +protected: + String fieldName; + PayloadFunctionPtr function; + +public: + using SpanNearQuery::toString; + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + friend class PayloadNearSpanWeight; + friend class PayloadNearSpanScorer; +}; + +class LPPAPI PayloadNearSpanWeight : public SpanWeight { +public: + PayloadNearSpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher); + virtual ~PayloadNearSpanWeight(); + + LUCENE_CLASS(PayloadNearSpanWeight); + +public: + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); +}; + +class LPPAPI PayloadNearSpanScorer : public SpanScorer { +public: + PayloadNearSpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); + virtual ~PayloadNearSpanScorer(); + + LUCENE_CLASS(PayloadNearSpanScorer); + +public: + SpansPtr spans; + SimilarityPtr similarity; + +protected: + double payloadScore; + int32_t payloadsSeen; + +public: + /// Get the payloads associated with all underlying subspans + void getPayloads(Collection subSpans); + + virtual double score(); + +protected: + /// By default, uses the {@link PayloadFunction} to score the payloads, but can be overridden to do + /// other things. + /// @param payLoads The payloads + /// @param start The start position of the span being scored + /// @param end The end position of the span being scored + /// @see Spans + void processPayloads(Collection payLoads, int32_t start, int32_t end); + + virtual bool setFreqCurrentDoc(); + virtual ExplanationPtr explain(int32_t doc); +}; + +} + +#endif diff --git a/include/lucene++/PayloadSpanUtil.h b/include/lucene++/PayloadSpanUtil.h new file mode 100644 index 00000000..282c6093 --- /dev/null +++ b/include/lucene++/PayloadSpanUtil.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOADSPANUTIL_H +#define PAYLOADSPANUTIL_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Experimental class to get set of payloads for most standard Lucene queries. Operates like Highlighter - +/// IndexReader should only contain doc of interest, best to use MemoryIndex. +class LPPAPI PayloadSpanUtil : public LuceneObject { +public: + /// @param reader That contains doc with payloads to extract + PayloadSpanUtil(const IndexReaderPtr& reader); + + virtual ~PayloadSpanUtil(); + + LUCENE_CLASS(PayloadSpanUtil); + +protected: + IndexReaderPtr reader; + +public: + /// Query should be rewritten for wild/fuzzy support. + /// @return payloads Collection + Collection getPayloadsForQuery(const QueryPtr& query); + +protected: + void queryToSpanQuery(const QueryPtr& query, Collection payloads); + void getPayloads(Collection payloads, const SpanQueryPtr& query); +}; + +} + +#endif diff --git a/include/lucene++/PayloadTermQuery.h b/include/lucene++/PayloadTermQuery.h new file mode 100644 index 00000000..3862e0d0 --- /dev/null +++ b/include/lucene++/PayloadTermQuery.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PAYLOADTERMQUERY_H +#define PAYLOADTERMQUERY_H + +#include "SpanTermQuery.h" + +namespace Lucene { + +/// This class is very similar to {@link SpanTermQuery} except that it factors in the value of the payload +/// located at each of the positions where the {@link Term} occurs. +/// +/// In order to take advantage of this, you must override {@link Similarity#scorePayload(int32_t, const String&, +/// int32_t, int32_t, ByteArray, int32_t, int32_t)} which returns 1 by default. +/// +/// Payload scores are aggregated using a pluggable {@link PayloadFunction}. +class LPPAPI PayloadTermQuery : public SpanTermQuery { +public: + PayloadTermQuery(const TermPtr& term, const PayloadFunctionPtr& function, bool includeSpanScore = true); + virtual ~PayloadTermQuery(); + + LUCENE_CLASS(PayloadTermQuery); + +protected: + PayloadFunctionPtr function; + bool includeSpanScore; + +public: + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + friend class PayloadTermWeight; + friend class PayloadTermSpanScorer; +}; + +} + +#endif diff --git a/include/lucene++/PerFieldAnalyzerWrapper.h b/include/lucene++/PerFieldAnalyzerWrapper.h new file mode 100644 index 00000000..14a7ff4d --- /dev/null +++ b/include/lucene++/PerFieldAnalyzerWrapper.h @@ -0,0 +1,69 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PERFIELDANALYZERWRAPPER_H +#define PERFIELDANALYZERWRAPPER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// This analyzer is used to facilitate scenarios where different fields require different analysis techniques. +/// Use {@link #addAnalyzer} to add a non-default analyzer on a field name basis. +/// +/// Example usage: +/// +///
+/// PerFieldAnalyzerWrapperPtr aWrapper = newLucene(newLucene());
+/// aWrapper->addAnalyzer(L"firstname", newLucene());
+/// aWrapper->addAnalyzer(L"lastname", newLucene());
+/// 
+/// +/// In this example, StandardAnalyzer will be used for all fields except "firstname" and "lastname", for which +/// KeywordAnalyzer will be used. +/// +/// A PerFieldAnalyzerWrapper can be used like any other analyzer, for both indexing and query parsing. +class LPPAPI PerFieldAnalyzerWrapper : public Analyzer { +public: + /// Constructs with default analyzer. + /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the + /// one provided here. + PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer); + + /// Constructs with default analyzer and a map of analyzers to use for specific fields. + /// @param defaultAnalyzer Any fields not specifically defined to use a different analyzer will use the one provided here. + /// @param fieldAnalyzers a Map (String field name to the Analyzer) to be used for those fields + PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer, MapStringAnalyzer fieldAnalyzers); + + virtual ~PerFieldAnalyzerWrapper(); + + LUCENE_CLASS(PerFieldAnalyzerWrapper); + +protected: + AnalyzerPtr defaultAnalyzer; + MapStringAnalyzer analyzerMap; + +public: + /// Defines an analyzer to use for the specified field. + /// @param fieldName field name requiring a non-default analyzer + /// @param analyzer non-default analyzer to use for field + void addAnalyzer(const String& fieldName, const AnalyzerPtr& analyzer); + + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Return the positionIncrementGap from the analyzer assigned to fieldName. + virtual int32_t getPositionIncrementGap(const String& fieldName); + + /// Return the offsetGap from the analyzer assigned to field + virtual int32_t getOffsetGap(const FieldablePtr& field); + + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/PhrasePositions.h b/include/lucene++/PhrasePositions.h new file mode 100644 index 00000000..84c5d058 --- /dev/null +++ b/include/lucene++/PhrasePositions.h @@ -0,0 +1,40 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PHRASEPOSITIONS_H +#define PHRASEPOSITIONS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Position of a term in a document that takes into account the term offset within the phrase. +class PhrasePositions : public LuceneObject { +public: + PhrasePositions(const TermPositionsPtr& t, int32_t o); + virtual ~PhrasePositions(); + + LUCENE_CLASS(PhrasePositions); + +public: + int32_t doc; // current doc + int32_t position; // position in doc + int32_t count; // remaining pos in this doc + int32_t offset; // position in phrase + TermPositionsPtr tp; // stream of positions + PhrasePositions* __next = nullptr; // used to make lists + bool repeats; // there's other pp for same term (eg. query="1st word 2nd word"~1) + +public: + bool next(); + bool skipTo(int32_t target); + void firstPosition(); + bool nextPosition(); +}; + +} + +#endif diff --git a/include/lucene++/PhraseQuery.h b/include/lucene++/PhraseQuery.h new file mode 100644 index 00000000..da94df68 --- /dev/null +++ b/include/lucene++/PhraseQuery.h @@ -0,0 +1,82 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PHRASEQUERY_H +#define PHRASEQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A Query that matches documents containing a particular sequence of terms. A PhraseQuery is built by +/// QueryParser for input like "new york". +/// +/// This query may be combined with other terms or queries with a {@link BooleanQuery}. +class LPPAPI PhraseQuery : public Query { +public: + /// Constructs an empty phrase query. + PhraseQuery(); + virtual ~PhraseQuery(); + + LUCENE_CLASS(PhraseQuery); + +protected: + String field; + Collection terms; + Collection positions; + int32_t maxPosition; + int32_t slop; + +public: + using Query::toString; + + /// Sets the number of other words permitted between words in query phrase. If zero, then this is an + /// exact phrase search. For larger values this works like a WITHIN or NEAR operator. + /// + /// The slop is in fact an edit-distance, where the units correspond to moves of terms in the query phrase + /// out of position. For example, to switch the order of two words requires two moves (the first move + /// places the words atop one another), so to permit re-orderings of phrases, the slop must be at least two. + /// + /// More exact matches are scored higher than sloppier matches, thus search results are sorted by exactness. + /// + /// The slop is zero by default, requiring exact matches. + void setSlop(int32_t slop); + + /// Returns the slop. + /// @see #setSlop() + int32_t getSlop(); + + /// Adds a term to the end of the query phrase. + /// The relative position of the term is the one immediately after the last term added. + void add(const TermPtr& term); + + /// Adds a term to the end of the query phrase. + /// The relative position of the term within the phrase is specified explicitly. This allows eg. phrases + /// with more than one term at the same position or phrases with gaps (eg. in connection with stopwords). + void add(const TermPtr& term, int32_t position); + + /// Returns the set of terms in this phrase. + Collection getTerms(); + + /// Returns the relative positions of terms in this phrase. + Collection getPositions(); + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + virtual void extractTerms(SetTerm terms); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class PhraseWeight; +}; + +} + +#endif diff --git a/include/lucene++/PhraseQueue.h b/include/lucene++/PhraseQueue.h new file mode 100644 index 00000000..c2c00f0e --- /dev/null +++ b/include/lucene++/PhraseQueue.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PHRASEQUEUE_H +#define PHRASEQUEUE_H + +#include "PriorityQueue.h" + +namespace Lucene { +// raw pointer +typedef PhrasePositions* PhrasePositionsStar; + +class PhraseQueue : public PriorityQueue { +public: + PhraseQueue(int32_t size); + virtual ~PhraseQueue(); + + LUCENE_CLASS(PhraseQueue); + +protected: + virtual bool lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second); + +}; + +} + +#endif diff --git a/include/lucene++/PhraseScorer.h b/include/lucene++/PhraseScorer.h new file mode 100644 index 00000000..cddb5701 --- /dev/null +++ b/include/lucene++/PhraseScorer.h @@ -0,0 +1,77 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PHRASESCORER_H +#define PHRASESCORER_H + +#include "Scorer.h" +#include + +namespace Lucene { + +/// Scoring functionality for phrase queries. A document is considered matching if it contains the +/// phrase-query terms at "valid" positions. What "valid positions" are depends on the type of the +/// phrase query: for an exact phrase query terms are required to appear in adjacent locations, while +/// for a sloppy phrase query some distance between the terms is allowed. The abstract method {@link +/// #phraseFreq()} of extending classes is invoked for each document containing all the phrase query +/// terms, in order to compute the frequency of the phrase query in that document. A non zero frequency +/// means a match. +class PhraseScorer : public Scorer { +public: + PhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms); + virtual ~PhraseScorer(); + + LUCENE_CLASS(PhraseScorer); + +protected: + WeightPtr weight; + Weight* __weight = nullptr; + ByteArray norms; + double value; + + bool firstTime; + bool more; + PhraseQueuePtr pq; + std::vector _holds; + PhrasePositions* __first = nullptr; + PhrasePositions* __last = nullptr; + + double freq; // phrase frequency in current doc as computed by phraseFreq(). + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); + virtual int32_t advance(int32_t target); + + /// Phrase frequency in current doc as computed by phraseFreq(). + double currentFreq(); + virtual float termFreq(){ + return currentFreq(); + } + + virtual String toString(); + +protected: + /// Next without initial increment + bool doNext(); + + /// For a document containing all the phrase query terms, compute the frequency of the phrase in + /// that document. A non zero frequency means a match. + /// Note, that containing all phrase terms does not guarantee a match - they have to be found in + /// matching locations. + /// @return frequency of the phrase in current doc, 0 if not found. + virtual double phraseFreq() = 0; + + void init(); + void sort(); + void pqToList(); + void firstToLast(); +}; + +} + +#endif diff --git a/include/lucene++/PorterStemFilter.h b/include/lucene++/PorterStemFilter.h new file mode 100644 index 00000000..ec457aea --- /dev/null +++ b/include/lucene++/PorterStemFilter.h @@ -0,0 +1,48 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PORTERSTEMFILTER_H +#define PORTERSTEMFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// Transforms the token stream as per the Porter stemming algorithm. Note: the input to the stemming filter must +/// already be in lower case, so you will need to use LowerCaseFilter or LowerCaseTokenizer further down the Tokenizer +/// chain in order for this to work properly. +/// +/// To use this filter with other analyzers, you'll want to write an Analyzer class that sets up the TokenStream chain +/// as you want it. To use this with LowerCaseTokenizer, for example, you'd write an analyzer like this: +/// +///
+/// class MyAnalyzer : public Analyzer
+/// {
+/// public:
+///     virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader)
+///     {
+///         return newLucene(newLucene(reader));
+///     }
+/// };
+/// 
+class LPPAPI PorterStemFilter : public TokenFilter { +public: + PorterStemFilter(const TokenStreamPtr& input); + virtual ~PorterStemFilter(); + + LUCENE_CLASS(PorterStemFilter); + +protected: + PorterStemmerPtr stemmer; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + +} + +#endif diff --git a/include/lucene++/PorterStemmer.h b/include/lucene++/PorterStemmer.h new file mode 100644 index 00000000..61461452 --- /dev/null +++ b/include/lucene++/PorterStemmer.h @@ -0,0 +1,126 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PORTERSTEMMER_H +#define PORTERSTEMMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This is the Porter stemming algorithm, coded up as thread-safe ANSI C by the author. +/// +/// It may be be regarded as canonical, in that it follows the algorithm presented in Porter, 1980, An algorithm +/// for suffix stripping, Program, Vol. 14, no. 3, pp 130-137, only differing from it at the points marked DEPARTURE. +/// +/// See also http://www.tartarus.org/~martin/PorterStemmer +/// +/// The algorithm as described in the paper could be exactly replicated by adjusting the points of DEPARTURE, but +/// this is barely necessary, because (a) the points of DEPARTURE are definitely improvements, and (b) no encoding +/// of the Porter stemmer I have seen is anything like as exact as this version, even with the points of DEPARTURE! +/// +/// Release 2 (the more old-fashioned, non-thread-safe version may be regarded as release 1.) +class PorterStemmer : public LuceneObject { +public: + PorterStemmer(); + virtual ~PorterStemmer(); + + LUCENE_CLASS(PorterStemmer); + +protected: + wchar_t* b; // buffer for word to be stemmed + int32_t k; // offset to the end of the string + int32_t j; // a general offset into the string + int32_t i; // initial length of word + bool dirty; + +public: + bool stem(CharArray word); + + /// In stem(b, k), b is a char pointer, and the string to be stemmed is from b[0] to b[k] inclusive. + /// Possibly b[k+1] == '\0', but it is not important. The stemmer adjusts the characters b[0] ... b[k] and + /// stores the new end-point of the string, k'. Stemming never increases word length, so 0 <= k' <= k. + bool stem(wchar_t* b, int32_t k); + + wchar_t* getResultBuffer(); + int32_t getResultLength(); + +protected: + /// Returns true if b[i] is a consonant. ('b' means 'z->b', but here and below we drop 'z->' in comments. + bool cons(int32_t i); + + /// Measures the number of consonant sequences between 0 and j. If c is a consonant sequence and v a vowel + /// sequence, and <..> indicates arbitrary presence, + /// + /// gives 0 + /// vc gives 1 + /// vcvc gives 2 + /// vcvcvc gives 3 + /// ... + int32_t m(); + + /// Return true if 0,...j contains a vowel + bool vowelinstem(); + + /// Return true if j,(j-1) contain a double consonant. + bool doublec(int32_t j); + + /// Return true if i-2,i-1,i has the form consonant - vowel - consonant and also if the second c is not w,x or y. + /// This is used when trying to restore an e at the end of a short word. + /// + /// eg. cav(e), lov(e), hop(e), crim(e), but + /// snow, box, tray. + bool cvc(int32_t i); + + /// Returns true if 0,...k ends with the string s. + bool ends(const wchar_t* s); + + /// Sets (j+1),...k to the characters in the string s, readjusting k. + void setto(const wchar_t* s); + + void r(const wchar_t* s); + + /// step1ab() gets rid of plurals and -ed or -ing. eg. + /// + /// caresses -> caress + /// ponies -> poni + /// ties -> ti + /// caress -> caress + /// cats -> cat + /// + /// feed -> feed + /// agreed -> agree + /// disabled -> disable + /// + /// matting -> mat + /// mating -> mate + /// meeting -> meet + /// milling -> mill + /// messing -> mess + /// + /// meetings -> meet + void step1ab(); + + /// Turns terminal y to i when there is another vowel in the stem. + void step1c(); + + /// Maps double suffices to single ones. so -ization ( = -ize plus -ation) maps to -ize etc. note that the + /// string before the suffix must give m() > 0. + void step2(); + + /// Deals with -ic-, -full, -ness etc. similar strategy to step2. + void step3(); + + /// Takes off -ant, -ence etc., in context vcvc. + void step4(); + + /// Removes a final -e if m() > 1, and changes -ll to -l if m() > 1. + void step5(); +}; + +} + +#endif diff --git a/include/lucene++/PositionBasedTermVectorMapper.h b/include/lucene++/PositionBasedTermVectorMapper.h new file mode 100644 index 00000000..5576af7b --- /dev/null +++ b/include/lucene++/PositionBasedTermVectorMapper.h @@ -0,0 +1,79 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef POSITIONBASEDTERMVECTORMAPPER_H +#define POSITIONBASEDTERMVECTORMAPPER_H + +#include "TermVectorMapper.h" + +namespace Lucene { + +class LPPAPI PositionBasedTermVectorMapper : public TermVectorMapper { +public: + PositionBasedTermVectorMapper(bool ignoringOffsets = false); + virtual ~PositionBasedTermVectorMapper(); + + LUCENE_CLASS(PositionBasedTermVectorMapper); + +protected: + MapStringMapIntTermVectorsPositionInfo fieldToTerms; + + String currentField; + + /// A Map of Integer and TermVectorsPositionInfo + MapIntTermVectorsPositionInfo currentPositions; + + bool storeOffsets; + +public: + /// Never ignores positions. This mapper doesn't make much sense unless there are positions. + /// @return false + virtual bool isIgnoringPositions(); + + /// Callback for the TermVectorReader. + virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); + + /// Callback mechanism used by the TermVectorReader. + virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); + + /// Get the mapping between fields and terms, sorted by the comparator + /// @return A map between field names and a Map. The sub-Map key is the position as the integer, the value is + /// {@link PositionBasedTermVectorMapper}. + MapStringMapIntTermVectorsPositionInfo getFieldToTerms(); +}; + +/// Container for a term at a position +class LPPAPI TermVectorsPositionInfo : public LuceneObject { +public: + TermVectorsPositionInfo(int32_t position, bool storeOffsets); + virtual ~TermVectorsPositionInfo(); + + LUCENE_CLASS(TermVectorsPositionInfo); + +protected: + int32_t position; + Collection terms; + Collection offsets; + +public: + void addTerm(const String& term, const TermVectorOffsetInfoPtr& info); + + /// @return The position of the term + int32_t getPosition(); + + /// Note, there may be multiple terms at the same position + /// @return A List of Strings + Collection getTerms(); + + /// Parallel list (to {@link #getTerms()}) of TermVectorOffsetInfo objects. There may be multiple + /// entries since there may be multiple terms at a position. + /// @return A List of TermVectorOffsetInfo objects, if offsets are stored. + Collection getOffsets(); +}; + +} + +#endif diff --git a/include/lucene++/PositionIncrementAttribute.h b/include/lucene++/PositionIncrementAttribute.h new file mode 100644 index 00000000..13194116 --- /dev/null +++ b/include/lucene++/PositionIncrementAttribute.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef POSITIONINCREMENTATTRIBUTE_H +#define POSITIONINCREMENTATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// The positionIncrement determines the position of this token relative to the previous Token in a +/// TokenStream, used in phrase searching. +/// +/// The default value is one. +/// +/// Some common uses for this are: +/// +/// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple +/// stems. Searches for phrases including either stem will match. In this case, all but the first stem's +/// increment should be set to zero: the increment of the first instance should be one. Repeating a token +/// with an increment of zero can also be used to boost the scores of matches on that token. +/// +/// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want +/// phrases to match across removed stop words, then one could build a stop word filter that removes stop +/// words and also sets the increment to the number of stop words removed before each non-stop word. Then +/// exact phrase queries will only match when the terms occur with no intervening stop words. +/// +/// @see TermPositions +class LPPAPI PositionIncrementAttribute : public Attribute { +public: + PositionIncrementAttribute(); + virtual ~PositionIncrementAttribute(); + + LUCENE_CLASS(PositionIncrementAttribute); + +protected: + int32_t positionIncrement; + +public: + virtual String toString(); + + /// Set the position increment. The default value is one. + /// @param positionIncrement the distance from the prior term + virtual void setPositionIncrement(int32_t positionIncrement); + + /// Returns the position increment of this Token. + /// @see #setPositionIncrement + virtual int32_t getPositionIncrement(); + + virtual void clear(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual void copyTo(const AttributePtr& target); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/PositiveScoresOnlyCollector.h b/include/lucene++/PositiveScoresOnlyCollector.h new file mode 100644 index 00000000..2c368919 --- /dev/null +++ b/include/lucene++/PositiveScoresOnlyCollector.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef POSITIVESCORESONLYCOLLECTOR_H +#define POSITIVESCORESONLYCOLLECTOR_H + +#include "Collector.h" + +namespace Lucene { + +/// A {@link Collector} implementation which wraps another {@link Collector} and makes sure only +/// documents with scores > 0 are collected. +class LPPAPI PositiveScoresOnlyCollector : public Collector { +public: + PositiveScoresOnlyCollector(const CollectorPtr& c); + virtual ~PositiveScoresOnlyCollector(); + + LUCENE_CLASS(PositiveScoresOnlyCollector); + +protected: + CollectorPtr collector; + ScorerPtr scorer; + +public: + virtual void collect(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); + virtual bool acceptsDocsOutOfOrder(); +}; + +} + +#endif diff --git a/include/lucene++/PrefixFilter.h b/include/lucene++/PrefixFilter.h new file mode 100644 index 00000000..b4a4318d --- /dev/null +++ b/include/lucene++/PrefixFilter.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PREFIXFILTER_H +#define PREFIXFILTER_H + +#include "MultiTermQueryWrapperFilter.h" + +namespace Lucene { + +/// A Filter that restricts search results to values that have a matching prefix in a given field. +class LPPAPI PrefixFilter : public MultiTermQueryWrapperFilter { +public: + PrefixFilter(const TermPtr& prefix); + virtual ~PrefixFilter(); + + LUCENE_CLASS(PrefixFilter); + +public: + TermPtr getPrefix(); + + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/PrefixQuery.h b/include/lucene++/PrefixQuery.h new file mode 100644 index 00000000..0c45bdf9 --- /dev/null +++ b/include/lucene++/PrefixQuery.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PREFIXQUERY_H +#define PREFIXQUERY_H + +#include "MultiTermQuery.h" + +namespace Lucene { + +/// A Query that matches documents containing terms with a specified prefix. A PrefixQuery is built by +/// QueryParser for input like app*. +/// +/// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. +class LPPAPI PrefixQuery : public MultiTermQuery { +public: + /// Constructs a query for terms starting with prefix. + PrefixQuery(const TermPtr& prefix); + + virtual ~PrefixQuery(); + + LUCENE_CLASS(PrefixQuery); + +protected: + TermPtr prefix; + +public: + using MultiTermQuery::toString; + + /// Returns the prefix of this query. + TermPtr getPrefix(); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + +protected: + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/PrefixTermEnum.h b/include/lucene++/PrefixTermEnum.h new file mode 100644 index 00000000..93f658bb --- /dev/null +++ b/include/lucene++/PrefixTermEnum.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PREFIXTERMENUM_H +#define PREFIXTERMENUM_H + +#include "FilteredTermEnum.h" + +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating all terms that match the specified prefix filter term. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than +/// all that precede it. +class LPPAPI PrefixTermEnum : public FilteredTermEnum { +public: + PrefixTermEnum(const IndexReaderPtr& reader, const TermPtr& prefix); + virtual ~PrefixTermEnum(); + + LUCENE_CLASS(PrefixTermEnum); + +protected: + TermPtr prefix; + bool _endEnum; + +public: + virtual double difference(); + +protected: + virtual bool endEnum(); + virtual bool termCompare(const TermPtr& term); + + TermPtr getPrefixTerm(); +}; + +} + +#endif diff --git a/include/lucene++/PriorityQueue.h b/include/lucene++/PriorityQueue.h new file mode 100644 index 00000000..ec7e71bb --- /dev/null +++ b/include/lucene++/PriorityQueue.h @@ -0,0 +1,201 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef PRIORITYQUEUE_H +#define PRIORITYQUEUE_H + +#include "LuceneObject.h" +#include "MiscUtils.h" + +namespace Lucene { + +/// A PriorityQueue maintains a partial ordering of its elements such that the least element can always +/// be found in constant time. Put()'s and pop()'s require log(size) time. +/// +/// NOTE: This class pre-allocates a full array of length maxSize + 1. +template +class PriorityQueue : public LuceneObject { +public: + typedef typename std::vector heap_type; + + PriorityQueue(int32_t maxSize) { + this->_size = 0; + this->_maxSize = maxSize; + } + + virtual ~PriorityQueue() { + } + +protected: + heap_type heap; + int32_t _size; + int32_t _maxSize; + +public: + virtual void initialize() { + bool empty = heap.empty(); + + if (empty) { + int32_t heapSize = 0; + if (_maxSize == 0) { + // We allocate 1 extra to avoid if statement in top() + heapSize = 2; + } else if (_maxSize == INT_MAX) { + // Don't wrap heapSize to -1, in this case, which causes a confusing NegativeArraySizeException. + // Note that very likely this will simply then hit an OOME, but at least that's more indicative + // to caller that this values is too big. We don't +1 in this case, but it's very unlikely in + // practice one will actually insert this many objects into the PQ + heapSize = INT_MAX; + } else { + // NOTE: we add +1 because all access to heap is 1-based not 0-based. heap[0] is unused. + heapSize = _maxSize + 1; + } + this->heap.resize(heapSize); + } + + // If sentinel objects are supported, populate the queue with them + TYPE sentinel = getSentinelObject(); + if (empty && sentinel) { + heap[1] = sentinel; + for (int32_t i = 2; i < (int32_t)heap.size(); ++i) { + heap[i] = getSentinelObject(); + } + _size = _maxSize; + } + } + + /// Return maximum size of queue + int32_t maxSize() { + return _maxSize; + } + + /// Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects + /// than maxSize from initialize an {@link IndexOutOfBoundsException} is thrown. + TYPE add(const TYPE& type) { + ++_size; + if (_size < 0 || _size >= (int32_t)heap.size()) { + boost::throw_exception(IndexOutOfBoundsException()); + } + heap[_size] = type; + upHeap(); + return heap[1]; + } + + /// Adds an Object to a PriorityQueue in log(size) time. It returns the object (if any) that was + /// dropped off the heap because it was full. This can be the given parameter (in case it is + /// smaller than the full heap's minimum, and couldn't be added), or another object that was + /// previously the smallest value in the heap and now has been replaced by a larger one, or null + /// if the queue wasn't yet full with maxSize elements. + TYPE addOverflow(const TYPE& type) { + if (_size < _maxSize) { + add(type); + return TYPE(); + } else if (_size > 0 && !lessThan(type, heap[1])) { + TYPE result = heap[1]; + heap[1] = type; + updateTop(); + return result; + } else { + return type; + } + } + + /// Returns the least element of the PriorityQueue. + TYPE top() { + // We don't need to check size here: if maxSize is 0, then heap is length 2 array with both + // entries null. If size is 0 then heap[1] is already null. + return heap[1]; + } + + /// Removes and returns the least element of the PriorityQueue. + TYPE pop() { + if (_size > 0) { + TYPE result = heap[1]; // save first value + heap[1] = heap[_size]; // move last to first + heap[_size--] = TYPE(); + downHeap(); // adjust heap + return result; + } else { + return TYPE(); + } + } + + /// Should be called when the Object at top changes values. + TYPE updateTop() { + downHeap(); + return heap[1]; + } + + /// Returns the number of elements currently stored in the PriorityQueue. + int32_t size() const { + return _size; + } + + /// Returns whether PriorityQueue is currently empty. + bool empty() const { + return (_size == 0); + } + + /// Removes all entries from the PriorityQueue. + void clear() { + for (int32_t i = 0; i <= _size; ++i) { + heap[i] = TYPE(); + } + _size = 0; + } + +protected: + void upHeap() { + int32_t i = _size; + TYPE node = heap[i]; // save bottom node + int32_t j = MiscUtils::unsignedShift(i, 1); + while (j > 0 && lessThan(node, heap[j])) { + heap[i] = heap[j]; // shift parents down + i = j; + j = MiscUtils::unsignedShift(j, 1); + } + heap[i] = node; // install saved node + } + + void downHeap() { + int32_t i = 1; + TYPE node = heap[i]; // save top node + int32_t j = i << 1; // find smaller child + int32_t k = j + 1; + if (k <= _size && lessThan(heap[k], heap[j])) { + j = k; + } + while (j <= _size && lessThan(heap[j], node)) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= _size && lessThan(heap[k], heap[j])) { + j = k; + } + } + heap[i] = node; // install saved node + } + + /// Determines the ordering of objects in this priority queue. Subclasses must define this one method. + virtual bool lessThan(const TYPE& first, const TYPE& second) { + return std::less()(first, second); + } + + /// This method can be overridden by extending classes to return a sentinel object which will be used by + /// {@link #initialize} to fill the queue, so that the code which uses that queue can always assume it's + /// full and only change the top without attempting to insert any new object. + /// + /// Those sentinel values should always compare worse than any non-sentinel value (ie., {@link #lessThan} + /// should always favour the non-sentinel values). + virtual TYPE getSentinelObject() { + return TYPE(); + } +}; + +} + +#endif diff --git a/include/lucene++/Query.h b/include/lucene++/Query.h new file mode 100644 index 00000000..4337b4b1 --- /dev/null +++ b/include/lucene++/Query.h @@ -0,0 +1,112 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERY_H +#define QUERY_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The abstract base class for queries. +/// +/// Instantiable subclasses are: +/// +/// {@link TermQuery} +/// {@link MultiTermQuery} +/// {@link BooleanQuery} +/// {@link WildcardQuery} +/// {@link PhraseQuery} +/// {@link PrefixQuery} +/// {@link MultiPhraseQuery} +/// {@link FuzzyQuery} +/// {@link TermRangeQuery} +/// {@link NumericRangeQuery} +/// {@link org.apache.lucene.search.spans.SpanQuery} +/// +/// A parser for queries is contained in: {@link QueryParser} +class LPPAPI Query : public LuceneObject { +public: + Query(); + virtual ~Query(); + + LUCENE_CLASS(Query); + +protected: + double boost; // query boost factor + +public: + /// Sets the boost for this query clause to b. Documents matching this clause will (in addition to + /// the normal weightings) have their score multiplied by b. + virtual void setBoost(double b); + + /// Gets the boost for this clause. Documents matching this clause will (in addition to the normal + /// weightings) have their score multiplied by b. The boost is 1.0 by default. + virtual double getBoost(); + + /// Prints a query to a string, with field assumed to be the default field and omitted. + /// + /// The representation used is one that is supposed to be readable by {@link QueryParser}. However, + /// there are the following limitations: + /// + /// If the query was created by the parser, the printed representation may not be exactly what was + /// parsed. For example, characters that need to be escaped will be represented without the required + /// backslash. + /// + /// Some of the more complicated queries (eg. span queries) don't have a representation that can be + /// parsed by QueryParser. + virtual String toString(const String& field); + + /// Prints a query to a string. + virtual String toString(); + + /// Constructs an appropriate Weight implementation for this query. + /// Only implemented by primitive queries, which re-write to themselves. + virtual WeightPtr createWeight(const SearcherPtr& searcher); + + /// Constructs and initializes a Weight for a top-level query. + virtual WeightPtr weight(const SearcherPtr& searcher); + + /// Called to re-write queries into primitive queries. For example, a PrefixQuery will be rewritten + /// into a BooleanQuery that consists of TermQuerys. + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + /// Called when re-writing queries under MultiSearcher. + /// + /// Create a single query suitable for use by all subsearchers (in 1-1 correspondence with queries). + /// This is an optimization of the OR of all queries. We handle the common optimization cases of equal + /// queries and overlapping clauses of boolean OR queries (as generated by MultiTermQuery.rewrite()). + /// Be careful overriding this method as queries[0] determines which method will be called and is not + /// necessarily of the same type as the other queries. + virtual QueryPtr combine(Collection queries); + + /// Adds all terms occurring in this query to the terms set. Only works if this query is in its + /// {@link #rewrite rewritten} form. + virtual void extractTerms(SetTerm terms); + + /// Merges the clauses of a set of BooleanQuery's into a single BooleanQuery. + /// + /// A utility for use by {@link #combine(Query[])} implementations. + static QueryPtr mergeBooleanQueries(Collection queries); + + /// Returns the Similarity implementation to be used for this query. Subclasses may override this method + /// to specify their own Similarity implementation, perhaps one that delegates through that of the Searcher. + /// By default the Searcher's Similarity implementation is returned. + virtual SimilarityPtr getSimilarity(const SearcherPtr& searcher); + + /// Returns a clone of this query. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + + /// Return given boost value as a string. + String boostString(); +}; + +} + +#endif diff --git a/include/lucene++/QueryParseError.h b/include/lucene++/QueryParseError.h new file mode 100644 index 00000000..8e05d31f --- /dev/null +++ b/include/lucene++/QueryParseError.h @@ -0,0 +1,53 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSEERROR_H +#define QUERYPARSEERROR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Utility class to handle query parse errors +class QueryParseError : public LuceneObject { +public: + virtual ~QueryParseError(); + LUCENE_CLASS(QueryParseError); + +public: + /// Returns a detailed message for the Error when it is thrown by the token manager to indicate a + /// lexical error. + /// @param EOFSeen Indicates if EOF caused the lexical error + /// @param curLexState Lexical state in which this error occurred + /// @param errorLine Line number when the error occurred + /// @param errorColumn Column number when the error occurred + /// @param errorAfter Prefix that was seen before this error occurred + /// @param curChar The offending character + static String lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, + const String& errorAfter, wchar_t curChar); + + /// Generate a parse error message and returns it. + /// @param currentToken This is the last token that has been consumed successfully. If this object + /// has been created due to a parse error, the token following this token will (therefore) be the first + /// error token. + /// @param expectedTokenSequences Each entry in this array is an array of integers. Each array of + /// integers represents a sequence of tokens (by their ordinal values) that is expected at this point + /// of the parse. + /// @param tokenImage This is a reference to the "tokenImage" array of the generated parser within + /// which the parse error occurred. + static String parseError(const QueryParserTokenPtr& currentToken, Collection< Collection > expectedTokenSequences, + Collection tokenImage); + + +protected: + /// Replaces unprintable characters by their escaped (or unicode escaped) equivalents in the + /// given string + static String addEscapes(const String& str); +}; + +} + +#endif diff --git a/include/lucene++/QueryParser.h b/include/lucene++/QueryParser.h new file mode 100644 index 00000000..59f8d8d7 --- /dev/null +++ b/include/lucene++/QueryParser.h @@ -0,0 +1,473 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSER_H +#define QUERYPARSER_H + +#include "QueryParserConstants.h" +#include "DateTools.h" +#include "BooleanClause.h" + +namespace Lucene { + +typedef HashMap MapStringResolution; + +/// The most important method is {@link #parse(const String&)}. +/// +/// The syntax for query strings is as follows: +/// A Query is a series of clauses. +/// A clause may be prefixed by: +///
    +///
  • a plus (+) or a minus (-) sign, indicating that the clause is required or prohibited respectively; or +///
  • a term followed by a colon, indicating the field to be searched. This enables one to construct queries +/// which search multiple fields. +///
+/// +/// A clause may be either: +///
    +///
  • a term, indicating all the documents that contain this term; or +///
  • a nested query, enclosed in parentheses. Note that this may be used with a +/- prefix to require any +/// of a set of terms. +///
+/// +/// Thus, in BNF, the query grammar is: +///
+/// Query  ::= ( Clause )*
+/// Clause ::= ["+", "-"] [ ":"] (  | "(" Query ")" )
+/// 
+/// +/// Examples of appropriately formatted queries can be found in the query syntax documentation. +/// +/// In {@link TermRangeQuery}s, QueryParser tries to detect date values, eg. +/// date:[6/1/2005 TO 6/4/2005] produces a range query that searches for "date" fields between +/// 2005-06-01 and 2005-06-04. Note that the format of the accepted input depends on {@link #setLocale(Locale) +/// the locale}. +/// +/// By default a date is converted into a search term using the deprecated {@link DateField} for compatibility +/// reasons. To use the new {@link DateTools} to convert dates, a {@link Resolution} has to be set. +/// +/// The date resolution that shall be used for RangeQueries can be set using {@link #setDateResolution(Resolution)} +/// or {@link #setDateResolution(const String&, Resolution)}. The former sets the default date resolution for +/// all fields, whereas the latter can be used to set field specific date resolutions. Field specific date +/// resolutions take, if set, precedence over the default date resolution. +/// +/// If you use neither {@link DateField} nor {@link DateTools} in your index, you can create your own query +/// parser that inherits QueryParser and overwrites {@link #getRangeQuery(const String&, const String&, +/// const String&, bool)} to use a different method for date conversion. +/// +/// Note that QueryParser is not thread-safe. +/// +/// NOTE: there is a new QueryParser in contrib, which matches the same syntax as this class, but is more modular, +/// enabling substantial customization to how a query is created. +/// +/// NOTE: You must specify the required {@link Version} compatibility when creating QueryParser: +///
    +///
  • As of 2.9, {@link #setEnablePositionIncrements} is true by default. +///
+class LPPAPI QueryParser : public QueryParserConstants, public LuceneObject { +public: + /// Constructs a query parser. + /// @param matchVersion Lucene version to match. + /// @param field The default field for query terms. + /// @param analyzer Used to find terms in the query text. + QueryParser(LuceneVersion::Version matchVersion, const String& field, const AnalyzerPtr& analyzer); + + /// Constructor with user supplied QueryParserCharStream. + QueryParser(const QueryParserCharStreamPtr& stream); + + /// Constructor with generated Token Manager. + QueryParser(const QueryParserTokenManagerPtr& tokenMgr); + + virtual ~QueryParser(); + + LUCENE_CLASS(QueryParser); + + /// The default operator for parsing queries. Use {@link QueryParser#setDefaultOperator} to change it. + enum Operator { OR_OPERATOR, AND_OPERATOR }; + +protected: + static const int32_t CONJ_NONE; + static const int32_t CONJ_AND; + static const int32_t CONJ_OR; + + static const int32_t MOD_NONE; + static const int32_t MOD_NOT; + static const int32_t MOD_REQ; + + /// The actual operator that parser uses to combine query terms + Operator _operator; + + /// Next token. + int32_t _jj_ntk; + QueryParserTokenPtr jj_scanpos; + QueryParserTokenPtr jj_lastpos; + + int32_t jj_la; + int32_t jj_gen; + Collection jj_la1; + + static const int32_t jj_la1_0[]; + static const int32_t jj_la1_1[]; + + struct JJCalls; + typedef boost::shared_ptr JJCallsPtr; + + struct JJCalls { + JJCalls() { + gen = 0; + arg = 0; + } + + int32_t gen; + QueryParserTokenPtr first; + int32_t arg; + JJCallsPtr next; + }; + + Collection jj_2_rtns; + bool jj_rescan; + int32_t jj_gc; + + Collection< Collection > jj_expentries; + Collection jj_expentry; + int32_t jj_kind; + Collection jj_lasttokens; + int32_t jj_endpos; + +public: + bool lowercaseExpandedTerms; + RewriteMethodPtr multiTermRewriteMethod; + bool allowLeadingWildcard; + bool enablePositionIncrements; + + AnalyzerPtr analyzer; + String field; + int32_t phraseSlop; + double fuzzyMinSim; + int32_t fuzzyPrefixLength; + std::locale locale; + + // the default date resolution + DateTools::Resolution dateResolution; + + // maps field names to date resolutions + MapStringResolution fieldToDateResolution; + + // The collator to use when determining range inclusion, for use when constructing RangeQuerys + CollatorPtr rangeCollator; + + /// Generated Token Manager. + QueryParserTokenManagerPtr token_source; + + /// Current token. + QueryParserTokenPtr token; + + /// Next token. + QueryParserTokenPtr jj_nt; + +public: + /// Parses a query string, returning a {@link Query}. + /// @param query The query string to be parsed. + QueryPtr parse(const String& query); + + /// @return Returns the analyzer. + AnalyzerPtr getAnalyzer(); + + /// @return Returns the field. + String getField(); + + /// Get the minimal similarity for fuzzy queries. + double getFuzzyMinSim(); + + /// Set the minimum similarity for fuzzy queries. Default is 0.5. + void setFuzzyMinSim(double fuzzyMinSim); + + /// Get the prefix length for fuzzy queries. + /// @return Returns the fuzzyPrefixLength. + int32_t getFuzzyPrefixLength(); + + /// Set the prefix length for fuzzy queries. Default is 0. + /// @param fuzzyPrefixLength The fuzzyPrefixLength to set. + void setFuzzyPrefixLength(int32_t fuzzyPrefixLength); + + /// Sets the default slop for phrases. If zero, then exact phrase matches are required. + /// Default value is zero. + void setPhraseSlop(int32_t phraseSlop); + + /// Gets the default slop for phrases. + int32_t getPhraseSlop(); + + /// Set to true to allow leading wildcard characters. + /// + /// When set, * or ? are allowed as the first character of a PrefixQuery and WildcardQuery. + /// Note that this can produce very slow queries on big indexes. Default: false. + void setAllowLeadingWildcard(bool allowLeadingWildcard); + + /// @see #setAllowLeadingWildcard(bool) + bool getAllowLeadingWildcard(); + + /// Set to true to enable position increments in result query. + /// + /// When set, result phrase and multi-phrase queries will be aware of position increments. + /// Useful when eg. a StopFilter increases the position increment of the token that follows an + /// omitted token. Default: false. + void setEnablePositionIncrements(bool enable); + + /// @see #setEnablePositionIncrements(bool) + bool getEnablePositionIncrements(); + + /// Sets the boolean operator of the QueryParser. In default mode (OR_OPERATOR) terms without + /// any modifiers are considered optional: for example capital of Hungary is equal to capital + /// OR of OR Hungary. + /// In AND_OPERATOR mode terms are considered to be in conjunction: the above mentioned query is + /// parsed as capital AND of AND Hungary + void setDefaultOperator(Operator op); + + /// Gets implicit operator setting, which will be either AND_OPERATOR or OR_OPERATOR. + Operator getDefaultOperator(); + + /// Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically lower-cased + /// or not. Default is true. + void setLowercaseExpandedTerms(bool lowercaseExpandedTerms); + + /// @see #setLowercaseExpandedTerms(bool) + bool getLowercaseExpandedTerms(); + + /// By default QueryParser uses {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} when + /// creating a PrefixQuery, WildcardQuery or RangeQuery. This implementation is generally preferable + /// because it a) Runs faster b) Does not have the scarcity of terms unduly influence score c) avoids + /// any "TooManyClauses" exception. However, if your application really needs to use the old- + /// fashioned BooleanQuery expansion rewriting and the above points are not relevant then use this + /// to change the rewrite method. + void setMultiTermRewriteMethod(const RewriteMethodPtr& method); + + /// @see #setMultiTermRewriteMethod + RewriteMethodPtr getMultiTermRewriteMethod(); + + /// Set locale used by date range parsing. + void setLocale(std::locale locale); + + /// Returns current locale, allowing access by subclasses. + std::locale getLocale(); + + /// Sets the default date resolution used by RangeQueries for fields for which no specific date + /// resolutions has been set. Field specific resolutions can be set with {@link + /// #setDateResolution(const String&, DateTools::Resolution)}. + /// @param dateResolution The default date resolution to set + void setDateResolution(DateTools::Resolution dateResolution); + + /// Sets the date resolution used by RangeQueries for a specific field. + /// @param fieldName Field for which the date resolution is to be set + /// @param dateResolution Date resolution to set + void setDateResolution(const String& fieldName, DateTools::Resolution dateResolution); + + /// Returns the date resolution that is used by RangeQueries for the given field. Returns null, if + /// no default or field specific date resolution has been set for the given field. + DateTools::Resolution getDateResolution(const String& fieldName); + + /// Sets the collator used to determine index term inclusion in ranges for RangeQuerys. + /// + /// WARNING: Setting the rangeCollator to a non-null collator using this method will cause every + /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending + /// on the number of index Terms in this Field, the operation could be very slow. + /// @param rc The collator to use when constructing RangeQuerys + void setRangeCollator(const CollatorPtr& rc); + + /// @return the collator used to determine index term inclusion in ranges for RangeQuerys. + CollatorPtr getRangeCollator(); + + /// Command line tool to test QueryParser, using {@link SimpleAnalyzer}. + static int main(Collection args); + + /// Query ::= ( Clause )* + /// Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) + int32_t Conjunction(); + int32_t Modifiers(); + + /// This makes sure that there is no garbage after the query string + virtual QueryPtr TopLevelQuery(const String& field); + + virtual QueryPtr ParseQuery(const String& field); + virtual QueryPtr ParseClause(const String& field); + virtual QueryPtr ParseTerm(const String& field); + + /// Reinitialise. + virtual void ReInit(const QueryParserCharStreamPtr& stream); + + /// Reinitialise. + virtual void ReInit(const QueryParserTokenManagerPtr& tokenMgr); + + /// Get the next Token. + virtual QueryParserTokenPtr getNextToken(); + + /// Get the specific Token. + virtual QueryParserTokenPtr getToken(int32_t index); + + /// Generate QueryParserError exception. + virtual void generateParseException(); + + /// Enable tracing. + virtual void enable_tracing(); + + /// Disable tracing. + virtual void disable_tracing(); + +protected: + /// Construct query parser with supplied QueryParserCharStream or TokenManager + void ConstructParser(const QueryParserCharStreamPtr& stream, const QueryParserTokenManagerPtr& tokenMgr); + + virtual void addClause(Collection clauses, int32_t conj, int32_t mods, const QueryPtr& q); + + /// Use the analyzer to get all the tokens, and then build a TermQuery, PhraseQuery, or nothing + /// based on the term count. + virtual QueryPtr getFieldQuery(const String& field, const String& queryText); + + /// Base implementation delegates to {@link #getFieldQuery(const String&, const String&)}. + /// This method may be overridden, for example, to return a SpanNearQuery instead of a PhraseQuery. + virtual QueryPtr getFieldQuery(const String& field, const String& queryText, int32_t slop); + + /// Builds a new TermRangeQuery instance for given min/max parts + virtual QueryPtr getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); + + /// Builds a new BooleanQuery instance + /// @param disableCoord disable coord + BooleanQueryPtr newBooleanQuery(bool disableCoord); + + /// Builds a new BooleanClause instance + /// @param q sub query + /// @param occur how this clause should occur when matching documents + /// @return new BooleanClause instance + BooleanClausePtr newBooleanClause(const QueryPtr& q, BooleanClause::Occur occur); + + /// Builds a new TermQuery instance + /// @param term term + /// @return new TermQuery instance + QueryPtr newTermQuery(const TermPtr& term); + + /// Builds a new PhraseQuery instance + /// @return new PhraseQuery instance + PhraseQueryPtr newPhraseQuery(); + + /// Builds a new MultiPhraseQuery instance + /// @return new MultiPhraseQuery instance + MultiPhraseQueryPtr newMultiPhraseQuery(); + + /// Builds a new PrefixQuery instance + /// @param prefix Prefix term + /// @return new PrefixQuery instance + QueryPtr newPrefixQuery(const TermPtr& prefix); + + /// Builds a new FuzzyQuery instance + /// @param term Term + /// @param minimumSimilarity minimum similarity + /// @param prefixLength prefix length + /// @return new FuzzyQuery Instance + QueryPtr newFuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength); + + /// Builds a new TermRangeQuery instance + /// @param field Field + /// @param part1 min + /// @param part2 max + /// @param inclusive true if range is inclusive + /// @return new TermRangeQuery instance + QueryPtr newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive); + + /// Builds a new MatchAllDocsQuery instance + /// @return new MatchAllDocsQuery instance + QueryPtr newMatchAllDocsQuery(); + + /// Builds a new WildcardQuery instance + /// @param t wildcard term + /// @return new WildcardQuery instance + QueryPtr newWildcardQuery(const TermPtr& term); + + /// Factory method for generating query, given a set of clauses. By default creates a boolean query + /// composed of clauses passed in. + /// + /// Can be overridden by extending classes, to modify query being returned. + /// + /// @param clauses List that contains {@link BooleanClause} instances to join. + /// @return Resulting {@link Query} object. + virtual QueryPtr getBooleanQuery(Collection clauses); + + /// Factory method for generating query, given a set of clauses. By default creates a boolean query + /// composed of clauses passed in. + /// + /// Can be overridden by extending classes, to modify query being returned. + /// + /// @param clauses List that contains {@link BooleanClause} instances to join. + /// @param disableCoord true if coord scoring should be disabled. + /// @return Resulting {@link Query} object. + virtual QueryPtr getBooleanQuery(Collection clauses, bool disableCoord); + + /// Factory method for generating a query. Called when parser parses an input term token that contains + /// one or more wildcard characters (? and *), but is not a prefix term token (one that has just a + /// single * character at the end) + /// + /// Depending on settings, prefix term may be lower-cased automatically. It will not go through the + /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard + /// templates. + /// + /// Can be overridden by extending classes, to provide custom handling for wildcard queries, which may + /// be necessary due to missing analyzer calls. + /// + /// @param field Name of the field query will use. + /// @param termStr Term token that contains one or more wild card characters (? or *), but is not simple + /// prefix term + /// @return Resulting {@link Query} built for the term + virtual QueryPtr getWildcardQuery(const String& field, const String& termStr); + + /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser + /// parses an input term token that uses prefix notation; that is, contains a single '*' wildcard + /// character as its last character. Since this is a special case of generic wildcard term, and such + /// a query can be optimized easily, this usually results in a different query object. + /// + /// Depending on settings, a prefix term may be lower-cased automatically. It will not go through the + /// default Analyzer, however, since normal Analyzers are unlikely to work properly with wildcard templates. + /// + /// Can be overridden by extending classes, to provide custom handling for wild card queries, which may be + /// necessary due to missing analyzer calls. + /// + /// @param field Name of the field query will use. + /// @param termStr Term token to use for building term for the query (without trailing '*' character) + /// @return Resulting {@link Query} built for the term + virtual QueryPtr getPrefixQuery(const String& field, const String& termStr); + + /// Factory method for generating a query (similar to {@link #getWildcardQuery}). Called when parser + /// parses an input term token that has the fuzzy suffix (~) appended. + /// + /// @param field Name of the field query will use. + /// @param termStr Term token to use for building term for the query + /// @return Resulting {@link Query} built for the term + virtual QueryPtr getFuzzyQuery(const String& field, const String& termStr, double minSimilarity); + + /// Returns a String where the escape char has been removed, or kept only once if there was a double + /// escape. Supports escaped unicode characters, eg. translates \\u0041 to A. + String discardEscapeChar(const String& input); + + /// Returns the numeric value of the hexadecimal character + static int32_t hexToInt(wchar_t c); + + /// Returns a String where those characters that QueryParser expects to be escaped are escaped by + /// a preceding \. + static String escape(const String& s); + + bool jj_2_1(int32_t xla); + bool jj_3R_2(); + bool jj_3_1(); + bool jj_3R_3(); + + QueryParserTokenPtr jj_consume_token(int32_t kind); + bool jj_scan_token(int32_t kind); + int32_t jj_ntk(); + void jj_add_error_token(int32_t kind, int32_t pos); + void jj_rescan_token(); + void jj_save(int32_t index, int32_t xla); +}; + +} + +#endif diff --git a/include/lucene++/QueryParserCharStream.h b/include/lucene++/QueryParserCharStream.h new file mode 100644 index 00000000..771e36bc --- /dev/null +++ b/include/lucene++/QueryParserCharStream.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSERCHARSTREAM_H +#define QUERYPARSERCHARSTREAM_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This interface describes a character stream that maintains line and column number positions of +/// the characters. It also has the capability to backup the stream to some extent. An implementation +/// of this interface is used in the QueryParserTokenManager. +/// +/// All the methods except backup can be implemented in any fashion. backup needs to be implemented +/// correctly for the correct operation of the lexer. Rest of the methods are all used to get information +/// like line number, column number and the String that constitutes a token and are not used by the lexer. +/// Hence their implementation won't affect the generated lexer's operation. +class LPPAPI QueryParserCharStream { +public: + LUCENE_INTERFACE(QueryParserCharStream); + +public: + /// Returns the next character from the selected input. The method of selecting the input is the + /// responsibility of the class implementing this interface. + virtual wchar_t readChar() = 0; + + /// Returns the column position of the character last read. + /// @deprecated + /// @see #getEndColumn + virtual int32_t getColumn() = 0; + + /// Returns the line number of the character last read. + /// @deprecated + /// @see #getEndLine + virtual int32_t getLine() = 0; + + /// Returns the column number of the last character for current token (being matched after the last + /// call to BeginToken). + virtual int32_t getEndColumn() = 0; + + /// Returns the line number of the last character for current token (being matched after the last call + /// to BeginToken). + virtual int32_t getEndLine() = 0; + + /// Returns the column number of the first character for current token (being matched after the last + /// call to BeginToken). + virtual int32_t getBeginColumn() = 0; + + /// Returns the line number of the first character for current token (being matched after the last call + /// to BeginToken). + virtual int32_t getBeginLine() = 0; + + /// Backs up the input stream by amount steps. Lexer calls this method if it had already read some + /// characters, but could not use them to match a (longer) token. So, they will be used again as the + /// prefix of the next token and it is the implementation's's responsibility to do this right. + virtual void backup(int32_t amount) = 0; + + /// Returns the next character that marks the beginning of the next token. All characters must remain + /// in the buffer between two successive calls to this method to implement backup correctly. + virtual wchar_t BeginToken() = 0; + + /// Returns a string made up of characters from the marked token beginning to the current buffer position. + /// Implementations have the choice of returning anything that they want to. For example, for efficiency, + /// one might decide to just return null, which is a valid implementation. + virtual String GetImage() = 0; + + /// Returns an array of characters that make up the suffix of length for the currently matched token. + /// This is used to build up the matched string for use in actions in the case of MORE. + virtual CharArray GetSuffix(int32_t length) = 0; + + /// The lexer calls this function to indicate that it is done with the stream and hence implementations + /// can free any resources held by this class. Again, the body of this function can be just empty and it + /// will not affect the lexer's operation. + virtual void Done() = 0; +}; + +} + +#endif diff --git a/include/lucene++/QueryParserConstants.h b/include/lucene++/QueryParserConstants.h new file mode 100644 index 00000000..c911c915 --- /dev/null +++ b/include/lucene++/QueryParserConstants.h @@ -0,0 +1,77 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSERCONSTANTS_H +#define QUERYPARSERCONSTANTS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Token literal values and constants. +class LPPAPI QueryParserConstants { +protected: + QueryParserConstants(); + +public: + virtual ~QueryParserConstants(); + LUCENE_INTERFACE(QueryParserConstants); + +public: + enum RegularExpressionId { + _EOF = 0, + _NUM_CHAR = 1, + _ESCAPED_CHAR = 2, + _TERM_START_CHAR = 3, + _TERM_CHAR = 4, + _WHITESPACE = 5, + _QUOTED_CHAR = 6, + AND = 8, + OR = 9, + NOT = 10, + PLUS = 11, + MINUS = 12, + LPAREN = 13, + RPAREN = 14, + COLON = 15, + STAR = 16, + CARAT = 17, + QUOTED = 18, + TERM = 19, + FUZZY_SLOP = 20, + PREFIXTERM = 21, + WILDTERM = 22, + RANGEIN_START = 23, + RANGEEX_START = 24, + NUMBER = 25, + RANGEIN_TO = 26, + RANGEIN_END = 27, + RANGEIN_QUOTED = 28, + RANGEIN_GOOP = 29, + RANGEEX_TO = 30, + RANGEEX_END = 31, + RANGEEX_QUOTED = 32, + RANGEEX_GOOP = 33 + }; + + enum LexicalState { + Boost = 0, + RangeEx = 1, + RangeIn = 2, + DEFAULT = 3 + }; + + /// Literal token values. + static Collection tokenImage; + +protected: + /// Literal token values. + static const wchar_t* _tokenImage[]; +}; + +} + +#endif diff --git a/include/lucene++/QueryParserToken.h b/include/lucene++/QueryParserToken.h new file mode 100644 index 00000000..568a9203 --- /dev/null +++ b/include/lucene++/QueryParserToken.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSERTOKEN_H +#define QUERYPARSERTOKEN_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Describes the input token stream. +class LPPAPI QueryParserToken : public LuceneObject { +public: + /// Constructs a new token for the specified Image and Kind. + QueryParserToken(int32_t kind = 0, const String& image = EmptyString); + + virtual ~QueryParserToken(); + + LUCENE_CLASS(QueryParserToken); + +public: + /// An integer that describes the kind of this token. + int32_t kind; + + /// The line number of the first character of this Token. + int32_t beginLine; + + /// The column number of the first character of this Token. + int32_t beginColumn; + + /// The line number of the last character of this Token. + int32_t endLine; + + /// The column number of the last character of this Token. + int32_t endColumn; + + /// The string image of the token. + String image; + + /// A reference to the next regular (non-special) token from the input stream. If this is the last + /// token from the input stream, or if the token manager has not read tokens beyond this one, this + /// field is set to null. This is true only if this token is also a regular token. Otherwise, see + /// below for a description of the contents of this field. + QueryParserTokenPtr next; + + /// This field is used to access special tokens that occur prior to this token, but after the + /// immediately preceding regular (non-special) token. If there are no such special tokens, this + /// field is set to null. When there are more than one such special token, this field refers to the + /// last of these special tokens, which in turn refers to the next previous special token through + /// its specialToken field, and so on until the first special token (whose specialToken field is + /// null). The next fields of special tokens refer to other special tokens that immediately follow + /// it (without an intervening regular token). If there is no such token, this field is null. + QueryParserTokenPtr specialToken; + +public: + /// Returns the image. + virtual String toString(); + + /// Returns a new Token object, by default. However, if you want, you can create and return subclass + /// objects based on the value of ofKind. Simply add the cases to the switch for all those special + /// cases. + static QueryParserTokenPtr newToken(int32_t ofKind, const String& image = EmptyString); +}; + +} + +#endif diff --git a/include/lucene++/QueryParserTokenManager.h b/include/lucene++/QueryParserTokenManager.h new file mode 100644 index 00000000..397f66dc --- /dev/null +++ b/include/lucene++/QueryParserTokenManager.h @@ -0,0 +1,111 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYPARSERTOKENMANAGER_H +#define QUERYPARSERTOKENMANAGER_H + +#include "QueryParserConstants.h" + +namespace Lucene { + +/// Token Manager. +class LPPAPI QueryParserTokenManager : public QueryParserConstants, public LuceneObject { +public: + QueryParserTokenManager(const QueryParserCharStreamPtr& stream); + QueryParserTokenManager(const QueryParserCharStreamPtr& stream, int32_t lexState); + + virtual ~QueryParserTokenManager(); + + LUCENE_CLASS(QueryParserTokenManager); + +public: + /// Debug output. + InfoStreamPtr debugStream; + +protected: + static const int64_t jjbitVec0[]; + static const int64_t jjbitVec1[]; + static const int64_t jjbitVec3[]; + static const int64_t jjbitVec4[]; + static const int32_t jjnextStates[]; + + /// Token literal values. + static const wchar_t* jjstrLiteralImages[]; + + /// Lexer state names. + static const wchar_t* lexStateNames[]; + + /// Lex State array. + static const int32_t jjnewLexState[]; + static const int64_t jjtoToken[]; + static const int64_t jjtoSkip[]; + + int32_t curLexState; + int32_t defaultLexState; + int32_t jjnewStateCnt; + int32_t jjround; + int32_t jjmatchedPos; + int32_t jjmatchedKind; + + QueryParserCharStreamPtr input_stream; + IntArray jjrounds; + IntArray jjstateSet; + wchar_t curChar; + +public: + /// Set debug output. + void setDebugStream(const InfoStreamPtr& debugStream); + + /// Reinitialise parser. + void ReInit(const QueryParserCharStreamPtr& stream); + + /// Reinitialise parser. + void ReInit(const QueryParserCharStreamPtr& stream, int32_t lexState); + + /// Switch to specified lex state. + void SwitchTo(int32_t lexState); + + /// Get the next Token. + QueryParserTokenPtr getNextToken(); + +protected: + int32_t jjStopStringLiteralDfa_3(int32_t pos, int64_t active0); + int32_t jjStartNfa_3(int32_t pos, int64_t active0); + int32_t jjStopAtPos(int32_t pos, int32_t kind); + int32_t jjMoveStringLiteralDfa0_3(); + int32_t jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state); + int32_t jjMoveNfa_3(int32_t startState, int32_t curPos); + int32_t jjStopStringLiteralDfa_1(int32_t pos, int64_t active0); + int32_t jjStartNfa_1(int32_t pos, int64_t active0); + int32_t jjMoveStringLiteralDfa0_1(); + int32_t jjMoveStringLiteralDfa1_1(int64_t active0); + int32_t jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state); + int32_t jjMoveNfa_1(int32_t startState, int32_t curPos); + int32_t jjMoveStringLiteralDfa0_0(); + int32_t jjMoveNfa_0(int32_t startState, int32_t curPos); + int32_t jjStopStringLiteralDfa_2(int32_t pos, int64_t active0); + int32_t jjStartNfa_2(int32_t pos, int64_t active0); + int32_t jjMoveStringLiteralDfa0_2(); + int32_t jjMoveStringLiteralDfa1_2(int64_t active0); + int32_t jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state); + int32_t jjMoveNfa_2(int32_t startState, int32_t curPos); + + static bool jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); + static bool jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); + static bool jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2); + + void ReInitRounds(); + QueryParserTokenPtr jjFillToken(); + + void jjCheckNAdd(int32_t state); + void jjAddStates(int32_t start, int32_t end); + void jjCheckNAddTwoStates(int32_t state1, int32_t state2); + void jjCheckNAddStates(int32_t start, int32_t end); +}; + +} + +#endif diff --git a/include/lucene++/QueryTermVector.h b/include/lucene++/QueryTermVector.h new file mode 100644 index 00000000..d482b998 --- /dev/null +++ b/include/lucene++/QueryTermVector.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYTERMVECTOR_H +#define QUERYTERMVECTOR_H + +#include "TermFreqVector.h" + +namespace Lucene { + +class LPPAPI QueryTermVector : public TermFreqVector, public LuceneObject { +public: + /// @param queryTerms The original list of terms from the query, can contain duplicates + QueryTermVector(Collection queryTerms); + QueryTermVector(const String& queryString, const AnalyzerPtr& analyzer); + + virtual ~QueryTermVector(); + + LUCENE_CLASS(QueryTermVector); + +protected: + Collection terms; + Collection termFreqs; + +public: + virtual String toString(); + + int32_t size(); + Collection getTerms(); + Collection getTermFrequencies(); + int32_t indexOf(const String& term); + Collection indexesOf(Collection terms, int32_t start, int32_t length); + +protected: + void processTerms(Collection queryTerms); +}; + +} + +#endif diff --git a/include/lucene++/QueryWrapperFilter.h b/include/lucene++/QueryWrapperFilter.h new file mode 100644 index 00000000..b898a9ea --- /dev/null +++ b/include/lucene++/QueryWrapperFilter.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef QUERYWRAPPERFILTER_H +#define QUERYWRAPPERFILTER_H + +#include "Filter.h" + +namespace Lucene { + +/// Constrains search results to only match those which also match a provided query. +/// +/// This could be used, for example, with a {@link TermRangeQuery} on a suitably formatted date field to +/// implement date filtering. One could re-use a single QueryFilter that matches, eg., only documents +/// modified within the last week. The QueryFilter and TermRangeQuery would only need to be reconstructed +/// once per day. +class LPPAPI QueryWrapperFilter : public Filter { +public: + /// Constructs a filter which only matches documents matching query. + QueryWrapperFilter(const QueryPtr& query); + + virtual ~QueryWrapperFilter(); + + LUCENE_CLASS(QueryWrapperFilter); + +protected: + QueryPtr query; + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/RAMDirectory.h b/include/lucene++/RAMDirectory.h new file mode 100644 index 00000000..6e0dc7f7 --- /dev/null +++ b/include/lucene++/RAMDirectory.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RAMDIRECTORY_H +#define RAMDIRECTORY_H + +#include "Directory.h" + +namespace Lucene { + +/// A memory-resident {@link Directory} implementation. Locking implementation is by default the +/// {@link SingleInstanceLockFactory} but can be changed with {@link #setLockFactory}. +/// Lock acquisition sequence: RAMDirectory, then RAMFile +class LPPAPI RAMDirectory : public Directory { +public: + /// Constructs an empty {@link Directory}. + RAMDirectory(); + + /// Creates a new RAMDirectory instance from a different Directory implementation. + /// This can be used to load a disk-based index into memory. + /// + /// This should be used only with indices that can fit into memory. + /// + /// Note that the resulting RAMDirectory instance is fully independent from the + /// original Directory (it is a complete copy). Any subsequent changes to the + /// original Directory will not be visible in the RAMDirectory instance. + /// @param dir a Directory value + RAMDirectory(const DirectoryPtr& dir); + + RAMDirectory(const DirectoryPtr& dir, bool closeDir); + + virtual ~RAMDirectory(); + + LUCENE_CLASS(RAMDirectory); + +INTERNAL: + int64_t _sizeInBytes; + MapStringRAMFile fileMap; + +protected: + DirectoryWeakPtr _dirSource; + bool copyDirectory; + bool closeDir; + +public: + virtual void initialize(); + + /// Returns an array of strings, one for each file in the directory. + virtual HashSet listAll(); + + /// Returns true if a file with the given name exists. + virtual bool fileExists(const String& name); + + /// Returns the time the named file was last modified. + virtual uint64_t fileModified(const String& name); + + /// Set the modified time of an existing file to now. + virtual void touchFile(const String& name); + + /// Returns the length of a file in the directory. + virtual int64_t fileLength(const String& name); + + /// Return total size in bytes of all files in this directory. + /// This is currently quantized to RAMOutputStream::BUFFER_SIZE. + int64_t sizeInBytes(); + + /// Removes an existing file in the directory. + virtual void deleteFile(const String& name); + + /// Creates a new, empty file in the directory with the given name. + /// Returns a stream writing this file. + virtual IndexOutputPtr createOutput(const String& name); + + /// Returns a stream reading an existing file. + virtual IndexInputPtr openInput(const String& name); + + /// Closes the store. + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/RAMFile.h b/include/lucene++/RAMFile.h new file mode 100644 index 00000000..798ba5ae --- /dev/null +++ b/include/lucene++/RAMFile.h @@ -0,0 +1,57 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RAMFILE_H +#define RAMFILE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// File used as buffer in RAMDirectory +class LPPAPI RAMFile : public LuceneObject { +public: + RAMFile(); // File used as buffer, in no RAMDirectory + RAMFile(const RAMDirectoryPtr& directory); + virtual ~RAMFile(); + + LUCENE_CLASS(RAMFile); + +INTERNAL: + int64_t length; + RAMDirectoryWeakPtr _directory; + +protected: + Collection buffers; + + int64_t sizeInBytes; + + /// This is publicly modifiable via Directory.touchFile(), so direct access not supported + int64_t lastModified; + +public: + /// For non-stream access from thread that might be concurrent with writing. + int64_t getLength(); + void setLength(int64_t length); + + /// For non-stream access from thread that might be concurrent with writing + int64_t getLastModified(); + void setLastModified(int64_t lastModified); + + int64_t getSizeInBytes(); + + ByteArray addBuffer(int32_t size); + ByteArray getBuffer(int32_t index); + int32_t numBuffers(); + +protected: + /// Allocate a new buffer. Subclasses can allocate differently. + virtual ByteArray newBuffer(int32_t size); +}; + +} + +#endif diff --git a/include/lucene++/RAMInputStream.h b/include/lucene++/RAMInputStream.h new file mode 100644 index 00000000..142385b3 --- /dev/null +++ b/include/lucene++/RAMInputStream.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RAMINPUTSTREAM_H +#define RAMINPUTSTREAM_H + +#include "IndexInput.h" + +namespace Lucene { + +/// A memory-resident {@link IndexInput} implementation. +class LPPAPI RAMInputStream : public IndexInput { +public: + RAMInputStream(); + RAMInputStream(const RAMFilePtr& f); + virtual ~RAMInputStream(); + + LUCENE_CLASS(RAMInputStream); + +public: + static const int32_t BUFFER_SIZE; + +protected: + RAMFilePtr file; + int64_t _length; + ByteArray currentBuffer; + int32_t currentBufferIndex; + int32_t bufferPosition; + int64_t bufferStart; + int32_t bufferLength; + +public: + /// Closes the stream to further operations. + virtual void close(); + + /// The number of bytes in the file. + virtual int64_t length(); + + /// Reads and returns a single byte. + /// @see IndexOutput#writeByte(uint8_t) + virtual uint8_t readByte(); + + /// Reads a specified number of bytes into an array at the specified offset. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Returns the current position in this file, where the next read will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next read will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + void switchCurrentBuffer(bool enforceEOF); +}; + +} + +#endif diff --git a/include/lucene++/RAMOutputStream.h b/include/lucene++/RAMOutputStream.h new file mode 100644 index 00000000..23814aa9 --- /dev/null +++ b/include/lucene++/RAMOutputStream.h @@ -0,0 +1,78 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RAMOUTPUTSTREAM_H +#define RAMOUTPUTSTREAM_H + +#include "IndexOutput.h" + +namespace Lucene { + +/// A memory-resident {@link IndexOutput} implementation. +class LPPAPI RAMOutputStream : public IndexOutput { +public: + /// Construct an empty output buffer. + RAMOutputStream(); + RAMOutputStream(const RAMFilePtr& f); + virtual ~RAMOutputStream(); + + LUCENE_CLASS(RAMOutputStream); + +public: + static const int32_t BUFFER_SIZE; + +protected: + RAMFilePtr file; + ByteArray currentBuffer; + int32_t currentBufferIndex; + int32_t bufferPosition; + int64_t bufferStart; + int32_t bufferLength; + +public: + /// Copy the current contents of this buffer to the named output. + void writeTo(const IndexOutputPtr& out); + + /// Resets this to an empty file. + void reset(); + + /// Closes this stream to further operations. + virtual void close(); + + /// Sets current position in this file, where the next write will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// The number of bytes in the file. + virtual int64_t length(); + + /// Writes a single byte. + /// @see IndexInput#readByte() + virtual void writeByte(uint8_t b); + + /// Writes an array of bytes. + /// @param b the bytes to write. + /// @param length the number of bytes to write. + /// @see IndexInput#readBytes(uint8_t*, int32_t, int32_t) + virtual void writeBytes(const uint8_t* b, int32_t offset, int32_t length); + + /// Forces any buffered output to be written. + virtual void flush(); + + /// Returns the current position in this file, where the next write will occur. + virtual int64_t getFilePointer(); + + /// Returns byte usage of all buffers. + int64_t sizeInBytes(); + +protected: + void switchCurrentBuffer(); + void setFileLength(); +}; + +} + +#endif diff --git a/include/lucene++/Random.h b/include/lucene++/Random.h new file mode 100644 index 00000000..1488af80 --- /dev/null +++ b/include/lucene++/Random.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RANDOM_H +#define RANDOM_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Utility class to generate a stream of pseudorandom numbers. +class LPPAPI Random : public LuceneObject { +public: + Random(); + Random(int64_t seed); + + virtual ~Random(); + +protected: + int64_t seed; + +public: + void setSeed(int64_t seed); + int32_t nextInt(int32_t limit = INT_MAX); + double nextDouble(); + +protected: + int32_t next(int32_t bits); +}; + +} + +#endif diff --git a/include/lucene++/RawPostingList.h b/include/lucene++/RawPostingList.h new file mode 100644 index 00000000..76d9587b --- /dev/null +++ b/include/lucene++/RawPostingList.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef RAWPOSTINGLIST_H +#define RAWPOSTINGLIST_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This is the base class for an in-memory posting list, keyed by a Token. {@link TermsHash} maintains a +/// hash table holding one instance of this per unique Token. Consumers of TermsHash ({@link TermsHashConsumer}) +/// must subclass this class with its own concrete class. FreqProxTermsWriterPostingList is a private inner +/// class used for the freq/prox postings, and TermVectorsTermsWriterPostingList is a private inner class used +/// to hold TermVectors postings. +class RawPostingList : public LuceneObject { +public: + RawPostingList(); + virtual ~RawPostingList(); + + LUCENE_CLASS(RawPostingList); + +public: + static const int32_t BYTES_SIZE; + + int32_t textStart; + int32_t intStart; + int32_t byteStart; +}; + +} + +#endif diff --git a/include/lucene++/ReadOnlyDirectoryReader.h b/include/lucene++/ReadOnlyDirectoryReader.h new file mode 100644 index 00000000..eff9715d --- /dev/null +++ b/include/lucene++/ReadOnlyDirectoryReader.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef READONLYDIRECTORYREADER_H +#define READONLYDIRECTORYREADER_H + +#include "DirectoryReader.h" + +namespace Lucene { + +class LPPAPI ReadOnlyDirectoryReader : public DirectoryReader { +public: + ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor); + ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, + Collection oldStarts, MapStringByteArray oldNormsCache, bool doClone, int32_t termInfosIndexDivisor); + ReadOnlyDirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor); + virtual ~ReadOnlyDirectoryReader(); + + LUCENE_CLASS(ReadOnlyDirectoryReader); + +public: + /// Tries to acquire the WriteLock on this directory. this method is only valid if this + /// IndexReader is directory owner. + virtual void acquireWriteLock(); +}; + +} + +#endif diff --git a/include/lucene++/ReadOnlySegmentReader.h b/include/lucene++/ReadOnlySegmentReader.h new file mode 100644 index 00000000..10a7dbd0 --- /dev/null +++ b/include/lucene++/ReadOnlySegmentReader.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef READONLYSEGMENTREADER_H +#define READONLYSEGMENTREADER_H + +#include "SegmentReader.h" + +namespace Lucene { + +class LPPAPI ReadOnlySegmentReader : public SegmentReader { +public: + virtual ~ReadOnlySegmentReader(); + + LUCENE_CLASS(ReadOnlySegmentReader); + +public: + static void noWrite(); + + virtual void acquireWriteLock(); + virtual bool isDeleted(int32_t n); +}; + +} + +#endif diff --git a/include/lucene++/Reader.h b/include/lucene++/Reader.h new file mode 100644 index 00000000..150a4f23 --- /dev/null +++ b/include/lucene++/Reader.h @@ -0,0 +1,56 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef READER_H +#define READER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract class for reading character streams. +class LPPAPI Reader : public LuceneObject { +protected: + Reader(); + +public: + virtual ~Reader(); + LUCENE_CLASS(Reader); + +public: + static const int32_t READER_EOF; + + /// Read a single character. + virtual int32_t read(); + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length) = 0; + + /// Skip characters. + virtual int64_t skip(int64_t n); + + /// Close the stream. + virtual void close() = 0; + + /// Tell whether this stream supports the mark() operation + virtual bool markSupported(); + + /// Mark the present position in the stream. Subsequent calls to reset() will attempt to reposition the + /// stream to this point. + virtual void mark(int32_t readAheadLimit); + + /// Reset the stream. If the stream has been marked, then attempt to reposition it at the mark. If the stream + /// has not been marked, then attempt to reset it in some way appropriate to the particular stream, for example + /// by repositioning it to its starting point. + virtual void reset(); + + /// The number of bytes in the stream. + virtual int64_t length(); +}; + +} + +#endif diff --git a/include/lucene++/ReaderUtil.h b/include/lucene++/ReaderUtil.h new file mode 100644 index 00000000..97ddf36b --- /dev/null +++ b/include/lucene++/ReaderUtil.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef READERUTIL_H +#define READERUTIL_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Common util methods for dealing with {@link IndexReader}s. +class LPPAPI ReaderUtil : public LuceneObject { +public: + virtual ~ReaderUtil(); + LUCENE_CLASS(ReaderUtil); + +public: + /// Gathers sub-readers from reader into a List. + static void gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader); + + /// Returns sub IndexReader that contains the given document id. + /// + /// @param doc Id of document + /// @param reader Parent reader + /// @return Sub reader of parent which contains the specified doc id + static IndexReaderPtr subReader(int32_t doc, const IndexReaderPtr& reader); + + /// Returns sub-reader subIndex from reader. + /// + /// @param reader Parent reader + /// @param subIndex Index of desired sub reader + /// @return The subreader at subIndex + static IndexReaderPtr subReader(const IndexReaderPtr& reader, int32_t subIndex); + + /// Returns index of the searcher/reader for document n in the array used to construct this + /// searcher/reader. + static int32_t subIndex(int32_t n, Collection docStarts); +}; + +} + +#endif diff --git a/include/lucene++/ReqExclScorer.h b/include/lucene++/ReqExclScorer.h new file mode 100644 index 00000000..be653c09 --- /dev/null +++ b/include/lucene++/ReqExclScorer.h @@ -0,0 +1,58 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef REQEXCLSCORER_H +#define REQEXCLSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// A Scorer for queries with a required subscorer and an excluding (prohibited) sub DocIdSetIterator. +/// This Scorer implements {@link Scorer#skipTo(int32_t)}, and it uses the skipTo() on the given scorers. +class ReqExclScorer : public Scorer { +public: + /// Construct a ReqExclScorer. + /// @param reqScorer The scorer that must match, except where + /// @param exclDisi indicates exclusion. + ReqExclScorer(const ScorerPtr& reqScorer, const DocIdSetIteratorPtr& exclDisi); + virtual ~ReqExclScorer(); + + LUCENE_CLASS(ReqExclScorer); + +protected: + ScorerPtr reqScorer; + DocIdSetIteratorPtr exclDisi; + int32_t doc; + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + + /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} + /// is called the first time. + /// @return The score of the required scorer. + virtual double score(); + + virtual int32_t advance(int32_t target); + +protected: + /// Advance to non excluded doc. + /// + /// On entry: + ///
    + ///
  • reqScorer != null, + ///
  • exclScorer != null, + ///
  • reqScorer was advanced once via next() or skipTo() and reqScorer.doc() may still be excluded. + ///
+ /// Advances reqScorer a non excluded required doc, if any. + /// @return true iff there is a non excluded required doc. + int32_t toNonExcluded(); +}; + +} + +#endif diff --git a/include/lucene++/ReqOptSumScorer.h b/include/lucene++/ReqOptSumScorer.h new file mode 100644 index 00000000..9af9380b --- /dev/null +++ b/include/lucene++/ReqOptSumScorer.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef REQOPTSUMSCORER_H +#define REQOPTSUMSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// A Scorer for queries with a required part and an optional part. Delays skipTo() on the optional part +/// until a score() is needed. This Scorer implements {@link Scorer#skipTo(int32_t)}. +class ReqOptSumScorer : public Scorer { +public: + ReqOptSumScorer(const ScorerPtr& reqScorer, const ScorerPtr& optScorer); + virtual ~ReqOptSumScorer(); + + LUCENE_CLASS(ReqOptSumScorer); + +protected: + ScorerPtr reqScorer; + ScorerPtr optScorer; + +public: + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + + /// Returns the score of the current document matching the query. Initially invalid, until {@link #next()} + /// is called the first time. + /// @return The score of the required scorer, eventually increased by the score of the optional scorer when + /// it also matches the current document. + virtual double score(); +}; + +} + +#endif diff --git a/include/lucene++/ReusableStringReader.h b/include/lucene++/ReusableStringReader.h new file mode 100644 index 00000000..ce4879bf --- /dev/null +++ b/include/lucene++/ReusableStringReader.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef REUSABLESTRINGREADER_H +#define REUSABLESTRINGREADER_H + +#include "Reader.h" + +namespace Lucene { + +/// Used by DocumentsWriter to implemented a StringReader that can be reset to a new string; we use this +/// when tokenizing the string value from a Field. +class ReusableStringReader : public Reader { +public: + ReusableStringReader(); + virtual ~ReusableStringReader(); + + LUCENE_CLASS(ReusableStringReader); + +public: + int32_t upto; + int32_t left; + String s; + +public: + virtual void init(const String& s); + + using Reader::read; + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + + /// Close the stream. + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/ReverseOrdFieldSource.h b/include/lucene++/ReverseOrdFieldSource.h new file mode 100644 index 00000000..57491aa2 --- /dev/null +++ b/include/lucene++/ReverseOrdFieldSource.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef REVERSEORDFIELDSOURCE_H +#define REVERSEORDFIELDSOURCE_H + +#include "ValueSource.h" + +namespace Lucene { + +/// Obtains the ordinal of the field value from the default Lucene {@link FieldCache} using getStringIndex() +/// and reverses the order. +/// +/// The native lucene index order is used to assign an ordinal value for each field value. +/// +/// Field values (terms) are lexicographically ordered by unicode value, and numbered starting at 1. Example +/// of reverse ordinal (rord): +/// +/// If there were only three field values: "apple","banana","pear" then rord("apple")=3, rord("banana")=2, +/// ord("pear")=1 +/// +/// WARNING: rord() depends on the position in an index and can thus change when other documents are inserted +/// or deleted, or if a MultiSearcher is used. +/// +/// NOTE: with the switch in 2.9 to segment-based searching, if {@link #getValues} is invoked with a composite +/// (multi-segment) reader, this can easily cause double RAM usage for the values in the FieldCache. It's +/// best to switch your application to pass only atomic (single segment) readers to this API. +class LPPAPI ReverseOrdFieldSource : public ValueSource { +public: + /// Constructor for a certain field. + /// @param field field whose values reverse order is used. + ReverseOrdFieldSource(const String& field); + virtual ~ReverseOrdFieldSource(); + + LUCENE_CLASS(ReverseOrdFieldSource); + +protected: + String field; + +public: + virtual String description(); + virtual DocValuesPtr getValues(const IndexReaderPtr& reader); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/ScoreCachingWrappingScorer.h b/include/lucene++/ScoreCachingWrappingScorer.h new file mode 100644 index 00000000..5f1d4b79 --- /dev/null +++ b/include/lucene++/ScoreCachingWrappingScorer.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SCORECACHINGWRAPPINGSCORER_H +#define SCORECACHINGWRAPPINGSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// A {@link Scorer} which wraps another scorer and caches the score of the current document. Successive +/// calls to {@link #score()} will return the same result and will not invoke the wrapped Scorer's score() +/// method, unless the current document has changed. +/// +/// This class might be useful due to the changes done to the {@link Collector} interface, in which the +/// score is not computed for a document by default, only if the collector requests it. Some collectors +/// may need to use the score in several places, however all they have in hand is a {@link Scorer} object, +/// and might end up computing the score of a document more than once. +class LPPAPI ScoreCachingWrappingScorer : public Scorer { +public: + /// Creates a new instance by wrapping the given scorer. + ScoreCachingWrappingScorer(const ScorerPtr& scorer); + virtual ~ScoreCachingWrappingScorer(); + + LUCENE_CLASS(ScoreCachingWrappingScorer); + +protected: + ScorerWeakPtr _scorer; + int32_t curDoc; + double curScore; + +public: + SimilarityPtr getSimilarity(); + virtual double score(); + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual void score(const CollectorPtr& collector); + virtual int32_t advance(int32_t target); + +protected: + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); +}; + +} + +#endif diff --git a/include/lucene++/ScoreDoc.h b/include/lucene++/ScoreDoc.h new file mode 100644 index 00000000..64ce8385 --- /dev/null +++ b/include/lucene++/ScoreDoc.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SCOREDOC_H +#define SCOREDOC_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Expert: Returned by low-level search implementations. +/// @see TopDocs +class LPPAPI ScoreDoc : public LuceneObject { +public: + ScoreDoc(int32_t doc, double score); + virtual ~ScoreDoc(); + + LUCENE_CLASS(ScoreDoc); + +public: + /// The score of this document for the query. + double score; + + /// A hit document's number. + /// @see Searcher#doc(int32_t) + int32_t doc; + +public: + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/Scorer.h b/include/lucene++/Scorer.h new file mode 100644 index 00000000..181ac392 --- /dev/null +++ b/include/lucene++/Scorer.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SCORER_H +#define SCORER_H + +#include "DocIdSetIterator.h" +#include "BooleanClause.h" +#include "Weight.h" + +namespace Lucene { + + + class LPPAPI ScorerVisitor{ + public: + virtual void visitOptional(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; + virtual void visitRequired(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; + virtual void visitProhibited(QueryPtr parent,QueryPtr child,ScorerPtr scorer)=0; + + }; + +/// Common scoring functionality for different types of queries. +/// +/// A Scorer iterates over documents matching a query in increasing order of doc Id. +/// +/// Document scores are computed using a given Similarity implementation. +/// +/// NOTE: The values NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. Certain collectors +/// (eg {@link TopScoreDocCollector}) will not properly collect hits with these scores. +class LPPAPI Scorer : public DocIdSetIterator { +public: + /// Constructs a Scorer. + /// @param similarity The Similarity implementation used by this scorer. + Scorer(const SimilarityPtr& similarity); + Scorer(const WeightPtr& weight); + virtual ~Scorer(); + + LUCENE_CLASS(Scorer); + WeightPtr weight; + +protected: + SimilarityPtr similarity; + +public: + /// Returns the Similarity implementation used by this scorer. + SimilarityPtr getSimilarity(); + + /// Scores and collects all matching documents. + /// @param collector The collector to which all matching documents are passed. + virtual void score(const CollectorPtr& collector); + + /// Returns the score of the current document matching the query. Initially invalid, until {@link + /// #nextDoc()} or {@link #advance(int32_t)} is called the first time, or when called from within + /// {@link Collector#collect}. + virtual double score() = 0; + + void visitSubScorers(QueryPtr parent, BooleanClause::Occur relationship, + ScorerVisitor *visitor); + + void visitScorers(ScorerVisitor *visitor); + + virtual float termFreq(){ + boost::throw_exception(RuntimeException(L"Freq not implemented")); + } + +protected: + /// Collects matching documents in a range. Hook for optimization. + /// Note, firstDocID is added to ensure that {@link #nextDoc()} was called before this method. + /// + /// @param collector The collector to which all matching documents are passed. + /// @param max Do not score documents past this. + /// @param firstDocID The first document ID (ensures {@link #nextDoc()} is called before this method. + /// @return true if more matching documents may remain. + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); + + friend class BooleanScorer; + friend class ScoreCachingWrappingScorer; +}; + + +} + +#endif diff --git a/include/lucene++/ScorerDocQueue.h b/include/lucene++/ScorerDocQueue.h new file mode 100644 index 00000000..26724749 --- /dev/null +++ b/include/lucene++/ScorerDocQueue.h @@ -0,0 +1,77 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SCORERDOCQUEUE_H +#define SCORERDOCQUEUE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A ScorerDocQueue maintains a partial ordering of its Scorers such that the least Scorer can always be +/// found in constant time. Put()'s and pop()'s require log(size) time. The ordering is by Scorer::doc(). +class LPPAPI ScorerDocQueue : public LuceneObject { +public: + ScorerDocQueue(int32_t maxSize); + virtual ~ScorerDocQueue(); + + LUCENE_CLASS(ScorerDocQueue); + +protected: + Collection heap; + int32_t maxSize; + int32_t _size; + HeapedScorerDocPtr topHSD; // same as heap[1], only for speed + +public: + /// Adds a Scorer to a ScorerDocQueue in log(size) time. If one tries to add more Scorers than maxSize + /// ArrayIndexOutOfBound exception is thrown. + void put(const ScorerPtr& scorer); + + /// Adds a Scorer to the ScorerDocQueue in log(size) time if either the ScorerDocQueue is not full, or + /// not lessThan(scorer, top()). + /// @return true if scorer is added, false otherwise. + bool insert(const ScorerPtr& scorer); + + /// Returns the least Scorer of the ScorerDocQueue in constant time. Should not be used when the queue + /// is empty. + ScorerPtr top(); + + /// Returns document number of the least Scorer of the ScorerDocQueue in constant time. + /// Should not be used when the queue is empty. + int32_t topDoc(); + + double topScore(); + bool topNextAndAdjustElsePop(); + bool topSkipToAndAdjustElsePop(int32_t target); + + /// Removes and returns the least scorer of the ScorerDocQueue in log(size) time. Should not be used + /// when the queue is empty. + ScorerPtr pop(); + + /// Should be called when the scorer at top changes doc() value. + void adjustTop(); + + /// Returns the number of scorers currently stored in the ScorerDocQueue. + int32_t size(); + + /// Removes all entries from the ScorerDocQueue. + void clear(); + +protected: + bool checkAdjustElsePop(bool cond); + + /// Removes the least scorer of the ScorerDocQueue in log(size) time. Should not be used when the + /// queue is empty. + void popNoResult(); + + void upHeap(); + void downHeap(); +}; + +} + +#endif diff --git a/include/lucene++/Searchable.h b/include/lucene++/Searchable.h new file mode 100644 index 00000000..7035a76b --- /dev/null +++ b/include/lucene++/Searchable.h @@ -0,0 +1,109 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEARCHABLE_H +#define SEARCHABLE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The interface for search implementations. +/// +/// Searchable is the abstract network protocol for searching. Implementations provide search over a single +/// index, over multiple indices, and over indices on remote servers. +/// +/// Queries, filters and sort criteria are designed to be compact so that they may be efficiently passed to a +/// remote index, with only the top-scoring hits being returned, rather than every matching hit. +/// +/// NOTE: this interface is kept public for convenience. Since it is not expected to be implemented directly, +/// it may be changed unexpectedly between releases. +class LPPAPI Searchable { +public: + LUCENE_INTERFACE(Searchable); + virtual ~Searchable() {} + +public: + /// Lower-level search API. + /// + /// {@link Collector#collect(int32_t)} is called for every document. Collector-based access to remote + /// indexes is discouraged. + /// + /// Applications should only use this if they need all of the matching documents. The high-level search + /// API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips non-high-scoring + /// hits. + /// + /// @param weight To match documents + /// @param filter If non-null, used to permit documents to be collected. + /// @param collector To receive hits + virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) = 0; + + /// Frees resources associated with this Searcher. Be careful not to call this method while you are still + /// using objects that reference this Searchable. + virtual void close() = 0; + + /// Returns the number of documents containing term. + /// @see IndexReader#docFreq(TermPtr) + virtual int32_t docFreq(const TermPtr& term) = 0; + + /// For each term in the terms array, calculates the number of documents containing term. Returns an array + /// with these document frequencies. Used to minimize number of remote calls. + virtual Collection docFreqs(Collection terms) = 0; + + /// Returns one greater than the largest possible document number. + /// @see IndexReader#maxDoc() + virtual int32_t maxDoc() = 0; + + /// Low-level search implementation. Finds the top n hits for query, applying filter if non-null. + /// Applications should usually call {@link Searcher#search(QueryPtr, int32_t)} or {@link + /// Searcher#search(QueryPtr, FilterPtr, int32_t)} instead. + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) = 0; + + /// Returns the stored fields of document i. + /// @see IndexReader#document(int32_t) + virtual DocumentPtr doc(int32_t n) = 0; + + /// Get the {@link Document} at the n'th position. The {@link FieldSelector} may be used to determine what + /// {@link Field}s to load and how they should be loaded. + /// + /// NOTE: If the underlying Reader (more specifically, the underlying FieldsReader) is closed before the + /// lazy {@link Field} is loaded an exception may be thrown. If you want the value of a lazy {@link Field} + /// to be available after closing you must explicitly load it or fetch the Document again with a new loader. + /// + /// @param n Get the document at the n'th position + /// @param fieldSelector The {@link FieldSelector} to use to determine what Fields should be loaded on the + /// Document. May be null, in which case all Fields will be loaded. + /// @return The stored fields of the {@link Document} at the n'th position + /// + /// @see IndexReader#document(int32_t, FieldSelectorPtr) + /// @see Fieldable + /// @see FieldSelector + /// @see SetBasedFieldSelector + /// @see LoadFirstFieldSelector + virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; + + /// Called to re-write queries into primitive queries. + virtual QueryPtr rewrite(const QueryPtr& query) = 0; + + /// Low-level implementation method. Returns an Explanation that describes how doc scored against weight. + /// + /// This is intended to be used in developing Similarity implementations, and for good performance, should + /// not be displayed with every hit. Computing an explanation is as expensive as executing the query over + /// the entire index. + /// + /// Applications should call {@link Searcher#explain(QueryPtr, int32_t)}. + virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc) = 0; + + /// Low-level search implementation with arbitrary sorting. Finds the top n hits for query, applying filter + /// if non-null, and sorting the hits by the criteria in sort. + /// + /// Applications should usually call {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)} instead. + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) = 0; +}; + +} + +#endif diff --git a/include/lucene++/Searcher.h b/include/lucene++/Searcher.h new file mode 100644 index 00000000..91b09f43 --- /dev/null +++ b/include/lucene++/Searcher.h @@ -0,0 +1,105 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEARCHER_H +#define SEARCHER_H + +#include "Searchable.h" + +namespace Lucene { + +/// An abstract base class for search implementations. Implements the main search methods. +/// +/// Note that you can only access hits from a Searcher as long as it is not yet closed, otherwise an IO +/// exception will be thrown. +class LPPAPI Searcher : public Searchable, public LuceneObject { +public: + Searcher(); + virtual ~Searcher(); + + LUCENE_CLASS(Searcher); + +protected: + /// The Similarity implementation used by this searcher. + SimilarityPtr similarity; + +public: + /// Search implementation with arbitrary sorting. Finds the top n hits for query, applying filter if + /// non-null, and sorting the hits by the criteria in sort. + /// + /// NOTE: this does not compute scores by default; use {@link IndexSearcher#setDefaultFieldSortScoring} + /// to enable scoring. + virtual TopFieldDocsPtr search(const QueryPtr& query, const FilterPtr& filter, int32_t n, const SortPtr& sort); + + /// Lower-level search API. + /// + /// {@link Collector#collect(int32_t)} is called for every matching document. + /// + /// Applications should only use this if they need all of the matching documents. The high-level + /// search API ({@link Searcher#search(QueryPtr, int32_t)}) is usually more efficient, as it skips + /// non-high-scoring hits. + /// + /// Note: The score passed to this method is a raw score. In other words, the score will not necessarily + /// be a double whose value is between 0 and 1. + virtual void search(const QueryPtr& query, const CollectorPtr& results); + + /// Lower-level search API. + /// + /// {@link Collector#collect(int32_t)} is called for every matching document. Collector-based access to + /// remote indexes is discouraged. + /// + /// Applications should only use this if they need all of the matching documents. The high-level search + /// API ({@link Searcher#search(QueryPtr, FilterPtr, int32_t)}) is usually more efficient, as it skips + /// non-high-scoring hits. + /// + /// @param query To match documents + /// @param filter If non-null, used to permit documents to be collected. + /// @param results To receive hits + virtual void search(const QueryPtr& query, const FilterPtr& filter, const CollectorPtr& results); + + /// Finds the top n hits for query, applying filter if non-null. + virtual TopDocsPtr search(const QueryPtr& query, const FilterPtr& filter, int32_t n); + + /// Finds the top n hits for query. + virtual TopDocsPtr search(const QueryPtr& query, int32_t n); + + /// Returns an Explanation that describes how doc scored against query. + /// + /// This is intended to be used in developing Similarity implementations, and for good performance, + /// should not be displayed with every hit. Computing an explanation is as expensive as executing the + /// query over the entire index. + virtual ExplanationPtr explain(const QueryPtr& query, int32_t doc); + + /// Set the Similarity implementation used by this Searcher. + virtual void setSimilarity(const SimilarityPtr& similarity); + + /// Return the Similarity implementation used by this Searcher. + /// + /// This defaults to the current value of {@link Similarity#getDefault()}. + virtual SimilarityPtr getSimilarity(); + + virtual Collection docFreqs(Collection terms); + + virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) = 0; + virtual void close() = 0; + virtual int32_t docFreq(const TermPtr& term) = 0; + virtual int32_t maxDoc() = 0; + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) = 0; + virtual DocumentPtr doc(int32_t n) = 0; + virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector) = 0; + virtual QueryPtr rewrite(const QueryPtr& query) = 0; + virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc) = 0; + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) = 0; + +protected: + /// Creates a weight for query. + /// @return New weight + virtual WeightPtr createWeight(const QueryPtr& query); +}; + +} + +#endif diff --git a/include/lucene++/SegmentInfo.h b/include/lucene++/SegmentInfo.h new file mode 100644 index 00000000..d1706d14 --- /dev/null +++ b/include/lucene++/SegmentInfo.h @@ -0,0 +1,173 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTINFO_H +#define SEGMENTINFO_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Information about a segment such as it's name, directory, and files +/// related to the segment. +class LPPAPI SegmentInfo : public LuceneObject { +public: + SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir); + + SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile); + + SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, + bool hasSingleNormFile, int32_t docStoreOffset, const String& docStoreSegment, + bool docStoreIsCompoundFile, bool hasProx); + + /// Construct a new SegmentInfo instance by reading a previously saved SegmentInfo from input. + /// @param dir directory to load from. + /// @param format format of the segments info file. + /// @param input input handle to read segment info from. + SegmentInfo(const DirectoryPtr& dir, int32_t format, const IndexInputPtr& input); + + virtual ~SegmentInfo(); + + LUCENE_CLASS(SegmentInfo); + +public: + static const int32_t NO; // no norms; no deletes; + static const int32_t YES; // have norms; have deletes; + static const int32_t CHECK_DIR; // must check dir to see if there are norms/deletions + static const int32_t WITHOUT_GEN; // a file name that has no GEN in it. + +protected: + // true if this is a segments file written before lock-less commits (2.1) + bool preLockless; + + // current generation of del file; NO if there are no deletes; CHECK_DIR if it's a pre-2.1 segment + // (and we must check filesystem); YES or higher if there are deletes at generation N + int64_t delGen; + + // current generation of each field's norm file. If this array is null, for lockLess this means no + // separate norms. For preLockLess this means we must check filesystem. If this array is not null, + // its values mean: NO says this field has no separate norms; CHECK_DIR says it is a preLockLess + // segment and filesystem must be checked; >= YES says this field has separate norms with the + // specified generation + Collection normGen; + + // NO if it is not; YES if it is; CHECK_DIR if it's pre-2.1 (ie, must check file system to see if + // .cfs and .nrm exist) + uint8_t isCompoundFile; + + // true if this segment maintains norms in a single file; false otherwise this is currently false for + // segments populated by DocumentWriter and true for newly created merged segments (both compound and + // non compound). + bool hasSingleNormFile; + + // cached list of files that this segment uses in the Directory + HashSet _files; + + // total byte size of all of our files (computed on demand) + int64_t _sizeInBytes; + + // if this segment shares stored fields & vectors, this offset is where in that file this segment's + // docs begin + int32_t docStoreOffset; + + // name used to derive fields/vectors file we share with other segments + String docStoreSegment; + + // whether doc store files are stored in compound file (*.cfx) + bool docStoreIsCompoundFile; + + // How many deleted docs in this segment, or -1 if not yet known (if it's an older index) + int32_t delCount; + + // True if this segment has any fields with omitTermFreqAndPositions == false + bool hasProx; + + MapStringString diagnostics; + +public: + String name; // unique name in dir + int32_t docCount; // number of docs in seg + DirectoryPtr dir; // where segment resides + +public: + /// Copy everything from src SegmentInfo into our instance. + void reset(const SegmentInfoPtr& src); + + void setDiagnostics(MapStringString diagnostics); + MapStringString getDiagnostics(); + + void setNumFields(int32_t numFields); + + /// Returns total size in bytes of all of files used by this segment. + int64_t sizeInBytes(); + + bool hasDeletions(); + void advanceDelGen(); + void clearDelGen(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + String getDelFileName(); + + /// Returns true if this field for this segment has saved a separate norms file (__N.sX). + /// @param fieldNumber the field index to check + bool hasSeparateNorms(int32_t fieldNumber); + + /// Returns true if any fields in this segment have separate norms. + bool hasSeparateNorms(); + + /// Increment the generation count for the norms file for this field. + /// @param fieldIndex field whose norm file will be rewritten + void advanceNormGen(int32_t fieldIndex); + + /// Get the file name for the norms file for this field. + /// @param number field index + String getNormFileName(int32_t number); + + /// Mark whether this segment is stored as a compound file. + /// @param isCompoundFile true if this is a compound file; else, false + void setUseCompoundFile(bool isCompoundFile); + + /// Returns true if this segment is stored as a compound file; else, false. + bool getUseCompoundFile(); + + int32_t getDelCount(); + void setDelCount(int32_t delCount); + int32_t getDocStoreOffset(); + bool getDocStoreIsCompoundFile(); + void setDocStoreIsCompoundFile(bool v); + String getDocStoreSegment(); + void setDocStoreOffset(int32_t offset); + void setDocStore(int32_t offset, const String& segment, bool isCompoundFile); + + /// Save this segment's info. + void write(const IndexOutputPtr& output); + + void setHasProx(bool hasProx); + bool getHasProx(); + + /// Return all files referenced by this SegmentInfo. The returns List is a locally cached List so + /// you should not modify it. + HashSet files(); + + /// Used for debugging. + String segString(const DirectoryPtr& dir); + + /// We consider another SegmentInfo instance equal if it has the same dir and same name. + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); + +protected: + void addIfExists(HashSet files, const String& fileName); + + /// Called whenever any change is made that affects which files this segment has. + void clearFiles(); +}; + +} + +#endif diff --git a/include/lucene++/SegmentInfoCollection.h b/include/lucene++/SegmentInfoCollection.h new file mode 100644 index 00000000..58819d07 --- /dev/null +++ b/include/lucene++/SegmentInfoCollection.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTINFOCOLLECTION_H +#define SEGMENTINFOCOLLECTION_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A collection of SegmentInfo objects to be used as a base class for {@link SegmentInfos} +class LPPAPI SegmentInfoCollection : public LuceneObject { +public: + SegmentInfoCollection(); + virtual ~SegmentInfoCollection(); + + LUCENE_CLASS(SegmentInfoCollection); + +protected: + Collection segmentInfos; + +public: + int32_t size(); + bool empty(); + void clear(); + void add(const SegmentInfoPtr& info); + void add(int32_t pos, const SegmentInfoPtr& info); + void addAll(const SegmentInfoCollectionPtr& segmentInfos); + bool equals(const LuceneObjectPtr& other); + int32_t find(const SegmentInfoPtr& info); + bool contains(const SegmentInfoPtr& info); + void remove(int32_t pos); + void remove(int32_t start, int32_t end); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/SegmentInfos.h b/include/lucene++/SegmentInfos.h new file mode 100644 index 00000000..d883206e --- /dev/null +++ b/include/lucene++/SegmentInfos.h @@ -0,0 +1,183 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTINFOS_H +#define SEGMENTINFOS_H + +#include "SegmentInfoCollection.h" + +namespace Lucene { + +/// A collection of SegmentInfo objects with methods for operating on those segments in relation to the file system. +class LPPAPI SegmentInfos : public SegmentInfoCollection { +public: + SegmentInfos(); + virtual ~SegmentInfos(); + + LUCENE_CLASS(SegmentInfos); + +public: + /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 + static const int32_t FORMAT; + + /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names + /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, + /// segments_2, etc. This allows us to not use a commit lock. + /// See fileformats for details. + static const int32_t FORMAT_LOCKLESS; + + /// This format adds a "hasSingleNormFile" flag into each segment info. + static const int32_t FORMAT_SINGLE_NORM_FILE; + + /// This format allows multiple segments to share a single vectors and stored fields file. + static const int32_t FORMAT_SHARED_DOC_STORE; + + /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. + static const int32_t FORMAT_CHECKSUM; + + /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). + static const int32_t FORMAT_DEL_COUNT; + + /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have + /// omitTermFreqAndPositions == false) + static const int32_t FORMAT_HAS_PROX; + + /// This format adds optional commit userData storage. + static const int32_t FORMAT_USER_DATA; + + /// This format adds optional per-segment string diagnostics storage, and switches userData to Map + static const int32_t FORMAT_DIAGNOSTICS; + + /// This must always point to the most recent file format. + static const int32_t CURRENT_FORMAT; + + int32_t counter; // used to name new segments + +private: + /// Advanced configuration of retry logic in loading segments_N file. + static int32_t defaultGenFileRetryCount; + static int32_t defaultGenFileRetryPauseMsec; + static int32_t defaultGenLookaheadCount; + + /// Counts how often the index has been changed by adding or deleting docs. + /// Starting with the current time in milliseconds forces to create unique version numbers. + int64_t version; + + int64_t generation; // generation of the "segments_N" for the next commit + + int64_t lastGeneration; // generation of the "segments_N" file we last successfully read + // or wrote; this is normally the same as generation except if + // there was an exception that had interrupted a commit + + MapStringString userData; // Opaque map that user can specify during IndexWriter::commit + + static MapStringString singletonUserData; + + static InfoStreamPtr infoStream; + ChecksumIndexOutputPtr pendingSegnOutput; + +public: + SegmentInfoPtr info(int32_t i); + String getCurrentSegmentFileName(); + String getNextSegmentFileName(); + + /// Read a particular segmentFileName. Note that this may throw an IOException if a commit is in process. + void read(const DirectoryPtr& directory, const String& segmentFileName); + + /// This version of read uses the retry logic (for lock-less commits) to find the right segments file to load. + void read(const DirectoryPtr& directory); + + /// Returns a copy of this instance, also copying each SegmentInfo. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Version number when this SegmentInfos was generated. + int64_t getVersion(); + int64_t getGeneration(); + int64_t getLastGeneration(); + + /// Returns a new SegmentInfos containing the SegmentInfo instances in the specified range first (inclusive) to + /// last (exclusive), so total number of segments returned is last-first. + SegmentInfosPtr range(int32_t first, int32_t last); + + /// Carry over generation numbers from another SegmentInfos. + void updateGeneration(const SegmentInfosPtr& other); + + void rollbackCommit(const DirectoryPtr& dir); + + /// Call this to start a commit. This writes the new segments file, but writes an invalid checksum at the end, so + /// that it is not visible to readers. Once this is called you must call. + /// {@link #finishCommit} to complete the commit or + /// {@link #rollbackCommit} to abort it. + void prepareCommit(const DirectoryPtr& dir); + + /// Returns all file names referenced by SegmentInfo instances matching the provided Directory (ie files associated + /// with any "external" segments are skipped). The returned collection is recomputed on each invocation. + HashSet files(const DirectoryPtr& dir, bool includeSegmentsFile); + + void finishCommit(const DirectoryPtr& dir); + + /// Writes & syncs to the Directory dir, taking care to remove the segments file on exception. + void commit(const DirectoryPtr& dir); + + String segString(const DirectoryPtr& directory); + MapStringString getUserData(); + void setUserData(MapStringString data); + + /// Replaces all segments in this instance, but keeps generation, version, counter so that future commits remain + /// write once. + void replace(const SegmentInfosPtr& other); + + bool hasExternalSegments(const DirectoryPtr& dir); + + static int64_t getCurrentSegmentGeneration(HashSet files); + static int64_t getCurrentSegmentGeneration(const DirectoryPtr& directory); + static String getCurrentSegmentFileName(HashSet files); + static String getCurrentSegmentFileName(const DirectoryPtr& directory); + static int64_t generationFromSegmentsFileName(const String& fileName); + + /// Current version number from segments file. + static int64_t readCurrentVersion(const DirectoryPtr& directory); + + /// Returns userData from latest segments file. + static MapStringString readCurrentUserData(const DirectoryPtr& directory); + + /// If non-null, information about retries when loading the segments file will be printed to this. + static void setInfoStream(const InfoStreamPtr& infoStream); + + /// Set how many times to try loading the segments.gen file contents to determine current segment generation. This file + /// is only referenced when the primary method (listing the directory) fails. + static void setDefaultGenFileRetryCount(int32_t count); + + /// @see #setDefaultGenFileRetryCount + static int32_t getDefaultGenFileRetryCount(); + + /// Set how many milliseconds to pause in between attempts to load the segments.gen file. + static void setDefaultGenFileRetryPauseMsec(int32_t msec); + + /// @see #setDefaultGenFileRetryPauseMsec + static int32_t getDefaultGenFileRetryPauseMsec(); + + /// Set how many times to try incrementing the gen when loading the segments file. This only runs if the primary + /// (listing directory) and secondary (opening segments.gen file) methods fail to find the segments file. + static void setDefaultGenLookaheadCount(int32_t count); + + /// @see #setDefaultGenLookaheadCount + static int32_t getDefaultGenLookahedCount(); + + /// @see #setInfoStream + static InfoStreamPtr getInfoStream(); + + static void message(const String& message); + +protected: + void write(const DirectoryPtr& directory); + + friend class FindSegmentsFile; +}; + +} + +#endif diff --git a/include/lucene++/SegmentMergeInfo.h b/include/lucene++/SegmentMergeInfo.h new file mode 100644 index 00000000..d8b9bfde --- /dev/null +++ b/include/lucene++/SegmentMergeInfo.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTMERGEINFO_H +#define SEGMENTMERGEINFO_H + +#include "Term.h" + +namespace Lucene { + +class SegmentMergeInfo : public LuceneObject { +public: + SegmentMergeInfo(int32_t b, const TermEnumPtr& te, const IndexReaderPtr& r); + virtual ~SegmentMergeInfo(); + + LUCENE_CLASS(SegmentMergeInfo); + +protected: + TermPositionsPtr postings; // use getPositions() + Collection docMap; // use getDocMap() + +public: + TermPtr term; + int32_t base; + int32_t ord; // the position of the segment in a MultiReader + TermEnumPtr termEnum; + IndexReaderWeakPtr _reader; + int32_t delCount; + +public: + Collection getDocMap(); + TermPositionsPtr getPositions(); + bool next(); + void close(); +}; + +} + +#endif diff --git a/include/lucene++/SegmentMergeQueue.h b/include/lucene++/SegmentMergeQueue.h new file mode 100644 index 00000000..b8817fb0 --- /dev/null +++ b/include/lucene++/SegmentMergeQueue.h @@ -0,0 +1,30 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTMERGEQUEUE_H +#define SEGMENTMERGEQUEUE_H + +#include "PriorityQueue.h" + +namespace Lucene { + +class SegmentMergeQueue : public PriorityQueue { +public: + SegmentMergeQueue(int32_t size); + virtual ~SegmentMergeQueue(); + + LUCENE_CLASS(SegmentMergeQueue); + +public: + void close(); + +protected: + virtual bool lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second); +}; + +} + +#endif diff --git a/include/lucene++/SegmentMerger.h b/include/lucene++/SegmentMerger.h new file mode 100644 index 00000000..e6267800 --- /dev/null +++ b/include/lucene++/SegmentMerger.h @@ -0,0 +1,155 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTMERGER_H +#define SEGMENTMERGER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, into a single +/// Segment. After adding the appropriate readers, call the merge method to combine the segments. +/// +/// If the compoundFile flag is set, then the segments will be merged into a compound file. +/// @see #merge +/// @see #add +class LPPAPI SegmentMerger : public LuceneObject { +public: + SegmentMerger(const DirectoryPtr& dir, const String& name); + SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge); + virtual ~SegmentMerger(); + + LUCENE_CLASS(SegmentMerger); + +protected: + DirectoryPtr directory; + String segment; + int32_t termIndexInterval; + + Collection readers; + FieldInfosPtr fieldInfos; + + int32_t mergedDocs; + CheckAbortPtr checkAbort; + + /// Whether we should merge doc stores (stored fields and vectors files). When all segments we + /// are merging already share the same doc store files, we don't need to merge the doc stores. + bool mergeDocStores; + + /// Maximum number of contiguous documents to bulk-copy when merging stored fields + static const int32_t MAX_RAW_MERGE_DOCS; + + Collection matchingSegmentReaders; + Collection rawDocLengths; + Collection rawDocLengths2; + + SegmentMergeQueuePtr queue; + bool omitTermFreqAndPositions; + + ByteArray payloadBuffer; + Collection< Collection > docMaps; + Collection delCounts; + +public: + /// norms header placeholder + static const uint8_t NORMS_HEADER[]; + static const int32_t NORMS_HEADER_LENGTH; + +public: + bool hasProx(); + + /// Add an IndexReader to the collection of readers that are to be merged + void add(const IndexReaderPtr& reader); + + /// @param i The index of the reader to return + /// @return The i'th reader to be merged + IndexReaderPtr segmentReader(int32_t i); + + /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. + /// @return The number of documents that were merged + int32_t merge(); + + /// Merges the readers specified by the {@link #add} method into the directory passed to the constructor. + /// @param mergeDocStores if false, we will not merge the stored fields nor vectors files + /// @return The number of documents that were merged + int32_t merge(bool mergeDocStores); + + /// close all IndexReaders that have been added. Should not be called before merge(). + void closeReaders(); + + HashSet getMergedFiles(); + HashSet createCompoundFile(const String& fileName); + + /// @return The number of documents in all of the readers + int32_t mergeFields(); + + Collection< Collection > getDocMaps(); + Collection getDelCounts(); + +protected: + void addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet names, bool storeTermVectors, + bool storePositionWithTermVector, bool storeOffsetWithTermVector, bool storePayloads, + bool omitTFAndPositions); + + void setMatchingSegmentReaders(); + int32_t copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader); + int32_t copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader); + + /// Merge the TermVectors from each of the segments into the new one. + void mergeVectors(); + + void copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader); + void copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader); + + void mergeTerms(); + + void mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer); + + /// Process postings from multiple segments all positioned on the same term. Writes out merged entries + /// into freqOutput and the proxOutput streams. + /// @param smis array of segments + /// @param n number of cells in the array actually occupied + /// @return number of documents across all segments where this term was found + int32_t appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection smis, int32_t n); + + void mergeNorms(); +}; + +class CheckAbort : public LuceneObject { +public: + CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir); + virtual ~CheckAbort(); + + LUCENE_CLASS(CheckAbort); + +protected: + double workCount; + OneMergePtr merge; + DirectoryWeakPtr _dir; + +public: + /// Records the fact that roughly units amount of work have been done since this method was last called. + /// When adding time-consuming code into SegmentMerger, you should test different values for units to + /// ensure that the time in between calls to merge.checkAborted is up to ~ 1 second. + virtual void work(double units); +}; + +class CheckAbortNull : public CheckAbort { +public: + CheckAbortNull(); + virtual ~CheckAbortNull(); + + LUCENE_CLASS(CheckAbortNull); + +public: + /// do nothing + virtual void work(double units); +}; + +} + +#endif diff --git a/include/lucene++/SegmentReader.h b/include/lucene++/SegmentReader.h new file mode 100644 index 00000000..445262ac --- /dev/null +++ b/include/lucene++/SegmentReader.h @@ -0,0 +1,221 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTREADER_H +#define SEGMENTREADER_H + +#include "IndexReader.h" +#include "CloseableThreadLocal.h" + +namespace Lucene { + +class LPPAPI SegmentReader : public IndexReader { +public: + SegmentReader(); + virtual ~SegmentReader(); + + LUCENE_CLASS(SegmentReader); + +protected: + bool readOnly; + +INTERNAL: + BitVectorPtr deletedDocs; + SegmentReaderRefPtr deletedDocsRef; + CoreReadersPtr core; + FieldsReaderLocalPtr fieldsReaderLocal; + SegmentInfoPtr rollbackSegmentInfo; + CloseableThreadLocal termVectorsLocal; + FieldInfosPtr fieldInfos(); + + /// Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. + /// @return TermVectorsReader + TermVectorsReaderPtr getTermVectorsReader(); + + TermVectorsReaderPtr getTermVectorsReaderOrig(); + FieldsReaderPtr getFieldsReader(); + MapStringNorm _norms; + +private: + SegmentInfoPtr si; + int32_t readBufferSize; + bool deletedDocsDirty; + bool normsDirty; + int32_t pendingDeleteCount; + + bool rollbackHasChanges; + bool rollbackDeletedDocsDirty; + bool rollbackNormsDirty; + int32_t rollbackPendingDeleteCount; + + // optionally used for the .nrm file shared by multiple norms + IndexInputPtr singleNormStream; + SegmentReaderRefPtr singleNormRef; + +public: + virtual void initialize(); + + using IndexReader::document; + using IndexReader::termPositions; + + static SegmentReaderPtr get(bool readOnly, const SegmentInfoPtr& si, int32_t termInfosIndexDivisor); + static SegmentReaderPtr get(bool readOnly, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor); + + void openDocStores(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual LuceneObjectPtr clone(bool openReadOnly, const LuceneObjectPtr& other = LuceneObjectPtr()); + SegmentReaderPtr reopenSegment(const SegmentInfoPtr& si, bool doClone, bool openReadOnly); + + static bool hasDeletions(const SegmentInfoPtr& si); + + /// Returns true if any documents have been deleted + virtual bool hasDeletions(); + + static bool usesCompoundFile(const SegmentInfoPtr& si); + static bool hasSeparateNorms(const SegmentInfoPtr& si); + + HashSet files(); + + /// Returns an enumeration of all the terms in the index. + virtual TermEnumPtr terms(); + + /// Returns an enumeration of all terms starting at a given term. + virtual TermEnumPtr terms(const TermPtr& t); + + /// Get the {@link Document} at the n'th position. + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + + /// Returns true if document n has been deleted + virtual bool isDeleted(int32_t n); + + /// Returns an enumeration of all the documents which contain term. + virtual TermDocsPtr termDocs(const TermPtr& term); + + /// Returns an unpositioned {@link TermDocs} enumerator. + virtual TermDocsPtr termDocs(); + + /// Returns an unpositioned {@link TermPositions} enumerator. + virtual TermPositionsPtr termPositions(); + + /// Returns the number of documents containing the term t. + virtual int32_t docFreq(const TermPtr& t); + + /// Returns the number of documents in this index. + virtual int32_t numDocs(); + + /// Returns one greater than the largest possible document number. + virtual int32_t maxDoc(); + + /// Get a list of unique field names that exist in this index and have the specified field option information. + virtual HashSet getFieldNames(FieldOption fieldOption); + + /// Returns true if there are norms stored for this field. + virtual bool hasNorms(const String& field); + + /// Returns the byte-encoded normalization factor for the named field of every document. + virtual ByteArray norms(const String& field); + + /// Read norms into a pre-allocated array. + virtual void norms(const String& field, ByteArray norms, int32_t offset); + + bool termsIndexLoaded(); + + /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, sharing a + /// segment that's still being merged. This method is not thread safe, and relies on the synchronization in IndexWriter + void loadTermsIndex(int32_t termsIndexDivisor); + + bool normsClosed(); // for testing only + bool normsClosed(const String& field); // for testing only + + /// Return a term frequency vector for the specified document and field. The vector returned contains term + /// numbers and frequencies for all terms in the specified field of this document, if the field had + /// storeTermVector flag set. If the flag was not set, the method returns null. + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + + /// Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays + /// of the {@link TermFreqVector}. + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + + /// Map all the term vectors for all fields in a Document + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + + /// Return an array of term frequency vectors for the specified document. The array contains a vector for + /// each vectorized field in the document. Each vector vector contains term numbers and frequencies for all + /// terms in a given vectorized field. If no such fields existed, the method returns null. + virtual Collection getTermFreqVectors(int32_t docNumber); + + /// Return the name of the segment this reader is reading. + String getSegmentName(); + + /// Return the SegmentInfo of the segment this reader is reading. + SegmentInfoPtr getSegmentInfo(); + void setSegmentInfo(const SegmentInfoPtr& info); + + void startCommit(); + void rollbackCommit(); + + /// Returns the directory this index resides in. + virtual DirectoryPtr directory(); + + /// This is necessary so that cloned SegmentReaders (which share the underlying postings data) + /// will map to the same entry in the FieldCache. + virtual LuceneObjectPtr getFieldCacheKey(); + virtual LuceneObjectPtr getDeletesCacheKey(); + + /// Returns the number of unique terms (across all fields) in this reader. + virtual int64_t getUniqueTermCount(); + + static SegmentReaderPtr getOnlySegmentReader(const DirectoryPtr& dir); + static SegmentReaderPtr getOnlySegmentReader(const IndexReaderPtr& reader); + + virtual int32_t getTermInfosIndexDivisor(); + +protected: + bool checkDeletedCounts(); + void loadDeletedDocs(); + + /// Clones the norm bytes. May be overridden by subclasses. + /// @param bytes Byte array to clone + /// @return New BitVector + virtual ByteArray cloneNormBytes(ByteArray bytes); + + /// Clones the deleteDocs BitVector. May be overridden by subclasses. + /// @param bv BitVector to clone + /// @return New BitVector + virtual BitVectorPtr cloneDeletedDocs(const BitVectorPtr& bv); + + /// Implements commit. + virtual void doCommit(MapStringString commitUserData); + + virtual void commitChanges(MapStringString commitUserData); + + /// Implements close. + virtual void doClose(); + + /// Implements deletion of the document numbered docNum. + /// Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. + virtual void doDelete(int32_t docNum); + + /// Implements actual undeleteAll() in subclass. + virtual void doUndeleteAll(); + + /// can return null if norms aren't stored + ByteArray getNorms(const String& field); + + /// Implements setNorm in subclass. + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + + void openNorms(const DirectoryPtr& cfsDir, int32_t readBufferSize); + + friend class ReaderPool; + friend class IndexWriter; + friend class Norm; +}; + +} + +#endif diff --git a/include/lucene++/SegmentTermDocs.h b/include/lucene++/SegmentTermDocs.h new file mode 100644 index 00000000..0eb92346 --- /dev/null +++ b/include/lucene++/SegmentTermDocs.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTTERMDOCS_H +#define SEGMENTTERMDOCS_H + +#include "TermPositions.h" + +namespace Lucene { + +class LPPAPI SegmentTermDocs : public TermPositions, public LuceneObject { +public: + SegmentTermDocs(const SegmentReaderPtr& parent); + virtual ~SegmentTermDocs(); + + LUCENE_CLASS(SegmentTermDocs); + +protected: + SegmentReaderWeakPtr _parent; + SegmentReader* __parent; + IndexInputPtr _freqStream; + IndexInput* __freqStream; + int32_t count; + int32_t df; + BitVectorPtr deletedDocs; + BitVector* __deletedDocs; + int32_t _doc; + int32_t _freq; + + int32_t skipInterval; + int32_t maxSkipLevels; + DefaultSkipListReaderPtr skipListReader; + + int64_t freqBasePointer; + int64_t proxBasePointer; + + int64_t skipPointer; + bool haveSkipped; + + bool currentFieldStoresPayloads; + bool currentFieldOmitTermFreqAndPositions; + +public: + /// Sets this to the data for a term. + virtual void seek(const TermPtr& term); + + /// Sets this to the data for the current term in a {@link TermEnum}. + virtual void seek(const TermEnumPtr& termEnum); + + virtual void seek(const TermInfoPtr& ti, const TermPtr& term); + + virtual void close(); + + /// Returns the current document number. + virtual int32_t doc(); + + /// Returns the frequency of the term within the current document. + virtual int32_t freq(); + + /// Moves to the next pair in the enumeration. + virtual bool next(); + + /// Optimized implementation. + virtual int32_t read(Collection& docs, Collection& freqs); + + /// Optimized implementation. + virtual bool skipTo(int32_t target); + + /// Used for testing + virtual IndexInputPtr freqStream(); + virtual void freqStream(const IndexInputPtr& freqStream); + +protected: + virtual void skippingDoc(); + virtual int32_t readNoTf(Collection& docs, Collection& freqs, int32_t length); + + /// Overridden by SegmentTermPositions to skip in prox stream. + virtual void skipProx(int64_t proxPointer, int32_t payloadLength); +}; + +} + +#endif diff --git a/include/lucene++/SegmentTermEnum.h b/include/lucene++/SegmentTermEnum.h new file mode 100644 index 00000000..e943697e --- /dev/null +++ b/include/lucene++/SegmentTermEnum.h @@ -0,0 +1,88 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTTERMENUM_H +#define SEGMENTTERMENUM_H + +#include "TermEnum.h" + +namespace Lucene { + +class LPPAPI SegmentTermEnum : public TermEnum { +public: + SegmentTermEnum(); + SegmentTermEnum(const IndexInputPtr& i, const FieldInfosPtr& fis, bool isi); + virtual ~SegmentTermEnum(); + + LUCENE_CLASS(SegmentTermEnum); + +protected: + IndexInputPtr input; + TermBufferPtr termBuffer; + TermBufferPtr prevBuffer; + TermBufferPtr scanBuffer; // used for scanning + + TermInfoPtr _termInfo; + + int32_t format; + bool isIndex; + int32_t formatM1SkipInterval; + +public: + FieldInfosPtr fieldInfos; + int64_t size; + int64_t position; + + int64_t indexPointer; + int32_t indexInterval; + int32_t skipInterval; + int32_t maxSkipLevels; + +public: + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + void seek(int64_t pointer, int64_t p, const TermPtr& t, const TermInfoPtr& ti); + + /// Increments the enumeration to the next element. True if one exists. + virtual bool next(); + + /// Optimized scan, without allocating new terms. Return number of invocations to next(). + int32_t scanTo(const TermPtr& term); + + /// Returns the current Term in the enumeration. + /// Initially invalid, valid after next() called for the first time. + virtual TermPtr term(); + + /// Returns the previous Term enumerated. Initially null. + TermPtr prev(); + + /// Returns the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + TermInfoPtr termInfo(); + + /// Sets the argument to the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + void termInfo(const TermInfoPtr& ti); + + /// Returns the docFreq of the current Term in the enumeration. + /// Initially invalid, valid after next() called for the first time. + virtual int32_t docFreq(); + + /// Returns the freqPointer from the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + int64_t freqPointer(); + + /// Returns the proxPointer from the current TermInfo in the enumeration. + /// Initially invalid, valid after next() called for the first time. + int64_t proxPointer(); + + /// Closes the enumeration to further activity, freeing resources. + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/SegmentTermPositionVector.h b/include/lucene++/SegmentTermPositionVector.h new file mode 100644 index 00000000..fb9cc6ab --- /dev/null +++ b/include/lucene++/SegmentTermPositionVector.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTTERMPOSITIONVECTOR_H +#define SEGMENTTERMPOSITIONVECTOR_H + +#include "SegmentTermVector.h" + +namespace Lucene { + +class LPPAPI SegmentTermPositionVector : public SegmentTermVector { +public: + SegmentTermPositionVector(const String& field, Collection terms, Collection termFreqs, + Collection< Collection > positions, Collection< Collection > offsets); + virtual ~SegmentTermPositionVector(); + + LUCENE_CLASS(SegmentTermPositionVector); + +protected: + Collection< Collection > positions; + Collection< Collection > offsets; + +protected: + static const Collection EMPTY_TERM_POS(); + +public: + /// Returns an array of TermVectorOffsetInfo in which the term is found. + /// @param index The position in the array to get the offsets from + /// @return An array of TermVectorOffsetInfo objects or the empty list + virtual Collection getOffsets(int32_t index); + + /// Returns an array of positions in which the term is found. + /// Terms are identified by the index at which its number appears in the term String array obtained from the indexOf method. + virtual Collection getTermPositions(int32_t index); +}; + +} + +#endif diff --git a/include/lucene++/SegmentTermPositions.h b/include/lucene++/SegmentTermPositions.h new file mode 100644 index 00000000..2e10d15b --- /dev/null +++ b/include/lucene++/SegmentTermPositions.h @@ -0,0 +1,80 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTTERMPOSITIONS_H +#define SEGMENTTERMPOSITIONS_H + +#include "SegmentTermDocs.h" + +namespace Lucene { + +class LPPAPI SegmentTermPositions : public SegmentTermDocs { +public: + SegmentTermPositions(const SegmentReaderPtr& parent); + virtual ~SegmentTermPositions(); + + LUCENE_CLASS(SegmentTermPositions); + +protected: + IndexInputPtr proxStream; + int32_t proxCount; + int32_t position; + + /// The current payload length + int32_t payloadLength; + + /// Indicates whether the payload of the current position has been read from the proxStream yet + bool needToLoadPayload; + + // these variables are being used to remember information for a lazy skip + int64_t lazySkipPointer; + int32_t lazySkipProxCount; + +public: + using SegmentTermDocs::seek; + + virtual void seek(const TermInfoPtr& ti, const TermPtr& term); + virtual void close(); + + /// Returns next position in the current document. + virtual int32_t nextPosition(); + + /// Moves to the next pair in the enumeration. + virtual bool next(); + + /// Not supported + virtual int32_t read(Collection& docs, Collection& freqs); + + /// Returns the length of the payload at the current term position. + virtual int32_t getPayloadLength(); + + /// Returns the payload data at the current term position. + virtual ByteArray getPayload(ByteArray data, int32_t offset); + + /// Checks if a payload can be loaded at this position. + virtual bool isPayloadAvailable(); + +protected: + int32_t readDeltaPosition(); + + virtual void skippingDoc(); + + virtual void skipProx(int64_t proxPointer, int32_t payloadLength); + virtual void skipPositions(int32_t n); + virtual void skipPayload(); + + /// It is not always necessary to move the prox pointer to a new document after the freq pointer has + /// been moved. Consider for example a phrase query with two terms: the freq pointer for term 1 has to + /// move to document x to answer the question if the term occurs in that document. But only if term 2 + /// also matches document x, the positions have to be read to figure out if term 1 and term 2 appear next + /// to each other in document x and thus satisfy the query. So we move the prox pointer lazily to the + /// document as soon as positions are requested. + virtual void lazySkip(); +}; + +} + +#endif diff --git a/include/lucene++/SegmentTermVector.h b/include/lucene++/SegmentTermVector.h new file mode 100644 index 00000000..d50a603c --- /dev/null +++ b/include/lucene++/SegmentTermVector.h @@ -0,0 +1,51 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTTERMVECTOR_H +#define SEGMENTTERMVECTOR_H + +#include "TermPositionVector.h" + +namespace Lucene { + +class LPPAPI SegmentTermVector : public TermPositionVector, public LuceneObject { +public: + SegmentTermVector(const String& field, Collection terms, Collection termFreqs); + virtual ~SegmentTermVector(); + + LUCENE_CLASS(SegmentTermVector); + +protected: + String field; + Collection terms; + Collection termFreqs; + +public: + /// @return The number of the field this vector is associated with + virtual String getField(); + + virtual String toString(); + + /// @return The number of terms in the term vector. + virtual int32_t size(); + + /// @return An Array of term texts in ascending order. + virtual Collection getTerms(); + + /// @return Array of term frequencies. + virtual Collection getTermFrequencies(); + + /// Return an index in the term numbers array returned from getTerms at which the term with the + /// specified term appears. + virtual int32_t indexOf(const String& term); + + /// Just like indexOf(int) but searches for a number of terms at the same time. + virtual Collection indexesOf(Collection termNumbers, int32_t start, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/SegmentWriteState.h b/include/lucene++/SegmentWriteState.h new file mode 100644 index 00000000..26000787 --- /dev/null +++ b/include/lucene++/SegmentWriteState.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SEGMENTWRITESTATE_H +#define SEGMENTWRITESTATE_H + +#include "LuceneObject.h" + +namespace Lucene { + +class SegmentWriteState : public LuceneObject { +public: + SegmentWriteState(const DocumentsWriterPtr& docWriter, const DirectoryPtr& directory, const String& segmentName, + const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, + int32_t termIndexInterval); + virtual ~SegmentWriteState(); + + LUCENE_CLASS(SegmentWriteState); + +public: + DocumentsWriterWeakPtr _docWriter; + DirectoryPtr directory; + String segmentName; + String docStoreSegmentName; + int32_t numDocs; + int32_t termIndexInterval; + int32_t numDocsInStore; + HashSet flushedFiles; + +public: + String segmentFileName(const String& ext); +}; + +} + +#endif diff --git a/include/lucene++/SerialMergeScheduler.h b/include/lucene++/SerialMergeScheduler.h new file mode 100644 index 00000000..484866cf --- /dev/null +++ b/include/lucene++/SerialMergeScheduler.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SERIALMERGESCHEDULER_H +#define SERIALMERGESCHEDULER_H + +#include "MergeScheduler.h" + +namespace Lucene { + +/// A {@link MergeScheduler} that simply does each merge sequentially, using the current thread. +class LPPAPI SerialMergeScheduler : public MergeScheduler { +public: + virtual ~SerialMergeScheduler(); + + LUCENE_CLASS(SerialMergeScheduler); + +public: + /// Just do the merges in sequence. We do this "synchronized" so that even if the application is using + /// multiple threads, only one merge may run at a time. + virtual void merge(const IndexWriterPtr& writer); + + /// Close this MergeScheduler. + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/Set.h b/include/lucene++/Set.h new file mode 100644 index 00000000..7282cc96 --- /dev/null +++ b/include/lucene++/Set.h @@ -0,0 +1,133 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SET_H +#define SET_H + +#include +#include "LuceneSync.h" + +namespace Lucene { + +/// Utility template class to handle set based collections that can be safely copied and shared +template < class TYPE, class LESS = std::less > +class Set : public LuceneSync { +public: + typedef Set this_type; + typedef std::set set_type; + typedef typename set_type::iterator iterator; + typedef typename set_type::const_iterator const_iterator; + typedef TYPE value_type; + + virtual ~Set() { + } + +protected: + boost::shared_ptr setContainer; + +public: + static this_type newInstance() { + this_type instance; + instance.setContainer = Lucene::newInstance(); + return instance; + } + + template + static this_type newInstance(ITER first, ITER last) { + this_type instance; + instance.setContainer = Lucene::newInstance(first, last); + return instance; + } + + void reset() { + setContainer.reset(); + } + + int32_t size() const { + return (int32_t)setContainer->size(); + } + + bool empty() const { + return setContainer->empty(); + } + + void clear() { + setContainer->clear(); + } + + iterator begin() { + return setContainer->begin(); + } + + iterator end() { + return setContainer->end(); + } + + const_iterator begin() const { + return setContainer->begin(); + } + + const_iterator end() const { + return setContainer->end(); + } + + bool add(const TYPE& type) { + return setContainer->insert(type).second; + } + + template + void addAll(ITER first, ITER last) { + setContainer->insert(first, last); + } + + bool remove(const TYPE& type) { + return (setContainer->erase(type) > 0); + } + + iterator find(const TYPE& type) { + return setContainer->find(type); + } + + bool contains(const TYPE& type) const { + return (setContainer->find(type) != setContainer->end()); + } + + bool equals(const this_type& other) const { + return equals(other, std::equal_to()); + } + + template + bool equals(const this_type& other, PRED comp) const { + if (setContainer->size() != other.setContainer->size()) { + return false; + } + return std::equal(setContainer->begin(), setContainer->end(), other.setContainer->begin(), comp); + } + + void swap(this_type& other) { + setContainer.swap(other->setContainer); + } + + operator bool() const { + return setContainer.get() != NULL; + } + + bool operator! () const { + return !setContainer; + } + + bool operator== (const this_type& other) { + return (setContainer == other.setContainer); + } + + bool operator!= (const this_type& other) { + return (setContainer != other.setContainer); + } +}; + +} + +#endif diff --git a/include/lucene++/SetBasedFieldSelector.h b/include/lucene++/SetBasedFieldSelector.h new file mode 100644 index 00000000..db4ed5e5 --- /dev/null +++ b/include/lucene++/SetBasedFieldSelector.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SETBASEDFIELDSELECTOR_H +#define SETBASEDFIELDSELECTOR_H + +#include "FieldSelector.h" + +namespace Lucene { + +/// Declare what fields to load normally and what fields to load lazily +class LPPAPI SetBasedFieldSelector : public FieldSelector { +public: + /// Pass in the Set of {@link Field} names to load and the Set of {@link Field} names to load lazily. + /// If both are null, the Document will not have any {@link Field} on it. + /// @param fieldsToLoad A Set of {@link String} field names to load. May be empty, but not null + /// @param lazyFieldsToLoad A Set of {@link String} field names to load lazily. May be empty, but not null + SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad); + + virtual ~SetBasedFieldSelector(); + + LUCENE_CLASS(SetBasedFieldSelector); + +protected: + HashSet fieldsToLoad; + HashSet lazyFieldsToLoad; + +public: + /// Indicate whether to load the field with the given name or not. If the {@link Field#name()} is not in + /// either of the initializing Sets, then {@link FieldSelectorResult#NO_LOAD} is returned. If a Field name + /// is in both fieldsToLoad and lazyFieldsToLoad, lazy has precedence. + /// @param fieldName The {@link Field} name to check + /// @return The {@link FieldSelectorResult} + virtual FieldSelectorResult accept(const String& fieldName); +}; + +} + +#endif diff --git a/include/lucene++/Similarity.h b/include/lucene++/Similarity.h new file mode 100644 index 00000000..ac55bff3 --- /dev/null +++ b/include/lucene++/Similarity.h @@ -0,0 +1,616 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMILARITY_H +#define SIMILARITY_H + +#include "Explanation.h" + +namespace Lucene { + +/// Scoring API. +/// +/// Similarity defines the components of Lucene scoring. Overriding computation of these components is +/// a convenient way to alter Lucene scoring. +/// +/// Suggested reading: +/// Introduction To Information Retrieval, Chapter 6. +/// +/// The following describes how Lucene scoring evolves from underlying information retrieval models to +/// (efficient) implementation. We first brief on VSM Score, then derive from it Lucene's Conceptual Scoring +/// Formula, from which, finally, evolves Lucene's Practical Scoring Function (the latter is connected directly +/// with Lucene classes and methods). +/// +/// Lucene combines Boolean model (BM) of +/// Information Retrieval with Vector Space Model +/// (VSM) of Information Retrieval - documents "approved" by BM are scored by VSM. +/// +/// In VSM, documents and queries are represented as weighted vectors in a multi-dimensional space, where each +/// distinct index term is a dimension, and weights are Tf-idf +/// values. +/// +/// VSM does not require weights to be Tf-idf values, but Tf-idf values are believed to produce search results +/// of high quality, and so Lucene is using Tf-idf. Tf and Idf are described in more detail below, but for now, +/// for completion, let's just say that for given term t and document (or query) x, Tf(t,x) varies with the +/// number of occurrences of term t in x (when one increases so does the other) and idf(t) similarly varies with +/// the inverse of the number of index documents containing term t. +/// +/// VSM score of document d for query q is the Cosine +/// Similarity of the weighted query vectors V(q) and V(d): +/// +///
 
+/// +/// +/// +///
+/// +/// +///
+/// +/// +/// +/// +/// +///
+/// cosine-similarity(q,d)   =   +/// +/// +/// +/// +/// +///
V(q) · V(d)
–––––––––
|V(q)| |V(d)|
+///
+///
+///
+///
VSM Score
+///
+///
 
+/// +/// Where V(q) · V(d) is the dot product of the +/// weighted vectors, and |V(q)| and |V(d)| are their +/// Euclidean norms. +/// +/// Note: the above equation can be viewed as the dot product of the normalized weighted vectors, in the sense +/// that dividing V(q) by its euclidean norm is normalizing it to a unit vector. +/// +/// Lucene refines VSM score for both search quality and usability: +///
    +///
  • Normalizing V(d) to the unit vector is known to be problematic in that it removes all document length +/// information. For some documents removing this info is probably ok, eg. a document made by duplicating a +/// certain paragraph 10 times, especially if that paragraph is made of distinct terms. But for a document which +/// contains no duplicated paragraphs, this might be wrong. To avoid this problem, a different document length +/// normalization factor is used, which normalizes to a vector equal to or larger than the unit vector: +/// doc-len-norm(d). +///
  • +///
  • At indexing, users can specify that certain documents are more important than others, by assigning a +/// document boost. For this, the score of each document is also multiplied by its boost value doc-boost(d). +///
  • +///
  • Lucene is field based, hence each query term applies to a single field, document length normalization +/// is by the length of the certain field, and in addition to document boost there are also document fields +/// boosts. +///
  • +///
  • The same field can be added to a document during indexing several times, and so the boost of that field +/// is the multiplication of the boosts of the separate additions (or parts) of that field within the document. +///
  • +///
  • At search time users can specify boosts to each query, sub-query, and each query term, hence the +/// contribution of a query term to the score of a document is multiplied by the boost of that query term +/// query-boost(q). +///
  • +///
  • A document may match a multi term query without containing all the terms of that query (this is correct +/// for some of the queries), and users can further reward documents matching more query terms through a +/// coordination factor, which is usually larger when more terms are matched: coord-factor(q,d). +///
  • +///
+/// +/// Under the simplifying assumption of a single field in the index, we get Lucene's Conceptual scoring formula: +/// +///
 
+/// +/// +/// +///
+/// +/// +///
+/// +/// +/// +/// +/// +/// +///
+/// score(q,d)   =   +/// coord-factor(q,d) ·   +/// query-boost(q) ·   +/// +/// +/// +/// +/// +///
V(q) · V(d)
–––––––––
|V(q)|
+///
+///   ·   doc-len-norm(d) +///   ·   doc-boost(d) +///
+///
+///
+///
Lucene Conceptual Scoring Formula
+///
+///
 
+/// +/// The conceptual formula is a simplification in the sense that (1) terms and documents are fielded and (2) +/// boosts are usually per query term rather than per query. +/// +/// We now describe how Lucene implements this conceptual scoring formula, and derive from it Lucene's Practical +/// Scoring Function. +/// +/// For efficient score computation some scoring components are computed and aggregated in advance: +///
    +///
  • Query-boost for the query (actually for each query term) is known when search starts. +///
  • +///
  • Query Euclidean norm |V(q)| can be computed when search starts, as it is independent of the document +/// being scored. From search optimization perspective, it is a valid question why bother to normalize the +/// query at all, because all scored documents will be multiplied by the same |V(q)|, and hence documents ranks +/// (their order by score) will not be affected by this normalization. There are two good reasons to keep this +/// normalization: +///
      +///
    • Recall that Cosine Similarity can be used +/// find how similar two documents are. One can use Lucene for eg. clustering, and use a document as a query to +/// compute its similarity to other documents. In this use case it is important that the score of document d3 +/// for query d1 is comparable to the score of document d3 for query d2. In other words, scores of a document for +/// two distinct queries should be comparable. There are other applications that may require this. And this is +/// exactly what normalizing the query vector V(q) provides: comparability (to a certain extent) of two or more +/// queries. +///
    • +///
    • Applying query normalization on the scores helps to keep the scores around the unit vector, hence preventing +/// loss of score data because of floating point precision limitations. +///
    • +///
    +///
  • +///
  • Document length norm doc-len-norm(d) and document boost doc-boost(d) are known at indexing time. They are +/// computed in advance and their multiplication is saved as a single value in the index: norm(d). (In the equations +/// below, norm(t in d) means norm(field(t) in doc d) where field(t) is the field associated with term t.) +///
  • +///
+/// +/// Lucene's Practical Scoring Function is derived from the above. The color codes demonstrate how it relates to +/// those of the conceptual formula: +/// +/// +/// +/// +///
+/// +/// +///
+/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +///
+/// score(q,d)   =   +/// coord(q,d)  ·  +/// queryNorm(q)  ·  +/// +/// +/// +/// ( +/// tf(t in d)  ·  +/// idf(t)2  ·  +/// t.getBoost() ·  +/// norm(t,d) +/// ) +///
t in q
+///
+///
+///
Lucene Practical Scoring Function
+///
+/// +/// where +///
    +///
  1. +/// +/// tf(t in d) +/// correlates to the term's frequency, defined as the number of times term t appears in the currently +/// scored document d. Documents that have more occurrences of a given term receive a higher score. +/// Note that tf(t in q) is assumed to be 1 and therefore it does not appear in this equation, +/// However if a query contains twice the same term, there will be two term-queries with that same term +/// and hence the computation would still be correct (although not very efficient). +/// The default computation for tf(t in d) in {@link DefaultSimilarity#tf(float) DefaultSimilarity} is: +/// +///
     
    +/// +/// +/// +/// +/// +///
    +/// {@link DefaultSimilarity#tf(float) tf(t in d)}   =   +/// +/// frequency½ +///
    +///
     
    +///
  2. +/// +///
  3. +/// +/// idf(t) stands for Inverse Document Frequency. This value correlates to the inverse of docFreq +/// (the number of documents in which the term t appears). This means rarer terms give higher contribution +/// to the total score. idf(t) appears for t in both the query and the document, hence it is squared in +/// the equation. The default computation for idf(t) in {@link DefaultSimilarity#idf(int, int) DefaultSimilarity} is: +/// +///
     
    +/// +/// +/// +/// +/// +/// +/// +///
    +/// {@link DefaultSimilarity#idf(int, int) idf(t)}  =   +/// +/// 1 + log ( +/// +/// +/// +/// +/// +///
    numDocs
    –––––––––
    docFreq+1
    +///
    +/// ) +///
    +///
     
    +///
  4. +/// +///
  5. +/// +/// coord(q,d) +/// is a score factor based on how many of the query terms are found in the specified document. Typically, a +/// document that contains more of the query's terms will receive a higher score than another document with +/// fewer query terms. This is a search time factor computed in {@link #coord(int, int) coord(q,d)} by the +/// Similarity in effect at search time. +///
     
    +///
  6. +/// +///
  7. +/// +/// queryNorm(q) +/// +/// is a normalizing factor used to make scores between queries comparable. This factor does not affect +/// document ranking (since all ranked documents are multiplied by the same factor), but rather just attempts +/// to make scores from different queries (or even different indexes) comparable. This is a search time +/// factor computed by the Similarity in effect at search time. +/// +/// The default computation in {@link DefaultSimilarity#queryNorm(float) DefaultSimilarity} +/// produces a Euclidean norm: +///
     
    +/// +/// +/// +/// +/// +///
    +/// queryNorm(q)   =   +/// {@link DefaultSimilarity#queryNorm(float) queryNorm(sumOfSquaredWeights)} +///   =   +/// +/// +/// +/// +/// +///
    1
    +/// –––––––––––––– +///
    sumOfSquaredWeights½
    +///
    +///
     
    +/// +/// The sum of squared weights (of the query terms) is computed by the query {@link Weight} object. For example, +/// a {@link BooleanQuery boolean query} computes this value as: +/// +///
     
    +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +///
    +/// {@link Weight#sumOfSquaredWeights() sumOfSquaredWeights}   =   +/// {@link Query#getBoost() q.getBoost()} 2 +///  ·  +/// +/// +/// +/// ( +/// idf(t)  ·  +/// t.getBoost() +/// ) 2 +///
    t in q
    +///
     
    +/// +///
  8. +/// +///
  9. +/// +/// t.getBoost() +/// is a search time boost of term t in the query q as specified in the query text or as set by application +/// calls to {@link Query#setBoost(float) setBoost()}. Notice that there is really no direct API for accessing +/// a boost of one term in a multi term query, but rather multi terms are represented in a query as multi +/// {@link TermQuery TermQuery} objects, and so the boost of a term in the query is accessible by calling +/// the sub-query {@link Query#getBoost() getBoost()}. +///
     
    +///
  10. +/// +///
  11. +/// +/// norm(t,d) encapsulates a few (indexing time) boost and length factors: +/// +///
      +///
    • Document boost - set by calling +/// {@link Document#setBoost(float) doc.setBoost()} +/// before adding the document to the index. +///
    • +///
    • Field boost - set by calling +/// {@link Fieldable#setBoost(float) field.setBoost()} +/// before adding the field to a document. +///
    • +///
    • {@link #lengthNorm(String, int) lengthNorm(field)} - computed when the document is added to +/// the index in accordance with the number of tokens of this field in the document, so that shorter fields +/// contribute more to the score. LengthNorm is computed by the Similarity class in effect at indexing. +///
    • +///
    +/// +/// When a document is added to the index, all the above factors are multiplied. +/// If the document has multiple fields with the same name, all their boosts are multiplied together: +/// +///
     
    +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +/// +///
    +/// norm(t,d)   =   +/// {@link Document#getBoost() doc.getBoost()} +///  ·  +/// {@link #lengthNorm(String, int) lengthNorm(field)} +///  ·  +/// +/// +/// +/// {@link Fieldable#getBoost() f.getBoost}() +///
    field f in d named as t
    +///
     
    +/// However the resulted norm value is {@link #encodeNorm(float) encoded} as a single byte before being stored. +/// At search time, the norm byte value is read from the index {@link Directory directory} and {@link +/// #decodeNorm(byte) decoded} back to a float norm value. This encoding/decoding, while reducing index size, +/// comes with the price of precision loss - it is not guaranteed that decode(encode(x)) = x. For instance, +/// decode(encode(0.89)) = 0.75. +///
     
    +/// Compression of norm values to a single byte saves memory at search time, because once a field is referenced +/// at search time, its norms - for all documents - are maintained in memory. +///
     
    +/// The rationale supporting such lossy compression of norm values is that given the difficulty (and inaccuracy) +/// of users to express their true information need by a query, only big differences matter. +///
     
    +/// Last, note that search time is too late to modify this norm part of scoring, eg. by using a different +/// {@link Similarity} for search. +///
     
    +///
  12. +///
+/// +/// @see #setDefault(SimilarityPtr) +/// @see IndexWriter#setSimilarity(SimilarityPtr) +/// @see Searcher#setSimilarity(SimilarityPtr) +class LPPAPI Similarity : public LuceneObject { +public: + Similarity(); + virtual ~Similarity(); + + LUCENE_CLASS(Similarity); + +protected: + static const int32_t NO_DOC_ID_PROVIDED; + +public: + static const Collection NORM_TABLE; + +public: + /// Return the default Similarity implementation used by indexing and search code. + /// This is initially an instance of {@link DefaultSimilarity}. + /// @see Searcher#setSimilarity(SimilarityPtr) + /// @see IndexWriter#setSimilarity(SimilarityPtr) + static SimilarityPtr getDefault(); + + /// Decodes a normalization factor stored in an index. + /// @see #encodeNorm(double) + static double decodeNorm(uint8_t b); + + /// Returns a table for decoding normalization bytes. + /// @see #encodeNorm(double) + static const Collection& getNormDecoder(); + + /// Compute the normalization value for a field, given the accumulated state of term processing for this + /// field (see {@link FieldInvertState}). + /// + /// Implementations should calculate a float value based on the field state and then return that value. + /// + /// For backward compatibility this method by default calls {@link #lengthNorm(String, int32_t)} passing + /// {@link FieldInvertState#getLength()} as the second argument, and then multiplies this value by {@link + /// FieldInvertState#getBoost()}. + /// + /// @param field Field name + /// @param state Current processing state for this field + /// @return The calculated float norm + virtual double computeNorm(const String& fieldName, const FieldInvertStatePtr& state); + + /// Computes the normalization value for a field given the total number of terms contained in a field. + /// These values, together with field boosts, are stored in an index and multiplied into scores for hits + /// on each field by the search code. + /// + /// Matches in longer fields are less precise, so implementations of this method usually return smaller + /// values when numTokens is large, and larger values when numTokens is small. + /// + /// Note that the return values are computed under {@link IndexWriter#addDocument(DocumentPtr)} and then + /// stored using {@link #encodeNorm(double)}. Thus they have limited precision, and documents must be + /// re-indexed if this method is altered. + /// + /// @param fieldName The name of the field + /// @param numTokens The total number of tokens contained in fields named fieldName of doc. + /// @return A normalization factor for hits on this field of this document + /// @see Field#setBoost(double) + virtual double lengthNorm(const String& fieldName, int32_t numTokens) = 0; + + /// Computes the normalization value for a query given the sum of the squared weights of each of the query + /// terms. This value is multiplied into the weight of each query term. While the classic query + /// normalization factor is computed as 1/sqrt(sumOfSquaredWeights), other implementations might completely + /// ignore sumOfSquaredWeights (ie return 1). + /// + /// This does not affect ranking, but the default implementation does make scores from different queries + /// more comparable than they would be by eliminating the magnitude of the Query vector as a factor in the + /// score. + /// + /// @param sumOfSquaredWeights The sum of the squares of query term weights + /// @return a normalization factor for query weights + virtual double queryNorm(double sumOfSquaredWeights) = 0; + + /// Encodes a normalization factor for storage in an index. + /// + /// The encoding uses a three-bit mantissa, a five-bit exponent, and the zero-exponent point at 15, thus + /// representing values from around 7x10^9 to 2x10^-9 with about one significant decimal digit of accuracy. + /// Zero is also represented. Negative numbers are rounded up to zero. Values too large to represent + /// are rounded down to the largest representable value. Positive values too small to represent are rounded + /// up to the smallest positive representable value. + /// + /// @see Field#setBoost(double) + static uint8_t encodeNorm(double f); + + /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied + /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then + /// summed to form the initial score for a document. + /// + /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this + /// method usually return larger values when freq is large, and smaller values when freq is small. + /// + /// The default implementation calls {@link #tf(double)}. + /// + /// @param freq The frequency of a term within a document + /// @return A score factor based on a term's within-document frequency + virtual double tf(int32_t freq); + + /// Computes the amount of a sloppy phrase match, based on an edit distance. This value is summed for + /// each sloppy phrase match in a document to form the frequency that is passed to {@link #tf(double)}. + /// + /// A phrase match with a small edit distance to a document passage more closely matches the document, so + /// implementations of this method usually return larger values when the edit distance is small and + /// smaller values when it is large. + /// + /// @see PhraseQuery#setSlop(int32_t) + /// @param distance The edit distance of this sloppy phrase match + /// @return The frequency increment for this match + virtual double sloppyFreq(int32_t distance) = 0; + + /// Computes a score factor based on a term or phrase's frequency in a document. This value is multiplied + /// by the {@link #idf(int32_t, int32_t)} factor for each term in the query and these products are then + /// summed to form the initial score for a document. + /// + /// Terms and phrases repeated in a document indicate the topic of the document, so implementations of this + /// method usually return larger values when freq is large, and smaller values when freq is small. + /// + /// @param freq The frequency of a term within a document + /// @return A score factor based on a term's within-document frequency + virtual double tf(double freq) = 0; + + /// Computes a score factor for a simple term and returns an explanation for that score factor. + /// + /// The default implementation uses: + ///
+    /// idf(searcher->docFreq(term), searcher->maxDoc());
+    /// 
+ /// + /// Note that {@link Searcher#maxDoc()} is used instead of {@link IndexReader#numDocs() IndexReader#numDocs()} + /// because also {@link Searcher#docFreq(TermPtr)} is used, and when the latter is inaccurate, so is {@link + /// Searcher#maxDoc()}, and in the same direction. In addition, {@link Searcher#maxDoc()} is more efficient + /// to compute. + /// + /// @param term The term in question + /// @param searcher The document collection being searched + /// @return An IDFExplain object that includes both an idf score factor and an explanation for the term. + virtual IDFExplanationPtr idfExplain(const TermPtr& term, const SearcherPtr& searcher); + + /// Computes a score factor for a phrase. + /// + /// The default implementation sums the idf factor for each term in the phrase. + /// + /// @param terms The terms in the phrase + /// @param searcher The document collection being searched + /// @return An IDFExplain object that includes both an idf score factor for the phrase and an explanation + /// for each term. + virtual IDFExplanationPtr idfExplain(Collection terms, const SearcherPtr& searcher); + + /// Computes a score factor based on a term's document frequency (the number of documents which contain the + /// term). This value is multiplied by the {@link #tf(int32_t)} factor for each term in the query and these + /// products are then summed to form the initial score for a document. + /// + /// Terms that occur in fewer documents are better indicators of topic, so implementations of this method + /// usually return larger values for rare terms, and smaller values for common terms. + /// + /// @param docFreq The number of documents which contain the term + /// @param numDocs The total number of documents in the collection + /// @return A score factor based on the term's document frequency + virtual double idf(int32_t docFreq, int32_t numDocs) = 0; + + /// Computes a score factor based on the fraction of all query terms that a document contains. This value + /// is multiplied into scores. + /// + /// The presence of a large portion of the query terms indicates a better match with the query, so + /// implementations of this method usually return larger values when the ratio between these parameters is + /// large and smaller values when the ratio between them is small. + /// + /// @param overlap The number of query terms matched in the document + /// @param maxOverlap The total number of terms in the query + /// @return A score factor based on term overlap with the query + virtual double coord(int32_t overlap, int32_t maxOverlap) = 0; + + /// Calculate a scoring factor based on the data in the payload. Overriding implementations are responsible + /// for interpreting what is in the payload. Lucene makes no assumptions about what is in the byte array. + /// + /// The default implementation returns 1. + /// + /// @param docId The docId currently being scored. If this value is {@link #NO_DOC_ID_PROVIDED}, then it + /// should be assumed that the PayloadQuery implementation does not provide document information + /// @param fieldName The fieldName of the term this payload belongs to + /// @param start The start position of the payload + /// @param end The end position of the payload + /// @param payload The payload byte array to be scored + /// @param offset The offset into the payload array + /// @param length The length in the array + /// @return An implementation dependent float to be used as a scoring factor + virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/SimilarityDelegator.h b/include/lucene++/SimilarityDelegator.h new file mode 100644 index 00000000..a4c4f4e1 --- /dev/null +++ b/include/lucene++/SimilarityDelegator.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMILARITYDELEGATOR_H +#define SIMILARITYDELEGATOR_H + +#include "Similarity.h" + +namespace Lucene { + +/// Delegating scoring implementation. Useful in {@link Query#getSimilarity(Searcher)} implementations, +/// to override only certain methods of a Searcher's Similarity implementation. +class LPPAPI SimilarityDelegator : public Similarity { +public: + SimilarityDelegator(const SimilarityPtr& delegee); + virtual ~SimilarityDelegator(); + + LUCENE_CLASS(SimilarityDelegator); + +protected: + SimilarityPtr delegee; + +public: + virtual double computeNorm(const String& field, const FieldInvertStatePtr& state); + virtual double lengthNorm(const String& fieldName, int32_t numTokens); + virtual double queryNorm(double sumOfSquaredWeights); + virtual double tf(double freq); + virtual double sloppyFreq(int32_t distance); + virtual double idf(int32_t docFreq, int32_t numDocs); + virtual double coord(int32_t overlap, int32_t maxOverlap); + virtual double scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/SimpleAnalyzer.h b/include/lucene++/SimpleAnalyzer.h new file mode 100644 index 00000000..b648da8e --- /dev/null +++ b/include/lucene++/SimpleAnalyzer.h @@ -0,0 +1,28 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMPLEANALYZER_H +#define SIMPLEANALYZER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// An {@link Analyzer} that filters {@link LetterTokenizer} with {@link LowerCaseFilter} +class LPPAPI SimpleAnalyzer : public Analyzer { +public: + virtual ~SimpleAnalyzer(); + + LUCENE_CLASS(SimpleAnalyzer); + +public: + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/SimpleFSDirectory.h b/include/lucene++/SimpleFSDirectory.h new file mode 100644 index 00000000..b746fffb --- /dev/null +++ b/include/lucene++/SimpleFSDirectory.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMPLEFSDIRECTORY_H +#define SIMPLEFSDIRECTORY_H + +#include "FSDirectory.h" + +namespace Lucene { + +/// A straightforward implementation of {@link FSDirectory} using std::ofstream and std::ifstream. +class LPPAPI SimpleFSDirectory : public FSDirectory { +public: + /// Create a new SimpleFSDirectory for the named location and {@link NativeFSLockFactory}. + /// @param path the path of the directory. + /// @param lockFactory the lock factory to use, or null for the default ({@link NativeFSLockFactory}) + SimpleFSDirectory(const String& path, const LockFactoryPtr& lockFactory = LockFactoryPtr()); + virtual ~SimpleFSDirectory(); + + LUCENE_CLASS(SimpleFSDirectory); + +public: + /// Creates an IndexOutput for the file with the given name. + virtual IndexOutputPtr createOutput(const String& name); + + /// Returns a stream reading an existing file, with the specified read buffer size. The particular Directory implementation may ignore the buffer size. + virtual IndexInputPtr openInput(const String& name); + + /// Creates an IndexInput for the file with the given name. + virtual IndexInputPtr openInput(const String& name, int32_t bufferSize); +}; + +} + +#endif diff --git a/include/lucene++/SimpleFSLockFactory.h b/include/lucene++/SimpleFSLockFactory.h new file mode 100644 index 00000000..6d9fa7ea --- /dev/null +++ b/include/lucene++/SimpleFSLockFactory.h @@ -0,0 +1,42 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMPLEFSLOCKFACTORY_H +#define SIMPLEFSLOCKFACTORY_H + +#include "FSLockFactory.h" +#include "Lock.h" + +namespace Lucene { + +/// Implements {@link LockFactory} using {@link File#createNewFile()}. +/// @see LockFactory +class LPPAPI SimpleFSLockFactory : public FSLockFactory { +public: + /// Create a SimpleFSLockFactory instance, with null (unset) lock directory. When you pass this factory + /// to a {@link FSDirectory} subclass, the lock directory is automatically set to the directory itself. + /// Be sure to create one instance for each directory your create! + SimpleFSLockFactory(); + + /// Instantiate using the provided directory name. + /// @param lockDir where lock files should be created. + SimpleFSLockFactory(const String& lockDir); + + virtual ~SimpleFSLockFactory(); + + LUCENE_CLASS(SimpleFSLockFactory); + +public: + /// Return a new Lock instance identified by lockName. + virtual LockPtr makeLock(const String& lockName); + + /// Attempt to clear (forcefully unlock and remove) the specified lock. + virtual void clearLock(const String& lockName); +}; + +} + +#endif diff --git a/include/lucene++/SimpleLRUCache.h b/include/lucene++/SimpleLRUCache.h new file mode 100644 index 00000000..8db67077 --- /dev/null +++ b/include/lucene++/SimpleLRUCache.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SIMPLELRUCACHE_H +#define SIMPLELRUCACHE_H + +#include +#include "LuceneObject.h" + +namespace Lucene { + +/// General purpose LRU cache map. +/// Accessing an entry will keep the entry cached. {@link #get(const KEY&)} and +/// {@link #put(const KEY&, const VALUE&)} results in an access to the corresponding entry. +template +class SimpleLRUCache : public LuceneObject { +public: + typedef std::pair key_value; + typedef std::list< key_value > key_list; + typedef typename key_list::const_iterator const_iterator; + typedef boost::unordered_map map_type; + typedef typename map_type::const_iterator map_iterator; + + SimpleLRUCache(int32_t cacheSize) { + this->cacheSize = cacheSize; + } + + virtual ~SimpleLRUCache() { + } + +protected: + int32_t cacheSize; + key_list cacheList; + map_type cacheMap; + +public: + void put(const KEY& key, const VALUE& value) { + cacheList.push_front(std::make_pair(key, value)); + cacheMap[key] = cacheList.begin(); + + if ((int32_t)cacheList.size() > cacheSize) { + cacheMap.erase(cacheList.back().first); + cacheList.pop_back(); + } + } + + VALUE get(const KEY& key) { + map_iterator find = cacheMap.find(key); + if (find == cacheMap.end()) { + return VALUE(); + } + + VALUE value(find->second->second); + cacheList.erase(find->second); + cacheList.push_front(std::make_pair(key, value)); + cacheMap[key] = cacheList.begin(); + + return value; + } + + bool contains(const KEY& key) const { + return (cacheMap.find(key) != cacheMap.end()); + } + + int32_t size() const { + return (int32_t)cacheList.size(); + } + + const_iterator begin() const { + return cacheList.begin(); + } + + const_iterator end() const { + return cacheList.end(); + } +}; + +}; + +#endif diff --git a/include/lucene++/SingleInstanceLockFactory.h b/include/lucene++/SingleInstanceLockFactory.h new file mode 100644 index 00000000..df19d270 --- /dev/null +++ b/include/lucene++/SingleInstanceLockFactory.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SINGLEINSTANCELOCKFACTORY_H +#define SINGLEINSTANCELOCKFACTORY_H + +#include "LockFactory.h" + +namespace Lucene { + +/// Implements {@link LockFactory} for a single in-process instance, meaning all +/// locking will take place through this one instance. Only use this {@link LockFactory} +/// when you are certain all IndexReaders and IndexWriters for a given index are running +/// against a single shared in-process Directory instance. This is currently the +/// default locking for RAMDirectory. +/// @see LockFactory +class LPPAPI SingleInstanceLockFactory : public LockFactory { +public: + SingleInstanceLockFactory(); + virtual ~SingleInstanceLockFactory(); + + LUCENE_CLASS(SingleInstanceLockFactory); + +protected: + HashSet locks; + +public: + /// Return a new Lock instance identified by lockName. + /// @param lockName name of the lock to be created. + virtual LockPtr makeLock(const String& lockName); + + /// Attempt to clear (forcefully unlock and remove) the + /// specified lock. Only call this at a time when you are + /// certain this lock is no longer in use. + /// @param lockName name of the lock to be cleared. + virtual void clearLock(const String& lockName); +}; + +} + +#endif diff --git a/include/lucene++/SingleTermEnum.h b/include/lucene++/SingleTermEnum.h new file mode 100644 index 00000000..115078fb --- /dev/null +++ b/include/lucene++/SingleTermEnum.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SINGLETERMENUM_H +#define SINGLETERMENUM_H + +#include "FilteredTermEnum.h" + +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating a single term. +/// +/// This can be used by {@link MultiTermQuery}s that need only visit one term, but want to preserve +/// MultiTermQuery semantics such as {@link MultiTermQuery#rewriteMethod}. +class LPPAPI SingleTermEnum : public FilteredTermEnum { +public: + SingleTermEnum(const IndexReaderPtr& reader, const TermPtr& singleTerm); + virtual ~SingleTermEnum(); + + LUCENE_CLASS(SingleTermEnum); + +protected: + TermPtr singleTerm; + bool _endEnum; + +public: + virtual double difference(); + +protected: + virtual bool endEnum(); + virtual bool termCompare(const TermPtr& term); +}; + +} + +#endif diff --git a/include/lucene++/SloppyPhraseScorer.h b/include/lucene++/SloppyPhraseScorer.h new file mode 100644 index 00000000..eff7aabc --- /dev/null +++ b/include/lucene++/SloppyPhraseScorer.h @@ -0,0 +1,69 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SLOPPYPHRASESCORER_H +#define SLOPPYPHRASESCORER_H + +#include "PhraseScorer.h" + +namespace Lucene { + +class SloppyPhraseScorer : public PhraseScorer { +public: + SloppyPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, int32_t slop, ByteArray norms); + virtual ~SloppyPhraseScorer(); + + LUCENE_CLASS(SloppyPhraseScorer); + +protected: + int32_t slop; + Collection repeats; + Collection tmpPos; // for flipping repeating pps + bool checkedRepeats; + +public: + /// Score a candidate doc for all slop-valid position-combinations (matches) encountered while + /// traversing/hopping the PhrasePositions. The score contribution of a match depends on the distance: + /// - highest score for distance=0 (exact match). + /// - score gets lower as distance gets higher. + /// Example: for query "a b"~2, a document "x a b a y" can be scored twice: once for "a b" (distance=0), + /// and once for "b a" (distance=2). + /// Possibly not all valid combinations are encountered, because for efficiency we always propagate the + /// least PhrasePosition. This allows to base on PriorityQueue and move forward faster. + /// As result, for example, document "a b c b a" would score differently for queries "a b c"~4 and + /// "c b a"~4, although they really are equivalent. Similarly, for doc "a b c b a f g", query "c b"~2 + /// would get same score as "g f"~2, although "c b"~2 could be matched twice. We may want to fix this + /// in the future (currently not, for performance reasons). + virtual double phraseFreq(); + +protected: + /// Flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. + /// Assumes: pp!=pp2, pp2 in pq, pp not in pq. Called only when there are repeating pps. + PhrasePositions* flip(PhrasePositions* pp, PhrasePositions* pp2); + + /// Init PhrasePositions in place. + /// There is a one time initialization for this scorer: + /// - Put in repeats[] each pp that has another pp with same position in the doc. + /// - Also mark each such pp by pp.repeats = true. + /// Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. + /// In particular, this allows to score queries with no repetitions with no overhead due to this computation. + /// - Example 1 - query with no repetitions: "ho my"~2 + /// - Example 2 - query with repetitions: "ho my my"~2 + /// - Example 3 - query with repetitions: "my ho my"~2 + /// Init per doc with repeats in query, includes propagating some repeating pp's to avoid false phrase detection. + /// @return end (max position), or -1 if any term ran out (ie. done) + int32_t initPhrasePositions(); + + /// We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences in the query + /// of the same word would go elsewhere in the matched doc. + /// @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions out of the first + /// two PPs found to not differ. + PhrasePositions* termPositionsDiffer(PhrasePositions* pp); +}; + +} + +#endif diff --git a/include/lucene++/SmallDouble.h b/include/lucene++/SmallDouble.h new file mode 100644 index 00000000..5cf72440 --- /dev/null +++ b/include/lucene++/SmallDouble.h @@ -0,0 +1,33 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SMALLDOUBLE_H +#define SMALLDOUBLE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Floating point numbers smaller than 32 bits. +class SmallDouble : public LuceneObject { +public: + virtual ~SmallDouble(); + LUCENE_CLASS(SmallDouble); + +public: + /// Converts a floating point number to an 8 bit float. + /// Values less than zero are all mapped to zero. + /// Values are truncated (rounded down) to the nearest 8 bit value. + /// Values between zero and the smallest representable value are rounded up. + static uint8_t doubleToByte(double f); + + /// Converts an 8 bit floating point number to a double. + static double byteToDouble(uint8_t b); +}; + +} + +#endif diff --git a/include/lucene++/SnapshotDeletionPolicy.h b/include/lucene++/SnapshotDeletionPolicy.h new file mode 100644 index 00000000..1f3ef138 --- /dev/null +++ b/include/lucene++/SnapshotDeletionPolicy.h @@ -0,0 +1,53 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SNAPSHOTDELETIONPOLICY_H +#define SNAPSHOTDELETIONPOLICY_H + +#include "IndexDeletionPolicy.h" + +namespace Lucene { + +class LPPAPI SnapshotDeletionPolicy : public IndexDeletionPolicy { +public: + SnapshotDeletionPolicy(const IndexDeletionPolicyPtr& primary); + virtual ~SnapshotDeletionPolicy(); + + LUCENE_CLASS(SnapshotDeletionPolicy); + +protected: + IndexCommitPtr lastCommit; + IndexDeletionPolicyPtr primary; + String _snapshot; + +public: + /// This is called once when a writer is first instantiated to give the policy a chance to remove old + /// commit points. + virtual void onInit(Collection commits); + + /// This is called each time the writer completed a commit. This gives the policy a chance to remove + /// old commit points with each commit. + virtual void onCommit(Collection commits); + + /// Take a snapshot of the most recent commit to the index. You must call release() to free this snapshot. + /// Note that while the snapshot is held, the files it references will not be deleted, which will consume + /// additional disk space in your index. If you take a snapshot at a particularly bad time (say just before + /// you call optimize()) then in the worst case this could consume an extra 1X of your total index size, + /// until you release the snapshot. + virtual IndexCommitPtr snapshot(); + + /// Release the currently held snapshot. + virtual void release(); + +protected: + Collection wrapCommits(Collection commits); + + friend class MyCommitPoint; +}; + +} + +#endif diff --git a/include/lucene++/Sort.h b/include/lucene++/Sort.h new file mode 100644 index 00000000..e51d5320 --- /dev/null +++ b/include/lucene++/Sort.h @@ -0,0 +1,115 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SORT_H +#define SORT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Encapsulates sort criteria for returned hits. +/// +/// The fields used to determine sort order must be carefully chosen. Documents must contain a single term +/// in such a field, and the value of the term should indicate the document's relative position in a given +/// sort order. The field must be indexed, but should not be tokenized, and does not need to be stored +/// (unless you happen to want it back with the rest of your document data). In other words: +/// +///
+/// document->add(newLucene(L"byNumber", StringUtils::toString(x), Field::STORE_NO, Field::INDEX_NOT_ANALYZED));
+/// 
+/// +/// Valid Types of Values +/// +/// There are four possible kinds of term values which may be put into sorting fields: Integers, Longs, Doubles, +/// or Strings. Unless {@link SortField SortField} objects are specified, the type of value in the field is +/// determined by parsing the first term in the field. +/// +/// Integer term values should contain only digits and an optional preceding negative sign. Values must be base +/// 10 and in the range INT_MIN and INT_MAX inclusive. Documents which should appear first in the sort should +/// have low value integers, later documents high values (ie. the documents should be numbered 1..n where 1 is +/// the first and n the last). +/// +/// Long term values should contain only digits and an optional preceding negative sign. Values must be base 10 +/// and in the range LLONG_MIN and LLONG_MAX inclusive. Documents which should appear first in the sort should +/// have low value integers, later documents high values. +/// +/// Double term values should conform to values accepted by Double (except that NaN and Infinity are not +/// supported). Documents which should appear first in the sort should have low values, later documents high +/// values. +/// +/// String term values can contain any valid String, but should not be tokenized. The values are sorted according +/// to their comparable natural order. Note that using this type of term value has higher memory requirements +/// than the other two types. +/// +/// Object Reuse +/// +/// One of these objects can be used multiple times and the sort order changed between usages. +/// This class is thread safe. +/// +/// Memory Usage +/// +/// Sorting uses of caches of term values maintained by the internal HitQueue(s). The cache is static and +/// contains an integer or double array of length IndexReader::maxDoc() for each field name for which a sort is +/// performed. In other words, the size of the cache in bytes is: +/// +///
+/// 4 * IndexReader::maxDoc() * (# of different fields actually used to sort)
+/// 
+/// +/// For String fields, the cache is larger: in addition to the above array, the value of every term in the +/// field is kept in memory. If there are many unique terms in the field, this could be quite large. +/// +/// Note that the size of the cache is not affected by how many fields are in the index and might be used to +/// sort - only by the ones actually used to sort a result set. +class LPPAPI Sort : public LuceneObject { +public: + /// Sorts by computed relevance. This is the same sort criteria as calling {@link + /// Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly more + /// overhead. + Sort(); + + /// Sorts by the criteria in the given SortField. + Sort(const SortFieldPtr& field); + + /// Sorts in succession by the criteria in each SortField. + Sort(Collection fields); + + virtual ~Sort(); + + LUCENE_CLASS(Sort); + +public: + /// Internal representation of the sort criteria + Collection fields; + +public: + /// Represents sorting by computed relevance. Using this sort criteria returns the same results as calling + /// {@link Searcher#search(QueryPtr, int32_t) Searcher#search()} without a sort criteria, only with slightly + /// more overhead. + static SortPtr RELEVANCE(); + + /// Represents sorting by index order. + static SortPtr INDEXORDER(); + + /// Sets the sort to the given criteria. + void setSort(const SortFieldPtr& field); + + /// Sets the sort to the given criteria in succession. + void setSort(Collection fields); + + /// Representation of the sort criteria. + /// @return Array of SortField objects used in this sort criteria + Collection getSort(); + + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/SortField.h b/include/lucene++/SortField.h new file mode 100644 index 00000000..893e7450 --- /dev/null +++ b/include/lucene++/SortField.h @@ -0,0 +1,148 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SORTFIELD_H +#define SORTFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Stores information about how to sort documents by terms in an individual field. Fields must be indexed +/// in order to sort by them. +class LPPAPI SortField : public LuceneObject { +public: + /// Creates a sort by terms in the given field with the type of term values explicitly given. + /// @param field Name of field to sort by. Can be null if type is SCORE or DOC. + /// @param type Type of values in the terms. + /// @param reverse True if natural order should be reversed. + SortField(const String& field, int32_t type, bool reverse = false); + + /// Creates a sort, possibly in reverse, by terms in the given field, parsed to numeric values using a + /// custom {@link Parser}. + /// @param field Name of field to sort by + /// @param parser Instance of a {@link Parser}, which must subclass one of the existing numeric parsers from + /// {@link FieldCache}. Sort type is inferred by testing which numeric parser the parser subclasses. + /// @param reverse True if natural order should be reversed. + SortField(const String& field, const ParserPtr& parser, bool reverse = false); + + /// Creates a sort, possibly in reverse, by terms in the given field sorted according to the given locale. + /// @param field Name of field to sort by, cannot be null. + /// @param locale Locale of values in the field. + /// @param reverse True if natural order should be reversed. + SortField(const String& field, const std::locale& locale, bool reverse = false); + + /// Creates a sort, possibly in reverse, with a custom comparison function. + /// @param field Name of field to sort by; cannot be null. + /// @param comparator Returns a comparator for sorting hits. + /// @param reverse True if natural order should be reversed. + SortField(const String& field, const FieldComparatorSourcePtr& comparator, bool reverse = false); + + virtual ~SortField(); + + LUCENE_CLASS(SortField); + +public: + /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. + static const int32_t SCORE; + + /// Sort by document number (index order). Sort values are Integer and lower values are at the front. + static const int32_t DOC; + + /// Sort using term values as Strings. Sort values are String and lower values are at the front. + static const int32_t STRING; + + /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. + static const int32_t INT; + + /// Sort using term values as Floats. Sort values are Float and lower values are at the front. + static const int32_t FLOAT; + + /// Sort using term values as Longs. Sort values are Long and lower values are at the front. + static const int32_t LONG; + + /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. + static const int32_t DOUBLE; + + /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. + static const int32_t SHORT; + + /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according + /// to natural order. + static const int32_t CUSTOM; + + /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. + static const int32_t BYTE; + + /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. + /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. + static const int32_t STRING_VAL; + +INTERNAL: + bool reverse; // defaults to natural order + + String field; + int32_t type; // defaults to determining type dynamically + localePtr locale; // defaults to "natural order" (no Locale) + ParserPtr parser; + +private: + /// Used for CUSTOM sort + FieldComparatorSourcePtr comparatorSource; + +public: + /// Represents sorting by document score (relevancy). + static SortFieldPtr FIELD_SCORE(); + + /// Represents sorting by document number (index order). + static SortFieldPtr FIELD_DOC(); + + /// Returns the name of the field. Could return null if the sort is by SCORE or DOC. + /// @return Name of field, possibly null. + String getField(); + + /// Returns the type of contents in the field. + /// @return One of the constants SCORE, DOC, STRING, INT or DOUBLE. + int32_t getType(); + + /// Returns the Locale by which term values are interpreted. + localePtr getLocale(); + + /// Returns the instance of a {@link FieldCache} parser that fits to the given sort type. May return null + /// if no parser was specified. Sorting is using the default parser then. + /// @return An instance of a parser, or null. + ParserPtr getParser(); + + /// Returns whether the sort should be reversed. + /// @return True if natural order should be reversed. + bool getReverse(); + + /// Returns the {@link FieldComparatorSource} used for custom sorting + FieldComparatorSourcePtr getComparatorSource(); + + virtual String toString(); + + /// Returns true if other is equal to this. If a {@link FieldComparatorSource} or {@link Parser} was provided, + /// it must properly implement equals (unless a singleton is always used). + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); + + /// Returns the {@link FieldComparator} to use for sorting. + /// @param numHits number of top hits the queue will store + /// @param sortPos position of this SortField within {@link Sort}. The comparator is primary if sortPos == 0, + /// secondary if sortPos == 1, etc. Some comparators can optimize themselves when they are the primary sort. + /// @return {@link FieldComparator} to use when sorting + FieldComparatorPtr getComparator(int32_t numHits, int32_t sortPos); + +protected: + /// Sets field and type, and ensures field is not NULL unless type is SCORE or DOC + void initFieldType(const String& field, int32_t type); +}; + +} + +#endif diff --git a/include/lucene++/SortedTermVectorMapper.h b/include/lucene++/SortedTermVectorMapper.h new file mode 100644 index 00000000..ecb0851d --- /dev/null +++ b/include/lucene++/SortedTermVectorMapper.h @@ -0,0 +1,61 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SORTEDTERMVECTORMAPPER_H +#define SORTEDTERMVECTORMAPPER_H + +#include +#include "TermVectorMapper.h" + +namespace Lucene { + +/// Store a sorted collection of {@link TermVectorEntry}s. Collects all term information into a single, +/// sorted set. +/// +/// NOTE: This Mapper ignores all Field information for the Document. This means that if you are using offset/ +/// positions you will not know what Fields they correlate with. +/// +/// This is not thread-safe +class LPPAPI SortedTermVectorMapper : public TermVectorMapper { +public: + /// @param comparator A Comparator for sorting {@link TermVectorEntry}s + SortedTermVectorMapper(TermVectorEntryComparator comparator); + + SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator); + + virtual ~SortedTermVectorMapper(); + + LUCENE_CLASS(SortedTermVectorMapper); + +protected: + Collection currentSet; + MapStringTermVectorEntry termToTVE; + bool storeOffsets; + bool storePositions; + TermVectorEntryComparator comparator; + +public: + static const wchar_t* ALL; + +public: + /// Map the Term Vector information into your own structure + virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); + + /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); + + /// The TermVectorEntrySet. A SortedSet of {@link TermVectorEntry} objects. Sort is by the comparator passed + /// into the constructor. + /// + /// This set will be empty until after the mapping process takes place. + /// + /// @return The sorted set of {@link TermVectorEntry}. + Collection getTermVectorEntrySet(); +}; + +} + +#endif diff --git a/include/lucene++/SortedVIntList.h b/include/lucene++/SortedVIntList.h new file mode 100644 index 00000000..02a9a98f --- /dev/null +++ b/include/lucene++/SortedVIntList.h @@ -0,0 +1,88 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SORTEDVINTLIST_H +#define SORTEDVINTLIST_H + +#include "DocIdSet.h" + +namespace Lucene { + +/// Stores and iterate on sorted integers in compressed form in RAM. +/// +/// The code for compressing the differences between ascending integers was borrowed from {@link IndexInput} +/// and {@link IndexOutput}. +/// +/// NOTE: this class assumes the stored integers are doc Ids (hence why it extends {@link DocIdSet}). Therefore +/// its {@link #iterator()} assumes {@link DocIdSetIterator#NO_MORE_DOCS} can be used as sentinel. If you +/// intend to use this value, then make sure it's not used during search flow. +class LPPAPI SortedVIntList : public DocIdSet { +public: + /// Create a SortedVIntList from all elements of an array of integers. + /// @param sortedInts A sorted array of non negative integers. + SortedVIntList(Collection sortedInts); + + /// Create a SortedVIntList from an array of integers. + /// @param sortedInts A sorted array of non negative integers. + /// @param inputSize The number of integers to be used from the array. + SortedVIntList(Collection sortedInts, int32_t inputSize); + + /// Create a SortedVIntList from a BitSet. + /// @param bits A bit set representing a set of integers. + SortedVIntList(const BitSetPtr& bits); + + /// Create a SortedVIntList from an OpenBitSet. + /// @param bits A bit set representing a set of integers. + SortedVIntList(const OpenBitSetPtr& bits); + + /// Create a SortedVIntList. + /// @param docIdSetIterator An iterator providing document numbers as a set of integers. + /// This DocIdSetIterator is iterated completely when this constructor is called and it must provide the + /// integers in non decreasing order. + SortedVIntList(const DocIdSetIteratorPtr& docIdSetIterator); + + virtual ~SortedVIntList(); + + LUCENE_CLASS(SortedVIntList); + +public: + /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the + /// index numbers of the set bits will be smaller than that BitSet. + static const int32_t BITS2VINTLIST_SIZE; + +protected: + static const int32_t VB1; + static const int32_t BIT_SHIFT; + static const int32_t MAX_BYTES_PER_INT; + + int32_t _size; + ByteArray bytes; + int32_t lastBytePos; + int32_t lastInt; + +public: + /// @return The total number of sorted integers. + int32_t size(); + + /// @return The size of the byte array storing the compressed sorted integers. + int32_t getByteSize(); + + /// This DocIdSet implementation is cacheable. + virtual bool isCacheable(); + + /// @return An iterator over the sorted integers. + virtual DocIdSetIteratorPtr iterator(); + +protected: + void initBytes(); + void addInt(int32_t nextInt); + + friend class SortedDocIdSetIterator; +}; + +} + +#endif diff --git a/include/lucene++/SpanFilter.h b/include/lucene++/SpanFilter.h new file mode 100644 index 00000000..f55f6d9f --- /dev/null +++ b/include/lucene++/SpanFilter.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANFILTER_H +#define SPANFILTER_H + +#include "Filter.h" + +namespace Lucene { + +/// Abstract base class providing a mechanism to restrict searches to a subset of an index and also maintains +/// and returns position information. +/// +/// This is useful if you want to compare the positions from a SpanQuery with the positions of items in a filter. +/// For instance, if you had a SpanFilter that marked all the occurrences of the word "foo" in documents, and +/// then you entered a new SpanQuery containing bar, you could not only filter by the word foo, but you could +/// then compare position information for post processing. +class LPPAPI SpanFilter : public Filter { +public: + virtual ~SpanFilter(); + LUCENE_CLASS(SpanFilter); + +public: + /// Returns a SpanFilterResult with true for documents which should be permitted in search results, and + /// false for those that should not and Spans for where the true docs match. + /// @param reader The {@link IndexReader} to load position and DocIdSet information from + /// @return A {@link SpanFilterResult} + virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader) = 0; +}; + +} + +#endif diff --git a/include/lucene++/SpanFilterResult.h b/include/lucene++/SpanFilterResult.h new file mode 100644 index 00000000..37b87ca3 --- /dev/null +++ b/include/lucene++/SpanFilterResult.h @@ -0,0 +1,77 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANFILTERRESULT_H +#define SPANFILTERRESULT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The results of a SpanQueryFilter. Wraps the BitSet and the position information from the SpanQuery +class LPPAPI SpanFilterResult : public LuceneObject { +public: + /// @param docIdSet The DocIdSet for the Filter + /// @param positions A List of {@link PositionInfo} objects + SpanFilterResult(const DocIdSetPtr& docIdSet, Collection positions); + + virtual ~SpanFilterResult(); + + LUCENE_CLASS(SpanFilterResult); + +protected: + DocIdSetPtr docIdSet; + Collection positions; // Spans spans + +public: + /// The first entry in the array corresponds to the first "on" bit. Entries are increasing by + /// document order. + /// @return A List of PositionInfo objects + Collection getPositions(); + + /// Returns the docIdSet + DocIdSetPtr getDocIdSet(); +}; + +class LPPAPI PositionInfo : public LuceneObject { +public: + PositionInfo(int32_t doc); + virtual ~PositionInfo(); + + LUCENE_CLASS(PositionInfo); + +protected: + int32_t doc; + Collection positions; + +public: + void addPosition(int32_t start, int32_t end); + int32_t getDoc(); + Collection getPositions(); +}; + +class LPPAPI StartEnd : public LuceneObject { +public: + StartEnd(int32_t start, int32_t end); + virtual ~StartEnd(); + + LUCENE_CLASS(StartEnd); + +protected: + int32_t start; + int32_t end; + +public: + /// @return The end position of this match + int32_t getEnd(); + + /// @return The start position of this match + int32_t getStart(); +}; + +} + +#endif diff --git a/include/lucene++/SpanFirstQuery.h b/include/lucene++/SpanFirstQuery.h new file mode 100644 index 00000000..7d264774 --- /dev/null +++ b/include/lucene++/SpanFirstQuery.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANFIRSTQUERY_H +#define SPANFIRSTQUERY_H + +#include "SpanQuery.h" +#include "Spans.h" + +namespace Lucene { + +/// Matches spans near the beginning of a field. +class LPPAPI SpanFirstQuery : public SpanQuery { +public: + /// Construct a SpanFirstQuery matching spans in match whose end position is less than or equal to end. + SpanFirstQuery(const SpanQueryPtr& match, int32_t end); + virtual ~SpanFirstQuery(); + + LUCENE_CLASS(SpanFirstQuery); + +protected: + SpanQueryPtr match; + int32_t end; + +public: + using SpanQuery::toString; + + /// Return the SpanQuery whose matches are filtered. + SpanQueryPtr getMatch(); + + /// Return the maximum end position permitted in a match. + int32_t getEnd(); + + virtual String getField(); + virtual String toString(const String& field); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual void extractTerms(SetTerm terms); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + friend class FirstSpans; +}; + +} + +#endif diff --git a/include/lucene++/SpanNearQuery.h b/include/lucene++/SpanNearQuery.h new file mode 100644 index 00000000..5c321259 --- /dev/null +++ b/include/lucene++/SpanNearQuery.h @@ -0,0 +1,58 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANNEARQUERY_H +#define SPANNEARQUERY_H + +#include "SpanQuery.h" + +namespace Lucene { + +/// Matches spans which are near one another. One can specify slop, the maximum number of intervening +/// unmatched positions, as well as whether matches are required to be in-order. +class LPPAPI SpanNearQuery : public SpanQuery { +public: + /// Construct a SpanNearQuery. Matches spans matching a span from each clause, with up to slop total + /// unmatched positions between them. * When inOrder is true, the spans from each clause must be + /// ordered as in clauses. + SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads = true); + virtual ~SpanNearQuery(); + + LUCENE_CLASS(SpanNearQuery); + +protected: + Collection clauses; + int32_t slop; + bool inOrder; + + String field; + bool collectPayloads; + +public: + using SpanQuery::toString; + + /// Return the clauses whose spans are matched. + Collection getClauses(); + + /// Return the maximum number of intervening unmatched positions permitted. + int32_t getSlop(); + + /// Return true if matches are required to be in-order. + bool isInOrder(); + + virtual String getField(); + virtual void extractTerms(SetTerm terms); + virtual String toString(const String& field); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/SpanNotQuery.h b/include/lucene++/SpanNotQuery.h new file mode 100644 index 00000000..3b3976c5 --- /dev/null +++ b/include/lucene++/SpanNotQuery.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANNOTQUERY_H +#define SPANNOTQUERY_H + +#include "SpanQuery.h" + +namespace Lucene { + +/// Removes matches which overlap with another SpanQuery. +class LPPAPI SpanNotQuery : public SpanQuery { +public: + /// Construct a SpanNotQuery matching spans from include which have no overlap with spans from exclude. + SpanNotQuery(const SpanQueryPtr& include, const SpanQueryPtr& exclude); + virtual ~SpanNotQuery(); + + LUCENE_CLASS(SpanNotQuery); + +protected: + SpanQueryPtr include; + SpanQueryPtr exclude; + +public: + using SpanQuery::toString; + + /// Return the SpanQuery whose matches are filtered. + SpanQueryPtr getInclude(); + + /// Return the SpanQuery whose matches must not overlap those returned. + SpanQueryPtr getExclude(); + + virtual String getField(); + virtual void extractTerms(SetTerm terms); + virtual String toString(const String& field); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/SpanOrQuery.h b/include/lucene++/SpanOrQuery.h new file mode 100644 index 00000000..8bee7985 --- /dev/null +++ b/include/lucene++/SpanOrQuery.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANORQUERY_H +#define SPANORQUERY_H + +#include "SpanQuery.h" + +namespace Lucene { + +/// Matches the union of its clauses. +class LPPAPI SpanOrQuery : public SpanQuery { +public: + /// Construct a SpanOrQuery merging the provided clauses. + SpanOrQuery(Collection clauses); + virtual ~SpanOrQuery(); + + LUCENE_CLASS(SpanOrQuery); + +protected: + Collection clauses; + String field; + +public: + using SpanQuery::toString; + + /// Return the clauses whose spans are matched. + Collection getClauses(); + + virtual String getField(); + virtual void extractTerms(SetTerm terms); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); + + friend class OrSpans; +}; + +} + +#endif diff --git a/include/lucene++/SpanQuery.h b/include/lucene++/SpanQuery.h new file mode 100644 index 00000000..8e0773c8 --- /dev/null +++ b/include/lucene++/SpanQuery.h @@ -0,0 +1,32 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANQUERY_H +#define SPANQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// Base class for span-based queries. +class LPPAPI SpanQuery : public Query { +public: + virtual ~SpanQuery(); + LUCENE_CLASS(SpanQuery); + +public: + /// Returns the matches for this query in an index. Used internally to search for spans. + virtual SpansPtr getSpans(const IndexReaderPtr& reader) = 0; + + /// Returns the name of the field matched by this query. + virtual String getField() = 0; + + virtual WeightPtr createWeight(const SearcherPtr& searcher); +}; + +} + +#endif diff --git a/include/lucene++/SpanQueryFilter.h b/include/lucene++/SpanQueryFilter.h new file mode 100644 index 00000000..0ef18863 --- /dev/null +++ b/include/lucene++/SpanQueryFilter.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANQUERYFILTER_H +#define SPANQUERYFILTER_H + +#include "SpanFilter.h" + +namespace Lucene { + +/// Constrains search results to only match those which also match a provided query. Also provides position +/// information about where each document matches at the cost of extra space compared with the +/// QueryWrapperFilter. There is an added cost to this above what is stored in a {@link QueryWrapperFilter}. +/// Namely, the position information for each matching document is stored. +/// +/// This filter does not cache. See the {@link CachingSpanFilter} for a wrapper that caches. +class LPPAPI SpanQueryFilter : public SpanFilter { +public: + /// Constructs a filter which only matches documents matching query. + /// @param query The {@link SpanQuery} to use as the basis for the Filter. + SpanQueryFilter(const SpanQueryPtr& query = SpanQueryPtr()); + + virtual ~SpanQueryFilter(); + + LUCENE_CLASS(SpanQueryFilter); + +protected: + SpanQueryPtr query; + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + virtual SpanFilterResultPtr bitSpans(const IndexReaderPtr& reader); + + SpanQueryPtr getQuery(); + + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/SpanScorer.h b/include/lucene++/SpanScorer.h new file mode 100644 index 00000000..a8685645 --- /dev/null +++ b/include/lucene++/SpanScorer.h @@ -0,0 +1,53 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANSCORER_H +#define SPANSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// Public for extension only. +class LPPAPI SpanScorer : public Scorer { +public: + SpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); + virtual ~SpanScorer(); + + LUCENE_CLASS(SpanScorer); + +protected: + SpansPtr spans; + WeightPtr weight; + ByteArray norms; + double value; + bool more; + int32_t doc; + double freq; + +public: + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + virtual double score(); + virtual float termFreq(){ + return freq; + } + +protected: + virtual bool setFreqCurrentDoc(); + + /// This method is no longer an official member of {@link Scorer}, but it is needed by SpanWeight + /// to build an explanation. + virtual ExplanationPtr explain(int32_t doc); + + friend class SpanWeight; + friend class PayloadNearSpanWeight; +}; + +} + +#endif diff --git a/include/lucene++/SpanTermQuery.h b/include/lucene++/SpanTermQuery.h new file mode 100644 index 00000000..2f11cd10 --- /dev/null +++ b/include/lucene++/SpanTermQuery.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANTERMQUERY_H +#define SPANTERMQUERY_H + +#include "SpanQuery.h" + +namespace Lucene { + +/// Matches spans containing a term. +class LPPAPI SpanTermQuery : public SpanQuery { +public: + /// Construct a SpanTermQuery matching the named term's spans. + SpanTermQuery(const TermPtr& term); + virtual ~SpanTermQuery(); + + LUCENE_CLASS(SpanTermQuery); + +protected: + TermPtr term; + +public: + using SpanQuery::toString; + + /// Return the term whose spans are matched. + TermPtr getTerm(); + + virtual String getField(); + virtual void extractTerms(SetTerm terms); + virtual String toString(const String& field); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual SpansPtr getSpans(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/SpanWeight.h b/include/lucene++/SpanWeight.h new file mode 100644 index 00000000..11d895a7 --- /dev/null +++ b/include/lucene++/SpanWeight.h @@ -0,0 +1,47 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANWEIGHT_H +#define SPANWEIGHT_H + +#include "Weight.h" + +namespace Lucene { + +/// Public for use by other weight implementations +class LPPAPI SpanWeight : public Weight { +public: + SpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher); + virtual ~SpanWeight(); + + LUCENE_CLASS(SpanWeight); + +protected: + SimilarityPtr similarity; + double value; + double idf; + double queryNorm; + double queryWeight; + + SetTerm terms; + SpanQueryPtr query; + IDFExplanationPtr idfExp; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); + + friend class PayloadNearSpanScorer; + friend class PayloadTermSpanScorer; +}; + +} + +#endif diff --git a/include/lucene++/Spans.h b/include/lucene++/Spans.h new file mode 100644 index 00000000..8def3dd3 --- /dev/null +++ b/include/lucene++/Spans.h @@ -0,0 +1,78 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SPANS_H +#define SPANS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// An enumeration of span matches. Used to implement span searching. Each span represents a range of term +/// positions within a document. Matches are enumerated in order, by increasing document number, within that +/// by increasing start position and finally by increasing end position. +class LPPAPI Spans : public LuceneObject { +public: + virtual ~Spans(); + LUCENE_CLASS(Spans); + +public: + /// Move to the next match, returning true if any such exists. + virtual bool next() = 0; + + /// Skips to the first match beyond the current, whose document number is greater than or equal to target. + /// + /// Returns true if there is such a match. + /// + /// Behaves as if written: + ///
+    /// bool skipTo(int32_t target)
+    /// {
+    ///     do
+    ///     {
+    ///         if (!next())
+    ///             return false;
+    ///     }
+    ///     while (target > doc());
+    ///     return true;
+    /// }
+    /// 
+ /// Most implementations are considerably more efficient than that. + virtual bool skipTo(int32_t target) = 0; + + /// Returns the document number of the current match. Initially invalid. + virtual int32_t doc() = 0; + + /// Returns the start position of the current match. Initially invalid. + virtual int32_t start() = 0; + + /// Returns the end position of the current match. Initially invalid. + virtual int32_t end() = 0; + + /// Returns the payload data for the current span. This is invalid until {@link #next()} is called for the + /// first time. This method must not be called more than once after each call of {@link #next()}. However, + /// most payloads are loaded lazily, so if the payload data for the current position is not needed, this + /// method may not be called at all for performance reasons. An ordered SpanQuery does not lazy load, so + /// if you have payloads in your index and you do not want ordered SpanNearQuerys to collect payloads, you + /// can disable collection with a constructor option. + /// + /// Note that the return type is a collection, thus the ordering should not be relied upon. + /// + /// @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable + /// is false + virtual Collection getPayload() = 0; + + /// Checks if a payload can be loaded at this position. + /// + /// Payloads can only be loaded once per call to {@link #next()}. + /// + /// @return true if there is a payload available at this position that can be loaded + virtual bool isPayloadAvailable() = 0; +}; + +} + +#endif diff --git a/include/lucene++/StandardAnalyzer.h b/include/lucene++/StandardAnalyzer.h new file mode 100644 index 00000000..58e28fca --- /dev/null +++ b/include/lucene++/StandardAnalyzer.h @@ -0,0 +1,86 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STANDARDANALYZER_H +#define STANDARDANALYZER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter} and {@link StopFilter}, using +/// a list of English stop words. +/// +/// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: +/// +///
    +///
  • As of 2.9, StopFilter preserves position increments +///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected +///
+class LPPAPI StandardAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words ({@link #STOP_WORDS_SET}). + /// @param matchVersion Lucene version to match. + StandardAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + /// @param matchVersion Lucene version to match. + /// @param stopWords stop words + StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); + + /// Builds an analyzer with the stop words from the given file. + /// @see WordlistLoader#getWordSet(const String&, const String&) + /// @param matchVersion Lucene version to match. + /// @param stopwords File to read stop words from. + StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords); + + /// Builds an analyzer with the stop words from the given reader. + /// @see WordlistLoader#getWordSet(ReaderPtr, const String&) + /// @param matchVersion Lucene version to match. + /// @param stopwords Reader to read stop words from. + StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords); + + virtual ~StandardAnalyzer(); + + LUCENE_CLASS(StandardAnalyzer); + +public: + /// Default maximum allowed token length + static const int32_t DEFAULT_MAX_TOKEN_LENGTH; + +protected: + HashSet stopSet; + + /// Specifies whether deprecated acronyms should be replaced with HOST type. + bool replaceInvalidAcronym; + bool enableStopPositionIncrements; + + LuceneVersion::Version matchVersion; + + int32_t maxTokenLength; + +protected: + /// Construct an analyzer with the given stop words. + void ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords); + +public: + /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter} + /// and a {@link StopFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Set maximum allowed token length. If a token is seen that exceeds this length then it is discarded. This setting + /// only takes effect the next time tokenStream or reusableTokenStream is called. + void setMaxTokenLength(int32_t length); + + /// @see #setMaxTokenLength + int32_t getMaxTokenLength(); + + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/StandardFilter.h b/include/lucene++/StandardFilter.h new file mode 100644 index 00000000..fa30bfab --- /dev/null +++ b/include/lucene++/StandardFilter.h @@ -0,0 +1,41 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STANDARDFILTER_H +#define STANDARDFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// Normalizes tokens extracted with {@link StandardTokenizer}. +class LPPAPI StandardFilter : public TokenFilter { +public: + /// Construct filtering input. + StandardFilter(const TokenStreamPtr& input); + virtual ~StandardFilter(); + + LUCENE_CLASS(StandardFilter); + +protected: + TypeAttributePtr typeAtt; + TermAttributePtr termAtt; + +protected: + static const String& APOSTROPHE_TYPE(); + static const String& ACRONYM_TYPE(); + +public: + /// Returns the next token in the stream, or null at EOS. + /// + /// Removes 's from the end of words. + /// Removes dots from acronyms. + virtual bool incrementToken(); +}; + +} + +#endif diff --git a/include/lucene++/StandardTokenizer.h b/include/lucene++/StandardTokenizer.h new file mode 100644 index 00000000..050eda67 --- /dev/null +++ b/include/lucene++/StandardTokenizer.h @@ -0,0 +1,107 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STANDARDTOKENIZER_H +#define STANDARDTOKENIZER_H + +#include "Tokenizer.h" + +namespace Lucene { + +/// A grammar-based tokenizer +/// +/// This should be a good tokenizer for most European-language documents: +/// +///
    +///
  • Splits words at punctuation characters, removing punctuation. However, a dot that's not followed by +/// whitespace is considered part of a token. +///
  • Splits words at hyphens, unless there's a number in the token, in which case the whole token is interpreted +/// as a product number and is not split. +///
  • Recognizes email addresses and internet hostnames as one token. +///
+/// +/// Many applications have specific tokenizer needs. If this tokenizer does not suit your application, please consider +/// copying this source code directory to your project and maintaining your own grammar-based tokenizer. +/// +/// You must specify the required {@link Version} compatibility when creating StandardAnalyzer: +/// +///
    +///
  • As of 2.4, Tokens incorrectly identified as acronyms are corrected +///
+class LPPAPI StandardTokenizer : public Tokenizer { +public: + /// Creates a new instance of the {@link StandardTokenizer}. Attaches the input to the newly created scanner. + /// @param input The input reader + StandardTokenizer(LuceneVersion::Version matchVersion, const ReaderPtr& input); + + /// Creates a new StandardTokenizer with a given {@link AttributeSource}. + StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Creates a new StandardTokenizer with a given {@link AttributeSource.AttributeFactory} + StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~StandardTokenizer(); + + LUCENE_CLASS(StandardTokenizer); + +protected: + /// A private instance of the scanner + StandardTokenizerImplPtr scanner; + + bool replaceInvalidAcronym; + int32_t maxTokenLength; + + // this tokenizer generates three attributes: offset, positionIncrement and type + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + PositionIncrementAttributePtr posIncrAtt; + TypeAttributePtr typeAtt; + +public: + static const int32_t ALPHANUM; + static const int32_t APOSTROPHE; + static const int32_t ACRONYM; + static const int32_t COMPANY; + static const int32_t EMAIL; + static const int32_t HOST; + static const int32_t NUM; + static const int32_t CJ; + + /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. + static const int32_t ACRONYM_DEP; + + /// String token types that correspond to token type int constants + static const Collection TOKEN_TYPES(); + +protected: + void init(const ReaderPtr& input, LuceneVersion::Version matchVersion); + +public: + /// Set the max allowed token length. Any token longer than this is skipped. + void setMaxTokenLength(int32_t length); + + /// @see #setMaxTokenLength + int32_t getMaxTokenLength(); + + /// @see TokenStream#next() + virtual bool incrementToken(); + + virtual void end(); + + virtual void reset(const ReaderPtr& input); + + /// @return true if StandardTokenizer now returns these tokens as Hosts, otherwise false + /// @deprecated Remove in 3.X and make true the only valid value + bool isReplaceInvalidAcronym(); + + /// @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms as HOST. + /// @deprecated Remove in 3.X and make true the only valid value + void setReplaceInvalidAcronym(bool replaceInvalidAcronym); +}; + +} + +#endif diff --git a/include/lucene++/StandardTokenizerImpl.h b/include/lucene++/StandardTokenizerImpl.h new file mode 100644 index 00000000..19e5321a --- /dev/null +++ b/include/lucene++/StandardTokenizerImpl.h @@ -0,0 +1,205 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STANDARDTOKENIZERIMPL_H +#define STANDARDTOKENIZERIMPL_H + +#include "LuceneObject.h" + +namespace Lucene { + +class StandardTokenizerImpl : public LuceneObject { +public: + /// Creates a new scanner + /// @param in the Reader to read input from. + StandardTokenizerImpl(const ReaderPtr& in); + + virtual ~StandardTokenizerImpl(); + + LUCENE_CLASS(StandardTokenizerImpl); + +protected: + /// Initial size of the lookahead buffer + static const int32_t ZZ_BUFFERSIZE; + + /// Translates characters to character classes + static CharArray _ZZ_CMAP; + static const wchar_t ZZ_CMAP_PACKED[]; + static const int32_t ZZ_CMAP_LENGTH; + static const int32_t ZZ_CMAP_PACKED_LENGTH; + + /// Translates characters to character classes + static void ZZ_CMAP_INIT(); + static const wchar_t* ZZ_CMAP(); + + /// Translates DFA states to action switch labels. + static IntArray _ZZ_ACTION; + static const wchar_t ZZ_ACTION_PACKED_0[]; + static const int32_t ZZ_ACTION_LENGTH; + static const int32_t ZZ_ACTION_PACKED_LENGTH; + + /// Translates DFA states to action switch labels. + static void ZZ_ACTION_INIT(); + static const int32_t* ZZ_ACTION(); + + /// Translates a state to a row index in the transition table + static IntArray _ZZ_ROWMAP; + static const wchar_t ZZ_ROWMAP_PACKED_0[]; + static const int32_t ZZ_ROWMAP_LENGTH; + static const int32_t ZZ_ROWMAP_PACKED_LENGTH; + + /// Translates a state to a row index in the transition table + static void ZZ_ROWMAP_INIT(); + static const int32_t* ZZ_ROWMAP(); + + /// The transition table of the DFA + static IntArray _ZZ_TRANS; + static const wchar_t ZZ_TRANS_PACKED_0[]; + static const int32_t ZZ_TRANS_LENGTH; + static const int32_t ZZ_TRANS_PACKED_LENGTH; + + /// The transition table of the DFA + static void ZZ_TRANS_INIT(); + static const int32_t* ZZ_TRANS(); + + // error codes + static const int32_t ZZ_UNKNOWN_ERROR; + static const int32_t ZZ_NO_MATCH; + static const int32_t ZZ_PUSHBACK_2BIG; + + static const wchar_t* ZZ_ERROR_MSG[]; + + /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState + static IntArray _ZZ_ATTRIBUTE; + static const wchar_t ZZ_ATTRIBUTE_PACKED_0[]; + static const int32_t ZZ_ATTRIBUTE_LENGTH; + static const int32_t ZZ_ATTRIBUTE_PACKED_LENGTH; + + /// ZZ_ATTRIBUTE[aState] contains the attributes of state aState + static void ZZ_ATTRIBUTE_INIT(); + static const int32_t* ZZ_ATTRIBUTE(); + + /// The input device + ReaderPtr zzReader; + + /// The current state of the DFA + int32_t zzState; + + /// The current lexical state + int32_t zzLexicalState; + + /// This buffer contains the current text to be matched and is the source of the yytext() string + CharArray zzBuffer; + + /// The text position at the last accepting state + int32_t zzMarkedPos; + + /// The text position at the last state to be included in yytext + int32_t zzPushbackPos; + + /// The current text position in the buffer + int32_t zzCurrentPos; + + /// StartRead marks the beginning of the yytext() string in the buffer + int32_t zzStartRead; + + /// EndRead marks the last character in the buffer, that has been read from input + int32_t zzEndRead; + + /// Number of newlines encountered up to the start of the matched text + int32_t yyline; + + /// The number of characters up to the start of the matched text + int32_t _yychar; + + /// The number of characters from the last newline up to the start of the matched text + int32_t yycolumn; + + /// zzAtBOL == true if the scanner is currently at the beginning of a line + bool zzAtBOL; + + /// zzAtEOF == true if the scanner is at the EOF + bool zzAtEOF; + +public: + /// This character denotes the end of file + static const int32_t YYEOF; + + /// Lexical states + static const int32_t YYINITIAL; + +public: + int32_t yychar(); + + /// Resets the Tokenizer to a new Reader. + void reset(const ReaderPtr& r); + + /// Fills Lucene token with the current token text. + void getText(const TokenPtr& t); + + /// Fills TermAttribute with the current token text. + void getText(const TermAttributePtr& t); + + /// Closes the input stream. + void yyclose(); + + /// Resets the scanner to read from a new input stream. Does not close the old reader. + /// + /// All internal variables are reset, the old input stream cannot be reused (internal buffer is discarded and lost). + /// Lexical state is set to ZZ_INITIAL. + /// + /// @param reader the new input stream. + void yyreset(const ReaderPtr& reader); + + /// Returns the current lexical state. + int32_t yystate(); + + /// Enters a new lexical state + /// @param newState the new lexical state. + void yybegin(int32_t newState); + + /// Returns the text matched by the current regular expression. + String yytext(); + + /// Returns the character at position pos from the matched text. + /// + /// It is equivalent to yytext()[pos], but faster + /// @param pos the position of the character to fetch. A value from 0 to yylength() - 1. + /// @return the character at position pos. + wchar_t yycharat(int32_t pos); + + /// Returns the length of the matched text region. + int32_t yylength(); + + /// Pushes the specified amount of characters back into the input stream. + /// + /// They will be read again by then next call of the scanning method + /// @param number the number of characters to be read again. This number must not be greater than yylength() + void yypushback(int32_t number); + + /// Resumes scanning until the next regular expression is matched, the end of input is encountered or an I/O- + /// Error occurs. + int32_t getNextToken(); + +protected: + /// Refills the input buffer. + bool zzRefill(); + + /// Reports an error that occurred while scanning. + /// + /// In a well-formed scanner (no or only correct usage of yypushback(int32_t) and a match-all fallback rule) + /// this method will only be called with things that "Can't Possibly Happen". If this method is called, + /// something is seriously wrong. + /// + /// Usual syntax/scanner level error handling should be done in error fallback rules. + /// + /// @param errorCode The code of the errormessage to display. + void zzScanError(int32_t errorCode); +}; + +} + +#endif diff --git a/include/lucene++/StopAnalyzer.h b/include/lucene++/StopAnalyzer.h new file mode 100644 index 00000000..179262d9 --- /dev/null +++ b/include/lucene++/StopAnalyzer.h @@ -0,0 +1,52 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STOPANALYZER_H +#define STOPANALYZER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. +/// +/// You must specify the required {@link Version} compatibility when creating StopAnalyzer: As of 2.9, position +/// increments are preserved +class LPPAPI StopAnalyzer : public Analyzer { +public: + /// Builds an analyzer which removes words in {@link #ENGLISH_STOP_WORDS_SET}. + StopAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the stop words from the given set. + StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords); + + /// Builds an analyzer with the stop words from the given file. + StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile); + + /// Builds an analyzer with the stop words from the given reader. + StopAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords); + + virtual ~StopAnalyzer(); + + LUCENE_CLASS(StopAnalyzer); + +protected: + HashSet stopWords; + bool enablePositionIncrements; + + static const wchar_t* _ENGLISH_STOP_WORDS_SET[]; + +public: + /// An unmodifiable set containing some common English words that are usually not useful for searching. + static const HashSet ENGLISH_STOP_WORDS_SET(); + + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/StopFilter.h b/include/lucene++/StopFilter.h new file mode 100644 index 00000000..93c52a52 --- /dev/null +++ b/include/lucene++/StopFilter.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STOPFILTER_H +#define STOPFILTER_H + +#include "TokenFilter.h" + +namespace Lucene { + +/// Removes stop words from a token stream. +class LPPAPI StopFilter : public TokenFilter { +public: + /// Construct a token stream filtering the given input. If stopWords is an instance of {@link CharArraySet} + /// (true if makeStopSet() was used to construct the set) it will be directly used and ignoreCase will be + /// ignored since CharArraySet directly controls case sensitivity. + /// + /// If stopWords is not an instance of {@link CharArraySet}, a new CharArraySet will be constructed and + /// ignoreCase will be used to specify the case sensitivity of that set. + /// + /// @param enablePositionIncrements true if token positions should record the removed stop words + /// @param input Input TokenStream + /// @param stopWords A Set of Strings or char[] or any other toString()-able set representing the stopwords + /// @param ignoreCase if true, all words are lower cased first + StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, HashSet stopWords, bool ignoreCase = false); + StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, const CharArraySetPtr& stopWords, bool ignoreCase = false); + + virtual ~StopFilter(); + + LUCENE_CLASS(StopFilter); + +protected: + CharArraySetPtr stopWords; + bool enablePositionIncrements; + + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncrAtt; + +public: + /// Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. + static HashSet makeStopSet(Collection stopWords); + + /// Returns the next input Token whose term() is not a stop word. + virtual bool incrementToken(); + + /// Returns version-dependent default for enablePositionIncrements. Analyzers that embed StopFilter use this + /// method when creating the StopFilter. Prior to 2.9, this returns false. On 2.9 or later, it returns true. + static bool getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion); + + /// @see #setEnablePositionIncrements(bool). + bool getEnablePositionIncrements(); + + /// If true, this StopFilter will preserve positions of the incoming tokens (ie, accumulate and set position + /// increments of the removed stop tokens). Generally, true is best as it does not lose information (positions + /// of the original tokens) during indexing. + /// + /// When set, when a token is stopped (omitted), the position increment of the following token is incremented. + /// + /// NOTE: be sure to also set {@link QueryParser#setEnablePositionIncrements} if you use QueryParser to create queries. + void setEnablePositionIncrements(bool enable); +}; + +} + +#endif diff --git a/include/lucene++/StoredFieldsWriter.h b/include/lucene++/StoredFieldsWriter.h new file mode 100644 index 00000000..ca3c67bb --- /dev/null +++ b/include/lucene++/StoredFieldsWriter.h @@ -0,0 +1,74 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STOREDFIELDSWRITER_H +#define STOREDFIELDSWRITER_H + +#include "DocumentsWriter.h" + +namespace Lucene { + +/// This is a DocFieldConsumer that writes stored fields. +class StoredFieldsWriter : public LuceneObject { +public: + StoredFieldsWriter(const DocumentsWriterPtr& docWriter, const FieldInfosPtr& fieldInfos); + virtual ~StoredFieldsWriter(); + + LUCENE_CLASS(StoredFieldsWriter); + +public: + FieldsWriterPtr fieldsWriter; + DocumentsWriterWeakPtr _docWriter; + FieldInfosPtr fieldInfos; + int32_t lastDocID; + + Collection docFreeList; + int32_t freeCount; + int32_t allocCount; + +public: + StoredFieldsWriterPerThreadPtr addThread(const DocStatePtr& docState); + void flush(const SegmentWriteStatePtr& state); + void closeDocStore(const SegmentWriteStatePtr& state); + StoredFieldsWriterPerDocPtr getPerDoc(); + void abort(); + + /// Fills in any hole in the docIDs + void fill(int32_t docID); + + void finishDocument(const StoredFieldsWriterPerDocPtr& perDoc); + bool freeRAM(); + void free(const StoredFieldsWriterPerDocPtr& perDoc); + +protected: + void initFieldsWriter(); +}; + +class StoredFieldsWriterPerDoc : public DocWriter { +public: + StoredFieldsWriterPerDoc(const StoredFieldsWriterPtr& fieldsWriter); + virtual ~StoredFieldsWriterPerDoc(); + + LUCENE_CLASS(StoredFieldsWriterPerDoc); + +protected: + StoredFieldsWriterWeakPtr _fieldsWriter; + +public: + PerDocBufferPtr buffer; + RAMOutputStreamPtr fdt; + int32_t numStoredFields; + +public: + void reset(); + virtual void abort(); + virtual int64_t sizeInBytes(); + virtual void finish(); +}; + +} + +#endif diff --git a/include/lucene++/StoredFieldsWriterPerThread.h b/include/lucene++/StoredFieldsWriterPerThread.h new file mode 100644 index 00000000..a20e1dd7 --- /dev/null +++ b/include/lucene++/StoredFieldsWriterPerThread.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STOREDFIELDSWRITERPERTHREAD_H +#define STOREDFIELDSWRITERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class StoredFieldsWriterPerThread : public LuceneObject { +public: + StoredFieldsWriterPerThread(const DocStatePtr& docState, const StoredFieldsWriterPtr& storedFieldsWriter); + virtual ~StoredFieldsWriterPerThread(); + + LUCENE_CLASS(StoredFieldsWriterPerThread); + +public: + FieldsWriterPtr localFieldsWriter; + StoredFieldsWriterWeakPtr _storedFieldsWriter; + DocStatePtr docState; + + StoredFieldsWriterPerDocPtr doc; + +public: + void startDocument(); + void addField(const FieldablePtr& field, const FieldInfoPtr& fieldInfo); + DocWriterPtr finishDocument(); + void abort(); +}; + +} + +#endif diff --git a/include/lucene++/StringReader.h b/include/lucene++/StringReader.h new file mode 100644 index 00000000..3684294a --- /dev/null +++ b/include/lucene++/StringReader.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STRINGREADER_H +#define STRINGREADER_H + +#include "Reader.h" + +namespace Lucene { + +/// Convenience class for reading strings. +class LPPAPI StringReader : public Reader { +public: + /// Creates a new StringReader, given the String to read from. + StringReader(const String& str); + virtual ~StringReader(); + + LUCENE_CLASS(StringReader); + +protected: + String str; + int32_t position; + +public: + /// Read a single character. + virtual int32_t read(); + + /// Read characters into a portion of an array. + virtual int32_t read(wchar_t* buffer, int32_t offset, int32_t length); + + /// Close the stream. + virtual void close(); + + /// Tell whether this stream supports the mark() operation + virtual bool markSupported(); + + /// Reset the stream. + virtual void reset(); + + /// The number of bytes in the stream. + virtual int64_t length(); +}; + +} + +#endif diff --git a/include/lucene++/StringUtils.h b/include/lucene++/StringUtils.h new file mode 100644 index 00000000..2b698d55 --- /dev/null +++ b/include/lucene++/StringUtils.h @@ -0,0 +1,95 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef STRINGUTILS_H +#define STRINGUTILS_H + +#include "Lucene.h" + +namespace Lucene { + +class LPPAPI StringUtils { +public: + /// Maximum length of UTF encoding. + static const int32_t MAX_ENCODING_UTF8_SIZE; + + /// Default character radix. + static const int32_t CHARACTER_MAX_RADIX; + +public: + /// Convert uft8 buffer into unicode. + static int32_t toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode); + + /// Convert uft8 buffer into unicode. + static int32_t toUnicode(const uint8_t* utf8, int32_t length, const UnicodeResultPtr& unicodeResult); + + /// Convert uft8 buffer into unicode. + static String toUnicode(const uint8_t* utf8, int32_t length); + + /// Convert uft8 string into unicode. + static String toUnicode(const SingleString& s); + + /// Convert unicode buffer into uft8. + static int32_t toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8); + + /// Convert unicode buffer into uft8. + static int32_t toUTF8(const wchar_t* unicode, int32_t length, const UTF8ResultPtr& utf8Result); + + /// Convert unicode buffer into uft8. + static SingleString toUTF8(const wchar_t* unicode, int32_t length); + + /// Convert unicode string into uft8. + static SingleString toUTF8(const String& s); + + /// Convert given string to lower case using current locale + static void toLower(String& str); + + /// Convert given string to lower case using current locale + static String toLower(const String& str); + + /// Convert given string to upper case using current locale + static void toUpper(String& str); + + /// Convert given string to upper case using current locale + static String toUpper(const String& str); + + /// Compare two strings ignoring case differences + static int32_t compareCase(const String& first, const String& second); + + /// Splits string using given delimiters + static Collection split(const String& str, const String& delim); + + /// Convert the given string to int32_t. + static int32_t toInt(const String& value); + + /// Convert the given string to int64_t. + static int64_t toLong(const String& value); + + /// Return given value as a long integer using base unit. + static int64_t toLong(const String& value, int32_t base); + + /// Convert the given string to double. + static double toDouble(const String& value); + + /// Compute the hash code from string. + static int32_t hashCode(const String& value); + + /// Return given value as a string using base unit. + static String toString(int64_t value, int32_t base); + + /// Convert any given type to a {@link String}. + template + static String toString(const TYPE& value) { + StringStream os; + os << value; + return os.str(); + } +}; + +#define UTF8_TO_STRING(utf8) StringUtils::toUnicode(utf8, SIZEOF_ARRAY(utf8)) +} + +#endif diff --git a/include/lucene++/Synchronize.h b/include/lucene++/Synchronize.h new file mode 100644 index 00000000..6d546a65 --- /dev/null +++ b/include/lucene++/Synchronize.h @@ -0,0 +1,82 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef SYNCHRONIZE_H +#define SYNCHRONIZE_H + +#include +#include +#include +#include "Lucene.h" + +namespace Lucene { + +/// Utility class to support locking via a mutex. +class LPPAPI Synchronize { +public: + Synchronize(); + virtual ~Synchronize(); + +protected: + boost::recursive_timed_mutex mutexSynchronize; + int64_t lockThread; + int32_t recursionCount; + +public: + /// create a new Synchronize instance atomically. + static void createSync(SynchronizePtr& sync); + + /// Lock mutex using an optional timeout. + void lock(int32_t timeout = 0); + + /// Unlock mutex. + void unlock(); + + /// Unlock all recursive mutex. + int32_t unlockAll(); + + /// Returns true if mutex is currently locked by current thread. + bool holdsLock(); +}; + +/// Utility class to support scope locking. +class LPPAPI SyncLock { +public: + SyncLock(const SynchronizePtr& sync, int32_t timeout = 0); + + template + SyncLock(OBJECT object, int32_t timeout = 0) { + this->sync = object->getSync(); + lock(timeout); + } + + virtual ~SyncLock(); + +protected: + SynchronizePtr sync; + +protected: + void lock(int32_t timeout); +}; + + +#define LUCENE_RUN_ONCE(Command) \ + do { \ + static std::atomic RUN_ONCE_hasRun = {}; \ + if (!RUN_ONCE_hasRun) { \ + static boost::mutex RUN_ONCE_mutex; \ + boost::mutex::scoped_lock RUN_ONCE_lock(RUN_ONCE_mutex); \ + if (!RUN_ONCE_hasRun) { \ + Command; \ + RUN_ONCE_hasRun = true; \ + } \ + } \ + } while(0) + + +} + +#endif diff --git a/include/lucene++/TeeSinkTokenFilter.h b/include/lucene++/TeeSinkTokenFilter.h new file mode 100644 index 00000000..65530354 --- /dev/null +++ b/include/lucene++/TeeSinkTokenFilter.h @@ -0,0 +1,149 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TEESINKTOKENFILTER_H +#define TEESINKTOKENFILTER_H + +#include "TokenFilter.h" +#include "TokenStream.h" + +namespace Lucene { + +/// This TokenFilter provides the ability to set aside attribute states that have already been analyzed. This is +/// useful in situations where multiple fields share many common analysis steps and then go their separate ways. +/// +/// It is also useful for doing things like entity extraction or proper noun analysis as part of the analysis workflow +/// and saving off those tokens for use in another field. +/// +///
+/// TeeSinkTokenFilterPtr source1 = newLucene(newLucene(reader1));
+/// SinkTokenStreamPtr sink1 = source1->newSinkTokenStream();
+/// SinkTokenStreamPtr sink2 = source1->newSinkTokenStream();
+///
+/// TeeSinkTokenFilterPtr source2 = newLucene(newLucene(reader2));
+/// source2->addSinkTokenStream(sink1);
+/// source2->addSinkTokenStream(sink2);
+///
+/// TokenStreamPtr final1 = newLucene(source1);
+/// TokenStreamPtr final2 = source2;
+/// TokenStreamPtr final3 = newLucene(sink1);
+/// TokenStreamPtr final4 = newLucene(sink2);
+///
+/// d->add(newLucene(L"f1", final1));
+/// d->add(newLucene(L"f2", final2));
+/// d->add(newLucene(L"f3", final3));
+/// d->add(newLucene(L"f4", final4));
+/// 
+/// +/// In this example, sink1 and sink2 will both get tokens from both reader1 and reader2 after whitespace tokenizer +/// and now we can further wrap any of these in extra analysis, and more "sources" can be inserted if desired. +/// It is important, that tees are consumed before sinks (in the above example, the field names must be less the +/// sink's field names). If you are not sure, which stream is consumed first, you can simply add another sink and +/// then pass all tokens to the sinks at once using {@link #consumeAllTokens}. +/// +/// This TokenFilter is exhausted after this. In the above example, change the example above to: +/// +///
+/// ...
+/// TokenStreamPtr final1 = newLucene(source1->newSinkTokenStream());
+/// TokenStreamPtr final2 = source2->newSinkTokenStream();
+/// sink1->consumeAllTokens();
+/// sink2->consumeAllTokens();
+/// ...
+/// 
+/// +/// In this case, the fields can be added in any order, because the sources are not used anymore and all sinks are +/// ready. +/// +/// Note, the EntityDetect and URLDetect TokenStreams are for the example and do not currently exist in Lucene. +class LPPAPI TeeSinkTokenFilter : public TokenFilter { +public: + /// Instantiates a new TeeSinkTokenFilter. + TeeSinkTokenFilter(const TokenStreamPtr& input); + virtual ~TeeSinkTokenFilter(); + + LUCENE_CLASS(TeeSinkTokenFilter); + +protected: + Collection sinks; + +public: + /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream. + SinkTokenStreamPtr newSinkTokenStream(); + + /// Returns a new {@link SinkTokenStream} that receives all tokens consumed by this stream that pass + /// the supplied filter. + /// @see SinkFilter + SinkTokenStreamPtr newSinkTokenStream(const SinkFilterPtr& filter); + + /// Adds a {@link SinkTokenStream} created by another TeeSinkTokenFilter to this one. The supplied stream will + /// also receive all consumed tokens. This method can be used to pass tokens from two different tees to one sink. + void addSinkTokenStream(const SinkTokenStreamPtr& sink); + + /// TeeSinkTokenFilter passes all tokens to the added sinks when itself is consumed. To be sure, that all tokens + /// from the input stream are passed to the sinks, you can call this methods. This instance is exhausted after this, + /// but all sinks are instant available. + void consumeAllTokens(); + + virtual bool incrementToken(); + virtual void end(); +}; + +class LPPAPI SinkFilter : public LuceneObject { +public: + virtual ~SinkFilter(); + + LUCENE_CLASS(SinkFilter); + +public: + /// Returns true, if the current state of the passed-in {@link AttributeSource} shall be stored in the sink. + virtual bool accept(const AttributeSourcePtr& source) = 0; + + /// Called by {@link SinkTokenStream#reset()}. This method does nothing by default and can optionally be overridden. + virtual void reset(); +}; + +class LPPAPI AcceptAllSinkFilter : public SinkFilter { +public: + virtual ~AcceptAllSinkFilter(); + + LUCENE_CLASS(AcceptAllSinkFilter); + +public: + virtual bool accept(const AttributeSourcePtr& source); +}; + +/// A filter that decides which {@link AttributeSource} states to store in the sink. +class LPPAPI SinkTokenStream : public TokenStream { +public: + SinkTokenStream(const AttributeSourcePtr& source, const SinkFilterPtr& filter); + virtual ~SinkTokenStream(); + + LUCENE_CLASS(SinkTokenStream); + +protected: + Collection cachedStates; + AttributeSourceStatePtr finalState; + bool initIterator; + Collection::iterator it; + SinkFilterPtr filter; + +protected: + bool accept(const AttributeSourcePtr& source); + void addState(const AttributeSourceStatePtr& state); + void setFinalState(const AttributeSourceStatePtr& finalState); + +public: + virtual bool incrementToken(); + virtual void end(); + virtual void reset(); + + friend class TeeSinkTokenFilter; +}; + +} + +#endif diff --git a/include/lucene++/Term.h b/include/lucene++/Term.h new file mode 100644 index 00000000..0ac80c31 --- /dev/null +++ b/include/lucene++/Term.h @@ -0,0 +1,62 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERM_H +#define TERM_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A Term represents a word from text. This is the unit of search. It is composed of two elements, +/// the text of the word, as a string, and the name of the field that the text occurred in, an interned +/// string. +/// +/// Note that terms may represent more than words from text fields, but also things like dates, email +/// addresses, urls, etc. +class LPPAPI Term : public LuceneObject { +public: + /// Constructs a Term with the given field and text. + Term(const String& fld, const String& txt = EmptyString); + virtual ~Term(); + + LUCENE_CLASS(Term); + +public: + String _field; + String _text; + +public: + /// Returns the field of this term, an interned string. The field indicates the part of a document + /// which this term came from. + String field(); + + /// Returns the text of this term. In the case of words, this is simply the text of the word. In + /// the case of dates and other types, this is an encoding of the object as a string. + String text(); + + /// Optimized construction of new Terms by reusing same field as this Term + /// @param text The text of the new term (field is implicitly same as this Term instance) + /// @return A new Term + TermPtr createTerm(const String& text); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Compares two terms, returning a negative integer if this term belongs before the argument, zero + /// if this term is equal to the argument, and a positive integer if this term belongs after the argument. + /// + /// The ordering of terms is first by field, then by text. + virtual int32_t compareTo(const LuceneObjectPtr& other); + + void set(const String& fld, const String& txt); + + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/TermAttribute.h b/include/lucene++/TermAttribute.h new file mode 100644 index 00000000..48af610b --- /dev/null +++ b/include/lucene++/TermAttribute.h @@ -0,0 +1,92 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMATTRIBUTE_H +#define TERMATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// The term text of a Token. +class LPPAPI TermAttribute : public Attribute { +public: + TermAttribute(); + virtual ~TermAttribute(); + + LUCENE_CLASS(TermAttribute); + +protected: + static const int32_t MIN_BUFFER_SIZE; + + CharArray _termBuffer; + int32_t _termLength; + +public: + virtual String toString(); + + /// Returns the Token's term text. + /// + /// This method has a performance penalty because the text is stored internally in a char[]. If possible, + /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use + /// this method, which is nothing more than a convenience call to new String(token.termBuffer(), 0, + /// token.termLength()) + virtual String term(); + + /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. + /// @param buffer the buffer to copy + /// @param offset the index in the buffer of the first character to copy + /// @param length the number of characters to copy + virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); + + /// Copies the contents of buffer into the termBuffer array. + /// @param buffer the buffer to copy + virtual void setTermBuffer(const String& buffer); + + /// Returns the internal termBuffer character array which you can then directly alter. If the array is + /// too small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer + /// be sure to call {@link #setTermLength} to record the number of valid characters that were placed into + /// the termBuffer. + virtual CharArray termBuffer(); + + /// Optimized implementation of termBuffer. + virtual wchar_t* termBufferArray(); + + /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next + /// operation is to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, + /// {@link #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the + /// resize with the setting of the termBuffer. + /// @param newSize minimum size of the new termBuffer + /// @return newly created termBuffer with length >= newSize + virtual CharArray resizeTermBuffer(int32_t newSize); + + /// Return number of valid characters (length of the term) in the termBuffer array. + virtual int32_t termLength(); + + /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the + /// termBuffer or to synchronize with external manipulation of the termBuffer. Note: to grow the size of + /// the array, use {@link #resizeTermBuffer(int)} first. + /// @param length the truncated length + virtual void setTermLength(int32_t length); + + virtual int32_t hashCode(); + virtual void clear(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual bool equals(const LuceneObjectPtr& other); + virtual void copyTo(const AttributePtr& target); + +protected: + /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always + /// used in places that set the content. + /// @param newSize minimum size of the buffer + void growTermBuffer(int32_t newSize); + + void initTermBuffer(); +}; + +} + +#endif diff --git a/include/lucene++/TermBuffer.h b/include/lucene++/TermBuffer.h new file mode 100644 index 00000000..d43df608 --- /dev/null +++ b/include/lucene++/TermBuffer.h @@ -0,0 +1,51 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMBUFFER_H +#define TERMBUFFER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class TermBuffer : public LuceneObject { +public: + TermBuffer(); + virtual ~TermBuffer(); + + LUCENE_CLASS(TermBuffer); + +protected: + String field; + TermPtr term; // cached + bool preUTF8Strings; // true if strings are stored in modified UTF8 encoding + + UnicodeResultPtr text; + UTF8ResultPtr bytes; + +public: + virtual int32_t compareTo(const LuceneObjectPtr& other); + + /// Call this if the IndexInput passed to {@link #read} stores terms in the "modified UTF8" format. + void setPreUTF8Strings(); + + void read(const IndexInputPtr& input, const FieldInfosPtr& fieldInfos); + + void set(const TermPtr& term); + void set(const TermBufferPtr& other); + void reset(); + + TermPtr toTerm(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + int32_t compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2); +}; + +} + +#endif diff --git a/include/lucene++/TermDocs.h b/include/lucene++/TermDocs.h new file mode 100644 index 00000000..2bff90ea --- /dev/null +++ b/include/lucene++/TermDocs.h @@ -0,0 +1,59 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMDOCS_H +#define TERMDOCS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// TermDocs provides an interface for enumerating ; pairs for a term. The document +/// portion names each document containing the term. Documents are indicated by number. The frequency +/// portion gives the number of times the term occurred in each document. The pairs are ordered by document +/// number. +/// @see IndexReader#termDocs() +class LPPAPI TermDocs { +protected: + TermDocs(); + +public: + LUCENE_INTERFACE(TermDocs); + +public: + /// Sets this to the data for a term. The enumeration is reset to the start of the data for this term. + virtual void seek(const TermPtr& term) = 0; + + /// Sets this to the data for the current term in a {@link TermEnum}. + /// This may be optimized in some implementations. + virtual void seek(const TermEnumPtr& termEnum) = 0; + + /// Returns the current document number. This is invalid until {@link #next()} is called for the first time. + virtual int32_t doc() = 0; + + /// Returns the frequency of the term within the current document. This is invalid until {@link #next()} is + /// called for the first time. + virtual int32_t freq() = 0; + + /// Moves to the next pair in the enumeration. Returns true if there is such a next pair in the enumeration. + virtual bool next() = 0; + + /// Attempts to read multiple entries from the enumeration, up to length of docs. Document numbers are stored + /// in docs, and term frequencies are stored in freqs. Returns the number of entries read. Zero is only + /// returned when the stream has been exhausted. + virtual int32_t read(Collection& docs, Collection& freqs) = 0; + + /// Skips entries to the first beyond the current whose document number is greater than or equal to target. + /// Returns true if there is such an entry. + virtual bool skipTo(int32_t target) = 0; + + /// Frees associated resources. + virtual void close() = 0; +}; + +} + +#endif diff --git a/include/lucene++/TermEnum.h b/include/lucene++/TermEnum.h new file mode 100644 index 00000000..550bbc3b --- /dev/null +++ b/include/lucene++/TermEnum.h @@ -0,0 +1,39 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMENUM_H +#define TERMENUM_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Abstract class for enumerating terms. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater +/// than all that precede it. +class LPPAPI TermEnum : public LuceneObject { +public: + virtual ~TermEnum(); + LUCENE_CLASS(TermEnum); + +public: + /// Increments the enumeration to the next element. True if one exists. + virtual bool next() = 0; + + /// Returns the current Term in the enumeration. + virtual TermPtr term() = 0; + + /// Returns the docFreq of the current Term in the enumeration. + virtual int32_t docFreq() = 0; + + /// Closes the enumeration to further activity, freeing resources. + virtual void close() = 0; +}; + +} + +#endif diff --git a/include/lucene++/TermFreqVector.h b/include/lucene++/TermFreqVector.h new file mode 100644 index 00000000..5cbf4e67 --- /dev/null +++ b/include/lucene++/TermFreqVector.h @@ -0,0 +1,57 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMFREQVECTOR_H +#define TERMFREQVECTOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Provides access to stored term vector of a document field. The vector consists of the name of the field, an +/// array of the terms that occur in the field of the {@link Document} and a parallel array of frequencies. Thus, +/// getTermFrequencies()[5] corresponds with the frequency of getTerms()[5], assuming there are at least 5 terms +/// in the Document. +class LPPAPI TermFreqVector { +protected: + TermFreqVector(); + +public: + virtual ~TermFreqVector(); + LUCENE_INTERFACE(TermFreqVector); + +public: + /// The {@link Fieldable} name. + /// @return The name of the field this vector is associated with. + virtual String getField(); + + /// @return The number of terms in the term vector. + virtual int32_t size(); + + /// @return An Array of term texts in ascending order. + virtual Collection getTerms(); + + /// Array of term frequencies. Locations of the array correspond one to one to the terms in the array obtained from + /// getTerms method. Each location in the array contains the number of times this term occurs in the document or the + /// document field. + virtual Collection getTermFrequencies(); + + /// Return an index in the term numbers array returned from getTerms at which the term with the specified term appears. + /// If this term does not appear in the array, return -1. + virtual int32_t indexOf(const String& term); + + /// Just like indexOf(int) but searches for a number of terms at the same time. Returns an array that has the same size + /// as the number of terms searched for, each slot containing the result of searching for that term number. + /// + /// @param terms array containing terms to look for + /// @param start index in the array where the list of terms starts + /// @param length the number of terms in the list + virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); +}; + +} + +#endif diff --git a/include/lucene++/TermInfo.h b/include/lucene++/TermInfo.h new file mode 100644 index 00000000..f2331eda --- /dev/null +++ b/include/lucene++/TermInfo.h @@ -0,0 +1,37 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMINFO_H +#define TERMINFO_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// A TermInfo is the record of information stored for a term. +class TermInfo : public LuceneObject { +public: + TermInfo(const TermInfoPtr& ti); + TermInfo(int32_t df = 0, int64_t fp = 0, int64_t pp = 0); + virtual ~TermInfo(); + + LUCENE_CLASS(TermInfo); + +public: + /// The number of documents which contain the term. + int32_t docFreq; + int64_t freqPointer; + int64_t proxPointer; + int32_t skipOffset; + +public: + void set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset); + void set(const TermInfoPtr& ti); +}; + +} + +#endif diff --git a/include/lucene++/TermInfosReader.h b/include/lucene++/TermInfosReader.h new file mode 100644 index 00000000..bda7ec76 --- /dev/null +++ b/include/lucene++/TermInfosReader.h @@ -0,0 +1,89 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMINFOSREADER_H +#define TERMINFOSREADER_H + +#include "CloseableThreadLocal.h" +#include "SimpleLRUCache.h" + +namespace Lucene { + +/// This stores a monotonically increasing set of pairs in a Directory. Pairs are +/// accessed either by Term or by ordinal position the set. +class TermInfosReader : public LuceneObject { +public: + TermInfosReader(const DirectoryPtr& dir, const String& seg, const FieldInfosPtr& fis, int32_t readBufferSize, int32_t indexDivisor); + virtual ~TermInfosReader(); + + LUCENE_CLASS(TermInfosReader); + +protected: + DirectoryPtr directory; + String segment; + FieldInfosPtr fieldInfos; + CloseableThreadLocal threadResources; + SegmentTermEnumPtr origEnum; + int64_t _size; + + Collection indexTerms; + Collection indexInfos; + Collection indexPointers; + + int32_t totalIndexInterval; + + static const int32_t DEFAULT_CACHE_SIZE; + +public: + int32_t getSkipInterval(); + int32_t getMaxSkipLevels(); + void close(); + + /// Returns the number of term/value pairs in the set. + int64_t size(); + + /// Returns the TermInfo for a Term in the set, or null. + TermInfoPtr get(const TermPtr& term); + + /// Returns the position of a Term in the set or -1. + int64_t getPosition(const TermPtr& term); + + /// Returns an enumeration of all the Terms and TermInfos in the set. + SegmentTermEnumPtr terms(); + + /// Returns an enumeration of terms starting at or after the named term. + SegmentTermEnumPtr terms(const TermPtr& term); + +protected: + TermInfosReaderThreadResourcesPtr getThreadResources(); + + /// Returns the offset of the greatest index entry which is less than or equal to term. + int32_t getIndexOffset(const TermPtr& term); + + void seekEnum(const SegmentTermEnumPtr& enumerator, int32_t indexOffset); + + /// Returns the TermInfo for a Term in the set, or null. + TermInfoPtr get(const TermPtr& term, bool useCache); + + void ensureIndexIsRead(); +}; + +class TermInfosReaderThreadResources : public LuceneObject { +public: + virtual ~TermInfosReaderThreadResources(); + + LUCENE_CLASS(TermInfosReaderThreadResources); + +public: + SegmentTermEnumPtr termEnum; + + // Used for caching the least recently looked-up Terms + TermInfoCachePtr termInfoCache; +}; + +} + +#endif diff --git a/include/lucene++/TermInfosWriter.h b/include/lucene++/TermInfosWriter.h new file mode 100644 index 00000000..b19a2503 --- /dev/null +++ b/include/lucene++/TermInfosWriter.h @@ -0,0 +1,95 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMINFOSWRITER_H +#define TERMINFOSWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// This stores a monotonically increasing set of pairs in a Directory. A TermInfos +/// can be written once, in order. +class TermInfosWriter : public LuceneObject { +public: + TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval); + TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isIndex); + virtual ~TermInfosWriter(); + + LUCENE_CLASS(TermInfosWriter); + +public: + /// The file format version, a negative number. + static const int32_t FORMAT; + + /// Changed strings to true utf8 with length-in-bytes not length-in-chars. + static const int32_t FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; + + /// NOTE: always change this if you switch to a new format. + static const int32_t FORMAT_CURRENT; + + /// The fraction of terms in the "dictionary" which should be stored in RAM. Smaller values use more memory, but + /// make searching slightly faster, while larger values use less memory and make searching slightly slower. + /// Searching is typically not dominated by dictionary lookup, so tweaking this is rarely useful. + int32_t indexInterval; + + /// The fraction of {@link TermDocs} entries stored in skip tables, used to accelerate {@link TermDocs#skipTo(int)}. + /// Larger values result in smaller indexes, greater acceleration, but fewer accelerable cases, while smaller values + /// result in bigger indexes, less acceleration and more accelerable cases. More detailed experiments would be useful + /// here. + int32_t skipInterval; + + /// The maximum number of skip levels. Smaller values result in slightly smaller indexes, but slower skipping + /// in big posting lists. + int32_t maxSkipLevels; + +protected: + FieldInfosPtr fieldInfos; + IndexOutputPtr output; + TermInfoPtr lastTi; + int64_t size; + + int64_t lastIndexPointer; + bool isIndex; + ByteArray lastTermBytes; + int32_t lastTermBytesLength; + int32_t lastFieldNumber; + + TermInfosWriterPtr otherWriter; + TermInfosWriterWeakPtr _other; + UTF8ResultPtr utf8Result; + + // Currently used only by assert statements + UnicodeResultPtr unicodeResult1; + UnicodeResultPtr unicodeResult2; + +public: + virtual void initialize(); + + void add(const TermPtr& term, const TermInfoPtr& ti); + + /// Adds a new <, TermInfo> pair to the set. Term must be lexicographically + /// greater than all previous Terms added. TermInfo pointers must be positive and greater than all previous. + void add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, const TermInfoPtr& ti); + + /// Called to complete TermInfos creation. + void close(); + +protected: + void initialize(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isi); + + /// Currently used only by assert statements + bool initUnicodeResults(); + + /// Currently used only by assert statement + int32_t compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); + + void writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength); +}; + +} + +#endif diff --git a/include/lucene++/TermPositionVector.h b/include/lucene++/TermPositionVector.h new file mode 100644 index 00000000..e8d79ffd --- /dev/null +++ b/include/lucene++/TermPositionVector.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMPOSITIONVECTOR_H +#define TERMPOSITIONVECTOR_H + +#include "TermFreqVector.h" + +namespace Lucene { + +/// Extends TermFreqVector to provide additional information about positions in which each of the terms is found. A TermPositionVector not necessarily +/// contains both positions and offsets, but at least one of these arrays exists. +class LPPAPI TermPositionVector : public TermFreqVector { +protected: + TermPositionVector(); + +public: + virtual ~TermPositionVector(); + LUCENE_INTERFACE(TermPositionVector); + +public: + /// Returns an array of positions in which the term is found. Terms are identified by the index at which its number appears in the term String + /// array obtained from the indexOf method. May return null if positions have not been stored. + virtual Collection getTermPositions(int32_t index); + + /// Returns an array of TermVectorOffsetInfo in which the term is found. May return null if offsets have not been stored. + /// @see Token + /// @param index The position in the array to get the offsets from + /// @return An array of TermVectorOffsetInfo objects or the empty list + virtual Collection getOffsets(int32_t index); +}; + +} + +#endif diff --git a/include/lucene++/TermPositions.h b/include/lucene++/TermPositions.h new file mode 100644 index 00000000..cf899b61 --- /dev/null +++ b/include/lucene++/TermPositions.h @@ -0,0 +1,55 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMPOSITIONS_H +#define TERMPOSITIONS_H + +#include "TermDocs.h" + +namespace Lucene { + +/// TermPositions provides an interface for enumerating the *> +/// tuples for a term. The document and frequency are the same as for a TermDocs. The positions portion +/// lists the ordinal positions of each occurrence of a term in a document. +/// @see IndexReader#termPositions() +class LPPAPI TermPositions : public TermDocs { +protected: + TermPositions(); + +public: + virtual ~TermPositions(); + LUCENE_INTERFACE(TermPositions); + +public: + /// Returns next position in the current document. It is an error to call this more than {@link #freq()} + /// times without calling {@link #next()}. This is invalid until {@link #next()} is called for + // the first time. + virtual int32_t nextPosition(); + + /// Returns the length of the payload at the current term position. This is invalid until {@link + /// #nextPosition()} is called for the first time. + /// @return length of the current payload in number of bytes + virtual int32_t getPayloadLength(); + + /// Returns the payload data at the current term position. This is invalid until {@link #nextPosition()} + /// is called for the first time. + /// This method must not be called more than once after each call of {@link #nextPosition()}. However, + /// payloads are loaded lazily, so if the payload data for the current position is not needed, + /// this method may not be called at all for performance reasons. + /// @param data the array into which the data of this payload is to be stored + /// @param offset the offset in the array into which the data of this payload is to be stored. + /// @return a byte array containing the data of this payload + virtual ByteArray getPayload(ByteArray data, int32_t offset); + + /// Checks if a payload can be loaded at this position. + /// Payloads can only be loaded once per call to {@link #nextPosition()}. + /// @return true if there is a payload available at this position that can be loaded + virtual bool isPayloadAvailable(); +}; + +} + +#endif diff --git a/include/lucene++/TermQuery.h b/include/lucene++/TermQuery.h new file mode 100644 index 00000000..b5788aab --- /dev/null +++ b/include/lucene++/TermQuery.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMQUERY_H +#define TERMQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A Query that matches documents containing a term. This may be combined with other terms with a +/// {@link BooleanQuery}. +class LPPAPI TermQuery : public Query { +public: + /// Constructs a query for the term. + TermQuery(const TermPtr& term); + + virtual ~TermQuery(); + + LUCENE_CLASS(TermQuery); + +protected: + TermPtr term; + +public: + using Query::toString; + + /// Returns the term of this query. + TermPtr getTerm(); + + virtual WeightPtr createWeight(const SearcherPtr& searcher); + virtual void extractTerms(SetTerm terms); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + friend class TermWeight; +}; + +} + +#endif diff --git a/include/lucene++/TermRangeFilter.h b/include/lucene++/TermRangeFilter.h new file mode 100644 index 00000000..5c785b28 --- /dev/null +++ b/include/lucene++/TermRangeFilter.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMRANGEFILTER_H +#define TERMRANGEFILTER_H + +#include "MultiTermQueryWrapperFilter.h" + +namespace Lucene { + +/// A Filter that restricts search results to a range of term values in a given field. +/// +/// This filter matches the documents looking for terms that fall into the supplied range according to {@link +/// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link +/// NumericRangeFilter} instead. +/// +/// If you construct a large number of range filters with different ranges but on the same field, {@link +/// FieldCacheRangeFilter} may have significantly better performance. +class LPPAPI TermRangeFilter : public MultiTermQueryWrapperFilter { +public: + /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause + /// every single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending + /// on the number of index Terms in this Field, the operation could be very slow. + /// @param lowerTerm The lower bound on this range + /// @param upperTerm The upper bound on this range + /// @param includeLower Does this range include the lower bound? + /// @param includeUpper Does this range include the upper bound? + /// @param collator The collator to use when determining range inclusion; set to null to use Unicode code + /// point ordering instead of collation. + TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, + bool includeUpper, CollatorPtr collator = CollatorPtr()); + + virtual ~TermRangeFilter(); + + LUCENE_CLASS(TermRangeFilter); + +public: + /// Constructs a filter for field fieldName matching less than or equal to upperTerm. + static TermRangeFilterPtr Less(const String& fieldName, StringValue upperTerm); + + /// Constructs a filter for field fieldName matching greater than or equal to lowerTerm. + static TermRangeFilterPtr More(const String& fieldName, StringValue lowerTerm); + + /// Returns the field name for this filter + String getField(); + + /// Returns the lower value of this range filter + String getLowerTerm(); + + /// Returns the upper value of this range filter + String getUpperTerm(); + + /// Returns true if the lower endpoint is inclusive + bool includesLower(); + + /// Returns true if the upper endpoint is inclusive + bool includesUpper(); + + /// Returns the collator used to determine range inclusion, if any. + CollatorPtr getCollator(); +}; + +} + +#endif diff --git a/include/lucene++/TermRangeQuery.h b/include/lucene++/TermRangeQuery.h new file mode 100644 index 00000000..cab13405 --- /dev/null +++ b/include/lucene++/TermRangeQuery.h @@ -0,0 +1,89 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMRANGEQUERY_H +#define TERMRANGEQUERY_H + +#include "MultiTermQuery.h" + +namespace Lucene { + +/// A Query that matches documents within an range of terms. +/// +/// This query matches the documents looking for terms that fall into the supplied range according to {@link +/// String#compare(String)}, unless a Collator is provided. It is not intended for numerical ranges; use {@link +/// NumericRangeQuery} instead. +/// +/// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. +class LPPAPI TermRangeQuery : public MultiTermQuery { +public: + /// Constructs a query selecting all terms greater/equal than lowerTerm but less/equal than upperTerm. + /// + /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints + /// may not be exclusive (you can't select all but the first or last term without explicitly specifying the + /// term to exclude.) + /// + /// If collator is not null, it will be used to decide whether index terms are within the given range, rather + /// than using the Unicode code point order in which index terms are stored. + /// + /// Warning: Using this constructor and supplying a non-null value in the collator parameter will cause every + /// single index Term in the Field referenced by lowerTerm and/or upperTerm to be examined. Depending on the + /// number of index Terms in this Field, the operation could be very slow. + /// + /// @param lowerTerm The Term text at the lower end of the range + /// @param upperTerm The Term text at the upper end of the range + /// @param includeLower If true, the lowerTerm is included in the range. + /// @param includeUpper If true, the upperTerm is included in the range. + /// @param collator The collator to use to collate index Terms, to determine their membership in the range + /// bounded by lowerTerm and upperTerm. + TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, + bool includeUpper, CollatorPtr collator = CollatorPtr()); + + virtual ~TermRangeQuery(); + + LUCENE_CLASS(TermRangeQuery); + +protected: + StringValue lowerTerm; + StringValue upperTerm; + CollatorPtr collator; + String field; + bool includeLower; + bool includeUpper; + +public: + using MultiTermQuery::toString; + + /// Returns the field name for this query + String getField(); + + /// Returns the lower value of this range query + String getLowerTerm(); + + /// Returns the upper value of this range query + String getUpperTerm(); + + /// Returns true if the lower endpoint is inclusive + bool includesLower(); + + /// Returns true if the upper endpoint is inclusive + bool includesUpper(); + + /// Returns the collator used to determine range inclusion, if any. + CollatorPtr getCollator(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual String toString(const String& field); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + +protected: + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/TermRangeTermEnum.h b/include/lucene++/TermRangeTermEnum.h new file mode 100644 index 00000000..1b1ad8e9 --- /dev/null +++ b/include/lucene++/TermRangeTermEnum.h @@ -0,0 +1,60 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMRANGETERMENUM_H +#define TERMRANGETERMENUM_H + +#include "FilteredTermEnum.h" + +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating all terms that match the specified range parameters. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than +/// all that precede it. +class LPPAPI TermRangeTermEnum : public FilteredTermEnum { +public: + /// Enumerates all terms greater/equal than lowerTerm but less/equal than upperTerm. + /// + /// If an endpoint is null, it is said to be "open". Either or both endpoints may be open. Open endpoints + /// may not be exclusive (you can't select all but the first or last term without explicitly specifying + /// the term to exclude.) + /// + /// @param reader + /// @param field An interned field that holds both lower and upper terms. + /// @param lowerTermText The term text at the lower end of the range + /// @param upperTermText The term text at the upper end of the range + /// @param includeLower If true, the lowerTerm is included in the range. + /// @param includeUpper If true, the upperTerm is included in the range. + /// @param collator The collator to use to collate index Terms, to determine their membership in the range + /// bounded by lowerTerm and upperTerm. + TermRangeTermEnum(const IndexReaderPtr& reader, const String& field, StringValue lowerTermText, StringValue upperTermText, + bool includeLower, bool includeUpper, const CollatorPtr& collator); + + virtual ~TermRangeTermEnum(); + + LUCENE_CLASS(TermRangeTermEnum); + +protected: + CollatorPtr collator; + bool _endEnum; + String field; + StringValue upperTermText; + StringValue lowerTermText; + bool includeLower; + bool includeUpper; + +public: + virtual double difference(); + +protected: + virtual bool endEnum(); + virtual bool termCompare(const TermPtr& term); +}; + +} + +#endif diff --git a/include/lucene++/TermScorer.h b/include/lucene++/TermScorer.h new file mode 100644 index 00000000..cfb9c00e --- /dev/null +++ b/include/lucene++/TermScorer.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSCORER_H +#define TERMSCORER_H + +#include "Scorer.h" + +namespace Lucene { + +/// A Scorer for documents matching a Term. +class LPPAPI TermScorer : public Scorer { +public: + /// Construct a TermScorer. + /// @param weight The weight of the Term in the query. + /// @param td An iterator over the documents matching the Term. + /// @param similarity The Similarity implementation to be used for score computations. + /// @param norms The field norms of the document fields for the Term. + TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms); + + virtual ~TermScorer(); + + LUCENE_CLASS(TermScorer); + +protected: + WeightPtr weight; + TermDocsPtr termDocs; // for malloc and free + TermDocs* __termDocs; // for work, + ByteArray norms; + double weightValue; + int32_t doc; + + Collection docs; // buffered doc numbers + decltype(docs.get()) __docs; // + Collection freqs; // buffered term freqs + decltype(freqs.get()) __freqs; // + + int32_t freq; + int32_t pointer; + int32_t pointerMax; + + static const int32_t SCORE_CACHE_SIZE; + Collection scoreCache; + + + +public: + virtual void score(const CollectorPtr& collector); + virtual int32_t docID(); + + /// Advances to the next document matching the query. + /// The iterator over the matching documents is buffered using {@link + /// TermDocs#read(Collection, Collection)}. + /// @return the document matching the query or -1 if there are no more documents. + virtual int32_t nextDoc(); + + virtual double score(); + + /// Advances to the first match beyond the current whose document number is greater than or equal to a + /// given target. The implementation uses {@link TermDocs#skipTo(int32_t)}. + /// @param target The target document number. + /// @return the matching document or -1 if none exist. + virtual int32_t advance(int32_t target); + + /// Returns a string representation of this TermScorer. + virtual String toString(); + + virtual float termFreq(){ + return freq; + } + +protected: + static const Collection& SIM_NORM_DECODER(); + + virtual bool score(const CollectorPtr& collector, int32_t max, int32_t firstDocID); +}; + +} + +#endif diff --git a/include/lucene++/TermSpans.h b/include/lucene++/TermSpans.h new file mode 100644 index 00000000..50944e55 --- /dev/null +++ b/include/lucene++/TermSpans.h @@ -0,0 +1,45 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSPANS_H +#define TERMSPANS_H + +#include "Spans.h" + +namespace Lucene { + +/// Public for extension only +class LPPAPI TermSpans : public Spans { +public: + TermSpans(const TermPositionsPtr& positions, const TermPtr& term); + virtual ~TermSpans(); + + LUCENE_CLASS(TermSpans); + +protected: + TermPositionsPtr positions; + TermPtr term; + int32_t _doc; + int32_t freq; + int32_t count; + int32_t position; + +public: + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual String toString(); + + TermPositionsPtr getPositions(); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorEntry.h b/include/lucene++/TermVectorEntry.h new file mode 100644 index 00000000..cf52c923 --- /dev/null +++ b/include/lucene++/TermVectorEntry.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORENTRY_H +#define TERMVECTORENTRY_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Convenience class for holding TermVector information. +class LPPAPI TermVectorEntry : public LuceneObject { +public: + TermVectorEntry(const String& field = EmptyString, const String& term = EmptyString, int32_t frequency = 0, + Collection offsets = Collection(), + Collection positions = Collection()); + virtual ~TermVectorEntry(); + + LUCENE_CLASS(TermVectorEntry); + +protected: + String field; + String term; + int32_t frequency; + Collection offsets; + Collection positions; + +public: + String getField(); + int32_t getFrequency(); + Collection getOffsets(); + Collection getPositions(); + String getTerm(); + + void setFrequency(int32_t frequency); + void setOffsets(Collection offsets); + void setPositions(Collection positions); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual String toString(); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorEntryFreqSortedComparator.h b/include/lucene++/TermVectorEntryFreqSortedComparator.h new file mode 100644 index 00000000..8cadb852 --- /dev/null +++ b/include/lucene++/TermVectorEntryFreqSortedComparator.h @@ -0,0 +1,27 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORENTRYFREQSORTEDCOMPARATOR_H +#define TERMVECTORENTRYFREQSORTEDCOMPARATOR_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Compares {@link TermVectorEntry}s first by frequency and then by the term (case-sensitive) +class LPPAPI TermVectorEntryFreqSortedComparator : public LuceneObject { +public: + virtual ~TermVectorEntryFreqSortedComparator(); + + LUCENE_CLASS(TermVectorEntryFreqSortedComparator); + +public: + static bool compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorMapper.h b/include/lucene++/TermVectorMapper.h new file mode 100644 index 00000000..3be7a305 --- /dev/null +++ b/include/lucene++/TermVectorMapper.h @@ -0,0 +1,73 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORMAPPER_H +#define TERMVECTORMAPPER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The TermVectorMapper can be used to map Term Vectors into your own structure instead of the parallel +/// array structure used by {@link IndexReader#getTermFreqVector(int,String)}. +/// +/// It is up to the implementation to make sure it is thread-safe. +class LPPAPI TermVectorMapper : public LuceneObject { +public: + /// @param ignoringPositions true if this mapper should tell Lucene to ignore positions even if + /// they are stored. + /// @param ignoringOffsets similar to ignoringPositions + TermVectorMapper(bool ignoringPositions = false, bool ignoringOffsets = false); + + virtual ~TermVectorMapper(); + + LUCENE_CLASS(TermVectorMapper); + +protected: + bool ignoringPositions; + bool ignoringOffsets; + +public: + /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + /// This method will be called once before retrieving the vector for a field. + /// + /// This method will be called before {@link #map(String,int,TermVectorOffsetInfo[],int[])}. + /// @param field The field the vector is for + /// @param numTerms The number of terms that need to be mapped + /// @param storeOffsets true if the mapper should expect offset information + /// @param storePositions true if the mapper should expect positions info + virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) = 0; + + /// Map the Term Vector information into your own structure + /// @param term The term to add to the vector + /// @param frequency The frequency of the term in the document + /// @param offsets null if the offset is not specified, otherwise the offset into the field of the term + /// @param positions null if the position is not specified, otherwise the position in the field of the term + virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions) = 0; + + /// Indicate to Lucene that even if there are positions stored, this mapper is not interested in them and + /// they can be skipped over. Derived classes should set this to true if they want to ignore positions. + /// The default is false, meaning positions will be loaded if they are stored. + virtual bool isIgnoringPositions(); + + /// @see #isIgnoringPositions() Same principal as {@link #isIgnoringPositions()}, but applied to offsets. + virtual bool isIgnoringOffsets(); + + /// Passes down the index of the document whose term vector is currently being mapped, once for each top + /// level call to a term vector reader. + /// + /// Default implementation IGNORES the document number. Override if your implementation needs the document + /// number. + /// + /// NOTE: Document numbers are internal to Lucene and subject to change depending on indexing operations. + /// + /// @param documentNumber index of document currently being mapped + virtual void setDocumentNumber(int32_t documentNumber); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorOffsetInfo.h b/include/lucene++/TermVectorOffsetInfo.h new file mode 100644 index 00000000..8ed0ff5f --- /dev/null +++ b/include/lucene++/TermVectorOffsetInfo.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTOROFFSETINFO_H +#define TERMVECTOROFFSETINFO_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// The TermVectorOffsetInfo class holds information pertaining to a Term in a {@link TermPositionVector}'s +/// offset information. This offset information is the character offset as set during the Analysis phase +/// (and thus may not be the actual offset in the original content). +class LPPAPI TermVectorOffsetInfo : public LuceneObject { +public: + TermVectorOffsetInfo(int32_t startOffset = 0, int32_t endOffset = 0); + virtual ~TermVectorOffsetInfo(); + + LUCENE_CLASS(TermVectorOffsetInfo); + +protected: + int32_t startOffset; + int32_t endOffset; + +public: + /// Convenience declaration when creating a {@link TermPositionVector} that stores only position information. + static const Collection EMPTY_OFFSET_INFO(); + + /// The accessor for the ending offset for the term + int32_t getEndOffset(); + void setEndOffset(int32_t endOffset); + + /// The accessor for the starting offset of the term. + int32_t getStartOffset(); + void setStartOffset(int32_t startOffset); + + /// Two TermVectorOffsetInfos are equals if both the start and end offsets are the same. + /// @return true if both {@link #getStartOffset()} and {@link #getEndOffset()} are the same for both objects. + virtual bool equals(const LuceneObjectPtr& other); + + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorsReader.h b/include/lucene++/TermVectorsReader.h new file mode 100644 index 00000000..bd2ca2bc --- /dev/null +++ b/include/lucene++/TermVectorsReader.h @@ -0,0 +1,150 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORSREADER_H +#define TERMVECTORSREADER_H + +#include "TermVectorMapper.h" + +namespace Lucene { + +class LPPAPI TermVectorsReader : public LuceneObject { +public: + TermVectorsReader(); + TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos); + TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, + int32_t readBufferSize, int32_t docStoreOffset = -1, int32_t size = 0); + virtual ~TermVectorsReader(); + + LUCENE_CLASS(TermVectorsReader); + +public: + /// NOTE: if you make a new format, it must be larger than the current format + static const int32_t FORMAT_VERSION; + + /// Changes to speed up bulk merging of term vectors + static const int32_t FORMAT_VERSION2; + + /// Changed strings to UTF8 with length-in-bytes not length-in-chars + static const int32_t FORMAT_UTF8_LENGTH_IN_BYTES; + + /// NOTE: always change this if you switch to a new format. + static const int32_t FORMAT_CURRENT; + + /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file + static const int32_t FORMAT_SIZE; + + static const uint8_t STORE_POSITIONS_WITH_TERMVECTOR; + static const uint8_t STORE_OFFSET_WITH_TERMVECTOR; + +protected: + FieldInfosPtr fieldInfos; + + IndexInputPtr tvx; + IndexInputPtr tvd; + IndexInputPtr tvf; + int32_t _size; + int32_t numTotalDocs; + + /// The docID offset where our docs begin in the index file. This will be 0 if we have our own private file. + int32_t docStoreOffset; + + int32_t format; + +public: + /// Used for bulk copy when merging + IndexInputPtr getTvdStream(); + + /// Used for bulk copy when merging + IndexInputPtr getTvfStream(); + + bool canReadRawDocs(); + + /// Retrieve the length (in bytes) of the tvd and tvf entries for the next numDocs starting with + /// startDocID. This is used for bulk copying when merging segments, if the field numbers are + /// congruent. Once this returns, the tvf & tvd streams are seeked to the startDocID. + void rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs); + + void close(); + + /// @return The number of documents in the reader + int32_t size(); + + void get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper); + + /// Retrieve the term vector for the given document and field + /// @param docNum The document number to retrieve the vector for + /// @param field The field within the document to retrieve + /// @return The TermFreqVector for the document and field or null if there is no termVector for + /// this field. + TermFreqVectorPtr get(int32_t docNum, const String& field); + + /// Return all term vectors stored for this document or null if the could not be read in. + /// + /// @param docNum The document number to retrieve the vector for + /// @return All term frequency vectors + Collection get(int32_t docNum); + + void get(int32_t docNumber, const TermVectorMapperPtr& mapper); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + +protected: + void ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size); + + void seekTvx(int32_t docNum); + + int32_t checkValidFormat(const IndexInputPtr& in); + + /// Reads the String[] fields; you have to pre-seek tvd to the right point + Collection readFields(int32_t fieldCount); + + /// Reads the long[] offsets into TVF; you have to pre-seek tvx/tvd to the right point + Collection readTvfPointers(int32_t fieldCount); + + Collection readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers); + void readTermVectors(Collection fields, Collection tvfPointers, const TermVectorMapperPtr& mapper); + + /// @param field The field to read in + /// @param tvfPointer The pointer within the tvf file where we should start reading + /// @param mapper The mapper used to map the TermVector + void readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper); +}; + +/// Models the existing parallel array structure +class ParallelArrayTermVectorMapper : public TermVectorMapper { +public: + ParallelArrayTermVectorMapper(); + virtual ~ParallelArrayTermVectorMapper(); + + LUCENE_CLASS(ParallelArrayTermVectorMapper); + +protected: + Collection terms; + Collection termFreqs; + Collection< Collection > positions; + Collection< Collection > offsets; + int32_t currentPosition; + bool storingOffsets; + bool storingPositions; + String field; + +public: + /// Tell the mapper what to expect in regards to field, number of terms, offset and position storage. + /// This method will be called once before retrieving the vector for a field. + virtual void setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions); + + /// Map the Term Vector information into your own structure + virtual void map(const String& term, int32_t frequency, Collection offsets, Collection positions); + + /// Construct the vector + /// @return The {@link TermFreqVector} based on the mappings. + TermFreqVectorPtr materializeVector(); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorsTermsWriter.h b/include/lucene++/TermVectorsTermsWriter.h new file mode 100644 index 00000000..0c7303f2 --- /dev/null +++ b/include/lucene++/TermVectorsTermsWriter.h @@ -0,0 +1,95 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORSTERMSWRITER_H +#define TERMVECTORSTERMSWRITER_H + +#include "TermsHashConsumer.h" +#include "DocumentsWriter.h" +#include "RawPostingList.h" + +namespace Lucene { + +class TermVectorsTermsWriter : public TermsHashConsumer { +public: + TermVectorsTermsWriter(const DocumentsWriterPtr& docWriter); + virtual ~TermVectorsTermsWriter(); + + LUCENE_CLASS(TermVectorsTermsWriter); + +public: + DocumentsWriterWeakPtr _docWriter; + TermVectorsWriterPtr termVectorsWriter; + Collection docFreeList; + int32_t freeCount; + IndexOutputPtr tvx; + IndexOutputPtr tvd; + IndexOutputPtr tvf; + int32_t lastDocID; + int32_t allocCount; + +public: + virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread); + virtual void createPostings(Collection postings, int32_t start, int32_t count); + virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + virtual void closeDocStore(const SegmentWriteStatePtr& state); + + TermVectorsTermsWriterPerDocPtr getPerDoc(); + + /// Fills in no-term-vectors for all docs we haven't seen since the last doc that had term vectors. + void fill(int32_t docID); + + void initTermVectorsWriter(); + void finishDocument(const TermVectorsTermsWriterPerDocPtr& perDoc); + bool freeRAM(); + void free(const TermVectorsTermsWriterPerDocPtr& doc); + + virtual void abort(); + virtual int32_t bytesPerPosting(); +}; + +class TermVectorsTermsWriterPerDoc : public DocWriter { +public: + TermVectorsTermsWriterPerDoc(const TermVectorsTermsWriterPtr& termsWriter = TermVectorsTermsWriterPtr()); + virtual ~TermVectorsTermsWriterPerDoc(); + + LUCENE_CLASS(TermVectorsTermsWriterPerDoc); + +protected: + TermVectorsTermsWriterWeakPtr _termsWriter; + +public: + PerDocBufferPtr buffer; + RAMOutputStreamPtr perDocTvf; + int32_t numVectorFields; + + Collection fieldNumbers; + Collection fieldPointers; + +public: + void reset(); + virtual void abort(); + void addField(int32_t fieldNumber); + virtual int64_t sizeInBytes(); + virtual void finish(); +}; + +class TermVectorsTermsWriterPostingList : public RawPostingList { +public: + TermVectorsTermsWriterPostingList(); + virtual ~TermVectorsTermsWriterPostingList(); + + LUCENE_CLASS(TermVectorsTermsWriterPostingList); + +public: + int32_t freq; // How many times this term occurred in the current doc + int32_t lastOffset; // Last offset we saw + int32_t lastPosition; // Last position where this term occurred +}; + +} + +#endif diff --git a/include/lucene++/TermVectorsTermsWriterPerField.h b/include/lucene++/TermVectorsTermsWriterPerField.h new file mode 100644 index 00000000..68bd35c9 --- /dev/null +++ b/include/lucene++/TermVectorsTermsWriterPerField.h @@ -0,0 +1,55 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORSTERMSWRITERPERFIELD_H +#define TERMVECTORSTERMSWRITERPERFIELD_H + +#include "TermsHashConsumerPerField.h" + +namespace Lucene { + +class TermVectorsTermsWriterPerField : public TermsHashConsumerPerField { +public: + TermVectorsTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const TermVectorsTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo); + virtual ~TermVectorsTermsWriterPerField(); + + LUCENE_CLASS(TermVectorsTermsWriterPerField); + +public: + TermVectorsTermsWriterPerThreadWeakPtr _perThread; + TermsHashPerFieldWeakPtr _termsHashPerField; + TermVectorsTermsWriterWeakPtr _termsWriter; + FieldInfoPtr fieldInfo; + DocStateWeakPtr _docState; + FieldInvertStateWeakPtr _fieldState; + + bool doVectors; + bool doVectorPositions; + bool doVectorOffsets; + + int32_t maxNumPostings; + OffsetAttributePtr offsetAttribute; + +public: + virtual int32_t getStreamCount(); + virtual bool start(Collection fields, int32_t count); + virtual void abort(); + + /// Called once per field per document if term vectors are enabled, to write the vectors to RAMOutputStream, + /// which is then quickly flushed to the real term vectors files in the Directory. + virtual void finish(); + + void shrinkHash(); + + virtual void start(const FieldablePtr& field); + virtual void newTerm(const RawPostingListPtr& p0); + virtual void addTerm(const RawPostingListPtr& p0); + virtual void skippingLongTerm(); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorsTermsWriterPerThread.h b/include/lucene++/TermVectorsTermsWriterPerThread.h new file mode 100644 index 00000000..e0ec6a54 --- /dev/null +++ b/include/lucene++/TermVectorsTermsWriterPerThread.h @@ -0,0 +1,44 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORSTERMSWRITERPERTHREAD_H +#define TERMVECTORSTERMSWRITERPERTHREAD_H + +#include "TermsHashConsumerPerThread.h" + +namespace Lucene { + +class TermVectorsTermsWriterPerThread : public TermsHashConsumerPerThread { +public: + TermVectorsTermsWriterPerThread(const TermsHashPerThreadPtr& termsHashPerThread, const TermVectorsTermsWriterPtr& termsWriter); + virtual ~TermVectorsTermsWriterPerThread(); + + LUCENE_CLASS(TermVectorsTermsWriterPerThread); + +public: + TermVectorsTermsWriterWeakPtr _termsWriter; + TermsHashPerThreadWeakPtr _termsHashPerThread; + DocStateWeakPtr _docState; + + TermVectorsTermsWriterPerDocPtr doc; + ByteSliceReaderPtr vectorSliceReader; + Collection utf8Results; + String lastVectorFieldName; + +public: + virtual void startDocument(); + virtual DocWriterPtr finishDocument(); + virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo); + virtual void abort(); + + /// Called only by assert + bool clearLastVectorFieldName(); + bool vectorFieldsInOrder(const FieldInfoPtr& fi); +}; + +} + +#endif diff --git a/include/lucene++/TermVectorsWriter.h b/include/lucene++/TermVectorsWriter.h new file mode 100644 index 00000000..8e5a61e6 --- /dev/null +++ b/include/lucene++/TermVectorsWriter.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMVECTORSWRITER_H +#define TERMVECTORSWRITER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class TermVectorsWriter : public LuceneObject { +public: + TermVectorsWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fieldInfos); + virtual ~TermVectorsWriter(); + + LUCENE_CLASS(TermVectorsWriter); + +protected: + IndexOutputPtr tvx; + IndexOutputPtr tvd; + IndexOutputPtr tvf; + FieldInfosPtr fieldInfos; + Collection utf8Results; + +public: + /// Add a complete document specified by all its term vectors. If document has no term vectors, + /// add value for tvx. + void addAllDocVectors(Collection vectors); + + /// Do a bulk copy of numDocs documents from reader to our streams. This is used to expedite merging, + /// if the field numbers are congruent. + void addRawDocuments(const TermVectorsReaderPtr& reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs); + + /// Close all streams. + void close(); +}; + +} + +#endif diff --git a/include/lucene++/TermsHash.h b/include/lucene++/TermsHash.h new file mode 100644 index 00000000..f390bd8d --- /dev/null +++ b/include/lucene++/TermsHash.h @@ -0,0 +1,68 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASH_H +#define TERMSHASH_H + +#include "InvertedDocConsumer.h" + +namespace Lucene { + +/// This class implements {@link InvertedDocConsumer}, which is passed each token produced by the analyzer on +/// each field. It stores these tokens in a hash table, and allocates separate byte streams per token. Consumers +/// of this class, eg {@link FreqProxTermsWriter} and {@link TermVectorsTermsWriter}, write their own byte streams +/// under each term. +class TermsHash : public InvertedDocConsumer { +public: + TermsHash(const DocumentsWriterPtr& docWriter, bool trackAllocations, const TermsHashConsumerPtr& consumer, const TermsHashPtr& nextTermsHash); + virtual ~TermsHash(); + + LUCENE_CLASS(TermsHash); + +public: + TermsHashConsumerPtr consumer; + TermsHashPtr nextTermsHash; + int32_t bytesPerPosting; + int32_t postingsFreeChunk; + DocumentsWriterWeakPtr _docWriter; + bool trackAllocations; + +protected: + Collection postingsFreeList; + int32_t postingsFreeCount; + int32_t postingsAllocCount; + +public: + /// Add a new thread + virtual InvertedDocConsumerPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread); + virtual TermsHashPerThreadPtr addThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPerThreadPtr& primaryPerThread); + + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); + + /// Abort (called after hitting AbortException) + /// NOTE: do not make this sync'd; it's not necessary (DW ensures all other threads are idle), and it + /// leads to deadlock + virtual void abort(); + + void shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + + /// Close doc stores + virtual void closeDocStore(const SegmentWriteStatePtr& state); + + /// Flush a new segment + virtual void flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state); + + /// Attempt to free RAM, returning true if any RAM was freed + virtual bool freeRAM(); + + void recyclePostings(Collection postings, int32_t numPostings); + + void getPostings(Collection postings); +}; + +} + +#endif diff --git a/include/lucene++/TermsHashConsumer.h b/include/lucene++/TermsHashConsumer.h new file mode 100644 index 00000000..04e8356f --- /dev/null +++ b/include/lucene++/TermsHashConsumer.h @@ -0,0 +1,36 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASHCONSUMER_H +#define TERMSHASHCONSUMER_H + +#include "LuceneObject.h" + +namespace Lucene { + +class TermsHashConsumer : public LuceneObject { +public: + virtual ~TermsHashConsumer(); + + LUCENE_CLASS(TermsHashConsumer); + +public: + FieldInfosPtr fieldInfos; + +public: + virtual int32_t bytesPerPosting() = 0; + virtual void createPostings(Collection postings, int32_t start, int32_t count) = 0; + virtual TermsHashConsumerPerThreadPtr addThread(const TermsHashPerThreadPtr& perThread) = 0; + virtual void flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) = 0; + virtual void abort() = 0; + virtual void closeDocStore(const SegmentWriteStatePtr& state) = 0; + + virtual void setFieldInfos(const FieldInfosPtr& fieldInfos); +}; + +} + +#endif diff --git a/include/lucene++/TermsHashConsumerPerField.h b/include/lucene++/TermsHashConsumerPerField.h new file mode 100644 index 00000000..b05dc6db --- /dev/null +++ b/include/lucene++/TermsHashConsumerPerField.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASHCONSUMERPERFIELD_H +#define TERMSHASHCONSUMERPERFIELD_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Implement this class to plug into the TermsHash processor, which inverts & stores Tokens into a hash +/// table and provides an API for writing bytes into multiple streams for each unique Token. +class TermsHashConsumerPerField : public LuceneObject { +public: + virtual ~TermsHashConsumerPerField(); + + LUCENE_CLASS(TermsHashConsumerPerField); + +public: + virtual bool start(Collection fields, int32_t count) = 0; + virtual void finish() = 0; + virtual void skippingLongTerm() = 0; + virtual void start(const FieldablePtr& field) = 0; + virtual void newTerm(const RawPostingListPtr& p) = 0; + virtual void addTerm(const RawPostingListPtr& p) = 0; + virtual int32_t getStreamCount() = 0; +}; + +} + +#endif diff --git a/include/lucene++/TermsHashConsumerPerThread.h b/include/lucene++/TermsHashConsumerPerThread.h new file mode 100644 index 00000000..9e074b5b --- /dev/null +++ b/include/lucene++/TermsHashConsumerPerThread.h @@ -0,0 +1,29 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASHCONSUMERPERTHREAD_H +#define TERMSHASHCONSUMERPERTHREAD_H + +#include "LuceneObject.h" + +namespace Lucene { + +class TermsHashConsumerPerThread : public LuceneObject { +public: + virtual ~TermsHashConsumerPerThread(); + + LUCENE_CLASS(TermsHashConsumerPerThread); + +public: + virtual void startDocument() = 0; + virtual DocWriterPtr finishDocument() = 0; + virtual TermsHashConsumerPerFieldPtr addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) = 0; + virtual void abort() = 0; +}; + +} + +#endif diff --git a/include/lucene++/TermsHashPerField.h b/include/lucene++/TermsHashPerField.h new file mode 100644 index 00000000..bd6cff73 --- /dev/null +++ b/include/lucene++/TermsHashPerField.h @@ -0,0 +1,99 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASHPERFIELD_H +#define TERMSHASHPERFIELD_H + +#include "InvertedDocConsumerPerField.h" + +namespace Lucene { + +class TermsHashPerField : public InvertedDocConsumerPerField { +public: + TermsHashPerField(const DocInverterPerFieldPtr& docInverterPerField, const TermsHashPerThreadPtr& perThread, const TermsHashPerThreadPtr& nextPerThread, const FieldInfoPtr& fieldInfo); + virtual ~TermsHashPerField(); + + LUCENE_CLASS(TermsHashPerField); + +public: + TermsHashConsumerPerFieldPtr consumer; + TermsHashPerFieldPtr nextPerField; + DocInverterPerFieldWeakPtr _docInverterPerField; + TermsHashPerThreadPtr nextPerThread; + TermsHashPerThreadWeakPtr _perThread; + DocStatePtr docState; + FieldInvertStatePtr fieldState; + TermAttributePtr termAtt; + + // Copied from our perThread + CharBlockPoolPtr charPool; + IntBlockPoolPtr intPool; + ByteBlockPoolPtr bytePool; + + int32_t streamCount; + int32_t numPostingInt; + + FieldInfoPtr fieldInfo; + + bool postingsCompacted; + int32_t numPostings; + + IntArray intUptos; + int32_t intUptoStart; + +protected: + int32_t postingsHashSize; + int32_t postingsHashHalfSize; + int32_t postingsHashMask; + Collection postingsHash; + RawPostingListPtr p; + bool doCall; + bool doNextCall; + +public: + virtual void initialize(); + void shrinkHash(int32_t targetSize); + void reset(); + + /// Called on hitting an aborting exception + virtual void abort(); + + void initReader(const ByteSliceReaderPtr& reader, const RawPostingListPtr& p, int32_t stream); + + /// Collapse the hash table and sort in-place. + Collection sortPostings(); + + /// Called before a field instance is being processed + virtual void start(const FieldablePtr& field); + + /// Called once per field, and is given all Fieldable occurrences for this field in the document. + virtual bool start(Collection fields, int32_t count); + + void add(int32_t textStart); + + /// Primary entry point (for first TermsHash) + virtual void add(); + + void writeByte(int32_t stream, int8_t b); + void writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length); + void writeVInt(int32_t stream, int32_t i); + + /// Called once per field per document, after all Fieldable occurrences are inverted + virtual void finish(); + + /// Called when postings hash is too small (> 50% occupied) or too large (< 20% occupied). + void rehashPostings(int32_t newSize); + +protected: + void compactPostings(); + + /// Test whether the text for current RawPostingList p equals current tokenText. + bool postingEquals(const wchar_t* tokenText, int32_t tokenTextLen); +}; + +} + +#endif diff --git a/include/lucene++/TermsHashPerThread.h b/include/lucene++/TermsHashPerThread.h new file mode 100644 index 00000000..7ae0fae9 --- /dev/null +++ b/include/lucene++/TermsHashPerThread.h @@ -0,0 +1,59 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TERMSHASHPERTHREAD_H +#define TERMSHASHPERTHREAD_H + +#include "InvertedDocConsumerPerThread.h" + +namespace Lucene { + +class TermsHashPerThread : public InvertedDocConsumerPerThread { +public: + TermsHashPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPtr& termsHash, const TermsHashPtr& nextTermsHash, const TermsHashPerThreadPtr& primaryPerThread); + virtual ~TermsHashPerThread(); + + LUCENE_CLASS(TermsHashPerThread); + +public: + DocInverterPerThreadWeakPtr _docInverterPerThread; + TermsHashWeakPtr _termsHash; + TermsHashPtr nextTermsHash; + TermsHashPerThreadWeakPtr _primaryPerThread; + TermsHashConsumerPerThreadPtr consumer; + TermsHashPerThreadPtr nextPerThread; + + CharBlockPoolPtr charPool; + IntBlockPoolPtr intPool; + ByteBlockPoolPtr bytePool; + bool primary; + DocStatePtr docState; + + Collection freePostings; + int32_t freePostingsCount; + +public: + virtual void initialize(); + + virtual InvertedDocConsumerPerFieldPtr addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo); + virtual void abort(); + + /// perField calls this when it needs more postings + void morePostings(); + + virtual void startDocument(); + virtual DocWriterPtr finishDocument(); + + /// Clear all state + void reset(bool recyclePostings); + +protected: + static bool noNullPostings(Collection postings, int32_t count, const String& details); +}; + +} + +#endif diff --git a/include/lucene++/TestPoint.h b/include/lucene++/TestPoint.h new file mode 100644 index 00000000..f4e626da --- /dev/null +++ b/include/lucene++/TestPoint.h @@ -0,0 +1,43 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TESTPOINT_H +#define TESTPOINT_H + +#include "Lucene.h" + +namespace Lucene { + +/// Used for unit testing as a substitute for stack trace +class LPPAPI TestPoint { +public: + virtual ~TestPoint(); + +protected: + static MapStringInt testMethods; + static bool enable; + +public: + static void enableTestPoints(); + static void clear(); + static void setTestPoint(const String& object, const String& method, bool point); + static bool getTestPoint(const String& object, const String& method); + static bool getTestPoint(const String& method); +}; + +class LPPAPI TestScope { +public: + TestScope(const String& object, const String& method); + virtual ~TestScope(); + +protected: + String object; + String method; +}; + +} + +#endif diff --git a/include/lucene++/ThreadPool.h b/include/lucene++/ThreadPool.h new file mode 100644 index 00000000..dc6446ff --- /dev/null +++ b/include/lucene++/ThreadPool.h @@ -0,0 +1,82 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef THREADPOOL_H +#define THREADPOOL_H + +#include +#include +#include +#include "LuceneObject.h" + +namespace Lucene { + +typedef boost::shared_ptr workPtr; + +/// A Future represents the result of an asynchronous computation. Methods are provided to check if the computation +/// is complete, to wait for its completion, and to retrieve the result of the computation. The result can only be +/// retrieved using method get when the computation has completed, blocking if necessary until it is ready. +class Future : public LuceneObject { +public: + virtual ~Future(); + +protected: + boost::any value; + +public: + void set(const boost::any& value) { + SyncLock syncLock(this); + this->value = value; + } + + template + TYPE get() { + SyncLock syncLock(this); + while (value.empty()) { + wait(10); + } + return value.empty() ? TYPE() : boost::any_cast(value); + } +}; + +/// Utility class to handle a pool of threads. +class ThreadPool : public LuceneObject { +public: + ThreadPool(); + virtual ~ThreadPool(); + + LUCENE_CLASS(ThreadPool); + +protected: + boost::asio::io_service io_service; + workPtr work; + boost::thread_group threadGroup; + + static const int32_t THREADPOOL_SIZE; + +public: + /// Get singleton thread pool instance. + static ThreadPoolPtr getInstance(); + + template + FuturePtr scheduleTask(FUNC func) { + FuturePtr future(newInstance()); + io_service.post(boost::bind(&ThreadPool::execute, this, func, future)); + return future; + } + +protected: + // this will be executed when one of the threads is available + template + void execute(FUNC func, const FuturePtr& future) { + future->set(func()); + future->notifyAll(); + } +}; + +} + +#endif diff --git a/include/lucene++/TimeLimitingCollector.h b/include/lucene++/TimeLimitingCollector.h new file mode 100644 index 00000000..7f377b09 --- /dev/null +++ b/include/lucene++/TimeLimitingCollector.h @@ -0,0 +1,97 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TIMELIMITINGCOLLECTOR_H +#define TIMELIMITINGCOLLECTOR_H + +#include "Collector.h" + +namespace Lucene { + +/// The {@link TimeLimitingCollector} is used to timeout search requests that take longer than the maximum +/// allowed search time limit. After this time is exceeded, the search thread is stopped by throwing a +/// {@link TimeExceededException}. +class LPPAPI TimeLimitingCollector : public Collector { +public: + /// Create a TimeLimitedCollector wrapper over another {@link Collector} with a specified timeout. + /// @param collector the wrapped {@link Collector} + /// @param timeAllowed max time allowed for collecting hits after which TimeExceeded exception is thrown + TimeLimitingCollector(const CollectorPtr& collector, int64_t timeAllowed); + + virtual ~TimeLimitingCollector(); + + LUCENE_CLASS(TimeLimitingCollector); + +public: + /// Default timer resolution. + /// @see #setResolution(int64_t) + static const int32_t DEFAULT_RESOLUTION; + + /// Default for {@link #isGreedy()}. + /// @see #isGreedy() + bool DEFAULT_GREEDY; + +protected: + static int64_t resolution; + bool greedy; + + int64_t t0; + int64_t timeout; + CollectorPtr collector; + + int32_t docBase; + +public: + /// Return the timer resolution. + /// @see #setResolution(int64_t) + static int64_t getResolution(); + + /// Set the timer resolution. + /// The default timer resolution is 20 milliseconds. + /// This means that a search required to take no longer than 800 milliseconds may be stopped after + /// 780 to 820 milliseconds. Note that: + ///
    + ///
  • Finer (smaller) resolution is more accurate but less efficient. + ///
  • Setting resolution to less than 5 milliseconds will be silently modified to 5 milliseconds. + ///
  • Setting resolution smaller than current resolution might take effect only after current resolution. + /// (Assume current resolution of 20 milliseconds is modified to 5 milliseconds, then it can take up to 20 + /// milliseconds for the change to have effect. + ///
+ static void setResolution(int64_t newResolution); + + /// Stop timer thread. + static void stopTimer(); + + /// Checks if this time limited collector is greedy in collecting the last hit. A non greedy collector, + /// upon a timeout, would throw a TimeExceeded without allowing the wrapped collector to collect current + /// doc. A greedy one would first allow the wrapped hit collector to collect current doc and only then + /// throw a TimeExceeded exception. + /// @see #setGreedy(boolean) + bool isGreedy(); + + /// Sets whether this time limited collector is greedy. + /// @param greedy true to make this time limited greedy + /// @see #isGreedy() + void setGreedy(bool greedy); + + /// Calls {@link Collector#collect(int)} on the decorated {@link Collector} unless the allowed time has + /// passed, in which case it throws an exception. + virtual void collect(int32_t doc); + + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); + virtual bool acceptsDocsOutOfOrder(); + +protected: + /// Initialize a single static timer thread to be used by all TimeLimitedCollector instances. + static TimerThreadPtr TIMER_THREAD(); + + friend class TimerThread; +}; + +} + +#endif diff --git a/include/lucene++/Token.h b/include/lucene++/Token.h new file mode 100644 index 00000000..abc50d12 --- /dev/null +++ b/include/lucene++/Token.h @@ -0,0 +1,356 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOKEN_H +#define TOKEN_H + +#include "Attribute.h" +#include "AttributeSource.h" + +namespace Lucene { + +/// A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end +/// offset of the term in the text of the field and a type string. +/// +/// The start and end offsets permit applications to re-associate a token with its source text, eg., to display +/// highlighted query terms in a document browser, or to show matching text fragments in a +/// KWIC display, etc. +/// +/// The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or syntactic class +/// that the token belongs to. For example an end of sentence marker token might be implemented with type "eos". +/// The default token type is "word". +/// +/// A Token can optionally have metadata (a.k.a. Payload) in the form of a variable length byte array. Use {@link +/// TermPositions#getPayloadLength()} and {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads +/// from the index. +/// +/// Tokenizers and TokenFilters should try to re-use a Token instance when possible for best performance, by implementing +/// the {@link TokenStream#incrementToken()} API. Failing that, to create a new Token you should first use one of +/// the constructors that starts with null text. To load the token from a char[] use +/// {@link #setTermBuffer(char[], int, int)}. To load from a String use {@link #setTermBuffer(String)} or {@link +/// #setTermBuffer(String, int, int)}. Alternatively you can get the Token's termBuffer by calling either {@link +/// #termBuffer()}, if you know that your text is shorter than the capacity of the termBuffer or {@link +/// #resizeTermBuffer(int)}, if there is any possibility that you may need to grow the buffer. Fill in the characters +/// of your term into this buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, +/// or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to +/// set the length of the term text. +/// +/// Typical Token reuse patterns: +/// +/// Copying text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): +///
+/// return reusableToken->reinit(string, startOffset, endOffset[, type]);
+/// 
+/// +/// Copying some text from a string (type is reset to {@link #DEFAULT_TYPE} if not specified): +///
+/// return reusableToken->reinit(string, 0, string.length(), startOffset, endOffset[, type]);
+/// 
+/// +/// Copying text from char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): +///
+/// return reusableToken->reinit(buffer, 0, buffer.length, startOffset, endOffset[, type]);
+/// 
+/// +/// Copying some text from a char[] buffer (type is reset to {@link #DEFAULT_TYPE} if not specified): +///
+/// return reusableToken->reinit(buffer, start, end - start, startOffset, endOffset[, type]);
+/// 
+/// +/// Copying from one one Token to another (type is reset to {@link #DEFAULT_TYPE} if not specified): +///
+/// return reusableToken.reinit(source.termBuffer(), 0, source.termLength(), source.startOffset(), source.endOffset()[, source.type()]);
+/// 
+/// +/// A few things to note: +/// clear() initializes all of the fields to default values. This was changed in contrast to Lucene 2.4, but +/// should affect no one. +/// Because TokenStreams can be chained, one cannot assume that the Token's current type is correct. The startOffset +/// and endOffset represent the start and offset in the source text, so be careful in adjusting them. When caching a +/// reusable token, clone it. When injecting a cached token into a stream that can be reset, clone it again. +/// +/// @see Payload +class LPPAPI Token : public Attribute { +public: + /// Constructs a Token will null text. + Token(); + + /// Constructs a Token with null text and start and end offsets. + /// @param start start offset in the source text + /// @param end end offset in the source text + Token(int32_t start, int32_t end); + + /// Constructs a Token with null text and start and end offsets plus the Token type. + /// @param start start offset in the source text + /// @param end end offset in the source text + /// @param type the lexical type of this Token + Token(int32_t start, int32_t end, const String& type); + + /// Constructs a Token with null text and start and end offsets plus flags. + /// @param start start offset in the source text + /// @param end end offset in the source text + /// @param flags The bits to set for this token + Token(int32_t start, int32_t end, int32_t flags); + + /// Constructs a Token with the given term text, start and end offsets. The type defaults to "word." + /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. + /// @param text term text + /// @param start start offset in the source text + /// @param end end offset in the source text + Token(const String& text, int32_t start, int32_t end); + + /// Constructs a Token with the given term text, start and end offsets and type. + /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. + /// @param text term text + /// @param start start offset in the source text + /// @param end end offset in the source text + /// @param type the lexical type of this Token + Token(const String& text, int32_t start, int32_t end, const String& type); + + /// Constructs a Token with the given term text, start and end offsets and flags. + /// NOTE: for better indexing speed you should instead use the char[] termBuffer methods to set the term text. + /// @param text term text + /// @param start start offset in the source text + /// @param end end offset in the source text + /// @param flags The bits to set for this token + Token(const String& text, int32_t start, int32_t end, int32_t flags); + + /// Constructs a Token with the given term buffer (offset and length), start and end offsets + Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end); + + virtual ~Token(); + + LUCENE_CLASS(Token); + +public: + static const String& DEFAULT_TYPE(); + +protected: + static const int32_t MIN_BUFFER_SIZE; + + CharArray _termBuffer; + int32_t _termLength; + int32_t _startOffset; + int32_t _endOffset; + String _type; + int32_t flags; + PayloadPtr payload; + int32_t positionIncrement; + +public: + /// Set the position increment. This determines the position of this token relative to the previous Token + /// in a {@link TokenStream}, used in phrase searching. + /// + /// The default value is one. + /// + /// Some common uses for this are: + /// + /// Set it to zero to put multiple terms in the same position. This is useful if, eg., a word has multiple + /// stems. Searches for phrases including either stem will match. In this case, all but the first stem's + /// increment should be set to zero: the increment of the first instance should be one. Repeating a token + /// with an increment of zero can also be used to boost the scores of matches on that token. + /// + /// Set it to values greater than one to inhibit exact phrase matches. If, for example, one does not want + /// phrases to match across removed stop words, then one could build a stop word filter that removes stop + /// words and also sets the increment to the number of stop words removed before each non-stop word. Then + /// exact phrase queries will only match when the terms occur with no intervening stop words. + /// + /// @param positionIncrement the distance from the prior term + /// @see TermPositions + virtual void setPositionIncrement(int32_t positionIncrement); + + /// Returns the position increment of this Token. + /// @see #setPositionIncrement + virtual int32_t getPositionIncrement(); + + /// Returns the Token's term text. + /// + /// This method has a performance penalty because the text is stored internally in a char[]. If possible, + /// use {@link #termBuffer()} and {@link #termLength()} directly instead. If you really need a String, use + /// this method, which is nothing more than a convenience call to String(token->termBuffer(), token->termLength()) + virtual String term(); + + /// Copies the contents of buffer, starting at offset for length characters, into the termBuffer array. + /// @param buffer the buffer to copy + /// @param offset the index in the buffer of the first character to copy + /// @param length the number of characters to copy + virtual void setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length); + + /// Copies the contents of buffer into the termBuffer array. + /// @param buffer the buffer to copy + virtual void setTermBuffer(const String& buffer); + + /// Copies the contents of buffer, starting at offset and continuing for length characters, into the termBuffer array. + /// @param buffer the buffer to copy + /// @param offset the index in the buffer of the first character to copy + /// @param length the number of characters to copy + virtual void setTermBuffer(const String& buffer, int32_t offset, int32_t length); + + /// Returns the internal termBuffer character array which you can then directly alter. If the array is too + /// small for your token, use {@link #resizeTermBuffer(int)} to increase it. After altering the buffer be sure + /// to call {@link #setTermLength} to record the number of valid characters that were placed into the termBuffer. + virtual CharArray termBuffer(); + + /// Optimized implementation of termBuffer. + virtual wchar_t* termBufferArray(); + + /// Grows the termBuffer to at least size newSize, preserving the existing content. Note: If the next operation is + /// to change the contents of the term buffer use {@link #setTermBuffer(char[], int, int)}, {@link + /// #setTermBuffer(String)}, or {@link #setTermBuffer(String, int, int)} to optimally combine the resize with the + /// setting of the termBuffer. + /// @param newSize minimum size of the new termBuffer + /// @return newly created termBuffer with length >= newSize + virtual CharArray resizeTermBuffer(int32_t newSize); + + /// Return number of valid characters (length of the term) in the termBuffer array. + virtual int32_t termLength(); + + /// Set number of valid characters (length of the term) in the termBuffer array. Use this to truncate the termBuffer + /// or to synchronize with external manipulation of the termBuffer. Note: to grow the size of the array, use {@link + /// #resizeTermBuffer(int)} first. + /// @param length the truncated length + virtual void setTermLength(int32_t length); + + /// Returns this Token's starting offset, the position of the first character corresponding to this token in the + /// source text. + /// + /// Note that the difference between endOffset() and startOffset() may not be equal to {@link #termLength}, as the + /// term text may have been altered by a stemmer or some other filter. + virtual int32_t startOffset(); + + /// Set the starting offset. + /// @see #startOffset() + virtual void setStartOffset(int32_t offset); + + /// Returns this Token's ending offset, one greater than the position of the last character corresponding to this + /// token in the source text. The length of the token in the source text is (endOffset - startOffset). + virtual int32_t endOffset(); + + /// Set the ending offset. + /// @see #endOffset() + virtual void setEndOffset(int32_t offset); + + /// Set the starting and ending offset. + /// @see #startOffset() and #endOffset() + virtual void setOffset(int32_t startOffset, int32_t endOffset); + + /// Returns this Token's lexical type. Defaults to "word". + virtual String type(); + + /// Set the lexical type. + /// @see #type() + virtual void setType(const String& type); + + /// Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although + /// they do share similar purposes. The flags can be used to encode information about the token for use by other + /// {@link TokenFilter}s. + /// + /// @return The bits + virtual int32_t getFlags(); + + /// @see #getFlags() + virtual void setFlags(int32_t flags); + + /// Returns this Token's payload. + virtual PayloadPtr getPayload(); + + /// Sets this Token's payload. + virtual void setPayload(const PayloadPtr& payload); + + virtual String toString(); + + /// Resets the term text, payload, flags, and positionIncrement, startOffset, endOffset and token type to default. + virtual void clear(); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Makes a clone, but replaces the term buffer and start/end offset in the process. This is more efficient than + /// doing a full clone (and then calling setTermBuffer) because it saves a wasted copy of the old termBuffer. + TokenPtr clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); + + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} + /// @return this Token instance + TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(char[], int, int)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE + /// @return this Token instance + TokenPtr reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} + /// @return this Token instance + TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} + /// @return this Token instance + TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE + /// @return this Token instance + TokenPtr reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset); + + /// Shorthand for calling {@link #clear}, {@link #setTermBuffer(String, int, int)}, {@link #setStartOffset}, + /// {@link #setEndOffset}, {@link #setType} on Token::DEFAULT_TYPE + /// @return this Token instance + TokenPtr reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset); + + /// Copy the prototype token's fields into this one. Note: Payloads are shared. + void reinit(const TokenPtr& prototype); + + /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + void reinit(const TokenPtr& prototype, const String& newTerm); + + /// Copy the prototype token's fields into this one, with a different term. Note: Payloads are shared. + void reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length); + + virtual void copyTo(const AttributePtr& target); + + /// Convenience factory that returns Token as implementation for the basic attributes + static AttributeFactoryPtr TOKEN_ATTRIBUTE_FACTORY(); + +protected: + /// Construct Token and initialize values + void ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags); + + /// Allocates a buffer char[] of at least newSize, without preserving the existing content. Its always used in + /// places that set the content. + /// @param newSize minimum size of the buffer + void growTermBuffer(int32_t newSize); + + void initTermBuffer(); + + /// Like clear() but doesn't clear termBuffer/text + void clearNoTermBuffer(); +}; + +/// Creates a TokenAttributeFactory returning {@link Token} as instance for the basic attributes and for all other +/// attributes calls the given delegate factory. +class LPPAPI TokenAttributeFactory : public AttributeFactory { +public: + TokenAttributeFactory(const AttributeFactoryPtr& delegate); + virtual ~TokenAttributeFactory(); + + LUCENE_CLASS(TokenAttributeFactory); + +protected: + AttributeFactoryPtr delegate; + +public: + virtual AttributePtr createAttributeInstance(const String& className); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +} + +#endif diff --git a/include/lucene++/TokenFilter.h b/include/lucene++/TokenFilter.h new file mode 100644 index 00000000..513a4226 --- /dev/null +++ b/include/lucene++/TokenFilter.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOKENFILTER_H +#define TOKENFILTER_H + +#include "TokenStream.h" + +namespace Lucene { + +/// A TokenFilter is a TokenStream whose input is another TokenStream. +/// +/// This is an abstract class; subclasses must override {@link #incrementToken()}. +/// @see TokenStream +class LPPAPI TokenFilter : public TokenStream { +protected: + /// Construct a token stream filtering the given input. + TokenFilter(const TokenStreamPtr& input); + +public: + virtual ~TokenFilter(); + + LUCENE_CLASS(TokenFilter); + +protected: + /// The source of tokens for this filter. + TokenStreamPtr input; + +public: + /// Performs end-of-stream operations, if any, and calls then end() on the input TokenStream. + /// NOTE: Be sure to call TokenFilter::end() first when overriding this method. + virtual void end(); + + /// Close the input TokenStream. + virtual void close(); + + /// Reset the filter as well as the input TokenStream. + virtual void reset(); +}; + +} + +#endif diff --git a/include/lucene++/TokenStream.h b/include/lucene++/TokenStream.h new file mode 100644 index 00000000..86718746 --- /dev/null +++ b/include/lucene++/TokenStream.h @@ -0,0 +1,103 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOKENSTREAM_H +#define TOKENSTREAM_H + +#include "AttributeSource.h" + +namespace Lucene { + +/// A TokenStream enumerates the sequence of tokens, either from {@link Field}s of a {@link Document} or from +/// query text. +/// +/// This is an abstract class; concrete subclasses are: {@link Tokenizer}, a TokenStream whose input is a Reader; +/// and {@link TokenFilter}, a TokenStream whose input is another TokenStream. +/// +/// A new TokenStream API has been introduced with Lucene 2.9. This API has moved from being {@link Token}-based +/// to {@link Attribute}-based. While {@link Token} still exists in 2.9 as a convenience class, the preferred way +/// to store the information of a {@link Token} is to use {@link Attribute}s. +/// +/// TokenStream now extends {@link AttributeSource}, which provides access to all of the token {@link Attribute}s +/// for the TokenStream. Note that only one instance per {@link Attribute} is created and reused for every +/// token. This approach reduces object creation and allows local caching of references to the {@link Attribute}s. +/// See {@link #incrementToken()} for further details. +/// +/// The workflow of the new TokenStream API is as follows: +/// - Instantiation of TokenStream/{@link TokenFilter}s which add/get attributes to/from the {@link AttributeSource}. +/// - The consumer calls {@link TokenStream#reset()}. +/// - The consumer retrieves attributes from the stream and stores local references to all attributes it wants to access. +/// - The consumer calls {@link #incrementToken()} until it returns false consuming the attributes after each call. +/// - The consumer calls {@link #end()} so that any end-of-stream operations can be performed. +/// - The consumer calls {@link #close()} to release any resource when finished using the TokenStream. +/// +/// To make sure that filters and consumers know which attributes are available, the attributes must be added during +/// instantiation. Filters and consumers are not required to check for availability of attributes in {@link +/// #incrementToken()}. +/// +/// Sometimes it is desirable to capture a current state of a TokenStream, eg., for buffering purposes (see {@link +/// CachingTokenFilter}, {@link TeeSinkTokenFilter}). For this use case {@link AttributeSource#captureState} and {@link +/// AttributeSource#restoreState} can be used. +class LPPAPI TokenStream : public AttributeSource { +protected: + /// A TokenStream using the default attribute factory. + TokenStream(); + + /// A TokenStream that uses the same attributes as the supplied one. + TokenStream(const AttributeSourcePtr& input); + + /// A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. + TokenStream(const AttributeFactoryPtr& factory); + +public: + virtual ~TokenStream(); + + LUCENE_CLASS(TokenStream); + +public: + /// Consumers (ie., {@link IndexWriter}) use this method to advance the stream to the next token. Implementing + /// classes must implement this method and update the appropriate {@link Attribute}s with the attributes of + /// the next token. + /// + /// The producer must make no assumptions about the attributes after the method has been returned: the caller may + /// arbitrarily change it. If the producer needs to preserve the state for subsequent calls, it can use {@link + /// #captureState} to create a copy of the current attribute state. + /// + /// This method is called for every token of a document, so an efficient implementation is crucial for good + /// performance. To avoid calls to {@link #addAttribute(Class)} and {@link #getAttribute(Class)}, references to + /// all {@link Attribute}s that this stream uses should be retrieved during instantiation. + /// + /// To ensure that filters and consumers know which attributes are available, the attributes must be added during + /// instantiation. Filters and consumers are not required to check for availability of attributes in {@link + /// #incrementToken()}. + /// + /// @return false for end of stream; true otherwise + virtual bool incrementToken() = 0; + + /// This method is called by the consumer after the last token has been consumed, after {@link #incrementToken()} + /// returned false (using the new TokenStream API). Streams implementing the old API should upgrade to use this + /// feature. + /// + /// This method can be used to perform any end-of-stream operations, such as setting the final offset of a stream. + /// The final offset of a stream might differ from the offset of the last token eg in case one or more whitespaces + /// followed after the last token, but a {@link WhitespaceTokenizer} was used. + virtual void end(); + + /// Resets this stream to the beginning. This is an optional operation, so subclasses may or may not implement + /// this method. {@link #reset()} is not needed for the standard indexing process. However, if the tokens of a + /// TokenStream are intended to be consumed more than once, it is necessary to implement {@link #reset()}. Note that + /// if your TokenStream caches tokens and feeds them back again after a reset, it is imperative that you clone the + /// tokens when you store them away (on the first pass) as well as when you return them (on future passes after + /// {@link #reset()}). + virtual void reset(); + + /// Releases resources associated with this stream. + virtual void close(); +}; + +} + +#endif diff --git a/include/lucene++/Tokenizer.h b/include/lucene++/Tokenizer.h new file mode 100644 index 00000000..8b25e407 --- /dev/null +++ b/include/lucene++/Tokenizer.h @@ -0,0 +1,70 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOKENIZER_H +#define TOKENIZER_H + +#include "TokenStream.h" + +namespace Lucene { + +/// A Tokenizer is a TokenStream whose input is a Reader. +/// +/// This is an abstract class; subclasses must override {@link #incrementToken()} +/// +/// Note: Subclasses overriding {@link #incrementToken()} must call {@link AttributeSource#clearAttributes()} +/// before setting attributes. +class LPPAPI Tokenizer : public TokenStream { +protected: + /// Construct a tokenizer with null input. + Tokenizer(); + + /// Construct a token stream processing the given input. + Tokenizer(const ReaderPtr& input); + + /// Construct a tokenizer with null input using the given AttributeFactory. + Tokenizer(const AttributeFactoryPtr& factory); + + /// Construct a token stream processing the given input using the given AttributeFactory. + Tokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + /// Construct a token stream processing the given input using the given AttributeSource. + Tokenizer(const AttributeSourcePtr& source); + + /// Construct a token stream processing the given input using the given AttributeSource. + Tokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + +public: + virtual ~Tokenizer(); + + LUCENE_CLASS(Tokenizer); + +protected: + /// The text source for this Tokenizer. + ReaderPtr input; + CharStreamPtr charStream; + +public: + /// By default, closes the input Reader. + virtual void close(); + + /// Return the corrected offset. If {@link #input} is a {@link CharStream} subclass this method calls + /// {@link CharStream#correctOffset}, else returns currentOff. + /// @param currentOff offset as seen in the output + /// @return corrected offset based on the input + /// @see CharStream#correctOffset + virtual int32_t correctOffset(int32_t currentOff); + + using TokenStream::reset; + + /// Reset the tokenizer to a new reader. Typically, an analyzer (in its reusableTokenStream method) will + /// use this to re-use a previously created tokenizer. + virtual void reset(const ReaderPtr& input); +}; + +} + +#endif diff --git a/include/lucene++/TopDocs.h b/include/lucene++/TopDocs.h new file mode 100644 index 00000000..56735de2 --- /dev/null +++ b/include/lucene++/TopDocs.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOPDOCS_H +#define TOPDOCS_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t)} and {@link +/// Searcher#search(QueryPtr, int32_t)}. +class LPPAPI TopDocs : public LuceneObject { +public: + /// Constructs a TopDocs with a default maxScore = double.NaN. + TopDocs(int32_t totalHits, Collection scoreDocs); + + /// Constructs a TopDocs. + TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore); + + virtual ~TopDocs(); + + LUCENE_CLASS(TopDocs); + +public: + /// The total number of hits for the query. + int32_t totalHits; + + /// The top hits for the query. + Collection scoreDocs; + + /// Stores the maximum score value encountered, needed for normalizing. + double maxScore; + +public: + /// Returns the maximum score value encountered. Note that in case scores are not tracked, + /// this returns NaN. + double getMaxScore(); + + /// Sets the maximum score value encountered. + void setMaxScore(double maxScore); +}; + +} + +#endif diff --git a/include/lucene++/TopDocsCollector.h b/include/lucene++/TopDocsCollector.h new file mode 100644 index 00000000..1d4389b7 --- /dev/null +++ b/include/lucene++/TopDocsCollector.h @@ -0,0 +1,83 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOPDOCSCOLLECTOR_H +#define TOPDOCSCOLLECTOR_H + +#include "Collector.h" +#include "PriorityQueue.h" + +namespace Lucene { + +/// A base class for all collectors that return a {@link TopDocs} output. This collector allows easy extension +/// by providing a single constructor which accepts a {@link PriorityQueue} as well as protected members for +/// that priority queue and a counter of the number of total hits. +/// +/// Extending classes can override {@link #topDocs(int32_t, int32_t)} and {@link #getTotalHits()} in order to +/// provide their own implementation. +class LPPAPI TopDocsCollector : public Collector { +public: + TopDocsCollector(const HitQueueBasePtr& pq); + virtual ~TopDocsCollector(); + + LUCENE_CLASS(TopDocsCollector); + +protected: + /// The priority queue which holds the top documents. Note that different implementations of PriorityQueue + /// give different meaning to 'top documents'. HitQueue for example aggregates the top scoring documents, + /// while other PQ implementations may hold documents sorted by other criteria. + HitQueueBasePtr pq; + + /// The total number of documents that the collector encountered. + int32_t totalHits; + +public: + /// The total number of documents that matched this query. + virtual int32_t getTotalHits(); + + /// Returns the top docs that were collected by this collector. + virtual TopDocsPtr topDocs(); + + /// Returns the documents in the range [start .. pq.size()) that were collected by this collector. Note that + /// if start >= pq.size(), an empty TopDocs is returned. + /// + /// This method is convenient to call if the application always asks for the last results, starting from the + /// last 'page'. + /// + /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more + /// than once, passing each time a different start, you should call {@link #topDocs()} and work with the + /// returned {@link TopDocs} object, which will contain all the results this search execution collected. + virtual TopDocsPtr topDocs(int32_t start); + + /// Returns the documents in the rage [start .. start + howMany) that were collected by this collector. Note + /// that if start >= pq.size(), an empty TopDocs is returned, and if pq.size() - start < howMany, then only + /// the available documents in [start .. pq.size()) are returned. + /// + /// This method is useful to call in case pagination of search results is allowed by the search application, + /// as well as it attempts to optimize the memory used by allocating only as much as requested by howMany. + /// + /// NOTE: you cannot call this method more than once for each search execution. If you need to call it more + /// than once, passing each time a different range, you should call {@link #topDocs()} and work with the + /// returned {@link TopDocs} object, which will contain all the results this search execution collected. + virtual TopDocsPtr topDocs(int32_t start, int32_t howMany); + +protected: + /// This is used in case topDocs() is called with illegal parameters, or there simply aren't (enough) results. + static TopDocsPtr EMPTY_TOPDOCS(); + + /// Populates the results array with the ScoreDoc instances. This can be overridden in case a different + /// ScoreDoc type should be returned. + virtual void populateResults(Collection results, int32_t howMany); + + /// Returns a {@link TopDocs} instance containing the given results. If results is null it means there are + /// no results to return, either because there were 0 calls to collect() or because the arguments to topDocs + /// were invalid. + virtual TopDocsPtr newTopDocs(Collection results, int32_t start); +}; + +} + +#endif diff --git a/include/lucene++/TopFieldCollector.h b/include/lucene++/TopFieldCollector.h new file mode 100644 index 00000000..115aa044 --- /dev/null +++ b/include/lucene++/TopFieldCollector.h @@ -0,0 +1,73 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOPFIELDCOLLECTOR_H +#define TOPFIELDCOLLECTOR_H + +#include "TopDocsCollector.h" + +namespace Lucene { + +/// A {@link Collector} that sorts by {@link SortField} using {@link FieldComparator}s. +/// +/// See the {@link #create(SortPtr, int32_t, bool, bool, bool, bool)} method for instantiating a TopFieldCollector. +class LPPAPI TopFieldCollector : public TopDocsCollector { +public: + TopFieldCollector(const HitQueueBasePtr& pq, int32_t numHits, bool fillFields); + virtual ~TopFieldCollector(); + + LUCENE_CLASS(TopFieldCollector); + +protected: + bool fillFields; + + /// Stores the maximum score value encountered, needed for normalizing. If document scores are not tracked, + /// this value is initialized to NaN. + double maxScore; + + int32_t numHits; + FieldValueHitQueueEntryPtr bottom; + bool queueFull; + int32_t docBase; + +public: + /// Creates a new {@link TopFieldCollector} from the given arguments. + /// + /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. + /// + /// @param sort The sort criteria (SortFields). + /// @param numHits The number of results to collect. + /// @param fillFields Specifies whether the actual field values should be returned on the results (FieldDoc). + /// @param trackDocScores Specifies whether document scores should be tracked and set on the results. Note + /// that if set to false, then the results' scores will be set to NaN. Setting this to true affects + /// performance, as it incurs the score computation on each competitive result. Therefore if document scores + /// are not required by the application, it is recommended to set it to false. + /// @param trackMaxScore Specifies whether the query's maxScore should be tracked and set on the resulting + /// {@link TopDocs}. Note that if set to false, {@link TopDocs#getMaxScore()} returns NaN. Setting this to + /// true affects performance as it incurs the score computation on each result. Also, setting this true + /// automatically sets trackDocScores to true as well. + /// @param docsScoredInOrder Specifies whether documents are scored in doc Id order or not by the given + /// {@link Scorer} in {@link #setScorer(ScorerPtr)}. + /// @return a {@link TopFieldCollector} instance which will sort the results by the sort criteria. + static TopFieldCollectorPtr create(const SortPtr& sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder); + + virtual void add(int32_t slot, int32_t doc, double score); + + virtual bool acceptsDocsOutOfOrder(); + +protected: + static const Collection EMPTY_SCOREDOCS(); + + /// Only the following callback methods need to be overridden since topDocs(int32_t, int32_t) calls them to + /// return the results. + virtual void populateResults(Collection results, int32_t howMany); + + virtual TopDocsPtr newTopDocs(Collection results, int32_t start); +}; + +} + +#endif diff --git a/include/lucene++/TopFieldDocs.h b/include/lucene++/TopFieldDocs.h new file mode 100644 index 00000000..96da0165 --- /dev/null +++ b/include/lucene++/TopFieldDocs.h @@ -0,0 +1,34 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOPFIELDDOCS_H +#define TOPFIELDDOCS_H + +#include "TopDocs.h" + +namespace Lucene { + +/// Represents hits returned by {@link Searcher#search(QueryPtr, FilterPtr, int32_t, SortPtr)}. +class LPPAPI TopFieldDocs : public TopDocs { +public: + /// @param totalHits Total number of hits for the query. + /// @param scoreDocs The top hits for the query. + /// @param fields The sort criteria used to find the top hits. + /// @param maxScore The maximum score encountered. + TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore); + + virtual ~TopFieldDocs(); + + LUCENE_CLASS(TopFieldDocs); + +public: + /// The fields which were used to sort results by. + Collection fields; +}; + +} + +#endif diff --git a/include/lucene++/TopScoreDocCollector.h b/include/lucene++/TopScoreDocCollector.h new file mode 100644 index 00000000..a43fe804 --- /dev/null +++ b/include/lucene++/TopScoreDocCollector.h @@ -0,0 +1,50 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TOPSCOREDOCCOLLECTOR_H +#define TOPSCOREDOCCOLLECTOR_H + +#include "TopDocsCollector.h" + +namespace Lucene { + +/// A {@link Collector} implementation that collects the top-scoring hits, returning them as a {@link TopDocs}. +/// This is used by {@link IndexSearcher} to implement {@link TopDocs}-based search. Hits are sorted by score +/// descending and then (when the scores are tied) docID ascending. When you create an instance of this +/// collector you should know in advance whether documents are going to be collected in doc Id order or not. +/// +/// NOTE: The values Nan, NEGATIVE_INFINITY and POSITIVE_INFINITY are not valid scores. This collector will +/// not properly collect hits with such scores. +class LPPAPI TopScoreDocCollector : public TopDocsCollector { +public: + TopScoreDocCollector(int32_t numHits); + virtual ~TopScoreDocCollector(); + + LUCENE_CLASS(TopScoreDocCollector); + +INTERNAL: + ScoreDocPtr pqTop; + int32_t docBase; + ScorerWeakPtr _scorer; + Scorer* __scorer; + +public: + /// Creates a new {@link TopScoreDocCollector} given the number of hits to collect and whether documents + /// are scored in order by the input {@link Scorer} to {@link #setScorer(ScorerPtr)}. + /// + /// NOTE: The instances returned by this method pre-allocate a full array of length numHits. + static TopScoreDocCollectorPtr create(int32_t numHits, bool docsScoredInOrder); + + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); + +protected: + virtual TopDocsPtr newTopDocs(Collection results, int32_t start); +}; + +} + +#endif diff --git a/include/lucene++/TypeAttribute.h b/include/lucene++/TypeAttribute.h new file mode 100644 index 00000000..5eada91e --- /dev/null +++ b/include/lucene++/TypeAttribute.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef TYPEATTRIBUTE_H +#define TYPEATTRIBUTE_H + +#include "Attribute.h" + +namespace Lucene { + +/// A Token's lexical type. The Default value is "word". +class LPPAPI TypeAttribute : public Attribute { +public: + TypeAttribute(); + TypeAttribute(const String& type); + virtual ~TypeAttribute(); + + LUCENE_CLASS(TypeAttribute); + +protected: + String _type; + static const String& DEFAULT_TYPE(); + +public: + virtual String toString(); + + /// Returns this Token's lexical type. Defaults to "word". + String type(); + + /// Set the lexical type. + /// @see #type() + void setType(const String& type); + + virtual void clear(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual void copyTo(const AttributePtr& target); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/UTF8Stream.h b/include/lucene++/UTF8Stream.h new file mode 100644 index 00000000..0a56f016 --- /dev/null +++ b/include/lucene++/UTF8Stream.h @@ -0,0 +1,143 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef UTF8STREAM_H +#define UTF8STREAM_H + +#include "LuceneObject.h" + +namespace Lucene { + +class LPPAPI UTF8Base : public LuceneObject { +public: + virtual ~UTF8Base(); + LUCENE_CLASS(UTF8Base); + +public: + static const uint16_t LEAD_SURROGATE_MIN; + static const uint16_t LEAD_SURROGATE_MAX; + static const uint16_t TRAIL_SURROGATE_MIN; + static const uint16_t TRAIL_SURROGATE_MAX; + static const uint16_t LEAD_OFFSET; + static const uint32_t SURROGATE_OFFSET; + static const uint32_t CODE_POINT_MAX; + + static const wchar_t UNICODE_REPLACEMENT_CHAR; + static const wchar_t UNICODE_TERMINATOR; + +protected: + virtual uint32_t readNext() = 0; + + uint8_t mask8(uint32_t b); + uint16_t mask16(uint32_t c); + bool isTrail(uint32_t b); + bool isSurrogate(uint32_t cp); + bool isLeadSurrogate(uint32_t cp); + bool isTrailSurrogate(uint32_t cp); + bool isValidCodePoint(uint32_t cp); + bool isOverlongSequence(uint32_t cp, int32_t length); +}; + +class UTF8Encoder : public UTF8Base { +public: + UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd); + virtual ~UTF8Encoder(); + + LUCENE_CLASS(UTF8Encoder); + +protected: + const wchar_t* unicodeBegin; + const wchar_t* unicodeEnd; + +public: + int32_t encode(uint8_t* utf8, int32_t length); + + int32_t utf16to8(uint8_t* utf8, int32_t length); + int32_t utf32to8(uint8_t* utf8, int32_t length); + +protected: + virtual uint32_t readNext(); + + uint8_t* appendChar(uint8_t* utf8, uint32_t cp); +}; + +class UTF8EncoderStream : public UTF8Encoder { +public: + UTF8EncoderStream(const ReaderPtr& reader); + virtual ~UTF8EncoderStream(); + + LUCENE_CLASS(UTF8EncoderStream); + +protected: + ReaderPtr reader; + +protected: + virtual uint32_t readNext(); +}; + +class UTF8Decoder : public UTF8Base { +public: + UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End); + virtual ~UTF8Decoder(); + + LUCENE_CLASS(UTF8Decoder); + +protected: + const uint8_t* utf8Begin; + const uint8_t* utf8End; + +public: + int32_t decode(wchar_t* unicode, int32_t length); + + int32_t utf8to16(wchar_t* unicode, int32_t length); + int32_t utf8to32(wchar_t* unicode, int32_t length); + +protected: + virtual uint32_t readNext(); + + int32_t sequenceLength(uint32_t cp); + bool getSequence(uint32_t& cp, int32_t length); + bool isValidNext(uint32_t& cp); +}; + +class UTF8DecoderStream : public UTF8Decoder { +public: + UTF8DecoderStream(const ReaderPtr& reader); + virtual ~UTF8DecoderStream(); + + LUCENE_CLASS(UTF8DecoderStream); + +protected: + ReaderPtr reader; + +protected: + virtual uint32_t readNext(); +}; + +class UTF16Decoder : public UTF8Base { +public: + UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End); + virtual ~UTF16Decoder(); + + LUCENE_CLASS(UTF16Decoder); + +protected: + const uint16_t* utf16Begin; + const uint16_t* utf16End; + +public: + int32_t decode(wchar_t* unicode, int32_t length); + + int32_t utf16to16(wchar_t* unicode, int32_t length); + int32_t utf16to32(wchar_t* unicode, int32_t length); + +protected: + virtual uint32_t readNext(); +}; + +} + +#endif diff --git a/include/lucene++/UnicodeUtils.h b/include/lucene++/UnicodeUtils.h new file mode 100644 index 00000000..768c949c --- /dev/null +++ b/include/lucene++/UnicodeUtils.h @@ -0,0 +1,96 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef UNICODEUTILS_H +#define UNICODEUTILS_H + +#include "LuceneObject.h" + +namespace Lucene { + +class LPPAPI UnicodeUtil { +public: + virtual ~UnicodeUtil(); + +public: + /// Return true if supplied character is alpha-numeric. + static bool isAlnum(wchar_t c); + + /// Return true if supplied character is alphabetic. + static bool isAlpha(wchar_t c); + + /// Return true if supplied character is numeric. + static bool isDigit(wchar_t c); + + /// Return true if supplied character is a space. + static bool isSpace(wchar_t c); + + /// Return true if supplied character is uppercase. + static bool isUpper(wchar_t c); + + /// Return true if supplied character is lowercase. + static bool isLower(wchar_t c); + + /// Return true if supplied character is other type of letter. + static bool isOther(wchar_t c); + + /// Return true if supplied character is non-spacing. + static bool isNonSpacing(wchar_t c); + + /// Return uppercase representation of a given character. + static wchar_t toUpper(wchar_t c); + + /// Return lowercase representation of a given character. + static wchar_t toLower(wchar_t c); +}; + +/// Utility class that contains utf8 and unicode translations. +template +class TranslationResult : public LuceneObject { +public: + TranslationResult() { + result = Array::newInstance(10); + length = 0; + } + +public: + Array result; + int32_t length; + +public: + void setLength(int32_t length) { + if (!result) { + result = Array::newInstance((int32_t)(1.5 * (double)length)); + } + if (result.size() < length) { + result.resize((int32_t)(1.5 * (double)length)); + } + this->length = length; + } + + void copyText(const TranslationResult& other) { + setLength(other.length); + MiscUtils::arrayCopy(other.result.get(), 0, result.get(), 0, other.length); + } + + void copyText(boost::shared_ptr< TranslationResult > other) { + copyText(*other); + } +}; + +class LPPAPI UTF8Result : public TranslationResult { +public: + virtual ~UTF8Result(); +}; + +class LPPAPI UnicodeResult : public TranslationResult { +public: + virtual ~UnicodeResult(); +}; + +} + +#endif diff --git a/include/lucene++/ValueSource.h b/include/lucene++/ValueSource.h new file mode 100644 index 00000000..6337be8e --- /dev/null +++ b/include/lucene++/ValueSource.h @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef VALUESOURCE_H +#define VALUESOURCE_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Source of values for basic function queries. +/// +/// At its default/simplest form, values - one per doc - are used as the score of that doc. +/// +/// Values are instantiated as {@link DocValues} for a particular reader. +/// ValueSource implementations differ in RAM requirements: it would always be a factor of the number of +/// documents, but for each document the number of bytes can be 1, 2, 4, or 8. +class LPPAPI ValueSource : public LuceneObject { +public: + virtual ~ValueSource(); + LUCENE_CLASS(ValueSource); + +public: + /// Return the DocValues used by the function query. + /// @param reader The IndexReader used to read these values. If any caching is involved, that caching + /// would also be IndexReader based. + virtual DocValuesPtr getValues(const IndexReaderPtr& reader) = 0; + + /// Description of field, used in explain() + virtual String description() = 0; + + virtual String toString(); + + /// Needed for possible caching of query results - used by {@link ValueSourceQuery#equals(LuceneObjectPtr)}. + virtual bool equals(const LuceneObjectPtr& other) = 0; + + /// Needed for possible caching of query results - used by {@link ValueSourceQuery#hashCode()}. + virtual int32_t hashCode() = 0; +}; + +} + +#endif diff --git a/include/lucene++/ValueSourceQuery.h b/include/lucene++/ValueSourceQuery.h new file mode 100644 index 00000000..c72e11d8 --- /dev/null +++ b/include/lucene++/ValueSourceQuery.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef VALUESOURCEQUERY_H +#define VALUESOURCEQUERY_H + +#include "Query.h" + +namespace Lucene { + +/// A Query that sets the scores of document to the values obtained from a {@link ValueSource}. +/// +/// This query provides a score for each and every undeleted document in the index. +/// +/// The value source can be based on a (cached) value of an indexed field, but it can also be based on an +/// external source, eg. values read from an external database. +/// +/// Score is set as: Score(doc,query) = (query.getBoost() * query.getBoost()) * valueSource(doc). +class LPPAPI ValueSourceQuery : public Query { +public: + /// Create a value source query + /// @param valSrc provides the values defines the function to be used for scoring + ValueSourceQuery(const ValueSourcePtr& valSrc); + + virtual ~ValueSourceQuery(); + + LUCENE_CLASS(ValueSourceQuery); + +public: + ValueSourcePtr valSrc; + +public: + using Query::toString; + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + virtual void extractTerms(SetTerm terms); + virtual WeightPtr createWeight(const SearcherPtr& searcher); + virtual String toString(const String& field); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +} + +#endif diff --git a/include/lucene++/VariantUtils.h b/include/lucene++/VariantUtils.h new file mode 100644 index 00000000..5a72e592 --- /dev/null +++ b/include/lucene++/VariantUtils.h @@ -0,0 +1,105 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef VARIANTUTILS_H +#define VARIANTUTILS_H + +#include +#include +#include "Lucene.h" +#include "MiscUtils.h" + +namespace Lucene { + +class LPPAPI VariantUtils { +public: + template + static TYPE get(const boost::any& var) { + return var.type() == typeid(TYPE) ? boost::any_cast(var) : TYPE(); + } + + template + static TYPE get(VAR var) { +#if BOOST_VERSION < 105800 + return var.type() == typeid(TYPE) ? boost::get(var) : TYPE(); +#else + return var.type() == typeid(TYPE) ? boost::relaxed_get(var) : TYPE(); +#endif + } + + template + static bool typeOf(VAR var) { + return (var.type() == typeid(TYPE)); + } + + static VariantNull null() { + return VariantNull(); + } + + static bool isNull(const boost::any& var) { + return var.empty(); + } + + template + static bool isNull(VAR var) { + return typeOf(var); + } + + template + static int32_t hashCode(VAR var) { + if (typeOf(var)) { + return StringUtils::hashCode(get(var)); + } + if (typeOf(var)) { + return get(var); + } + if (typeOf(var)) { + return (int32_t)get(var); + } + if (typeOf(var)) { + int64_t longBits = MiscUtils::doubleToLongBits(get(var)); + return (int32_t)(longBits ^ (longBits >> 32)); + } + if (typeOf< Collection >(var)) { + return get< Collection >(var).hashCode(); + } + if (typeOf< Collection >(var)) { + return get< Collection >(var).hashCode(); + } + if (typeOf< Collection >(var)) { + return get< Collection >(var).hashCode(); + } + if (typeOf< Collection >(var)) { + return get< Collection >(var).hashCode(); + } + if (typeOf< Collection >(var)) { + return get< Collection >(var).hashCode(); + } + if (typeOf(var)) { + return get(var)->hashCode(); + } + return 0; + } + + template + static bool equalsType(FIRST first, SECOND second) { + return (first.type() == second.type()); + } + + template + static bool equals(FIRST first, SECOND second) { + return first.type() == second.type() ? (first == second) : false; + } + + template + static int32_t compareTo(VAR first, VAR second) { + return first < second ? -1 : (first == second ? 0 : 1); + } +}; + +} + +#endif diff --git a/include/lucene++/Weight.h b/include/lucene++/Weight.h new file mode 100644 index 00000000..ce733f7d --- /dev/null +++ b/include/lucene++/Weight.h @@ -0,0 +1,84 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WEIGHT_H +#define WEIGHT_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Calculate query weights and build query scorers. +/// +/// The purpose of {@link Weight} is to ensure searching does not modify a {@link Query}, so that a +/// {@link Query} instance can be reused. +/// {@link Searcher} dependent state of the query should reside in the {@link Weight}. +/// {@link IndexReader} dependent state should reside in the {@link Scorer}. +/// +/// Weight is used in the following way: +///
    +///
  1. A Weight is constructed by a top-level query, given a Searcher ({@link Query#createWeight(Searcher)}). +///
  2. The {@link #sumOfSquaredWeights()} method is called on the Weight to compute the query normalization +/// factor {@link Similarity#queryNorm(float)} of the query clauses contained in the query. +///
  3. The query normalization factor is passed to {@link #normalize(float)}. At this point the weighting is +/// complete. +///
  4. A Scorer is constructed by {@link #scorer(IndexReaderPtr, bool, bool)}. +///
+class LPPAPI Weight : public LuceneObject { +public: + virtual ~Weight(); + LUCENE_CLASS(Weight); + +public: + /// An explanation of the score computation for the named document. + /// @param reader sub-reader containing the give doc + /// @param doc + /// @return an Explanation for the score + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc) = 0; + + /// The query that this concerns. + virtual QueryPtr getQuery() = 0; + + /// The weight for this query. + virtual double getValue() = 0; + + /// Assigns the query normalization factor to this. + virtual void normalize(double norm) = 0; + + /// Returns a {@link Scorer} which scores documents in/out-of order according to scoreDocsInOrder. + /// + /// NOTE: even if scoreDocsInOrder is false, it is recommended to check whether the returned Scorer + /// indeed scores documents out of order (ie., call {@link #scoresDocsOutOfOrder()}), as some Scorer + /// implementations will always return documents in-order. + /// + /// NOTE: null can be returned if no documents will be scored by this query. + /// + /// @param reader The {@link IndexReader} for which to return the {@link Scorer}. + /// @param scoreDocsInOrder Specifies whether in-order scoring of documents is required. Note that if + /// set to false (i.e., out-of-order scoring is required), this method can return whatever scoring mode + /// it supports, as every in-order scorer is also an out-of-order one. However, an out-of-order scorer + /// may not support {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)}, therefore it is + /// recommended to request an in-order scorer if use of these methods is required. + /// @param topScorer If true, {@link Scorer#score(CollectorPtr)} will be called; if false, {@link + /// Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will be called. + /// @return a {@link Scorer} which scores documents in/out-of order. + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) = 0; + + /// The sum of squared weights of contained query clauses. + virtual double sumOfSquaredWeights() = 0; + + /// Returns true if this implementation scores docs only out of order. This method is used in conjunction + /// with {@link Collector}'s {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and + /// {@link #scorer(IndexReaderPtr, bool, bool)} to create a matching {@link Scorer} instance for a given + /// {@link Collector}, or vice versa. + /// + /// NOTE: the default implementation returns false, ie. the Scorer scores documents in-order. + virtual bool scoresDocsOutOfOrder(); +}; + +} + +#endif diff --git a/include/lucene++/WhitespaceAnalyzer.h b/include/lucene++/WhitespaceAnalyzer.h new file mode 100644 index 00000000..fe92bee2 --- /dev/null +++ b/include/lucene++/WhitespaceAnalyzer.h @@ -0,0 +1,28 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WHITESPACEANALYZER_H +#define WHITESPACEANALYZER_H + +#include "Analyzer.h" + +namespace Lucene { + +/// An Analyzer that uses {@link WhitespaceTokenizer}. +class LPPAPI WhitespaceAnalyzer : public Analyzer { +public: + virtual ~WhitespaceAnalyzer(); + + LUCENE_CLASS(WhitespaceAnalyzer); + +public: + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/WhitespaceTokenizer.h b/include/lucene++/WhitespaceTokenizer.h new file mode 100644 index 00000000..241becff --- /dev/null +++ b/include/lucene++/WhitespaceTokenizer.h @@ -0,0 +1,38 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WHITESPACETOKENIZER_H +#define WHITESPACETOKENIZER_H + +#include "CharTokenizer.h" + +namespace Lucene { + +/// A WhitespaceTokenizer is a tokenizer that divides text at whitespace. Adjacent sequences of non-Whitespace +/// characters form tokens. +class LPPAPI WhitespaceTokenizer : public CharTokenizer { +public: + /// Construct a new WhitespaceTokenizer. + WhitespaceTokenizer(const ReaderPtr& input); + + /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource}. + WhitespaceTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Construct a new WhitespaceTokenizer using a given {@link AttributeSource.AttributeFactory}. + WhitespaceTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~WhitespaceTokenizer(); + + LUCENE_CLASS(WhitespaceTokenizer); + +public: + /// Collects only characters which do not satisfy {@link Character#isWhitespace(char)}. + virtual bool isTokenChar(wchar_t c); +}; + +} + +#endif diff --git a/include/lucene++/WildcardQuery.h b/include/lucene++/WildcardQuery.h new file mode 100644 index 00000000..05a9d181 --- /dev/null +++ b/include/lucene++/WildcardQuery.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WILDCARDQUERY_H +#define WILDCARDQUERY_H + +#include "MultiTermQuery.h" + +namespace Lucene { + +/// Implements the wildcard search query. Supported wildcards are *, which matches any character sequence +/// (including the empty one), and ?, which matches any single character. Note this query can be slow, as +/// it needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, a Wildcard +/// term should not start with one of the wildcards * or ?. +/// +/// This query uses the {@link MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} rewrite method. +/// @see WildcardTermEnum +class LPPAPI WildcardQuery : public MultiTermQuery { +public: + WildcardQuery(const TermPtr& term); + virtual ~WildcardQuery(); + + LUCENE_CLASS(WildcardQuery); + +protected: + bool termContainsWildcard; + bool termIsPrefix; + TermPtr term; + +public: + using MultiTermQuery::toString; + + /// Returns the pattern term. + TermPtr getTerm(); + + virtual QueryPtr rewrite(const IndexReaderPtr& reader); + + /// Prints a user-readable version of this query. + virtual String toString(const String& field); + + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + +protected: + virtual FilteredTermEnumPtr getEnum(const IndexReaderPtr& reader); +}; + +} + +#endif diff --git a/include/lucene++/WildcardTermEnum.h b/include/lucene++/WildcardTermEnum.h new file mode 100644 index 00000000..596e76d6 --- /dev/null +++ b/include/lucene++/WildcardTermEnum.h @@ -0,0 +1,54 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WILDCARDTERMENUM_H +#define WILDCARDTERMENUM_H + +#include "FilteredTermEnum.h" + +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating all terms that match the specified wildcard filter term. +/// +/// Term enumerations are always ordered by Term.compareTo(). Each term in the enumeration is greater than +/// all that precede it. +class LPPAPI WildcardTermEnum : public FilteredTermEnum { +public: + /// Creates a new WildcardTermEnum. + /// + /// After calling the constructor the enumeration is already pointing to the first valid term if such + /// a term exists. + WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term); + + virtual ~WildcardTermEnum(); + + LUCENE_CLASS(WildcardTermEnum); + +public: + static const wchar_t WILDCARD_STRING; + static const wchar_t WILDCARD_CHAR; + + TermPtr searchTerm; + String field; + String text; + String pre; + int32_t preLen; + bool _endEnum; + +public: + virtual double difference(); + + /// Determines if a word matches a wildcard pattern. + static bool wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx); + +protected: + virtual bool termCompare(const TermPtr& term); + virtual bool endEnum(); +}; + +} + +#endif diff --git a/include/lucene++/WordlistLoader.h b/include/lucene++/WordlistLoader.h new file mode 100644 index 00000000..e111e129 --- /dev/null +++ b/include/lucene++/WordlistLoader.h @@ -0,0 +1,49 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#ifndef WORDLISTLOADER_H +#define WORDLISTLOADER_H + +#include "LuceneObject.h" + +namespace Lucene { + +/// Loader for text files that represent a list of stopwords. +class LPPAPI WordlistLoader : public LuceneObject { +public: + virtual ~WordlistLoader(); + + LUCENE_CLASS(WordlistLoader); + +public: + /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). + /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// @param wordfile File name containing the wordlist + /// @param comment The comment string to ignore + /// @return A set with the file's words + static HashSet getWordSet(const String& wordfile, const String& comment = EmptyString); + + /// Loads a text file and adds every line as an entry to a HashSet (omitting leading and trailing whitespace). + /// Every line of the file should contain only one word. The words need to be in lowercase if you make use of an + /// Analyzer which uses LowerCaseFilter (like StandardAnalyzer). + /// + /// @param reader Reader containing the wordlist + /// @param comment The comment string to ignore + /// @return A set with the file's words + static HashSet getWordSet(const ReaderPtr& reader, const String& comment = EmptyString); + + /// Reads a stem dictionary. Each line contains: + ///
word\tstem
+ /// (ie. two tab separated words) + /// @return stem dictionary that overrules the stemming algorithm + static MapStringString getStemDict(const String& wordstemfile); +}; + +} + +#endif diff --git a/include/targetver.h b/include/lucene++/targetver.h similarity index 92% rename from include/targetver.h rename to include/lucene++/targetver.h index fd70e9ef..c7f57484 100644 --- a/include/targetver.h +++ b/include/lucene++/targetver.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// diff --git a/lib/.gitignore b/lib/.gitignore deleted file mode 100644 index f59ec20a..00000000 --- a/lib/.gitignore +++ /dev/null @@ -1 +0,0 @@ -* \ No newline at end of file diff --git a/liblucene++.pc.cmake b/liblucene++.pc.cmake deleted file mode 100644 index 3fc09176..00000000 --- a/liblucene++.pc.cmake +++ /dev/null @@ -1,13 +0,0 @@ -prefix=@CMAKE_INSTALL_PREFIX@ -exec_prefix=${prefix}/bin -libdir=${prefix}/@LIB_DESTINATION@ -includedir=${prefix}/include/lucene++ -lib=lucene++ - -Name: liblucene++ -Description: Lucene++ - a C++ search engine, ported from the popular Apache Lucene -Version: @LUCENE++_VERSION@ -Libs: -L${prefix}/@LIB_DESTINATION@/ -l${lib} -Cflags: -I${includedir} -~ - diff --git a/options.cmake b/options.cmake new file mode 100644 index 00000000..d9823146 --- /dev/null +++ b/options.cmake @@ -0,0 +1,100 @@ +# lucene++ project build options +#======================================================================== + + +# linker args +#======================================================================== + +option(LUCENE_BUILD_SHARED +"Build shared library" +ON +) + +option(ENABLE_PACKAGING +"Create build scripts for creating lucene++ packages" +OFF +) + +option(LUCENE_USE_STATIC_BOOST_LIBS +"Use static boost libraries" +OFF +) + +option(ENABLE_BOOST_INTEGER +"Enable boost integer types" +OFF +) + +option(ENABLE_CYCLIC_CHECK +"Enable cyclic checking" +OFF +) + + +# build options +#======================================================================== + +option( + ENABLE_TEST + "Enable the tests" + ON) + +option( + ENABLE_DEMO + "Enable building demo applications" + ON) + +OPTION( + ENABLE_DOCS + "Build the Lucene++ documentation." + OFF) + +OPTION( + ENABLE_CONTRIB + "Enable building contrib library" + ON) + +# documentation options +#======================================================================== + +mark_as_advanced(DOCS_HTML_HELP) +OPTION( + DOCS_HTML_HELP + "Doxygen should compile HTML into a Help file (CHM)." + NO ) + +mark_as_advanced(DOCS_HTML) +OPTION( + DOCS_HTML + "Doxygen should build HTML documentation." + YES ) + +mark_as_advanced(DOCS_XML) +OPTION( + DOCS_XML + "Doxygen should build XML documentation." + NO ) + +mark_as_advanced(DOCS_RTF) +OPTION( + DOCS_RTF + "Doxygen should build RTF documentation." + NO ) + +mark_as_advanced(DOCS_MAN) +OPTION( + DOCS_MAN + "Doxygen should build man documentation." + NO ) + +mark_as_advanced(DOCS_TAGFILE) +OPTION( + DOCS_TAGFILE + "Doxygen should build a tagfile." + NO ) + +mark_as_advanced(DOCS_LATEX) +OPTION( + DOCS_LATEX + "Doxygen should build Latex documentation." + NO ) diff --git a/scripts/llvm/README b/scripts/llvm/README deleted file mode 100644 index ffd80db0..00000000 --- a/scripts/llvm/README +++ /dev/null @@ -1,12 +0,0 @@ -can almost get this to work: - -boost needs to be compiled in, so that we don't need native boost libs (DONE) -problems with linking with libstdc++: ntv.bc:(.text+0x1742): undefined reference to `std::ctype::_M_widen_init() const' - - seems to be a problem with libstdc++ (gcc 4.3 -> 4.4 regression) - - a solution is apparently to compile with lower optimisation levels, but that doesn't seem to help -waf script doesn't work all the way to the end yet... was still playing around with: - llvm-ld -native *.so target.bc -o ntv -lsupc++ -lstdc++ -L/usr/lib/llvm-2.8/gcc-4.2/lib64 -lpthread - - it worked better when linking to boost native libs -confused about how to compile c++ based bytecode to a runnable lib, seems kind of strange adding all the pthread, etc, which is native? -trying to convert the given code into C code doesn't work yet, due to a bug with large int sizes - diff --git a/scripts/llvm/build/clang.py b/scripts/llvm/build/clang.py deleted file mode 100644 index 6c998eec..00000000 --- a/scripts/llvm/build/clang.py +++ /dev/null @@ -1,72 +0,0 @@ -############################################################################# -## Copyright (c) 2009-2011 Alan Wright. All rights reserved. -## Distributable under the terms of either the Apache License (Version 2.0) -## or the GNU Lesser General Public License. -############################################################################# - -from TaskGen import feature -import Options -import sys - - -@feature('c') -def apply_clang(self): - if self.env['HAVE_LLVM'] == False: - return - ''' - Replaced the default compiler with clang if required. - ''' - if not getattr(self, 'clang', True) or Options.options.disable_clang: - return - self.env['CC'] = self.env['CLANG'] or self.env['CC'] - if sys.platform == "darwin": - # workaround problems with non-static inline functions - # http://clang.llvm.org/compatibility.html - self.env['CCFLAGS'] += ['-std=gnu89'] - -@feature('c') -def apply_clang_cpp(self): - if self.env['HAVE_LLVM'] == False: - return - ''' - Replaced the default compiler with clang if required. - ''' - if not getattr(self, 'clang', True) or Options.options.disable_clang: - return - self.env['CPP'] = self.env['CLANGPP'] or self.env['CXX'] - self.env['CXX'] = self.env['CLANGPP'] or self.env['CXX'] - if sys.platform == "darwin": - self.env['shlib_CXXFLAGS'] = ['-fPIC'] - -@feature('c') -def apply_clang_llvm(self): - if self.env['HAVE_LLVM'] == False: - return - #self.env['AR'] = self.env['LLVM-AR'] or self.env['AR'] - self.env['LINK_CC'] = self.env['LLVM-LD'] or self.env['LINK_CC'] - self.env['LINK_CXX'] = self.env['LLVM-LD'] or self.env['LINK_CXX'] - self.env['STLIB_MARKER'] = '' - self.env['SHLIB_MARKER'] = '' - -def options(opt): - """ - Add options specific the codehash tool - """ - opt.add_option('--noclang', - dest = 'disable_clang', - action = 'store_true', - default = False, - help = 'disable the clang compiler if it is available') - - -def configure(conf): - search_paths = ['/Xcode4/usr/bin/'] if sys.platform == "darwin" else [] - conf.find_program('clang', var='CLANG') - conf.find_program('clang++', var='CLANGPP', path_list = search_paths) - conf.find_program('llvm-ld', var='LLVM-LD', path_list = search_paths) - conf.find_program('llvm-ar', var='LLVM-AR', path_list = search_paths) - if conf.env['LLVM-LD'] == None or conf.env['LLVM-AR'] == None or conf.env['CLANG'] == None or conf.env['CLANGPP'] == None: - conf.env['HAVE_LLVM'] = False - else: - conf.env['HAVE_LLVM'] = True - diff --git a/scripts/llvm/waf b/scripts/llvm/waf deleted file mode 100644 index 73ad1d5d..00000000 Binary files a/scripts/llvm/waf and /dev/null differ diff --git a/scripts/llvm/wscript b/scripts/llvm/wscript deleted file mode 100644 index 3d1807e7..00000000 --- a/scripts/llvm/wscript +++ /dev/null @@ -1,268 +0,0 @@ -############################################################################# -## Copyright (c) 2009-2011 Ben van Klinken. All rights reserved. -## Distributable under the terms of either the Apache License (Version 2.0) -## or the GNU Lesser General Public License. -############################################################################# - -import sys -import os -from copy import copy -import Options -import TaskGen -from Configure import conf -from TaskGen import feature, after -#import Task, ccroot - -APPNAME='Lucene++' -VERSION='3.0.2' - -top = '../../' -out = 'bin' - -source_patterns = '**/*.(c|cpp)' - -lucene_source_dirs = [ - top + 'src/core/analysis', - top + 'src/core/document', - top + 'src/core/index', - top + 'src/core/queryparser', - top + 'src/core/search', - top + 'src/core/store', - top + 'src/core/util' -] - -boost_defines = [ - 'BOOST_BUILD_THREAD_DLL', - 'BOOST_BUILD_FILESYSTEM_DLL', - 'BOOST_BUILD_REGEX_DLL', - 'BOOST_BUILD_DATE_TIME_DLL', - 'BOOST_BUILD_IOSTREAMS_DLL', -] -boost_sources_dirs = [ - 'libs/thread/src', - 'libs/filesystem/src', - 'libs/regex/src', - 'libs/date_time/src', - 'libs/iostreams/src', - 'libs/system/src' -] - -lucene_contrib_source_dirs = [ - top + 'src/contrib' -] - -lucene_include_dirs = [ - top + 'include', - top + 'src/core/include', - top + 'src/contrib/include' -] - -tester_source_dirs = [ - top + 'src/test' -] - -tester_include_dirs = [ - top + 'include', - top + 'src/core/include', - top + 'src/contrib/include', - top + 'src/test/include' -] - - -def options(opt): - opt.tool_options("boost") - opt.tool_options('compiler_cxx') - opt.tool_options('clang', tooldir = 'build') - opt.add_option( - '--debug', - default = False, - action = "store_true", - help ='debug build no optimization, etc...', - dest = 'debug') - - opt.add_option( - '--static', - default = False, - action = "store_true", - help ='fully static build', - dest = 'static') - - opt.add_option( - '--boost', - default = 'boost_1_42_0', - action = "store", - help ='boost path', - dest = 'BOOST_HOME') - - -def configure(conf): - conf.env['INCLUDES_BOOST'] = Options.options.BOOST_HOME - - conf.check_tool('g++') - conf.check_tool('gcc') - #now try with overridden clang... - conf.check_tool('clang', 'build') - conf.check_cc(lib = 'pthread', mandatory = True) - conf.check(header_name='bzlib.h', mandatory = True) - conf.env['LINKFLAGS_cshlib'] = '' - conf.env['LINKFLAGS_cxxshlib'] = '' - - conf.check_tool('boost') - conf.check_tool('clang', 'build') - conf.check_boost( - #static = 'onlystatic', - lib = ['filesystem', 'thread', 'regex', 'system', 'date_time', 'iostreams', 'unit_test_framework'] - ) - - if conf.env['HAVE_LLVM'] == False: - raise Exception("No clang found") - #if conf.path.find_dir(conf.env['INCLUDES_BOOST'] + "/libs") == None: - # raise Exception(conf.env['INCLUDES_BOOST'] + " does not have the libs directory or is not within the source path (" + top + ") - check that the path is correctly and points to a source distribution") - #if conf.path.find_dir(conf.env['INCLUDES_BOOST'] + "/boost") != None: - # raise Exception("Please remove the boost includes path, it causes problems for some unknown reason") - -def build(bld): - target_type = 'cxxstlib' - debug_define = '_DEBUG' if Options.options.debug else 'NDEBUG' - compile_flags = ['-emit-llvm'] - if Options.options.debug: - compile_flags = compile_flags + ['-O0', '-g', ] - else: - compile_flags = compile_flags + ['-O3'] - - dll_link_flags = ['-link-as-library'] - app_link_flags = ['-native', - 'scripts/llvm/liblucene++.a', - '-L/usr/lib/gcc/x86_64-linux-gnu/4.5/', - '-lsupc++', '-lstdc++', - '-lpthread', '-lm', '-lc' - ] - # 'scripts/llvm/liblucene_boost.a', - # - # - - ############### - #libraries... - ############### - - lucene_sources = [] - for source_dir in lucene_source_dirs: - source_dir = bld.path.find_dir(source_dir) - lucene_sources.extend(source_dir.ant_glob(source_patterns)) - bld( - name = 'lucene++', - features = ['cxx', 'c'] + [target_type], - source = [source.relpath_gen(bld.path) for source in lucene_sources], - target = 'lucene++', - includes = lucene_include_dirs + [bld.env["INCLUDES_BOOST"]], - cflags = compile_flags, - cxxflags = compile_flags, - linkflags = dll_link_flags, - defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], - uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD' - ) - - lucene_contrib_sources = [] - for source_dir in lucene_contrib_source_dirs: - source_dir = bld.path.find_dir(source_dir) - lucene_contrib_sources.extend(source_dir.ant_glob(source_patterns)) - bld( - name = 'lucene_contrib', - features = ['cxx', 'c'] + [target_type], - source = [source.relpath_gen(bld.path) for source in lucene_contrib_sources], - target = 'lucene_contrib', - includes = lucene_include_dirs + [bld.env["INCLUDES_BOOST"]], - cflags = compile_flags, - cxxflags = compile_flags, - linkflags = dll_link_flags, - defines = ['LPP_BUILDING_LIB', 'LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], - ) - - - #lucene_boost_sources = [] - #for source_dir in boost_sources_dirs: - # if not bld.path.find_dir(bld.env["INCLUDES_BOOST"] + "/" + source_dir): - # raise Exception(source_dir + " was not found or is not inside the lucene path") - # source_dir = bld.path.find_dir(bld.env["INCLUDES_BOOST"] + "/" + source_dir) - # lucene_boost_sources.extend(source_dir.ant_glob(source_patterns, excl='win32')) - #bld( - # name = 'lucene_boost', - # features = ['cxx', 'c'] + [target_type], - # source = [source.relpath_gen(bld.path) for source in lucene_boost_sources], - # target = 'lucene_boost', - # includes = bld.env["INCLUDES_BOOST"], - # cflags = compile_flags, - # cxxflags = compile_flags, - # linkflags = dll_link_flags, - # defines = [debug_define] + boost_defines, - #) - - ########## - # applications - ########## - - tester_sources = [] - for source_dir in tester_source_dirs: - source_dir = bld.path.find_dir(source_dir) - tester_sources.extend(source_dir.ant_glob(source_patterns)) - - #bld( - # name = 'lucene_tester', - # features = ['cxx', 'c', 'cprogram'], - # #source = [source.relpath_gen(bld.path) for source in tester_sources], - # target = 'lucene_tester', - # includes = tester_include_dirs + [bld.env["INCLUDES_BOOST"]], - # cflags = compile_flags, - # cxxflags = compile_flags, - # linkflags = app_link_flags, - # defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + ['LPP_EXPOSE_INTERNAL'] + [debug_define], - # uselib = 'PTHREAD', - # use = 'lucene++ lucene_contrib' - # ) - - bld( - name = 'deletefiles', - features = ['cxx', 'c', 'cprogram'], - source = bld.path.find_resource(top + 'src/demo/deletefiles/main.cpp').relpath_gen(bld.path), - target = 'deletefiles', - includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], - cflags = compile_flags, - cxxflags = compile_flags, - linkflags = app_link_flags, - defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], - uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', - uselib_local = 'lucene++' - ) - - bld( - name = 'indexfiles', - features = ['cxx', 'c', 'cprogram'], - source = bld.path.find_resource(top + 'src/demo/indexfiles/main.cpp').relpath_gen(bld.path), - target = 'indexfiles', - includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], - cflags = compile_flags, - cxxflags = compile_flags, - linkflags = app_link_flags, - defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], - uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', - uselib_local = 'lucene++' - ) - - bld( - name = 'searchfiles', - features = ['cxx', 'c', 'cprogram'], - source = bld.path.find_resource(top + 'src/demo/searchfiles/main.cpp').relpath_gen(bld.path), - target = 'searchfiles', - includes = [top + 'include'] + [bld.env["INCLUDES_BOOST"]], - cflags = compile_flags, - cxxflags = compile_flags, - linkflags = app_link_flags, - defines = ['LPP_HAVE_GXXCLASSVISIBILITY'] + [debug_define], - uselib = 'BOOST_FILESYSTEM BOOST_THREAD BOOST_REGEX BOOST_SYSTEM BOOST_DATE_TIME BOOST_IOSTREAMS PTHREAD', - uselib_local = 'lucene++' - ) - - - #Todo: - #llvm-ld -native *.so target.bc -o ntv -lsupc++ -lstdc++ -L/usr/lib/llvm-2.8/gcc-4.2/lib64 -lpthread - diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 00000000..fb042da0 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,16 @@ + +add_subdirectory(core) +add_subdirectory(config) + +if(ENABLE_CONTRIB) + add_subdirectory(contrib) +endif() + +if(ENABLE_DEMO) + add_subdirectory(demo) +endif() + +if(ENABLE_TEST) + enable_testing() + add_subdirectory(test) +endif() \ No newline at end of file diff --git a/src/config/CMakeLists.txt b/src/config/CMakeLists.txt new file mode 100644 index 00000000..fe8e8b89 --- /dev/null +++ b/src/config/CMakeLists.txt @@ -0,0 +1,26 @@ +#################################### +# Set config vars +#################################### +set(core_libname "lucene++") +set(contrib_libname "lucene++-contrib") + +set( + PACKAGE_CMAKE_INSTALL_INCLUDEDIR + "${CMAKE_INSTALL_INCLUDEDIR}/lucene++/") + +set( + PACKAGE_CMAKE_INSTALL_LIBDIR + "${LIB_DESTINATION}") + + +#################################### +# include CMakePackageConfigHelpers +#################################### +include(CMakePackageConfigHelpers) + + +#################################### +# include directories +#################################### +add_subdirectory(core) +add_subdirectory(contrib) \ No newline at end of file diff --git a/src/config/contrib/CMakeLists.txt b/src/config/contrib/CMakeLists.txt new file mode 100644 index 00000000..0dc66f26 --- /dev/null +++ b/src/config/contrib/CMakeLists.txt @@ -0,0 +1,34 @@ +################################# +# install pkconfig +################################# +if(NOT WIN32) + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contrib.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc" @ONLY) + + install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contrib.pc" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +endif() + + +################################# +# install config.cmake +################################# +configure_package_config_file( + "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++-contribConfig.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" + INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++-contrib" + PATH_VARS contrib_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) + +write_basic_package_version_file( + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" + VERSION ${lucene++_VERSION} + COMPATIBILITY SameMajorVersion ) + +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfig.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++-contribConfigVersion.cmake" + DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/liblucene++-contrib") diff --git a/src/config/contrib/liblucene++-contrib.pc.in b/src/config/contrib/liblucene++-contrib.pc.in new file mode 100644 index 00000000..64c3acac --- /dev/null +++ b/src/config/contrib/liblucene++-contrib.pc.in @@ -0,0 +1,13 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix}/bin +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/include/lucene++ +lib=lucene++-contrib + +Name: liblucene++-contrib +Description: Contributions for Lucene++ - a C++ search engine, ported from the popular Apache Lucene +Version: @lucene++_VERSION@ +Libs: -L${libdir} -l${lib} +Cflags: -I${includedir} +Requires: liblucene++ = @lucene++_VERSION@ + diff --git a/src/config/contrib/liblucene++-contribConfig.cmake.in b/src/config/contrib/liblucene++-contribConfig.cmake.in new file mode 100644 index 00000000..85fdfd2e --- /dev/null +++ b/src/config/contrib/liblucene++-contribConfig.cmake.in @@ -0,0 +1,25 @@ +# Config module for Lucene++-contrib +# +# Provides the following variables +# liblucene++-contrib_INCLUDE_DIRS - Directories to include +# liblucene++-contrib_LIBRARIES - Libraries to link +# liblucene++-contrib_LIBRARY_DIRS - Library directories to search for link libraries + + +@PACKAGE_INIT@ + + +# This should only be used for meson +if (NOT DEFINED set_and_check) + macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() + endmacro() +endif() + + +set_and_check(liblucene++-contrib_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") +set_and_check(liblucene++-contrib_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") +set(liblucene++-contrib_LIBRARIES "@contrib_libname@") diff --git a/src/config/core/CMakeLists.txt b/src/config/core/CMakeLists.txt new file mode 100644 index 00000000..69cfefcc --- /dev/null +++ b/src/config/core/CMakeLists.txt @@ -0,0 +1,34 @@ +################################# +# install pkconfig +################################# +if(NOT WIN32) + configure_file( + "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++.pc.in" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc" @ONLY) + + install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++.pc" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig) +endif() + + +################################# +# install config.cmake +################################# +configure_package_config_file( + "${CMAKE_CURRENT_SOURCE_DIR}/liblucene++Config.cmake.in" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" + INSTALL_DESTINATION "${LIB_DESTINATION}/cmake/liblucene++" + PATH_VARS core_libname PACKAGE_CMAKE_INSTALL_INCLUDEDIR PACKAGE_CMAKE_INSTALL_LIBDIR) + +write_basic_package_version_file( + ${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake + VERSION ${lucene++_VERSION} + COMPATIBILITY SameMajorVersion ) + +install( + FILES + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++Config.cmake" + "${CMAKE_CURRENT_BINARY_DIR}/liblucene++ConfigVersion.cmake" + DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/liblucene++) diff --git a/src/config/core/liblucene++.pc.in b/src/config/core/liblucene++.pc.in new file mode 100644 index 00000000..690f7d24 --- /dev/null +++ b/src/config/core/liblucene++.pc.in @@ -0,0 +1,12 @@ +prefix=@CMAKE_INSTALL_PREFIX@ +exec_prefix=${prefix}/bin +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/include/lucene++ +lib=lucene++ + +Name: liblucene++ +Description: Lucene++ - a C++ search engine, ported from the popular Apache Lucene +Version: @lucene++_VERSION@ +Libs: -L${libdir} -l${lib} +Cflags: -I${includedir} + diff --git a/src/config/core/liblucene++Config.cmake.in b/src/config/core/liblucene++Config.cmake.in new file mode 100644 index 00000000..574f8129 --- /dev/null +++ b/src/config/core/liblucene++Config.cmake.in @@ -0,0 +1,27 @@ +# Config module for Lucene++ +# +# Provides the following variables +# liblucene++_INCLUDE_DIRS - Directories to include +# liblucene++_LIBRARIES - Libraries to link +# liblucene++_LIBRARY_DIRS - Library directories to search for link libraries + + +@PACKAGE_INIT@ + + +# This should only be used for meson +if (NOT DEFINED set_and_check) + macro(set_and_check _var _file) + set(${_var} "${_file}") + if(NOT EXISTS "${_file}") + message(FATAL_ERROR "File or directory ${_file} referenced by variable ${_var} does not exist !") + endif() + endmacro() +endif() + + +set_and_check(liblucene++_INCLUDE_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_INCLUDEDIR@") +set_and_check(liblucene++_LIBRARY_DIRS "${PACKAGE_PREFIX_DIR}/@PACKAGE_CMAKE_INSTALL_LIBDIR@") +set(liblucene++_LIBRARIES "@core_libname@") + + diff --git a/src/contrib/CMakeLists.txt b/src/contrib/CMakeLists.txt index 96c7d5dd..7af68045 100644 --- a/src/contrib/CMakeLists.txt +++ b/src/contrib/CMakeLists.txt @@ -1,54 +1,102 @@ -project(lucene++-contrib) - -#################################### -# THE lucene++-contrib library -#################################### -file(GLOB_RECURSE lucene_sources - ${lucene++-contrib_SOURCE_DIR}/*.cpp - ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/libstemmer/libstemmer_utf8.c - ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/src_c/*.c - ${lucene++-contrib_SOURCE_DIR}/snowball/libstemmer_c/runtime/*.c) -file(GLOB_RECURSE HEADERS ${lucene++-contrib_SOURCE_DIR}/include/*.h) - -ADD_DEFINITIONS(-DLPP_BUILDING_LIB) -INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-contrib_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) - -install(FILES HEADERS - DESTINATION include/lucene++ - COMPONENT development-contrib) - -################################# -# lucene++ static library -################################# -ADD_LIBRARY(lucene++-contrib-static STATIC EXCLUDE_FROM_ALL - ${lucene_sources} ${HEADERS} -) -#set properties on the libraries -SET_TARGET_PROPERTIES(lucene++-contrib-static PROPERTIES - VERSION ${LUCENE++_VERSION} - SOVERSION ${LUCENE++_SOVERSION} -) +project(contrib) + +#################################### +# create library target +#################################### +add_subdirectory(include) + +if (LUCENE_BUILD_SHARED) + add_library(lucene++-contrib SHARED) +else() + add_library(lucene++-contrib STATIC) +endif() + +set(lucene++-contrib_SOVERSION "0") + +set(lucene++-contrib_VERSION + "${lucene++_VERSION_MAJOR}.${lucene++_VERSION_MINOR}.${lucene++_VERSION_PATCH}") -################################# -# lucene++ shared library -################################# -SET(PCH_ADDITIONAL_COMPILER_FLAGS_lucene++-contrib -DLPP_HAVE_DLL) -ADD_LIBRARY(lucene++-contrib SHARED - ${lucene_sources} ${HEADERS} +add_library(lucene++::lucene++-contrib ALIAS lucene++-contrib) + + +#################################### +# src +#################################### +file(GLOB_RECURSE contrib_sources + "analyzers/*.cpp" + "highlighter/*.cpp" + "memory/*.cpp" + "index/*.cpp" + "mevc/*.cpp" + "snowball/*.cpp" + "snowball/libstemmer_c/libstemmer/libstemmer_utf8.c" + "snowball/libstemmer_c/src_c/*.c" + "snowball/libstemmer_c/runtime/*.c" ) -#set properties on the libraries -SET_TARGET_PROPERTIES(lucene++-contrib PROPERTIES - VERSION ${LUCENE++_VERSION} - SOVERSION ${LUCENE++_SOVERSION} - COMPILE_FLAGS -DLPP_HAVE_DLL + +file(GLOB_RECURSE contrib_headers + "${lucene++-lib_SOURCE_DIR}/include/*.h") + + + +target_sources(lucene++-contrib + PRIVATE + ${contrib_sources}) + + +#################################### +# include directories +#################################### +target_include_directories(lucene++-contrib + PUBLIC + $ + $ + $ + $ + ${Boost_INCLUDE_DIRS} ) -TARGET_LINK_LIBRARIES(lucene++-contrib - ${CMAKE_THREAD_LIBS_INIT}) + + + +#################################### +# dependencies +#################################### +target_link_libraries(lucene++-contrib + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB + lucene++::lucene++) + +if(WIN32) + target_link_libraries(lucene++-contrib ws2_32) +endif() + + +#################################### +# link args +#################################### +target_compile_options(lucene++-contrib PRIVATE -DLPP_BUILDING_LIB) + +set_target_properties(lucene++-contrib + PROPERTIES + COTIRE_CXX_PREFIX_HEADER_INIT "include/ContribInc.h" + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + VERSION ${lucene++_VERSION} + SOVERSION ${lucene++_SOVERSION}) + +cotire(lucene++-contrib) + install(TARGETS lucene++-contrib - DESTINATION ${LIB_DESTINATION} - COMPONENT runtime ) + DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT runtime) +install( + FILES ${contrib_headers} + DESTINATION "include/lucene++" + COMPONENT development-contrib) diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp index 55479dfe..83bb7aae 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,127 +13,118 @@ #include "ArabicStemFilter.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default Arabic stopwords in UTF-8 format. - /// - /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html - /// The stopword list is BSD-Licensed. - const uint8_t ArabicAnalyzer::DEFAULT_STOPWORD_FILE[] = - { - 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x86, - 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, - 0x0a, 0xd9, 0x88, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, - 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0x0a, 0xd8, 0xab, - 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xa3, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0x0a, - 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0x0a, 0xd8, - 0xa3, 0x0a, 0xd8, 0xa7, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, - 0x0a, 0xd8, 0xa3, 0xd9, 0x89, 0x0a, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd8, - 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, - 0xd8, 0xa5, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x86, 0x0a, 0xd9, 0x85, - 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xa7, - 0x0a, 0xd9, 0x81, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, - 0xb9, 0x0a, 0xd8, 0xa7, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, - 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, - 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, - 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, - 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, - 0xa8, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, - 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, - 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, - 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, - 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, - 0xb0, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, - 0xa7, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xa5, 0xd9, - 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, - 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, - 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, - 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, - 0xb6, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, - 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x88, - 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x86, - 0x0a, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x87, 0xd9, 0x88, 0x0a, - 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, - 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x81, 0xd9, 0x87, - 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, - 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x84, 0xd9, - 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd9, 0x87, 0x0a, - 0xd9, 0x87, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd8, 0xb0, - 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x83, 0x0a, 0xd9, 0x83, - 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x8a, - 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, - 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, - 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, - 0xb6, 0x0a, 0xd9, 0x82, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xad, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, - 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, - 0xd9, 0x85, 0xd9, 0x86, 0xd8, 0xb0, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xad, - 0xd9, 0x8a, 0xd8, 0xab, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, - 0xd9, 0x84, 0xd8, 0xa2, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, - 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd8, 0xad, - 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, - 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x89, 0x0a, 0xd8, 0xac, - 0xd9, 0x85, 0xd9, 0x8a, 0xd8, 0xb9, 0x0a - }; - - ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - ArabicAnalyzer::~ArabicAnalyzer() - { - } - - const HashSet ArabicAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - { - String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); - Collection words(StringUtils::split(stopWords, L"\n")); - stopSet = HashSet::newInstance(words.begin(), words.end()); - } - return stopSet; - } - - TokenStreamPtr ArabicAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(reader); - result = newLucene(result); +namespace Lucene { + +/// Default Arabic stopwords in UTF-8 format. +/// +/// Generated from http://members.unine.ch/jacques.savoy/clef/index.html +/// The stopword list is BSD-Licensed. +const uint8_t ArabicAnalyzer::DEFAULT_STOPWORD_FILE[] = { + 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x86, + 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, + 0x0a, 0xd9, 0x88, 0xd9, 0x81, 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, + 0x0a, 0xd9, 0x81, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0x0a, 0xd8, 0xab, + 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0x0a, 0xd8, 0xa3, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, 0x0a, + 0xd8, 0xa8, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0x0a, 0xd8, + 0xa3, 0x0a, 0xd8, 0xa7, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, + 0x0a, 0xd8, 0xa3, 0xd9, 0x89, 0x0a, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd8, + 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, + 0xd8, 0xa5, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x86, 0x0a, 0xd9, 0x85, + 0xd8, 0xa7, 0x0a, 0xd9, 0x88, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xa7, + 0x0a, 0xd9, 0x81, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd8, + 0xb9, 0x0a, 0xd8, 0xa7, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, + 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, 0xd8, + 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, + 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, + 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa5, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, + 0xa8, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, + 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x81, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, + 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa3, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd8, 0xa5, 0xd9, 0x86, 0x0a, + 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xaa, 0xd9, + 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, + 0xb0, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, + 0xa7, 0xd9, 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xa5, 0xd9, + 0x84, 0xd9, 0x89, 0x0a, 0xd8, 0xa5, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, + 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb9, 0xd9, + 0x84, 0xd9, 0x8a, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, + 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa5, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, + 0xb6, 0xd8, 0xa7, 0x0a, 0xd8, 0xa3, 0xd9, 0x8a, 0xd8, 0xb6, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, + 0x84, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x88, + 0xd9, 0x84, 0xd9, 0x85, 0x0a, 0xd9, 0x84, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x86, + 0x0a, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x87, 0xd9, 0x88, 0x0a, + 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, + 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x89, 0x0a, 0xd9, 0x81, 0xd9, 0x87, + 0xd9, 0x8a, 0x0a, 0xd9, 0x81, 0xd9, 0x87, 0xd9, 0x88, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, + 0x0a, 0xd8, 0xa3, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x84, 0xd9, + 0x87, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb0, 0xd9, 0x87, 0x0a, + 0xd9, 0x87, 0xd8, 0xb0, 0xd8, 0xa7, 0x0a, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd8, 0xb0, + 0xd9, 0x84, 0xd9, 0x83, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x83, 0x0a, 0xd9, 0x83, + 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x8a, + 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd9, 0x83, 0xd9, 0x88, 0xd9, 0x86, 0x0a, + 0xd9, 0x88, 0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaa, 0x0a, 0xd9, 0x88, 0xd9, 0x83, 0xd8, + 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, + 0xb6, 0x0a, 0xd9, 0x82, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xad, 0xd9, 0x88, 0x0a, 0xd8, 0xa8, + 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, + 0xd9, 0x85, 0xd9, 0x86, 0xd8, 0xb0, 0x0a, 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd8, 0xad, + 0xd9, 0x8a, 0xd8, 0xab, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, + 0xd9, 0x84, 0xd8, 0xa2, 0xd9, 0x86, 0x0a, 0xd8, 0xae, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, + 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd8, 0xad, + 0xd8, 0xaa, 0xd9, 0x89, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, + 0xd8, 0xaf, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x89, 0x0a, 0xd8, 0xac, + 0xd9, 0x85, 0xd9, 0x8a, 0xd8, 0xb9, 0x0a +}; + +ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +ArabicAnalyzer::ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +ArabicAnalyzer::~ArabicAnalyzer() { +} + +const HashSet ArabicAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); + Collection words(StringUtils::split(stopWords, L"\n")); + stopSet = HashSet::newInstance(words.begin(), words.end()); + ); + return stopSet; +} + +TokenStreamPtr ArabicAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(reader); + result = newLucene(result); + // the order here is important: the stopword list is not normalized + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + result = newLucene(result); + result = newLucene(result); + return result; +} + +TokenStreamPtr ArabicAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + ArabicAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(streams->source); // the order here is important: the stopword list is not normalized - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - result = newLucene(result); - result = newLucene(result); - return result; - } - - TokenStreamPtr ArabicAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - ArabicAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(streams->source); - // the order here is important: the stopword list is not normalized - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - streams->result = newLucene(streams->result); - streams->result = newLucene(streams->result); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - ArabicAnalyzerSavedStreams::~ArabicAnalyzerSavedStreams() - { + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + streams->result = newLucene(streams->result); + streams->result = newLucene(streams->result); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +ArabicAnalyzerSavedStreams::~ArabicAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp index 3888872c..e5f1ded0 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicLetterTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,22 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - ArabicLetterTokenizer::ArabicLetterTokenizer(ReaderPtr input) : LetterTokenizer(input) - { - } - - ArabicLetterTokenizer::ArabicLetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : LetterTokenizer(source, input) - { - } - - ArabicLetterTokenizer::ArabicLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : LetterTokenizer(factory, input) - { - } - - ArabicLetterTokenizer::~ArabicLetterTokenizer() - { - } - - bool ArabicLetterTokenizer::isTokenChar(wchar_t c) - { - return LetterTokenizer::isTokenChar(c) || UnicodeUtil::isNonSpacing(c); - } +namespace Lucene { + +ArabicLetterTokenizer::ArabicLetterTokenizer(const ReaderPtr& input) : LetterTokenizer(input) { +} + +ArabicLetterTokenizer::ArabicLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : LetterTokenizer(source, input) { +} + +ArabicLetterTokenizer::ArabicLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : LetterTokenizer(factory, input) { +} + +ArabicLetterTokenizer::~ArabicLetterTokenizer() { +} + +bool ArabicLetterTokenizer::isTokenChar(wchar_t c) { + return LetterTokenizer::isTokenChar(c) || UnicodeUtil::isNonSpacing(c); +} + } diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp index 4b7bd7fd..6482dce4 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,27 +9,24 @@ #include "ArabicNormalizer.h" #include "TermAttribute.h" -namespace Lucene -{ - ArabicNormalizationFilter::ArabicNormalizationFilter(TokenStreamPtr input) : TokenFilter(input) - { - normalizer = newLucene(); - termAtt = addAttribute(); - } - - ArabicNormalizationFilter::~ArabicNormalizationFilter() - { - } - - bool ArabicNormalizationFilter::incrementToken() - { - if (input->incrementToken()) - { - int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); - termAtt->setTermLength(newlen); - return true; - } - else - return false; +namespace Lucene { + +ArabicNormalizationFilter::ArabicNormalizationFilter(const TokenStreamPtr& input) : TokenFilter(input) { + normalizer = newLucene(); + termAtt = addAttribute(); +} + +ArabicNormalizationFilter::~ArabicNormalizationFilter() { +} + +bool ArabicNormalizationFilter::incrementToken() { + if (input->incrementToken()) { + int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); + termAtt->setTermLength(newlen); + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp index 18685ca5..f94ea2ae 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicNormalizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,73 +8,70 @@ #include "ArabicNormalizer.h" #include "MiscUtils.h" -namespace Lucene -{ - const wchar_t ArabicNormalizer::ALEF = (wchar_t)0x0627; - const wchar_t ArabicNormalizer::ALEF_MADDA = (wchar_t)0x0622; - const wchar_t ArabicNormalizer::ALEF_HAMZA_ABOVE = (wchar_t)0x0623; - const wchar_t ArabicNormalizer::ALEF_HAMZA_BELOW = (wchar_t)0x0625; - - const wchar_t ArabicNormalizer::YEH = (wchar_t)0x064a; - const wchar_t ArabicNormalizer::DOTLESS_YEH = (wchar_t)0x0649; - - const wchar_t ArabicNormalizer::TEH_MARBUTA = (wchar_t)0x0629; - const wchar_t ArabicNormalizer::HEH = (wchar_t)0x0647; - - const wchar_t ArabicNormalizer::TATWEEL = (wchar_t)0x0640; - - const wchar_t ArabicNormalizer::FATHATAN = (wchar_t)0x064b; - const wchar_t ArabicNormalizer::DAMMATAN = (wchar_t)0x064c; - const wchar_t ArabicNormalizer::KASRATAN = (wchar_t)0x064d; - const wchar_t ArabicNormalizer::FATHA = (wchar_t)0x064e; - const wchar_t ArabicNormalizer::DAMMA = (wchar_t)0x064f; - const wchar_t ArabicNormalizer::KASRA = (wchar_t)0x0650; - const wchar_t ArabicNormalizer::SHADDA = (wchar_t)0x0651; - const wchar_t ArabicNormalizer::SUKUN = (wchar_t)0x0652; - - ArabicNormalizer::~ArabicNormalizer() - { - } - - int32_t ArabicNormalizer::normalize(wchar_t* s, int32_t len) - { - for (int32_t i = 0; i < len; ++i) - { - switch (s[i]) - { - case ALEF_MADDA: - case ALEF_HAMZA_ABOVE: - case ALEF_HAMZA_BELOW: - s[i] = ALEF; - break; - case DOTLESS_YEH: - s[i] = YEH; - break; - case TEH_MARBUTA: - s[i] = HEH; - break; - case TATWEEL: - case KASRATAN: - case DAMMATAN: - case FATHATAN: - case FATHA: - case DAMMA: - case KASRA: - case SHADDA: - case SUKUN: - len = deleteChar(s, i--, len); - break; - default: - break; - } +namespace Lucene { + +const wchar_t ArabicNormalizer::ALEF = (wchar_t)0x0627; +const wchar_t ArabicNormalizer::ALEF_MADDA = (wchar_t)0x0622; +const wchar_t ArabicNormalizer::ALEF_HAMZA_ABOVE = (wchar_t)0x0623; +const wchar_t ArabicNormalizer::ALEF_HAMZA_BELOW = (wchar_t)0x0625; + +const wchar_t ArabicNormalizer::YEH = (wchar_t)0x064a; +const wchar_t ArabicNormalizer::DOTLESS_YEH = (wchar_t)0x0649; + +const wchar_t ArabicNormalizer::TEH_MARBUTA = (wchar_t)0x0629; +const wchar_t ArabicNormalizer::HEH = (wchar_t)0x0647; + +const wchar_t ArabicNormalizer::TATWEEL = (wchar_t)0x0640; + +const wchar_t ArabicNormalizer::FATHATAN = (wchar_t)0x064b; +const wchar_t ArabicNormalizer::DAMMATAN = (wchar_t)0x064c; +const wchar_t ArabicNormalizer::KASRATAN = (wchar_t)0x064d; +const wchar_t ArabicNormalizer::FATHA = (wchar_t)0x064e; +const wchar_t ArabicNormalizer::DAMMA = (wchar_t)0x064f; +const wchar_t ArabicNormalizer::KASRA = (wchar_t)0x0650; +const wchar_t ArabicNormalizer::SHADDA = (wchar_t)0x0651; +const wchar_t ArabicNormalizer::SUKUN = (wchar_t)0x0652; + +ArabicNormalizer::~ArabicNormalizer() { +} + +int32_t ArabicNormalizer::normalize(wchar_t* s, int32_t len) { + for (int32_t i = 0; i < len; ++i) { + switch (s[i]) { + case ALEF_MADDA: + case ALEF_HAMZA_ABOVE: + case ALEF_HAMZA_BELOW: + s[i] = ALEF; + break; + case DOTLESS_YEH: + s[i] = YEH; + break; + case TEH_MARBUTA: + s[i] = HEH; + break; + case TATWEEL: + case KASRATAN: + case DAMMATAN: + case FATHATAN: + case FATHA: + case DAMMA: + case KASRA: + case SHADDA: + case SUKUN: + len = deleteChar(s, i--, len); + break; + default: + break; } - return len; } - - int32_t ArabicNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) - { - if (pos < len) - MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); - return len - 1; + return len; +} + +int32_t ArabicNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { + if (pos < len) { + MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } + return len - 1; +} + } diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp index 605847c2..241761b6 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,27 +9,24 @@ #include "ArabicStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - ArabicStemFilter::ArabicStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - ArabicStemFilter::~ArabicStemFilter() - { - } - - bool ArabicStemFilter::incrementToken() - { - if (input->incrementToken()) - { - int32_t newlen = stemmer->stem(termAtt->termBuffer().get(), termAtt->termLength()); - termAtt->setTermLength(newlen); - return true; - } - else - return false; +namespace Lucene { + +ArabicStemFilter::ArabicStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +ArabicStemFilter::~ArabicStemFilter() { +} + +bool ArabicStemFilter::incrementToken() { + if (input->incrementToken()) { + int32_t newlen = stemmer->stem(termAtt->termBuffer().get(), termAtt->termLength()); + termAtt->setTermLength(newlen); + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp b/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp index ba5fd1d9..cb88eeca 100644 --- a/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/ar/ArabicStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,133 +8,122 @@ #include "ArabicStemmer.h" #include "MiscUtils.h" -namespace Lucene -{ - const wchar_t ArabicStemmer::ALEF = (wchar_t)0x0627; - const wchar_t ArabicStemmer::BEH = (wchar_t)0x0628; - const wchar_t ArabicStemmer::TEH_MARBUTA = (wchar_t)0x0629; - const wchar_t ArabicStemmer::TEH = (wchar_t)0x062a; - const wchar_t ArabicStemmer::FEH = (wchar_t)0x0641; - const wchar_t ArabicStemmer::KAF = (wchar_t)0x0643; - const wchar_t ArabicStemmer::LAM = (wchar_t)0x0644; - const wchar_t ArabicStemmer::NOON = (wchar_t)0x0646; - const wchar_t ArabicStemmer::HEH = (wchar_t)0x0647; - const wchar_t ArabicStemmer::WAW = (wchar_t)0x0648; - const wchar_t ArabicStemmer::YEH = (wchar_t)0x064a; - - ArabicStemmer::~ArabicStemmer() - { - } - - const Collection ArabicStemmer::prefixes() - { - static Collection _prefixes; - if (!_prefixes) - { - _prefixes = Collection::newInstance(); - _prefixes.add(String(L"") + ALEF + LAM); - _prefixes.add(String(L"") + WAW + ALEF + LAM); - _prefixes.add(String(L"") + BEH + ALEF + LAM); - _prefixes.add(String(L"") + KAF + ALEF + LAM); - _prefixes.add(String(L"") + FEH + ALEF + LAM); - _prefixes.add(String(L"") + LAM + LAM); - _prefixes.add(String(L"") + WAW); - } - return _prefixes; - } - - const Collection ArabicStemmer::suffixes() - { - static Collection _suffixes; - if (!_suffixes) - { - _suffixes = Collection::newInstance(); - _suffixes.add(String(L"") + HEH + ALEF); - _suffixes.add(String(L"") + ALEF + NOON); - _suffixes.add(String(L"") + ALEF + TEH); - _suffixes.add(String(L"") + WAW + NOON); - _suffixes.add(String(L"") + YEH + NOON); - _suffixes.add(String(L"") + YEH + HEH); - _suffixes.add(String(L"") + YEH + TEH_MARBUTA); - _suffixes.add(String(L"") + HEH); - _suffixes.add(String(L"") + TEH_MARBUTA); - _suffixes.add(String(L"") + YEH); - } - return _suffixes; - } - - int32_t ArabicStemmer::stem(wchar_t* s, int32_t len) - { - len = stemPrefix(s, len); - len = stemSuffix(s, len); - return len; - } - - int32_t ArabicStemmer::stemPrefix(wchar_t* s, int32_t len) - { - Collection stemPrefixes(prefixes()); - for (int32_t i = 0; i < stemPrefixes.size(); ++i) - { - if (startsWith(s, len, stemPrefixes[i])) - return deleteChars(s, 0, len, (int32_t)stemPrefixes[i].length()); +namespace Lucene { + +const wchar_t ArabicStemmer::ALEF = (wchar_t)0x0627; +const wchar_t ArabicStemmer::BEH = (wchar_t)0x0628; +const wchar_t ArabicStemmer::TEH_MARBUTA = (wchar_t)0x0629; +const wchar_t ArabicStemmer::TEH = (wchar_t)0x062a; +const wchar_t ArabicStemmer::FEH = (wchar_t)0x0641; +const wchar_t ArabicStemmer::KAF = (wchar_t)0x0643; +const wchar_t ArabicStemmer::LAM = (wchar_t)0x0644; +const wchar_t ArabicStemmer::NOON = (wchar_t)0x0646; +const wchar_t ArabicStemmer::HEH = (wchar_t)0x0647; +const wchar_t ArabicStemmer::WAW = (wchar_t)0x0648; +const wchar_t ArabicStemmer::YEH = (wchar_t)0x064a; + +ArabicStemmer::~ArabicStemmer() { +} + +const Collection ArabicStemmer::prefixes() { + static Collection _prefixes; + LUCENE_RUN_ONCE( + _prefixes = Collection::newInstance(); + _prefixes.add(String(L"") + ALEF + LAM); + _prefixes.add(String(L"") + WAW + ALEF + LAM); + _prefixes.add(String(L"") + BEH + ALEF + LAM); + _prefixes.add(String(L"") + KAF + ALEF + LAM); + _prefixes.add(String(L"") + FEH + ALEF + LAM); + _prefixes.add(String(L"") + LAM + LAM); + _prefixes.add(String(L"") + WAW); + ); + return _prefixes; +} + +const Collection ArabicStemmer::suffixes() { + static Collection _suffixes; + LUCENE_RUN_ONCE( + _suffixes = Collection::newInstance(); + _suffixes.add(String(L"") + HEH + ALEF); + _suffixes.add(String(L"") + ALEF + NOON); + _suffixes.add(String(L"") + ALEF + TEH); + _suffixes.add(String(L"") + WAW + NOON); + _suffixes.add(String(L"") + YEH + NOON); + _suffixes.add(String(L"") + YEH + HEH); + _suffixes.add(String(L"") + YEH + TEH_MARBUTA); + _suffixes.add(String(L"") + HEH); + _suffixes.add(String(L"") + TEH_MARBUTA); + _suffixes.add(String(L"") + YEH); + ); + return _suffixes; +} + +int32_t ArabicStemmer::stem(wchar_t* s, int32_t len) { + len = stemPrefix(s, len); + len = stemSuffix(s, len); + return len; +} + +int32_t ArabicStemmer::stemPrefix(wchar_t* s, int32_t len) { + Collection stemPrefixes(prefixes()); + for (int32_t i = 0; i < stemPrefixes.size(); ++i) { + if (startsWith(s, len, stemPrefixes[i])) { + return deleteChars(s, 0, len, (int32_t)stemPrefixes[i].length()); } - return len; } - - int32_t ArabicStemmer::stemSuffix(wchar_t* s, int32_t len) - { - Collection stemSuffixes(suffixes()); - for (int32_t i = 0; i < stemSuffixes.size(); ++i) - { - if (endsWith(s, len, stemSuffixes[i])) - len = (int32_t)deleteChars(s, (int32_t)(len - stemSuffixes[i].length()), len, (int32_t)stemSuffixes[i].length()); + return len; +} + +int32_t ArabicStemmer::stemSuffix(wchar_t* s, int32_t len) { + Collection stemSuffixes(suffixes()); + for (int32_t i = 0; i < stemSuffixes.size(); ++i) { + if (endsWith(s, len, stemSuffixes[i])) { + len = (int32_t)deleteChars(s, (int32_t)(len - stemSuffixes[i].length()), len, (int32_t)stemSuffixes[i].length()); } - return len; } - - bool ArabicStemmer::startsWith(wchar_t* s, int32_t len, const String& prefix) - { - if (prefix.length() == 1 && len < 4) // wa- prefix requires at least 3 characters - return false; - else if (len < (int32_t)prefix.length() + 2) // other prefixes require only 2 - return false; - else - { - for (int32_t i = 0; i < (int32_t)prefix.length(); ++i) - { - if (s[i] != prefix[i]) - return false; + return len; +} + +bool ArabicStemmer::startsWith(wchar_t* s, int32_t len, const String& prefix) { + if (prefix.length() == 1 && len < 4) { // wa- prefix requires at least 3 characters + return false; + } else if (len < (int32_t)prefix.length() + 2) { // other prefixes require only 2 + return false; + } else { + for (int32_t i = 0; i < (int32_t)prefix.length(); ++i) { + if (s[i] != prefix[i]) { + return false; } - return true; } + return true; } - - bool ArabicStemmer::endsWith(wchar_t* s, int32_t len, const String& suffix) - { - if (len < (int32_t)suffix.length() + 2) // all suffixes require at least 2 characters after stemming - return false; - else - { - for (int32_t i = 0; i < (int32_t)suffix.length(); ++i) - { - if (s[len - suffix.length() + i] != suffix[i]) - return false; +} + +bool ArabicStemmer::endsWith(wchar_t* s, int32_t len, const String& suffix) { + if (len < (int32_t)suffix.length() + 2) { // all suffixes require at least 2 characters after stemming + return false; + } else { + for (int32_t i = 0; i < (int32_t)suffix.length(); ++i) { + if (s[len - suffix.length() + i] != suffix[i]) { + return false; } - return true; } + return true; } - - int32_t ArabicStemmer::deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars) - { - for (int32_t i = 0; i < chars; ++i) - len = deleteChar(s, pos, len); - return len; +} + +int32_t ArabicStemmer::deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars) { + for (int32_t i = 0; i < chars; ++i) { + len = deleteChar(s, pos, len); } - - int32_t ArabicStemmer::deleteChar(wchar_t* s, int32_t pos, int32_t len) - { - if (pos < len) - MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); - return len - 1; + return len; +} + +int32_t ArabicStemmer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { + if (pos < len) { + MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } + return len - 1; +} + } diff --git a/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp b/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp index 6b0249cf..5eeb846e 100644 --- a/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/br/BrazilianAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,99 +12,90 @@ #include "StopFilter.h" #include "BrazilianStemFilter.h" -namespace Lucene -{ - const wchar_t* BrazilianAnalyzer::_BRAZILIAN_STOP_WORDS[] = - { - L"a", L"ainda", L"alem", L"ambas", L"ambos", L"antes", - L"ao", L"aonde", L"aos", L"apos", L"aquele", L"aqueles", - L"as", L"assim", L"com", L"como", L"contra", L"contudo", - L"cuja", L"cujas", L"cujo", L"cujos", L"da", L"das", L"de", - L"dela", L"dele", L"deles", L"demais", L"depois", L"desde", - L"desta", L"deste", L"dispoe", L"dispoem", L"diversa", - L"diversas", L"diversos", L"do", L"dos", L"durante", L"e", - L"ela", L"elas", L"ele", L"eles", L"em", L"entao", L"entre", - L"essa", L"essas", L"esse", L"esses", L"esta", L"estas", - L"este", L"estes", L"ha", L"isso", L"isto", L"logo", L"mais", - L"mas", L"mediante", L"menos", L"mesma", L"mesmas", L"mesmo", - L"mesmos", L"na", L"nas", L"nao", L"nas", L"nem", L"nesse", - L"neste", L"nos", L"o", L"os", L"ou", L"outra", L"outras", - L"outro", L"outros", L"pelas", L"pelas", L"pelo", L"pelos", - L"perante", L"pois", L"por", L"porque", L"portanto", - L"proprio", L"propios", L"quais", L"qual", L"qualquer", - L"quando", L"quanto", L"que", L"quem", L"quer", L"se", L"seja", - L"sem", L"sendo", L"seu", L"seus", L"sob", L"sobre", L"sua", - L"suas", L"tal", L"tambem", L"teu", L"teus", L"toda", L"todas", - L"todo", L"todos", L"tua", L"tuas", L"tudo", L"um", L"uma", - L"umas", L"uns" - }; - - BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) - { - this->stoptable = stopwords; - this->excltable = exclusions; - this->matchVersion = matchVersion; - } - - BrazilianAnalyzer::~BrazilianAnalyzer() - { - } - - const HashSet BrazilianAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - stopSet = HashSet::newInstance(_BRAZILIAN_STOP_WORDS, _BRAZILIAN_STOP_WORDS + SIZEOF_ARRAY(_BRAZILIAN_STOP_WORDS)); - return stopSet; - } - - void BrazilianAnalyzer::setStemExclusionTable(HashSet exclusions) - { - excltable = exclusions; - setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created - } - - TokenStreamPtr BrazilianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - result = newLucene(result, excltable); - return result; - } - - TokenStreamPtr BrazilianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - BrazilianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(streams->result); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - streams->result = newLucene(streams->result, excltable); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - BrazilianAnalyzerSavedStreams::~BrazilianAnalyzerSavedStreams() - { +namespace Lucene { + +const wchar_t* BrazilianAnalyzer::_BRAZILIAN_STOP_WORDS[] = { + L"a", L"ainda", L"alem", L"ambas", L"ambos", L"antes", + L"ao", L"aonde", L"aos", L"apos", L"aquele", L"aqueles", + L"as", L"assim", L"com", L"como", L"contra", L"contudo", + L"cuja", L"cujas", L"cujo", L"cujos", L"da", L"das", L"de", + L"dela", L"dele", L"deles", L"demais", L"depois", L"desde", + L"desta", L"deste", L"dispoe", L"dispoem", L"diversa", + L"diversas", L"diversos", L"do", L"dos", L"durante", L"e", + L"ela", L"elas", L"ele", L"eles", L"em", L"entao", L"entre", + L"essa", L"essas", L"esse", L"esses", L"esta", L"estas", + L"este", L"estes", L"ha", L"isso", L"isto", L"logo", L"mais", + L"mas", L"mediante", L"menos", L"mesma", L"mesmas", L"mesmo", + L"mesmos", L"na", L"nas", L"nao", L"nas", L"nem", L"nesse", + L"neste", L"nos", L"o", L"os", L"ou", L"outra", L"outras", + L"outro", L"outros", L"pelas", L"pelas", L"pelo", L"pelos", + L"perante", L"pois", L"por", L"porque", L"portanto", + L"proprio", L"propios", L"quais", L"qual", L"qualquer", + L"quando", L"quanto", L"que", L"quem", L"quer", L"se", L"seja", + L"sem", L"sendo", L"seu", L"seus", L"sob", L"sobre", L"sua", + L"suas", L"tal", L"tambem", L"teu", L"teus", L"toda", L"todas", + L"todo", L"todos", L"tua", L"tuas", L"tudo", L"um", L"uma", + L"umas", L"uns" +}; + +BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +BrazilianAnalyzer::BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { + this->stoptable = stopwords; + this->excltable = exclusions; + this->matchVersion = matchVersion; +} + +BrazilianAnalyzer::~BrazilianAnalyzer() { +} + +const HashSet BrazilianAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + stopSet = HashSet::newInstance(_BRAZILIAN_STOP_WORDS, _BRAZILIAN_STOP_WORDS + SIZEOF_ARRAY(_BRAZILIAN_STOP_WORDS)); + ); + return stopSet; +} + +void BrazilianAnalyzer::setStemExclusionTable(HashSet exclusions) { + excltable = exclusions; + setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created +} + +TokenStreamPtr BrazilianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + result = newLucene(result, excltable); + return result; +} + +TokenStreamPtr BrazilianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + BrazilianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(streams->result); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + streams->result = newLucene(streams->result, excltable); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +BrazilianAnalyzerSavedStreams::~BrazilianAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp b/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp index e2f80319..d6ae451e 100644 --- a/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/br/BrazilianStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,41 +9,37 @@ #include "BrazilianStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - BrazilianStemFilter::BrazilianStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - BrazilianStemFilter::BrazilianStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - exclusions = exclusiontable; - } - - BrazilianStemFilter::~BrazilianStemFilter() - { - } - - bool BrazilianStemFilter::incrementToken() - { - if (input->incrementToken()) - { - String term(termAtt->term()); - // Check the exclusion table. - if (!exclusions || !exclusions.contains(term)) - { - String s(stemmer->stem(term)); - // If not stemmed, don't waste the time adjusting the token. - if (!s.empty() && s != term) - termAtt->setTermBuffer(s); +namespace Lucene { + +BrazilianStemFilter::BrazilianStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +BrazilianStemFilter::BrazilianStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); + exclusions = exclusiontable; +} + +BrazilianStemFilter::~BrazilianStemFilter() { +} + +bool BrazilianStemFilter::incrementToken() { + if (input->incrementToken()) { + String term(termAtt->term()); + // Check the exclusion table. + if (!exclusions || !exclusions.contains(term)) { + String s(stemmer->stem(term)); + // If not stemmed, don't waste the time adjusting the token. + if (!s.empty() && s != term) { + termAtt->setTermBuffer(s); } - return true; } - else - return false; + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp b/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp index 5f6307c2..e509ba21 100644 --- a/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/br/BrazilianStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,1156 +10,984 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - BrazilianStemmer::~BrazilianStemmer() - { - } - - String BrazilianStemmer::stem(const String& term) - { - // creates CT - createCT(term); +namespace Lucene { - if (!isIndexable(CT)) - return L""; - if (!isStemmable(CT)) - return CT; +BrazilianStemmer::~BrazilianStemmer() { +} - R1 = getR1(CT); - R2 = getR1(R1); - RV = getRV(CT); - TERM = term + L";" + CT; +String BrazilianStemmer::stem(const String& term) { + // creates CT + createCT(term); - bool altered = step1(); - if (!altered) - altered = step2(); + if (!isIndexable(CT)) { + return L""; + } + if (!isStemmable(CT)) { + return CT; + } - if (altered) - step3(); - else - step4(); + R1 = getR1(CT); + R2 = getR1(R1); + RV = getRV(CT); + TERM = term + L";" + CT; - step5(); + bool altered = step1(); + if (!altered) { + altered = step2(); + } - return CT; + if (altered) { + step3(); + } else { + step4(); } - - bool BrazilianStemmer::isStemmable(const String& term) - { - for (int32_t c = 0; c < (int32_t)term.length(); ++c) - { - // Discard terms that contain non-letter characters. - if (!UnicodeUtil::isAlpha(term[c])) - return false; + + step5(); + + return CT; +} + +bool BrazilianStemmer::isStemmable(const String& term) { + for (int32_t c = 0; c < (int32_t)term.length(); ++c) { + // Discard terms that contain non-letter characters. + if (!UnicodeUtil::isAlpha(term[c])) { + return false; } - return true; } - - bool BrazilianStemmer::isIndexable(const String& term) - { - return (term.length() < 30) && (term.length() > 2); - } - - bool BrazilianStemmer::isVowel(wchar_t value) - { - return (value == L'a' || value == L'e' || value == L'i' || value == L'o' || value == L'u'); + return true; +} + +bool BrazilianStemmer::isIndexable(const String& term) { + return (term.length() < 30) && (term.length() > 2); +} + +bool BrazilianStemmer::isVowel(wchar_t value) { + return (value == L'a' || value == L'e' || value == L'i' || value == L'o' || value == L'u'); +} + +String BrazilianStemmer::getR1(const String& value) { + if (value.empty()) { + return L""; } - - String BrazilianStemmer::getR1(const String& value) - { - if (value.empty()) - return L""; - // find 1st vowel - int32_t i = (int32_t)(value.length() - 1); - int32_t j = 0; - for (; j < i; ++j) - { - if (isVowel(value[j])) - break; + // find 1st vowel + int32_t i = (int32_t)(value.length() - 1); + int32_t j = 0; + for (; j < i; ++j) { + if (isVowel(value[j])) { + break; } + } - if (j >= i) - return L""; + if (j >= i) { + return L""; + } - // find 1st non-vowel - for (; j < i; ++j) - { - if (!isVowel(value[j])) - break; + // find 1st non-vowel + for (; j < i; ++j) { + if (!isVowel(value[j])) { + break; } + } + + if (j >= i) { + return L""; + } - if (j >= i) - return L""; + return value.substr(j + 1); +} - return value.substr(j + 1); +String BrazilianStemmer::getRV(const String& value) { + if (value.empty()) { + return L""; } - - String BrazilianStemmer::getRV(const String& value) - { - if (value.empty()) - return L""; - - int32_t i = (int32_t)(value.length() - 1); - - // RV - IF the second letter is a consonant, RV is the region after the next following vowel - if (i > 0 && !isVowel(value[1])) - { - int32_t j = 2; - // find 1st vowel - for (; j < i; ++j) - { - if (isVowel(value[j])) - break; + + int32_t i = (int32_t)(value.length() - 1); + + // RV - IF the second letter is a consonant, RV is the region after the next following vowel + if (i > 0 && !isVowel(value[1])) { + int32_t j = 2; + // find 1st vowel + for (; j < i; ++j) { + if (isVowel(value[j])) { + break; } + } - if (j < i) - return value.substr(j + 1); + if (j < i) { + return value.substr(j + 1); } + } - // RV - OR if the first two letters are vowels, RV is the region after the next consonant, - if (i > 1 && isVowel(value[0]) && isVowel(value[1])) - { - int32_t j = 2; - // find 1st consonant - for (; j < i; ++j) - { - if (!isVowel(value[j])) - break; + // RV - OR if the first two letters are vowels, RV is the region after the next consonant, + if (i > 1 && isVowel(value[0]) && isVowel(value[1])) { + int32_t j = 2; + // find 1st consonant + for (; j < i; ++j) { + if (!isVowel(value[j])) { + break; } + } - if (j < i) - return value.substr(j + 1); + if (j < i) { + return value.substr(j + 1); } - - // RV - AND otherwise (consonant-vowel case) RV is the region after the third letter. - if (i > 2) - return value.substr(3); - + } + + // RV - AND otherwise (consonant-vowel case) RV is the region after the third letter. + if (i > 2) { + return value.substr(3); + } + + return L""; +} + +String BrazilianStemmer::changeTerm(const String& value) { + if (value.empty()) { return L""; } - - String BrazilianStemmer::changeTerm(const String& value) - { - if (value.empty()) - return L""; - - String lowerValue(StringUtils::toLower(value)); - String r; - - for (int32_t j = 0; j < (int32_t)value.length(); ++j) - { - if (value[j] == 0x00e1 || value[j] == 0x00e2 || value[j] == 0x00e3) - { - r += L"a"; - continue; - } - if (value[j] == 0x00e9 || value[j] == 0x00ea) - { - r += L"e"; - continue; - } - if (value[j] == 0x00ed) - { - r += L"i"; - continue; - } - if (value[j] == 0x00f3 || value[j] == 0x00f4 || value[j] == 0x00f5) - { - r += L"o"; - continue; - } - if (value[j] == 0x00fa || value[j] == 0x00fc) - { - r += L"u"; - continue; - } - if (value[j] == 0x00e7) - { - r += L"c"; - continue; - } - if (value[j] == 0x00f1) - { - r += L"n"; - continue; - } - r += value[j]; + String lowerValue(StringUtils::toLower(value)); + String r; + + for (int32_t j = 0; j < (int32_t)value.length(); ++j) { + if (value[j] == 0x00e1 || value[j] == 0x00e2 || value[j] == 0x00e3) { + r += L"a"; + continue; + } + if (value[j] == 0x00e9 || value[j] == 0x00ea) { + r += L"e"; + continue; + } + if (value[j] == 0x00ed) { + r += L"i"; + continue; + } + if (value[j] == 0x00f3 || value[j] == 0x00f4 || value[j] == 0x00f5) { + r += L"o"; + continue; + } + if (value[j] == 0x00fa || value[j] == 0x00fc) { + r += L"u"; + continue; + } + if (value[j] == 0x00e7) { + r += L"c"; + continue; + } + if (value[j] == 0x00f1) { + r += L"n"; + continue; } - return r ; + r += value[j]; } - - bool BrazilianStemmer::checkSuffix(const String& value, const String& suffix) - { - if (value.empty() || suffix.empty()) - return false; - if (suffix.length() > value.length()) - return false; - return (value.substr(value.length() - suffix.length()) == suffix); + + return r ; +} + +bool BrazilianStemmer::checkSuffix(const String& value, const String& suffix) { + if (value.empty() || suffix.empty()) { + return false; } - - String BrazilianStemmer::replaceSuffix(const String& value, const String& toReplace, const String& changeTo) - { - if (value.empty() || toReplace.empty() || changeTo.empty()) - return value; - - String vvalue = removeSuffix(value, toReplace); - - if (value == vvalue) - return value; - else - return vvalue + changeTo; + if (suffix.length() > value.length()) { + return false; } - - String BrazilianStemmer::removeSuffix(const String& value, const String& toRemove) - { - if (value.empty() || toRemove.empty() || !checkSuffix(value, toRemove)) - return value; - return value.substr(0, value.length() - toRemove.length()); + return (value.substr(value.length() - suffix.length()) == suffix); +} + +String BrazilianStemmer::replaceSuffix(const String& value, const String& toReplace, const String& changeTo) { + if (value.empty() || toReplace.empty() || changeTo.empty()) { + return value; } - - bool BrazilianStemmer::suffixPreceded(const String& value, const String& suffix, const String& preceded) - { - if (value.empty() || suffix.empty() || preceded.empty() || !checkSuffix(value, suffix)) - return false; - return checkSuffix(removeSuffix(value, suffix), preceded); + + String vvalue = removeSuffix(value, toReplace); + + if (value == vvalue) { + return value; + } else { + return vvalue + changeTo; } - - void BrazilianStemmer::createCT(const String& term) - { - CT = changeTerm(term); +} - if (CT.length() < 2) - return; +String BrazilianStemmer::removeSuffix(const String& value, const String& toRemove) { + if (value.empty() || toRemove.empty() || !checkSuffix(value, toRemove)) { + return value; + } + return value.substr(0, value.length() - toRemove.length()); +} + +bool BrazilianStemmer::suffixPreceded(const String& value, const String& suffix, const String& preceded) { + if (value.empty() || suffix.empty() || preceded.empty() || !checkSuffix(value, suffix)) { + return false; + } + return checkSuffix(removeSuffix(value, suffix), preceded); +} - // if the first character is ... , remove it - if (CT[0] == L'"' || CT[0] == L'\'' || CT[0] == L'-' || CT[0] == L',' || - CT[0] == L';' || CT[0] == L'.' || CT[0] == L'?' || CT[0] == L'!') - CT = CT.substr(1); +void BrazilianStemmer::createCT(const String& term) { + CT = changeTerm(term); - if (CT.length() < 2) - return; + if (CT.length() < 2) { + return; + } + + // if the first character is ... , remove it + if (CT[0] == L'"' || CT[0] == L'\'' || CT[0] == L'-' || CT[0] == L',' || + CT[0] == L';' || CT[0] == L'.' || CT[0] == L'?' || CT[0] == L'!') { + CT = CT.substr(1); + } + + if (CT.length() < 2) { + return; + } - // if the last character is ... , remove it - if (CT[CT.length() - 1] == L'-' || CT[CT.length() - 1] == L',' || CT[CT.length() - 1] == L';' || + // if the last character is ... , remove it + if (CT[CT.length() - 1] == L'-' || CT[CT.length() - 1] == L',' || CT[CT.length() - 1] == L';' || CT[CT.length() - 1] == L'.' || CT[CT.length() - 1] == L'?' || CT[CT.length() - 1] == L'!' || - CT[CT.length() - 1] == L'\'' || CT[CT.length() - 1] == L'"') - CT = CT.substr(0, CT.length() - 1); + CT[CT.length() - 1] == L'\'' || CT[CT.length() - 1] == L'"') { + CT = CT.substr(0, CT.length() - 1); } - - bool BrazilianStemmer::step1() - { - if (CT.empty()) - return false; - - // suffix length = 7 - if (checkSuffix(CT, L"uciones") && checkSuffix(R2, L"uciones")) - { - CT = replaceSuffix(CT, L"uciones", L"u"); +} + +bool BrazilianStemmer::step1() { + if (CT.empty()) { + return false; + } + + // suffix length = 7 + if (checkSuffix(CT, L"uciones") && checkSuffix(R2, L"uciones")) { + CT = replaceSuffix(CT, L"uciones", L"u"); + return true; + } + + // suffix length = 6 + if (CT.length() >= 6) { + if (checkSuffix(CT, L"imentos") && checkSuffix(R2, L"imentos")) { + CT = removeSuffix(CT, L"imentos"); return true; } - - // suffix length = 6 - if (CT.length() >= 6) - { - if (checkSuffix(CT, L"imentos") && checkSuffix(R2, L"imentos")) - { - CT = removeSuffix(CT, L"imentos"); - return true; - } - if (checkSuffix(CT, L"amentos") && checkSuffix(R2, L"amentos")) - { - CT = removeSuffix(CT, L"amentos"); - return true; - } - if (checkSuffix(CT, L"adores") && checkSuffix(R2, L"adores")) - { - CT = removeSuffix(CT, L"adores"); - return true; - } - if (checkSuffix(CT, L"adoras") && checkSuffix(R2, L"adoras")) - { - CT = removeSuffix(CT, L"adoras"); - return true; - } - if (checkSuffix(CT, L"logias") && checkSuffix(R2, L"logias")) - { - replaceSuffix(CT, L"logias", L"log"); - return true; - } - if (checkSuffix(CT, L"encias") && checkSuffix(R2, L"encias")) - { - CT = replaceSuffix(CT, L"encias", L"ente"); - return true; - } - if (checkSuffix(CT, L"amente") && checkSuffix(R1, L"amente")) - { - CT = removeSuffix(CT, L"amente"); - return true; - } - if (checkSuffix(CT, L"idades") && checkSuffix(R2, L"idades")) - { - CT = removeSuffix(CT, L"idades"); - return true; - } + if (checkSuffix(CT, L"amentos") && checkSuffix(R2, L"amentos")) { + CT = removeSuffix(CT, L"amentos"); + return true; + } + if (checkSuffix(CT, L"adores") && checkSuffix(R2, L"adores")) { + CT = removeSuffix(CT, L"adores"); + return true; + } + if (checkSuffix(CT, L"adoras") && checkSuffix(R2, L"adoras")) { + CT = removeSuffix(CT, L"adoras"); + return true; + } + if (checkSuffix(CT, L"logias") && checkSuffix(R2, L"logias")) { + replaceSuffix(CT, L"logias", L"log"); + return true; + } + if (checkSuffix(CT, L"encias") && checkSuffix(R2, L"encias")) { + CT = replaceSuffix(CT, L"encias", L"ente"); + return true; + } + if (checkSuffix(CT, L"amente") && checkSuffix(R1, L"amente")) { + CT = removeSuffix(CT, L"amente"); + return true; + } + if (checkSuffix(CT, L"idades") && checkSuffix(R2, L"idades")) { + CT = removeSuffix(CT, L"idades"); + return true; } + } - // suffix length = 5 - if (CT.length() >= 5) - { - if (checkSuffix(CT, L"acoes") && checkSuffix(R2, L"acoes")) - { - CT = removeSuffix(CT, L"acoes"); - return true; - } - if (checkSuffix(CT, L"imento") && checkSuffix(R2, L"imento")) - { - CT = removeSuffix(CT, L"imento"); - return true; - } - if (checkSuffix(CT, L"amento") && checkSuffix(R2, L"amento")) - { - CT = removeSuffix(CT, L"amento"); - return true; - } - if (checkSuffix(CT, L"adora") && checkSuffix(R2, L"adora")) - { - CT = removeSuffix(CT, L"adora"); - return true; - } - if (checkSuffix(CT, L"ismos") && checkSuffix(R2, L"ismos")) - { - CT = removeSuffix(CT, L"ismos"); - return true; - } - if (checkSuffix(CT, L"istas") && checkSuffix(R2, L"istas")) - { - CT = removeSuffix(CT, L"istas"); - return true; - } - if (checkSuffix(CT, L"logia") && checkSuffix(R2, L"logia")) - { - CT = replaceSuffix(CT, L"logia", L"log"); - return true; - } - if (checkSuffix(CT, L"ucion") && checkSuffix(R2, L"ucion")) - { - CT = replaceSuffix(CT, L"ucion", L"u"); - return true; - } - if (checkSuffix(CT, L"encia") && checkSuffix(R2, L"encia")) - { - CT = replaceSuffix(CT, L"encia", L"ente"); - return true; - } - if (checkSuffix(CT, L"mente") && checkSuffix(R2, L"mente")) - { - CT = removeSuffix(CT, L"mente"); - return true; - } - if (checkSuffix(CT, L"idade") && checkSuffix(R2, L"idade")) - { - CT = removeSuffix(CT, L"idade"); - return true; - } + // suffix length = 5 + if (CT.length() >= 5) { + if (checkSuffix(CT, L"acoes") && checkSuffix(R2, L"acoes")) { + CT = removeSuffix(CT, L"acoes"); + return true; + } + if (checkSuffix(CT, L"imento") && checkSuffix(R2, L"imento")) { + CT = removeSuffix(CT, L"imento"); + return true; + } + if (checkSuffix(CT, L"amento") && checkSuffix(R2, L"amento")) { + CT = removeSuffix(CT, L"amento"); + return true; + } + if (checkSuffix(CT, L"adora") && checkSuffix(R2, L"adora")) { + CT = removeSuffix(CT, L"adora"); + return true; + } + if (checkSuffix(CT, L"ismos") && checkSuffix(R2, L"ismos")) { + CT = removeSuffix(CT, L"ismos"); + return true; + } + if (checkSuffix(CT, L"istas") && checkSuffix(R2, L"istas")) { + CT = removeSuffix(CT, L"istas"); + return true; + } + if (checkSuffix(CT, L"logia") && checkSuffix(R2, L"logia")) { + CT = replaceSuffix(CT, L"logia", L"log"); + return true; } + if (checkSuffix(CT, L"ucion") && checkSuffix(R2, L"ucion")) { + CT = replaceSuffix(CT, L"ucion", L"u"); + return true; + } + if (checkSuffix(CT, L"encia") && checkSuffix(R2, L"encia")) { + CT = replaceSuffix(CT, L"encia", L"ente"); + return true; + } + if (checkSuffix(CT, L"mente") && checkSuffix(R2, L"mente")) { + CT = removeSuffix(CT, L"mente"); + return true; + } + if (checkSuffix(CT, L"idade") && checkSuffix(R2, L"idade")) { + CT = removeSuffix(CT, L"idade"); + return true; + } + } - // suffix length = 4 - if (CT.length() >= 4) - { - if (checkSuffix(CT, L"acao") && checkSuffix(R2, L"acao")) - { - CT = removeSuffix(CT, L"acao"); - return true; - } - if (checkSuffix(CT, L"ezas") && checkSuffix(R2, L"ezas")) - { - CT = removeSuffix(CT, L"ezas"); - return true; - } - if (checkSuffix(CT, L"icos") && checkSuffix(R2, L"icos")) - { - CT = removeSuffix(CT, L"icos"); - return true; - } - if (checkSuffix(CT, L"icas") && checkSuffix(R2, L"icas")) - { - CT = removeSuffix(CT, L"icas"); - return true; - } - if (checkSuffix(CT, L"ismo") && checkSuffix(R2, L"ismo")) - { - CT = removeSuffix(CT, L"ismo"); - return true; - } - if (checkSuffix(CT, L"avel") && checkSuffix(R2, L"avel")) - { - CT = removeSuffix(CT, L"avel"); - return true; - } - if (checkSuffix(CT, L"ivel") && checkSuffix(R2, L"ivel")) - { - CT = removeSuffix(CT, L"ivel"); - return true; - } - if (checkSuffix(CT, L"ista") && checkSuffix(R2, L"ista")) - { - CT = removeSuffix(CT, L"ista"); - return true; - } - if (checkSuffix(CT, L"osos") && checkSuffix(R2, L"osos")) - { - CT = removeSuffix(CT, L"osos"); - return true; - } - if (checkSuffix(CT, L"osas") && checkSuffix(R2, L"osas")) - { - CT = removeSuffix(CT, L"osas"); - return true; - } - if (checkSuffix(CT, L"ador") && checkSuffix(R2, L"ador")) - { - CT = removeSuffix(CT, L"ador"); - return true; - } - if (checkSuffix(CT, L"ivas") && checkSuffix(R2, L"ivas")) - { - CT = removeSuffix(CT, L"ivas"); - return true; - } - if (checkSuffix(CT, L"ivos") && checkSuffix(R2, L"ivos")) - { - CT = removeSuffix(CT, L"ivos"); - return true; - } - if (checkSuffix(CT, L"iras") && checkSuffix(RV, L"iras") && suffixPreceded(CT, L"iras", L"e")) - { - CT = replaceSuffix(CT, L"iras", L"ir"); - return true; - } + // suffix length = 4 + if (CT.length() >= 4) { + if (checkSuffix(CT, L"acao") && checkSuffix(R2, L"acao")) { + CT = removeSuffix(CT, L"acao"); + return true; + } + if (checkSuffix(CT, L"ezas") && checkSuffix(R2, L"ezas")) { + CT = removeSuffix(CT, L"ezas"); + return true; + } + if (checkSuffix(CT, L"icos") && checkSuffix(R2, L"icos")) { + CT = removeSuffix(CT, L"icos"); + return true; + } + if (checkSuffix(CT, L"icas") && checkSuffix(R2, L"icas")) { + CT = removeSuffix(CT, L"icas"); + return true; + } + if (checkSuffix(CT, L"ismo") && checkSuffix(R2, L"ismo")) { + CT = removeSuffix(CT, L"ismo"); + return true; + } + if (checkSuffix(CT, L"avel") && checkSuffix(R2, L"avel")) { + CT = removeSuffix(CT, L"avel"); + return true; + } + if (checkSuffix(CT, L"ivel") && checkSuffix(R2, L"ivel")) { + CT = removeSuffix(CT, L"ivel"); + return true; + } + if (checkSuffix(CT, L"ista") && checkSuffix(R2, L"ista")) { + CT = removeSuffix(CT, L"ista"); + return true; + } + if (checkSuffix(CT, L"osos") && checkSuffix(R2, L"osos")) { + CT = removeSuffix(CT, L"osos"); + return true; + } + if (checkSuffix(CT, L"osas") && checkSuffix(R2, L"osas")) { + CT = removeSuffix(CT, L"osas"); + return true; + } + if (checkSuffix(CT, L"ador") && checkSuffix(R2, L"ador")) { + CT = removeSuffix(CT, L"ador"); + return true; + } + if (checkSuffix(CT, L"ivas") && checkSuffix(R2, L"ivas")) { + CT = removeSuffix(CT, L"ivas"); + return true; + } + if (checkSuffix(CT, L"ivos") && checkSuffix(R2, L"ivos")) { + CT = removeSuffix(CT, L"ivos"); + return true; } + if (checkSuffix(CT, L"iras") && checkSuffix(RV, L"iras") && suffixPreceded(CT, L"iras", L"e")) { + CT = replaceSuffix(CT, L"iras", L"ir"); + return true; + } + } - // suffix length = 3 - if (CT.length() >= 3) - { - if (checkSuffix(CT, L"eza") && checkSuffix(R2, L"eza")) - { - CT = removeSuffix(CT, L"eza"); - return true; - } - if (checkSuffix(CT, L"ico") && checkSuffix(R2, L"ico")) - { - CT = removeSuffix(CT, L"ico"); - return true; - } - if (checkSuffix(CT, L"ica") && checkSuffix(R2, L"ica")) - { - CT = removeSuffix(CT, L"ica"); - return true; - } - if (checkSuffix(CT, L"oso") && checkSuffix(R2, L"oso")) - { - CT = removeSuffix(CT, L"oso"); - return true; - } - if (checkSuffix(CT, L"osa") && checkSuffix(R2, L"osa")) - { - CT = removeSuffix(CT, L"osa"); - return true; - } - if (checkSuffix(CT, L"iva") && checkSuffix(R2, L"iva")) - { - CT = removeSuffix(CT, L"iva"); - return true; - } - if (checkSuffix(CT, L"ivo") && checkSuffix(R2, L"ivo")) - { - CT = removeSuffix(CT, L"ivo"); - return true; - } - if (checkSuffix(CT, L"ira") && checkSuffix(RV, L"ira") && suffixPreceded(CT, L"ira", L"e")) - { - CT = replaceSuffix(CT, L"ira", L"ir"); - return true; - } + // suffix length = 3 + if (CT.length() >= 3) { + if (checkSuffix(CT, L"eza") && checkSuffix(R2, L"eza")) { + CT = removeSuffix(CT, L"eza"); + return true; + } + if (checkSuffix(CT, L"ico") && checkSuffix(R2, L"ico")) { + CT = removeSuffix(CT, L"ico"); + return true; + } + if (checkSuffix(CT, L"ica") && checkSuffix(R2, L"ica")) { + CT = removeSuffix(CT, L"ica"); + return true; } + if (checkSuffix(CT, L"oso") && checkSuffix(R2, L"oso")) { + CT = removeSuffix(CT, L"oso"); + return true; + } + if (checkSuffix(CT, L"osa") && checkSuffix(R2, L"osa")) { + CT = removeSuffix(CT, L"osa"); + return true; + } + if (checkSuffix(CT, L"iva") && checkSuffix(R2, L"iva")) { + CT = removeSuffix(CT, L"iva"); + return true; + } + if (checkSuffix(CT, L"ivo") && checkSuffix(R2, L"ivo")) { + CT = removeSuffix(CT, L"ivo"); + return true; + } + if (checkSuffix(CT, L"ira") && checkSuffix(RV, L"ira") && suffixPreceded(CT, L"ira", L"e")) { + CT = replaceSuffix(CT, L"ira", L"ir"); + return true; + } + } + + // no ending was removed by step1 + return false; +} - // no ending was removed by step1 +bool BrazilianStemmer::step2() { + if (RV.empty()) { return false; } - - bool BrazilianStemmer::step2() - { - if (RV.empty()) - return false; - - // suffix lenght = 7 - if (RV.length() >= 7) - { - if (checkSuffix(RV, L"issemos")) - { - CT = removeSuffix(CT, L"issemos"); - return true; - } - if (checkSuffix(RV, L"essemos")) - { - CT = removeSuffix(CT, L"essemos"); - return true; - } - if (checkSuffix(RV, L"assemos")) - { - CT = removeSuffix(CT, L"assemos"); - return true; - } - if (checkSuffix(RV, L"ariamos")) - { - CT = removeSuffix(CT, L"ariamos"); - return true; - } - if (checkSuffix(RV, L"eriamos")) - { - CT = removeSuffix(CT, L"eriamos"); - return true; - } - if (checkSuffix(RV, L"iriamos")) - { - CT = removeSuffix(CT, L"iriamos"); - return true; - } + + // suffix lenght = 7 + if (RV.length() >= 7) { + if (checkSuffix(RV, L"issemos")) { + CT = removeSuffix(CT, L"issemos"); + return true; + } + if (checkSuffix(RV, L"essemos")) { + CT = removeSuffix(CT, L"essemos"); + return true; + } + if (checkSuffix(RV, L"assemos")) { + CT = removeSuffix(CT, L"assemos"); + return true; + } + if (checkSuffix(RV, L"ariamos")) { + CT = removeSuffix(CT, L"ariamos"); + return true; + } + if (checkSuffix(RV, L"eriamos")) { + CT = removeSuffix(CT, L"eriamos"); + return true; + } + if (checkSuffix(RV, L"iriamos")) { + CT = removeSuffix(CT, L"iriamos"); + return true; } + } - // suffix length = 6 - if (RV.length() >= 6) - { - if (checkSuffix(RV, L"iremos")) - { - CT = removeSuffix(CT, L"iremos"); - return true; - } - if (checkSuffix(RV, L"eremos")) - { - CT = removeSuffix(CT, L"eremos"); - return true; - } - if (checkSuffix(RV, L"aremos")) - { - CT = removeSuffix(CT, L"aremos"); - return true; - } - if (checkSuffix(RV, L"avamos")) - { - CT = removeSuffix(CT, L"avamos"); - return true; - } - if (checkSuffix(RV, L"iramos")) - { - CT = removeSuffix(CT, L"iramos"); - return true; - } - if (checkSuffix(RV, L"eramos")) - { - CT = removeSuffix(CT, L"eramos"); - return true; - } - if (checkSuffix(RV, L"aramos")) - { - CT = removeSuffix(CT, L"aramos"); - return true; - } - if (checkSuffix(RV, L"asseis")) - { - CT = removeSuffix(CT, L"asseis"); - return true; - } - if (checkSuffix(RV, L"esseis")) - { - CT = removeSuffix(CT, L"esseis"); - return true; - } - if (checkSuffix(RV, L"isseis")) - { - CT = removeSuffix(CT, L"isseis"); - return true; - } - if (checkSuffix(RV, L"arieis")) - { - CT = removeSuffix(CT, L"arieis"); - return true; - } - if (checkSuffix(RV, L"erieis")) - { - CT = removeSuffix(CT, L"erieis"); - return true; - } - if (checkSuffix(RV, L"irieis")) - { - CT = removeSuffix(CT, L"irieis"); - return true; - } + // suffix length = 6 + if (RV.length() >= 6) { + if (checkSuffix(RV, L"iremos")) { + CT = removeSuffix(CT, L"iremos"); + return true; } - - // suffix length = 5 - if (RV.length() >= 5) - { - if (checkSuffix(RV, L"irmos")) - { - CT = removeSuffix(CT, L"irmos"); - return true; - } - if (checkSuffix(RV, L"iamos")) - { - CT = removeSuffix(CT, L"iamos"); - return true; - } - if (checkSuffix(RV, L"armos")) - { - CT = removeSuffix(CT, L"armos"); - return true; - } - if (checkSuffix(RV, L"ermos")) - { - CT = removeSuffix(CT, L"ermos"); - return true; - } - if (checkSuffix(RV, L"areis")) - { - CT = removeSuffix(CT, L"areis"); - return true; - } - if (checkSuffix(RV, L"ereis")) - { - CT = removeSuffix(CT, L"ereis"); - return true; - } - if (checkSuffix(RV, L"ireis")) - { - CT = removeSuffix(CT, L"ireis"); - return true; - } - if (checkSuffix(RV, L"asses")) - { - CT = removeSuffix(CT, L"asses"); - return true; - } - if (checkSuffix(RV, L"esses")) - { - CT = removeSuffix(CT, L"esses"); - return true; - } - if (checkSuffix(RV, L"isses")) - { - CT = removeSuffix(CT, L"isses"); - return true; - } - if (checkSuffix(RV, L"astes")) - { - CT = removeSuffix(CT, L"astes"); - return true; - } - if (checkSuffix(RV, L"assem")) - { - CT = removeSuffix(CT, L"assem"); - return true; - } - if (checkSuffix(RV, L"essem")) - { - CT = removeSuffix(CT, L"essem"); - return true; - } - if (checkSuffix(RV, L"issem")) - { - CT = removeSuffix(CT, L"issem"); - return true; - } - if (checkSuffix(RV, L"ardes")) - { - CT = removeSuffix(CT, L"ardes"); - return true; - } - if (checkSuffix(RV, L"erdes")) - { - CT = removeSuffix(CT, L"erdes"); - return true; - } - if (checkSuffix(RV, L"irdes")) - { - CT = removeSuffix(CT, L"irdes"); - return true; - } - if (checkSuffix(RV, L"ariam")) - { - CT = removeSuffix(CT, L"ariam"); - return true; - } - if (checkSuffix(RV, L"eriam")) - { - CT = removeSuffix(CT, L"eriam"); - return true; - } - if (checkSuffix(RV, L"iriam")) - { - CT = removeSuffix(CT, L"iriam"); - return true; - } - if (checkSuffix(RV, L"arias")) - { - CT = removeSuffix(CT, L"arias"); - return true; - } - if (checkSuffix(RV, L"erias")) - { - CT = removeSuffix(CT, L"erias"); - return true; - } - if (checkSuffix(RV, L"irias")) - { - CT = removeSuffix(CT, L"irias"); - return true; - } - if (checkSuffix(RV, L"estes")) - { - CT = removeSuffix(CT, L"estes"); - return true; - } - if (checkSuffix(RV, L"istes")) - { - CT = removeSuffix(CT, L"istes"); - return true; - } - if (checkSuffix(RV, L"areis")) - { - CT = removeSuffix(CT, L"areis"); - return true; - } - if (checkSuffix(RV, L"aveis")) - { - CT = removeSuffix(CT, L"aveis"); - return true; - } + if (checkSuffix(RV, L"eremos")) { + CT = removeSuffix(CT, L"eremos"); + return true; } - - // suffix length = 4 - if (RV.length() >= 4) - { - if (checkSuffix(RV, L"aria")) - { - CT = removeSuffix(CT, L"aria"); - return true; - } - if (checkSuffix(RV, L"eria")) - { - CT = removeSuffix(CT, L"eria"); - return true; - } - if (checkSuffix(RV, L"iria")) - { - CT = removeSuffix(CT, L"iria"); - return true; - } - if (checkSuffix(RV, L"asse")) - { - CT = removeSuffix(CT, L"asse"); - return true; - } - if (checkSuffix(RV, L"esse")) - { - CT = removeSuffix(CT, L"esse"); - return true; - } - if (checkSuffix(RV, L"isse")) - { - CT = removeSuffix(CT, L"isse"); - return true; - } - if (checkSuffix(RV, L"aste")) - { - CT = removeSuffix(CT, L"aste"); - return true; - } - if (checkSuffix(RV, L"este")) - { - CT = removeSuffix(CT, L"este"); - return true; - } - if (checkSuffix(RV, L"iste")) - { - CT = removeSuffix(CT, L"iste"); - return true; - } - if (checkSuffix(RV, L"arei")) - { - CT = removeSuffix(CT, L"arei"); - return true; - } - if (checkSuffix(RV, L"erei")) - { - CT = removeSuffix(CT, L"erei"); - return true; - } - if (checkSuffix(RV, L"irei")) - { - CT = removeSuffix(CT, L"irei"); - return true; - } - if (checkSuffix(RV, L"aram")) - { - CT = removeSuffix(CT, L"aram"); - return true; - } - if (checkSuffix(RV, L"eram")) - { - CT = removeSuffix(CT, L"eram"); - return true; - } - if (checkSuffix(RV, L"iram")) - { - CT = removeSuffix(CT, L"iram"); - return true; - } - if (checkSuffix(RV, L"avam")) - { - CT = removeSuffix(CT, L"avam"); - return true; - } - if (checkSuffix(RV, L"arem")) - { - CT = removeSuffix(CT, L"arem"); - return true; - } - if (checkSuffix(RV, L"erem")) - { - CT = removeSuffix(CT, L"erem"); - return true; - } - if (checkSuffix(RV, L"irem")) - { - CT = removeSuffix(CT, L"irem"); - return true; - } - if (checkSuffix(RV, L"ando")) - { - CT = removeSuffix(CT, L"ando"); - return true; - } - if (checkSuffix(RV, L"endo")) - { - CT = removeSuffix(CT, L"endo"); - return true; - } - if (checkSuffix(RV, L"indo")) - { - CT = removeSuffix(CT, L"indo"); - return true; - } - if (checkSuffix(RV, L"arao")) - { - CT = removeSuffix(CT, L"arao"); - return true; - } - if (checkSuffix(RV, L"erao")) - { - CT = removeSuffix(CT, L"erao"); - return true; - } - if (checkSuffix(RV, L"irao")) - { - CT = removeSuffix(CT, L"irao"); - return true; - } - if (checkSuffix(RV, L"adas")) - { - CT = removeSuffix(CT, L"adas"); - return true; - } - if (checkSuffix(RV, L"idas")) - { - CT = removeSuffix(CT, L"idas"); - return true; - } - if (checkSuffix(RV, L"aras")) - { - CT = removeSuffix(CT, L"aras"); - return true; - } - if (checkSuffix(RV, L"eras")) - { - CT = removeSuffix(CT, L"eras"); - return true; - } - if (checkSuffix(RV, L"iras")) - { - CT = removeSuffix(CT, L"iras"); - return true; - } - if (checkSuffix(RV, L"avas")) - { - CT = removeSuffix(CT, L"avas"); - return true; - } - if (checkSuffix(RV, L"ares")) - { - CT = removeSuffix(CT, L"ares"); - return true; - } - if (checkSuffix(RV, L"eres")) - { - CT = removeSuffix(CT, L"eres"); - return true; - } - if (checkSuffix(RV, L"ires")) - { - CT = removeSuffix(CT, L"ires"); - return true; - } - if (checkSuffix(RV, L"ados")) - { - CT = removeSuffix(CT, L"ados"); - return true; - } - if (checkSuffix(RV, L"idos")) - { - CT = removeSuffix(CT, L"idos"); - return true; - } - if (checkSuffix(RV, L"amos")) - { - CT = removeSuffix(CT, L"amos"); - return true; - } - if (checkSuffix(RV, L"emos")) - { - CT = removeSuffix(CT, L"emos"); - return true; - } - if (checkSuffix(RV, L"imos")) - { - CT = removeSuffix(CT, L"imos"); - return true; - } - if (checkSuffix(RV, L"iras")) - { - CT = removeSuffix(CT, L"iras"); - return true; - } - if (checkSuffix(RV, L"ieis")) - { - CT = removeSuffix(CT, L"ieis"); - return true; - } + if (checkSuffix(RV, L"aremos")) { + CT = removeSuffix(CT, L"aremos"); + return true; } - - // suffix length = 3 - if (RV.length() >= 3) - { - if (checkSuffix(RV, L"ada")) - { - CT = removeSuffix(CT, L"ada"); - return true; - } - if (checkSuffix(RV, L"ida")) - { - CT = removeSuffix(CT, L"ida"); - return true; - } - if (checkSuffix(RV, L"ara")) - { - CT = removeSuffix(CT, L"ara"); - return true; - } - if (checkSuffix(RV, L"era")) - { - CT = removeSuffix(CT, L"era"); - return true; - } - if (checkSuffix(RV, L"ira")) - { - CT = removeSuffix(CT, L"ava"); - return true; - } - if (checkSuffix(RV, L"iam")) - { - CT = removeSuffix(CT, L"iam"); - return true; - } - if (checkSuffix(RV, L"ado")) - { - CT = removeSuffix(CT, L"ado"); - return true; - } - if (checkSuffix(RV, L"ido")) - { - CT = removeSuffix(CT, L"ido"); - return true; - } - if (checkSuffix(RV, L"ias")) - { - CT = removeSuffix(CT, L"ias"); - return true; - } - if (checkSuffix(RV, L"ais")) - { - CT = removeSuffix(CT, L"ais"); - return true; - } - if (checkSuffix(RV, L"eis")) - { - CT = removeSuffix(CT, L"eis"); - return true; - } - if (checkSuffix(RV, L"ira")) - { - CT = removeSuffix(CT, L"ira"); - return true; - } - if (checkSuffix(RV, L"ear")) - { - CT = removeSuffix(CT, L"ear"); - return true; - } + if (checkSuffix(RV, L"avamos")) { + CT = removeSuffix(CT, L"avamos"); + return true; + } + if (checkSuffix(RV, L"iramos")) { + CT = removeSuffix(CT, L"iramos"); + return true; } + if (checkSuffix(RV, L"eramos")) { + CT = removeSuffix(CT, L"eramos"); + return true; + } + if (checkSuffix(RV, L"aramos")) { + CT = removeSuffix(CT, L"aramos"); + return true; + } + if (checkSuffix(RV, L"asseis")) { + CT = removeSuffix(CT, L"asseis"); + return true; + } + if (checkSuffix(RV, L"esseis")) { + CT = removeSuffix(CT, L"esseis"); + return true; + } + if (checkSuffix(RV, L"isseis")) { + CT = removeSuffix(CT, L"isseis"); + return true; + } + if (checkSuffix(RV, L"arieis")) { + CT = removeSuffix(CT, L"arieis"); + return true; + } + if (checkSuffix(RV, L"erieis")) { + CT = removeSuffix(CT, L"erieis"); + return true; + } + if (checkSuffix(RV, L"irieis")) { + CT = removeSuffix(CT, L"irieis"); + return true; + } + } - // suffix length = 2 - if (RV.length() >= 2) - { - if (checkSuffix(RV, L"ia")) - { - CT = removeSuffix(CT, L"ia"); - return true; - } - if (checkSuffix(RV, L"ei")) - { - CT = removeSuffix(CT, L"ei"); - return true; - } - if (checkSuffix(RV, L"am")) - { - CT = removeSuffix(CT, L"am"); - return true; - } - if (checkSuffix(RV, L"em")) - { - CT = removeSuffix(CT, L"em"); - return true; - } - if (checkSuffix(RV, L"ar")) - { - CT = removeSuffix(CT, L"ar"); - return true; - } - if (checkSuffix(RV, L"er")) - { - CT = removeSuffix(CT, L"er"); - return true; - } - if (checkSuffix(RV, L"ir")) - { - CT = removeSuffix(CT, L"ir"); - return true; - } - if (checkSuffix(RV, L"as")) - { - CT = removeSuffix(CT, L"as"); - return true; - } - if (checkSuffix(RV, L"es")) - { - CT = removeSuffix(CT, L"es"); - return true; - } - if (checkSuffix(RV, L"is")) - { - CT = removeSuffix(CT, L"is"); - return true; - } - if (checkSuffix(RV, L"eu")) - { - CT = removeSuffix(CT, L"eu"); - return true; - } - if (checkSuffix(RV, L"iu")) - { - CT = removeSuffix(CT, L"iu"); - return true; - } - if (checkSuffix(RV, L"iu")) - { - CT = removeSuffix(CT, L"iu"); - return true; - } - if (checkSuffix(RV, L"ou")) - { - CT = removeSuffix(CT, L"ou"); - return true; - } + // suffix length = 5 + if (RV.length() >= 5) { + if (checkSuffix(RV, L"irmos")) { + CT = removeSuffix(CT, L"irmos"); + return true; } + if (checkSuffix(RV, L"iamos")) { + CT = removeSuffix(CT, L"iamos"); + return true; + } + if (checkSuffix(RV, L"armos")) { + CT = removeSuffix(CT, L"armos"); + return true; + } + if (checkSuffix(RV, L"ermos")) { + CT = removeSuffix(CT, L"ermos"); + return true; + } + if (checkSuffix(RV, L"areis")) { + CT = removeSuffix(CT, L"areis"); + return true; + } + if (checkSuffix(RV, L"ereis")) { + CT = removeSuffix(CT, L"ereis"); + return true; + } + if (checkSuffix(RV, L"ireis")) { + CT = removeSuffix(CT, L"ireis"); + return true; + } + if (checkSuffix(RV, L"asses")) { + CT = removeSuffix(CT, L"asses"); + return true; + } + if (checkSuffix(RV, L"esses")) { + CT = removeSuffix(CT, L"esses"); + return true; + } + if (checkSuffix(RV, L"isses")) { + CT = removeSuffix(CT, L"isses"); + return true; + } + if (checkSuffix(RV, L"astes")) { + CT = removeSuffix(CT, L"astes"); + return true; + } + if (checkSuffix(RV, L"assem")) { + CT = removeSuffix(CT, L"assem"); + return true; + } + if (checkSuffix(RV, L"essem")) { + CT = removeSuffix(CT, L"essem"); + return true; + } + if (checkSuffix(RV, L"issem")) { + CT = removeSuffix(CT, L"issem"); + return true; + } + if (checkSuffix(RV, L"ardes")) { + CT = removeSuffix(CT, L"ardes"); + return true; + } + if (checkSuffix(RV, L"erdes")) { + CT = removeSuffix(CT, L"erdes"); + return true; + } + if (checkSuffix(RV, L"irdes")) { + CT = removeSuffix(CT, L"irdes"); + return true; + } + if (checkSuffix(RV, L"ariam")) { + CT = removeSuffix(CT, L"ariam"); + return true; + } + if (checkSuffix(RV, L"eriam")) { + CT = removeSuffix(CT, L"eriam"); + return true; + } + if (checkSuffix(RV, L"iriam")) { + CT = removeSuffix(CT, L"iriam"); + return true; + } + if (checkSuffix(RV, L"arias")) { + CT = removeSuffix(CT, L"arias"); + return true; + } + if (checkSuffix(RV, L"erias")) { + CT = removeSuffix(CT, L"erias"); + return true; + } + if (checkSuffix(RV, L"irias")) { + CT = removeSuffix(CT, L"irias"); + return true; + } + if (checkSuffix(RV, L"estes")) { + CT = removeSuffix(CT, L"estes"); + return true; + } + if (checkSuffix(RV, L"istes")) { + CT = removeSuffix(CT, L"istes"); + return true; + } + if (checkSuffix(RV, L"areis")) { + CT = removeSuffix(CT, L"areis"); + return true; + } + if (checkSuffix(RV, L"aveis")) { + CT = removeSuffix(CT, L"aveis"); + return true; + } + } - // no ending was removed by step2 - return false; + // suffix length = 4 + if (RV.length() >= 4) { + if (checkSuffix(RV, L"aria")) { + CT = removeSuffix(CT, L"aria"); + return true; + } + if (checkSuffix(RV, L"eria")) { + CT = removeSuffix(CT, L"eria"); + return true; + } + if (checkSuffix(RV, L"iria")) { + CT = removeSuffix(CT, L"iria"); + return true; + } + if (checkSuffix(RV, L"asse")) { + CT = removeSuffix(CT, L"asse"); + return true; + } + if (checkSuffix(RV, L"esse")) { + CT = removeSuffix(CT, L"esse"); + return true; + } + if (checkSuffix(RV, L"isse")) { + CT = removeSuffix(CT, L"isse"); + return true; + } + if (checkSuffix(RV, L"aste")) { + CT = removeSuffix(CT, L"aste"); + return true; + } + if (checkSuffix(RV, L"este")) { + CT = removeSuffix(CT, L"este"); + return true; + } + if (checkSuffix(RV, L"iste")) { + CT = removeSuffix(CT, L"iste"); + return true; + } + if (checkSuffix(RV, L"arei")) { + CT = removeSuffix(CT, L"arei"); + return true; + } + if (checkSuffix(RV, L"erei")) { + CT = removeSuffix(CT, L"erei"); + return true; + } + if (checkSuffix(RV, L"irei")) { + CT = removeSuffix(CT, L"irei"); + return true; + } + if (checkSuffix(RV, L"aram")) { + CT = removeSuffix(CT, L"aram"); + return true; + } + if (checkSuffix(RV, L"eram")) { + CT = removeSuffix(CT, L"eram"); + return true; + } + if (checkSuffix(RV, L"iram")) { + CT = removeSuffix(CT, L"iram"); + return true; + } + if (checkSuffix(RV, L"avam")) { + CT = removeSuffix(CT, L"avam"); + return true; + } + if (checkSuffix(RV, L"arem")) { + CT = removeSuffix(CT, L"arem"); + return true; + } + if (checkSuffix(RV, L"erem")) { + CT = removeSuffix(CT, L"erem"); + return true; + } + if (checkSuffix(RV, L"irem")) { + CT = removeSuffix(CT, L"irem"); + return true; + } + if (checkSuffix(RV, L"ando")) { + CT = removeSuffix(CT, L"ando"); + return true; + } + if (checkSuffix(RV, L"endo")) { + CT = removeSuffix(CT, L"endo"); + return true; + } + if (checkSuffix(RV, L"indo")) { + CT = removeSuffix(CT, L"indo"); + return true; + } + if (checkSuffix(RV, L"arao")) { + CT = removeSuffix(CT, L"arao"); + return true; + } + if (checkSuffix(RV, L"erao")) { + CT = removeSuffix(CT, L"erao"); + return true; + } + if (checkSuffix(RV, L"irao")) { + CT = removeSuffix(CT, L"irao"); + return true; + } + if (checkSuffix(RV, L"adas")) { + CT = removeSuffix(CT, L"adas"); + return true; + } + if (checkSuffix(RV, L"idas")) { + CT = removeSuffix(CT, L"idas"); + return true; + } + if (checkSuffix(RV, L"aras")) { + CT = removeSuffix(CT, L"aras"); + return true; + } + if (checkSuffix(RV, L"eras")) { + CT = removeSuffix(CT, L"eras"); + return true; + } + if (checkSuffix(RV, L"iras")) { + CT = removeSuffix(CT, L"iras"); + return true; + } + if (checkSuffix(RV, L"avas")) { + CT = removeSuffix(CT, L"avas"); + return true; + } + if (checkSuffix(RV, L"ares")) { + CT = removeSuffix(CT, L"ares"); + return true; + } + if (checkSuffix(RV, L"eres")) { + CT = removeSuffix(CT, L"eres"); + return true; + } + if (checkSuffix(RV, L"ires")) { + CT = removeSuffix(CT, L"ires"); + return true; + } + if (checkSuffix(RV, L"ados")) { + CT = removeSuffix(CT, L"ados"); + return true; + } + if (checkSuffix(RV, L"idos")) { + CT = removeSuffix(CT, L"idos"); + return true; + } + if (checkSuffix(RV, L"amos")) { + CT = removeSuffix(CT, L"amos"); + return true; + } + if (checkSuffix(RV, L"emos")) { + CT = removeSuffix(CT, L"emos"); + return true; + } + if (checkSuffix(RV, L"imos")) { + CT = removeSuffix(CT, L"imos"); + return true; + } + if (checkSuffix(RV, L"iras")) { + CT = removeSuffix(CT, L"iras"); + return true; + } + if (checkSuffix(RV, L"ieis")) { + CT = removeSuffix(CT, L"ieis"); + return true; + } } - - void BrazilianStemmer::step3() - { - if (RV.empty()) - return; - - if (checkSuffix(RV, L"i") && suffixPreceded(RV, L"i", L"c")) - CT = removeSuffix(CT, L"i"); + + // suffix length = 3 + if (RV.length() >= 3) { + if (checkSuffix(RV, L"ada")) { + CT = removeSuffix(CT, L"ada"); + return true; + } + if (checkSuffix(RV, L"ida")) { + CT = removeSuffix(CT, L"ida"); + return true; + } + if (checkSuffix(RV, L"ara")) { + CT = removeSuffix(CT, L"ara"); + return true; + } + if (checkSuffix(RV, L"era")) { + CT = removeSuffix(CT, L"era"); + return true; + } + if (checkSuffix(RV, L"ira")) { + CT = removeSuffix(CT, L"ava"); + return true; + } + if (checkSuffix(RV, L"iam")) { + CT = removeSuffix(CT, L"iam"); + return true; + } + if (checkSuffix(RV, L"ado")) { + CT = removeSuffix(CT, L"ado"); + return true; + } + if (checkSuffix(RV, L"ido")) { + CT = removeSuffix(CT, L"ido"); + return true; + } + if (checkSuffix(RV, L"ias")) { + CT = removeSuffix(CT, L"ias"); + return true; + } + if (checkSuffix(RV, L"ais")) { + CT = removeSuffix(CT, L"ais"); + return true; + } + if (checkSuffix(RV, L"eis")) { + CT = removeSuffix(CT, L"eis"); + return true; + } + if (checkSuffix(RV, L"ira")) { + CT = removeSuffix(CT, L"ira"); + return true; + } + if (checkSuffix(RV, L"ear")) { + CT = removeSuffix(CT, L"ear"); + return true; + } } - - void BrazilianStemmer::step4() - { - if (RV.empty()) - return; - - if (checkSuffix(RV, L"os")) - { - CT = removeSuffix(CT, L"os"); - return; + + // suffix length = 2 + if (RV.length() >= 2) { + if (checkSuffix(RV, L"ia")) { + CT = removeSuffix(CT, L"ia"); + return true; } - if (checkSuffix(RV, L"a")) - { - CT = removeSuffix(CT, L"a"); - return; + if (checkSuffix(RV, L"ei")) { + CT = removeSuffix(CT, L"ei"); + return true; } - if (checkSuffix(RV, L"i")) - { - CT = removeSuffix(CT, L"i"); - return; + if (checkSuffix(RV, L"am")) { + CT = removeSuffix(CT, L"am"); + return true; } - if (checkSuffix(RV, L"o")) - { - CT = removeSuffix(CT, L"o"); - return; + if (checkSuffix(RV, L"em")) { + CT = removeSuffix(CT, L"em"); + return true; + } + if (checkSuffix(RV, L"ar")) { + CT = removeSuffix(CT, L"ar"); + return true; + } + if (checkSuffix(RV, L"er")) { + CT = removeSuffix(CT, L"er"); + return true; + } + if (checkSuffix(RV, L"ir")) { + CT = removeSuffix(CT, L"ir"); + return true; + } + if (checkSuffix(RV, L"as")) { + CT = removeSuffix(CT, L"as"); + return true; + } + if (checkSuffix(RV, L"es")) { + CT = removeSuffix(CT, L"es"); + return true; + } + if (checkSuffix(RV, L"is")) { + CT = removeSuffix(CT, L"is"); + return true; + } + if (checkSuffix(RV, L"eu")) { + CT = removeSuffix(CT, L"eu"); + return true; + } + if (checkSuffix(RV, L"iu")) { + CT = removeSuffix(CT, L"iu"); + return true; + } + if (checkSuffix(RV, L"iu")) { + CT = removeSuffix(CT, L"iu"); + return true; + } + if (checkSuffix(RV, L"ou")) { + CT = removeSuffix(CT, L"ou"); + return true; } } - - void BrazilianStemmer::step5() - { - if (RV.empty()) - return; - - if (checkSuffix(RV, L"e")) - { - if (suffixPreceded(RV, L"e", L"gu")) - { - CT = removeSuffix(CT, L"e"); - CT = removeSuffix(CT, L"u"); - return; - } - if (suffixPreceded(RV, L"e", L"ci")) - { - CT = removeSuffix(CT, L"e"); - CT = removeSuffix(CT, L"i"); - return; - } + // no ending was removed by step2 + return false; +} + +void BrazilianStemmer::step3() { + if (RV.empty()) { + return; + } + if (checkSuffix(RV, L"i") && suffixPreceded(RV, L"i", L"c")) { + CT = removeSuffix(CT, L"i"); + } +} + +void BrazilianStemmer::step4() { + if (RV.empty()) { + return; + } + + if (checkSuffix(RV, L"os")) { + CT = removeSuffix(CT, L"os"); + return; + } + if (checkSuffix(RV, L"a")) { + CT = removeSuffix(CT, L"a"); + return; + } + if (checkSuffix(RV, L"i")) { + CT = removeSuffix(CT, L"i"); + return; + } + if (checkSuffix(RV, L"o")) { + CT = removeSuffix(CT, L"o"); + return; + } +} + +void BrazilianStemmer::step5() { + if (RV.empty()) { + return; + } + + if (checkSuffix(RV, L"e")) { + if (suffixPreceded(RV, L"e", L"gu")) { + CT = removeSuffix(CT, L"e"); + CT = removeSuffix(CT, L"u"); + return; + } + + if (suffixPreceded(RV, L"e", L"ci")) { CT = removeSuffix(CT, L"e"); + CT = removeSuffix(CT, L"i"); return; } + + CT = removeSuffix(CT, L"e"); + return; } } + +} diff --git a/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp b/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp index 27372f19..bd340762 100644 --- a/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/cjk/CJKAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,64 +9,57 @@ #include "CJKTokenizer.h" #include "StopFilter.h" -namespace Lucene -{ - const wchar_t* CJKAnalyzer::_STOP_WORDS[] = - { - L"a", L"and", L"are", L"as", L"at", L"be", - L"but", L"by", L"for", L"if", L"in", L"into", - L"is", L"it", L"no", L"not", L"of", L"on", - L"or", L"s", L"such", L"t", L"that", L"the", - L"their", L"then", L"there", L"these", - L"they", L"this", L"to", L"was", L"will", - L"with", L"", L"www" - }; - - CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - CJKAnalyzer::~CJKAnalyzer() - { - } - - const HashSet CJKAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - stopSet = HashSet::newInstance(_STOP_WORDS, _STOP_WORDS + SIZEOF_ARRAY(_STOP_WORDS)); - return stopSet; - } - - TokenStreamPtr CJKAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - return newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), newLucene(reader), stoptable); - } - - TokenStreamPtr CJKAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - CJKAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->source, stoptable); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - CJKAnalyzerSavedStreams::~CJKAnalyzerSavedStreams() - { +namespace Lucene { + +const wchar_t* CJKAnalyzer::_STOP_WORDS[] = { + L"a", L"and", L"are", L"as", L"at", L"be", + L"but", L"by", L"for", L"if", L"in", L"into", + L"is", L"it", L"no", L"not", L"of", L"on", + L"or", L"s", L"such", L"t", L"that", L"the", + L"their", L"then", L"there", L"these", + L"they", L"this", L"to", L"was", L"will", + L"with", L"", L"www" +}; + +CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +CJKAnalyzer::CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +CJKAnalyzer::~CJKAnalyzer() { +} + +const HashSet CJKAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + stopSet = HashSet::newInstance(_STOP_WORDS, _STOP_WORDS + SIZEOF_ARRAY(_STOP_WORDS)); + ); + return stopSet; +} + +TokenStreamPtr CJKAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + return newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), newLucene(reader), stoptable); +} + +TokenStreamPtr CJKAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + CJKAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->source, stoptable); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +CJKAnalyzerSavedStreams::~CJKAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp b/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp index f04a4fc1..ba3426da 100644 --- a/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp +++ b/src/contrib/analyzers/common/analysis/cjk/CJKTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,265 +14,221 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - /// Word token type - const int32_t CJKTokenizer::WORD_TYPE = 0; - - /// Single byte token type - const int32_t CJKTokenizer::SINGLE_TOKEN_TYPE = 1; - - /// Double byte token type - const int32_t CJKTokenizer::DOUBLE_TOKEN_TYPE = 2; - - /// Names for token types - const wchar_t* CJKTokenizer::TOKEN_TYPE_NAMES[] = {L"word", L"single", L"double"}; - - const int32_t CJKTokenizer::MAX_WORD_LEN = 255; - - const int32_t CJKTokenizer::IO_BUFFER_SIZE = 256; - - CJKTokenizer::CJKTokenizer(ReaderPtr input) : Tokenizer(input) - { - } - - CJKTokenizer::CJKTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) - { - } - - CJKTokenizer::CJKTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) - { - } - - CJKTokenizer::~CJKTokenizer() - { - } - - void CJKTokenizer::initialize() - { - offset = 0; - bufferIndex = 0; - dataLen = 0; - buffer = CharArray::newInstance(MAX_WORD_LEN); - ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); - tokenType = WORD_TYPE; - preIsTokened = false; - - termAtt = addAttribute(); - offsetAtt = addAttribute(); - typeAtt = addAttribute(); - } - - CJKTokenizer::UnicodeBlock CJKTokenizer::unicodeBlock(wchar_t c) - { - if (c >= 0x0000 && c <= 0x007f) - return BASIC_LATIN; - else if (c >= 0xff00 && c <= 0xffef) - return HALFWIDTH_AND_FULLWIDTH_FORMS; - return NONE; +namespace Lucene { + +/// Word token type +const int32_t CJKTokenizer::WORD_TYPE = 0; + +/// Single byte token type +const int32_t CJKTokenizer::SINGLE_TOKEN_TYPE = 1; + +/// Double byte token type +const int32_t CJKTokenizer::DOUBLE_TOKEN_TYPE = 2; + +/// Names for token types +const wchar_t* CJKTokenizer::TOKEN_TYPE_NAMES[] = {L"word", L"single", L"double"}; + +const int32_t CJKTokenizer::MAX_WORD_LEN = 255; + +const int32_t CJKTokenizer::IO_BUFFER_SIZE = 256; + +CJKTokenizer::CJKTokenizer(const ReaderPtr& input) : Tokenizer(input) { +} + +CJKTokenizer::CJKTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { +} + +CJKTokenizer::CJKTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { +} + +CJKTokenizer::~CJKTokenizer() { +} + +void CJKTokenizer::initialize() { + offset = 0; + bufferIndex = 0; + dataLen = 0; + buffer = CharArray::newInstance(MAX_WORD_LEN); + ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); + tokenType = WORD_TYPE; + preIsTokened = false; + + termAtt = addAttribute(); + offsetAtt = addAttribute(); + typeAtt = addAttribute(); +} + +CJKTokenizer::UnicodeBlock CJKTokenizer::unicodeBlock(wchar_t c) { + if (c >= 0x0000 && c <= 0x007f) { + return BASIC_LATIN; + } else if (c >= 0xff00 && c <= 0xffef) { + return HALFWIDTH_AND_FULLWIDTH_FORMS; } - - bool CJKTokenizer::incrementToken() - { - clearAttributes(); + return NONE; +} - while (true) // loop until we find a non-empty token - { - int32_t length = 0; +bool CJKTokenizer::incrementToken() { + clearAttributes(); - // the position used to create Token - int32_t start = offset; + while (true) { // loop until we find a non-empty token + int32_t length = 0; - while (true) // loop until we've found a full token - { - wchar_t c = 0; - UnicodeBlock ub = NONE; + // the position used to create Token + int32_t start = offset; - ++offset; + while (true) { // loop until we've found a full token + wchar_t c = 0; + UnicodeBlock ub = NONE; - if (bufferIndex >= dataLen) - { - dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); - bufferIndex = 0; - } + ++offset; - if (dataLen == -1) - { - if (length > 0) - { - if (preIsTokened == true) - { - length = 0; - preIsTokened = false; - } - else - --offset; - break; - } - else - { + if (bufferIndex >= dataLen) { + dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); + bufferIndex = 0; + } + + if (dataLen == -1) { + if (length > 0) { + if (preIsTokened == true) { + length = 0; + preIsTokened = false; + } else { --offset; - return false; } + break; + } else { + --offset; + return false; } - else - { - // get current character - c = ioBuffer[bufferIndex++]; - - // get the UnicodeBlock of the current character - ub = unicodeBlock(c); - } + } else { + // get current character + c = ioBuffer[bufferIndex++]; - // if the current character is ASCII or Extend ASCII - if (ub == BASIC_LATIN || ub == HALFWIDTH_AND_FULLWIDTH_FORMS) - { - if (ub == HALFWIDTH_AND_FULLWIDTH_FORMS) - { - int32_t i = (int32_t)c; - if (i >= 65281 && i <= 65374) - { - // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN - i = i - 65248; - c = (wchar_t)i; - } + // get the UnicodeBlock of the current character + ub = unicodeBlock(c); + } + + // if the current character is ASCII or Extend ASCII + if (ub == BASIC_LATIN || ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { + if (ub == HALFWIDTH_AND_FULLWIDTH_FORMS) { + int32_t i = (int32_t)c; + if (i >= 65281 && i <= 65374) { + // convert certain HALFWIDTH_AND_FULLWIDTH_FORMS to BASIC_LATIN + i = i - 65248; + c = (wchar_t)i; } + } - // if the current character is a letter or "_" "+" "#" - if (UnicodeUtil::isAlnum(c) || c == L'_' || c == L'+' || c == L'#') - { - if (length == 0) - { - // "javaC1C2C3C4linux"
- // ^--: the current character begin to token the ASCII - // letter - start = offset - 1; - } - else if (tokenType == DOUBLE_TOKEN_TYPE) - { - // "javaC1C2C3C4linux"
- // ^--: the previous non-ASCII - // : the current character - --offset; - --bufferIndex; + // if the current character is a letter or "_" "+" "#" + if (UnicodeUtil::isAlnum(c) || c == L'_' || c == L'+' || c == L'#') { + if (length == 0) { + // "javaC1C2C3C4linux"
+ // ^--: the current character begin to token the ASCII + // letter + start = offset - 1; + } else if (tokenType == DOUBLE_TOKEN_TYPE) { + // "javaC1C2C3C4linux"
+ // ^--: the previous non-ASCII + // : the current character + --offset; + --bufferIndex; - if (preIsTokened) - { - // there is only one non-ASCII has been stored - length = 0; - preIsTokened = false; - break; - } - else - break; + if (preIsTokened) { + // there is only one non-ASCII has been stored + length = 0; + preIsTokened = false; + break; + } else { + break; } + } - // store the LowerCase(c) in the buffer - buffer[length++] = CharFolder::toLower(c); - tokenType = SINGLE_TOKEN_TYPE; + // store the LowerCase(c) in the buffer + buffer[length++] = CharFolder::toLower(c); + tokenType = SINGLE_TOKEN_TYPE; - // break the procedure if buffer overflowed! - if (length == MAX_WORD_LEN) - break; + // break the procedure if buffer overflowed! + if (length == MAX_WORD_LEN) { + break; } - else if (length > 0) - { - if (preIsTokened) - { - length = 0; - preIsTokened = false; - } - else - break; + } else if (length > 0) { + if (preIsTokened) { + length = 0; + preIsTokened = false; + } else { + break; } } - else - { - // non-ASCII letter, e.g."C1C2C3C4" - if (UnicodeUtil::isAlpha(c)) - { - if (length == 0) - { - start = offset - 1; + } else { + // non-ASCII letter, e.g."C1C2C3C4" + if (UnicodeUtil::isAlpha(c)) { + if (length == 0) { + start = offset - 1; + buffer[length++] = c; + tokenType = DOUBLE_TOKEN_TYPE; + } else { + if (tokenType == SINGLE_TOKEN_TYPE) { + --offset; + --bufferIndex; + + // return the previous ASCII characters + break; + } else { buffer[length++] = c; tokenType = DOUBLE_TOKEN_TYPE; - } - else - { - if (tokenType == SINGLE_TOKEN_TYPE) - { + + if (length == 2) { --offset; --bufferIndex; - - // return the previous ASCII characters + preIsTokened = true; break; } - else - { - buffer[length++] = c; - tokenType = DOUBLE_TOKEN_TYPE; - - if (length == 2) - { - --offset; - --bufferIndex; - preIsTokened = true; - break; - } - } } } - else if (length > 0) - { - if (preIsTokened) - { - // empty the buffer - length = 0; - preIsTokened = false; - } - else - break; + } else if (length > 0) { + if (preIsTokened) { + // empty the buffer + length = 0; + preIsTokened = false; + } else { + break; } } } + } - if (length > 0) - { - termAtt->setTermBuffer(buffer.get(), 0, length); - offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); - typeAtt->setType(TOKEN_TYPE_NAMES[tokenType]); - return true; - } - else if (dataLen == -1) - { - --offset; - return false; - } - - // Cycle back and try for the next token (don't return an empty string) + if (length > 0) { + termAtt->setTermBuffer(buffer.get(), 0, length); + offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); + typeAtt->setType(TOKEN_TYPE_NAMES[tokenType]); + return true; + } else if (dataLen == -1) { + --offset; + return false; } + + // Cycle back and try for the next token (don't return an empty string) } - - void CJKTokenizer::end() - { - // set final offset - int32_t finalOffset = correctOffset(offset); - offsetAtt->setOffset(finalOffset, finalOffset); - } - - void CJKTokenizer::reset() - { - Tokenizer::reset(); - offset = 0; - bufferIndex = 0; - dataLen = 0; - preIsTokened = false; - tokenType = WORD_TYPE; - } - - void CJKTokenizer::reset(ReaderPtr input) - { - Tokenizer::reset(input); - reset(); - } +} + +void CJKTokenizer::end() { + // set final offset + int32_t finalOffset = correctOffset(offset); + offsetAtt->setOffset(finalOffset, finalOffset); +} + +void CJKTokenizer::reset() { + Tokenizer::reset(); + offset = 0; + bufferIndex = 0; + dataLen = 0; + preIsTokened = false; + tokenType = WORD_TYPE; +} + +void CJKTokenizer::reset(const ReaderPtr& input) { + Tokenizer::reset(input); + reset(); +} + } diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp index 810b3581..a31117a8 100644 --- a/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/cn/ChineseAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,35 +9,31 @@ #include "ChineseTokenizer.h" #include "ChineseFilter.h" -namespace Lucene -{ - ChineseAnalyzer::~ChineseAnalyzer() - { - } - - TokenStreamPtr ChineseAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(reader); - result = newLucene(result); - return result; - } - - TokenStreamPtr ChineseAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - ChineseAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(streams->source); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - ChineseAnalyzerSavedStreams::~ChineseAnalyzerSavedStreams() - { +namespace Lucene { + +ChineseAnalyzer::~ChineseAnalyzer() { +} + +TokenStreamPtr ChineseAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(reader); + result = newLucene(result); + return result; +} + +TokenStreamPtr ChineseAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + ChineseAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(streams->source); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +ChineseAnalyzerSavedStreams::~ChineseAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp index d068bf4d..83134454 100644 --- a/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp +++ b/src/contrib/analyzers/common/analysis/cn/ChineseFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,49 +10,42 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - const wchar_t* ChineseFilter::STOP_WORDS[] = - { - L"and", L"are", L"as", L"at", L"be", L"but", L"by", - L"for", L"if", L"in", L"into", L"is", L"it", - L"no", L"not", L"of", L"on", L"or", L"such", - L"that", L"the", L"their", L"then", L"there", L"these", - L"they", L"this", L"to", L"was", L"will", L"with" - }; - - ChineseFilter::ChineseFilter(TokenStreamPtr input) : TokenFilter(input) - { - stopTable = HashSet::newInstance(STOP_WORDS, STOP_WORDS + SIZEOF_ARRAY(STOP_WORDS)); - termAtt = addAttribute(); - } - - ChineseFilter::~ChineseFilter() - { - } - - bool ChineseFilter::incrementToken() - { - while (input->incrementToken()) - { - String text(termAtt->term()); - - if (!stopTable.contains(text)) - { - if (UnicodeUtil::isLower(text[0]) || UnicodeUtil::isUpper(text[0])) - { - // English word/token should larger than 1 character. - if (text.length() > 1) - return true; - } - else if (UnicodeUtil::isOther(text[0])) - { - // One Chinese character as one Chinese word. - // Chinese word extraction to be added later here. +namespace Lucene { + +const wchar_t* ChineseFilter::STOP_WORDS[] = { + L"and", L"are", L"as", L"at", L"be", L"but", L"by", + L"for", L"if", L"in", L"into", L"is", L"it", + L"no", L"not", L"of", L"on", L"or", L"such", + L"that", L"the", L"their", L"then", L"there", L"these", + L"they", L"this", L"to", L"was", L"will", L"with" +}; + +ChineseFilter::ChineseFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stopTable = HashSet::newInstance(STOP_WORDS, STOP_WORDS + SIZEOF_ARRAY(STOP_WORDS)); + termAtt = addAttribute(); +} + +ChineseFilter::~ChineseFilter() { +} + +bool ChineseFilter::incrementToken() { + while (input->incrementToken()) { + String text(termAtt->term()); + + if (!stopTable.contains(text)) { + if (UnicodeUtil::isLower(text[0]) || UnicodeUtil::isUpper(text[0])) { + // English word/token should larger than 1 character. + if (text.length() > 1) { return true; } + } else if (UnicodeUtil::isOther(text[0]) || UnicodeUtil::isDigit(text[0])) { + // One Chinese character as one Chinese word. + // Chinese word extraction to be added later here. + return true; } } - return false; } + return false; +} + } diff --git a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp index 67d856b0..3b4de742 100644 --- a/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp +++ b/src/contrib/analyzers/common/analysis/cn/ChineseTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,126 +13,130 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - const int32_t ChineseTokenizer::MAX_WORD_LEN = 255; - const int32_t ChineseTokenizer::IO_BUFFER_SIZE = 1024; - - ChineseTokenizer::ChineseTokenizer(ReaderPtr input) : Tokenizer(input) - { - } - - ChineseTokenizer::ChineseTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) - { - } - - ChineseTokenizer::ChineseTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) - { - } - - ChineseTokenizer::~ChineseTokenizer() - { - } - - void ChineseTokenizer::initialize() - { - offset = 0; - bufferIndex = 0; - dataLen = 0; - buffer = CharArray::newInstance(MAX_WORD_LEN); - ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); - length = 0; - start = 0; - - termAtt = addAttribute(); - offsetAtt = addAttribute(); +namespace Lucene { + +const int32_t ChineseTokenizer::MAX_WORD_LEN = 255; +const int32_t ChineseTokenizer::IO_BUFFER_SIZE = 1024; + +ChineseTokenizer::ChineseTokenizer(const ReaderPtr& input) : Tokenizer(input) { +} + +ChineseTokenizer::ChineseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { +} + +ChineseTokenizer::ChineseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { +} + +ChineseTokenizer::~ChineseTokenizer() { +} + +void ChineseTokenizer::initialize() { + offset = 0; + bufferIndex = 0; + dataLen = 0; + buffer = CharArray::newInstance(MAX_WORD_LEN); + ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); + length = 0; + start = 0; + + termAtt = addAttribute(); + offsetAtt = addAttribute(); +} + +void ChineseTokenizer::push(wchar_t c) { + if (length == 0) { + start = offset - 1; // start of token } - - void ChineseTokenizer::push(wchar_t c) - { - if (length == 0) - start = offset - 1; // start of token - buffer[length++] = CharFolder::toLower(c); // buffer it + buffer[length++] = CharFolder::toLower(c); // buffer it +} + +bool ChineseTokenizer::flush() { + if (length > 0) { + termAtt->setTermBuffer(buffer.get(), 0, length); + offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); + return true; + } else { + return false; } - - bool ChineseTokenizer::flush() - { - if (length > 0) - { - termAtt->setTermBuffer(buffer.get(), 0, length); - offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); - return true; +} + +bool ChineseTokenizer::incrementToken() { + clearAttributes(); + + length = 0; + start = offset; + bool last_is_en = false, last_is_num = false; + + while (true) { + wchar_t c; + ++offset; + + if (bufferIndex >= dataLen) { + dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); + bufferIndex = 0; } - else - return false; - } - - bool ChineseTokenizer::incrementToken() - { - clearAttributes(); - - length = 0; - start = offset; - - while (true) - { - wchar_t c; - ++offset; - - if (bufferIndex >= dataLen) - { - dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); - bufferIndex = 0; - } - - if (dataLen == -1) - { + + if (dataLen == -1) { + --offset; + return flush(); + } else { + c = ioBuffer[bufferIndex++]; + } + + if (UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) { + if (last_is_num) { + --bufferIndex; --offset; return flush(); } - else - c = ioBuffer[bufferIndex++]; - - if (UnicodeUtil::isDigit(c) || UnicodeUtil::isLower(c) || UnicodeUtil::isUpper(c)) - { - push(c); - if (length == MAX_WORD_LEN) - return flush(); + + push(c); + if (length == MAX_WORD_LEN) { + return flush(); + } + last_is_en = true; + } else if (UnicodeUtil::isDigit(c)) { + if (last_is_en) { + --bufferIndex; + --offset; + return flush(); } - else if (UnicodeUtil::isOther(c)) - { - if (length > 0) - { - --bufferIndex; - --offset; - return flush(); - } - push(c); + + push(c); + if (length == MAX_WORD_LEN) { return flush(); } - else if (length > 0) + last_is_num = true; + } else if (UnicodeUtil::isOther(c)) { + if (length > 0) { + --bufferIndex; + --offset; return flush(); + } + push(c); + return flush(); + } else if (length > 0) { + return flush(); } } - - void ChineseTokenizer::end() - { - // set final offset - int32_t finalOffset = correctOffset(offset); - offsetAtt->setOffset(finalOffset, finalOffset); - } - - void ChineseTokenizer::reset() - { - Tokenizer::reset(); - offset = 0; - bufferIndex = 0; - dataLen = 0; - } - - void ChineseTokenizer::reset(ReaderPtr input) - { - Tokenizer::reset(input); - reset(); - } +} + +void ChineseTokenizer::end() { + // set final offset + int32_t finalOffset = correctOffset(offset); + offsetAtt->setOffset(finalOffset, finalOffset); +} + +void ChineseTokenizer::reset() { + Tokenizer::reset(); + offset = 0; + bufferIndex = 0; + dataLen = 0; +} + +void ChineseTokenizer::reset(const ReaderPtr& input) { + Tokenizer::reset(input); + reset(); +} + } diff --git a/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp b/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp index a78313f7..bc47293b 100644 --- a/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/cz/CzechAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,121 +12,112 @@ #include "StopFilter.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default Czech stopwords in UTF-8 format. - const uint8_t CzechAnalyzer::_CZECH_STOP_WORDS[] = - { - 0x61, 0x0a, 0x73, 0x0a, 0x6b, 0x0a, 0x6f, 0x0a, 0x69, 0x0a, 0x75, 0x0a, 0x76, 0x0a, 0x7a, 0x0a, - 0x64, 0x6e, 0x65, 0x73, 0x0a, 0x63, 0x7a, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x74, 0x6f, 0x0a, 0x62, - 0x75, 0x64, 0x65, 0xc5, 0xa1, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x6d, 0x0a, 0x62, 0x79, 0x6c, 0x69, - 0x0a, 0x6a, 0x73, 0x65, 0xc5, 0xa1, 0x0a, 0x6d, 0x75, 0x6a, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, - 0x0a, 0x74, 0x61, 0x0a, 0x74, 0x6f, 0x6d, 0x74, 0x6f, 0x0a, 0x74, 0x6f, 0x68, 0x6c, 0x65, 0x0a, - 0x74, 0x75, 0x74, 0x6f, 0x0a, 0x74, 0x79, 0x74, 0x6f, 0x0a, 0x6a, 0x65, 0x6a, 0x0a, 0x7a, 0x64, - 0x61, 0x0a, 0x70, 0x72, 0x6f, 0x63, 0x0a, 0x6d, 0xc3, 0xa1, 0x74, 0x65, 0x0a, 0x74, 0x61, 0x74, - 0x6f, 0x0a, 0x6b, 0x61, 0x6d, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x64, 0x6f, - 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xad, 0x0a, 0x6d, 0x69, 0x0a, 0x6e, 0xc3, 0xa1, 0x6d, 0x0a, - 0x74, 0x6f, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x74, 0x6f, 0x0a, 0x6d, 0xc3, 0xad, 0x74, 0x0a, - 0x6e, 0x69, 0x63, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0x6f, 0x75, - 0x0a, 0x62, 0x79, 0x6c, 0x61, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, - 0xc5, 0xbe, 0x65, 0x0a, 0x61, 0x73, 0x69, 0x0a, 0x68, 0x6f, 0x0a, 0x6e, 0x61, 0xc5, 0xa1, 0x69, - 0x0a, 0x6e, 0x61, 0x70, 0x69, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x72, 0x65, 0x0a, 0x63, 0x6f, 0xc5, - 0xbe, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x0a, 0x74, 0x61, 0x6b, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x76, - 0xc3, 0xbd, 0x63, 0x68, 0x0a, 0x6a, 0x65, 0x6a, 0xc3, 0xad, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, - 0x69, 0x0a, 0x6a, 0x73, 0x74, 0x65, 0x0a, 0x61, 0x6a, 0x0a, 0x74, 0x75, 0x0a, 0x74, 0x65, 0x64, - 0x79, 0x0a, 0x74, 0x65, 0x74, 0x6f, 0x0a, 0x62, 0x79, 0x6c, 0x6f, 0x0a, 0x6b, 0x64, 0x65, 0x0a, - 0x6b, 0x65, 0x0a, 0x70, 0x72, 0x61, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x0a, 0x6e, 0x61, 0x64, - 0x0a, 0x6e, 0x65, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x63, 0x69, 0x0a, 0x70, 0x6f, 0x64, 0x0a, 0x74, - 0xc3, 0xa9, 0x6d, 0x61, 0x0a, 0x6d, 0x65, 0x7a, 0x69, 0x0a, 0x70, 0x72, 0x65, 0x73, 0x0a, 0x74, - 0x79, 0x0a, 0x70, 0x61, 0x6b, 0x0a, 0x76, 0xc3, 0xa1, 0x6d, 0x0a, 0x61, 0x6e, 0x69, 0x0a, 0x6b, - 0x64, 0x79, 0xc5, 0xbe, 0x0a, 0x76, 0xc5, 0xa1, 0x61, 0x6b, 0x0a, 0x6e, 0x65, 0x67, 0x0a, 0x6a, - 0x73, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6e, 0x74, 0x6f, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, - 0x75, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x79, 0x0a, 0x61, 0x62, 0x79, 0x0a, 0x6a, 0x73, - 0x6d, 0x65, 0x0a, 0x70, 0x72, 0x65, 0x64, 0x0a, 0x70, 0x74, 0x61, 0x0a, 0x6a, 0x65, 0x6a, 0x69, - 0x63, 0x68, 0x0a, 0x62, 0x79, 0x6c, 0x0a, 0x6a, 0x65, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x61, 0xc5, - 0xbe, 0x0a, 0x62, 0x65, 0x7a, 0x0a, 0x74, 0x61, 0x6b, 0xc3, 0xa9, 0x0a, 0x70, 0x6f, 0x75, 0x7a, - 0x65, 0x0a, 0x70, 0x72, 0x76, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0x61, 0xc5, 0xa1, 0x65, 0x0a, 0x6b, - 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x0a, 0x6e, 0xc3, 0xa1, 0x73, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xbd, - 0x0a, 0x74, 0x69, 0x70, 0x79, 0x0a, 0x70, 0x6f, 0x6b, 0x75, 0x64, 0x0a, 0x6d, 0x75, 0xc5, 0xbe, - 0x65, 0x0a, 0x73, 0x74, 0x72, 0x61, 0x6e, 0x61, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0x0a, 0x73, 0x76, - 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x6e, 0xc3, 0xa9, 0x0a, 0x7a, 0x70, 0x72, 0xc3, 0xa1, 0x76, 0x79, - 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xa9, 0x0a, 0x6e, 0x65, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0xc3, 0xa1, - 0x73, 0x0a, 0x6a, 0x65, 0x6e, 0x0a, 0x70, 0x6f, 0x64, 0x6c, 0x65, 0x0a, 0x7a, 0x64, 0x65, 0x0a, - 0x75, 0xc5, 0xbe, 0x0a, 0x62, 0xc3, 0xbd, 0x74, 0x0a, 0x76, 0xc3, 0xad, 0x63, 0x65, 0x0a, 0x62, - 0x75, 0x64, 0x65, 0x0a, 0x6a, 0x69, 0xc5, 0xbe, 0x0a, 0x6e, 0x65, 0xc5, 0xbe, 0x0a, 0x6b, 0x74, - 0x65, 0x72, 0xc3, 0xbd, 0x0a, 0x62, 0x79, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa9, 0x0a, 0x63, - 0x6f, 0x0a, 0x6e, 0x65, 0x62, 0x6f, 0x0a, 0x74, 0x65, 0x6e, 0x0a, 0x74, 0x61, 0x6b, 0x0a, 0x6d, - 0xc3, 0xa1, 0x0a, 0x70, 0x72, 0x69, 0x0a, 0x6f, 0x64, 0x0a, 0x70, 0x6f, 0x0a, 0x6a, 0x73, 0x6f, - 0x75, 0x0a, 0x6a, 0x61, 0x6b, 0x0a, 0x64, 0x61, 0x6c, 0xc5, 0xa1, 0xc3, 0xad, 0x0a, 0x61, 0x6c, - 0x65, 0x0a, 0x73, 0x69, 0x0a, 0x73, 0x65, 0x0a, 0x76, 0x65, 0x0a, 0x74, 0x6f, 0x0a, 0x6a, 0x61, - 0x6b, 0x6f, 0x0a, 0x7a, 0x61, 0x0a, 0x7a, 0x70, 0x65, 0x74, 0x0a, 0x7a, 0x65, 0x0a, 0x64, 0x6f, - 0x0a, 0x70, 0x72, 0x6f, 0x0a, 0x6a, 0x65, 0x0a, 0x6e, 0x61, 0x0a, 0x61, 0x74, 0x64, 0x0a, 0x61, - 0x74, 0x70, 0x0a, 0x6a, 0x61, 0x6b, 0x6d, 0x69, 0x6c, 0x65, 0x0a, 0x70, 0x72, 0x69, 0x63, 0x65, - 0x6d, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xa1, 0x0a, 0x6f, 0x6e, 0x0a, 0x6f, 0x6e, 0x61, 0x0a, 0x6f, - 0x6e, 0x6f, 0x0a, 0x6f, 0x6e, 0x69, 0x0a, 0x6f, 0x6e, 0x79, 0x0a, 0x6d, 0x79, 0x0a, 0x76, 0x79, - 0x0a, 0x6a, 0xc3, 0xad, 0x0a, 0x6a, 0x69, 0x0a, 0x6d, 0x65, 0x0a, 0x6d, 0x6e, 0x65, 0x0a, 0x6a, - 0x65, 0x6d, 0x75, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x0a, 0x74, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6d, - 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, - 0x68, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xad, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x6c, 0x69, 0x6b, - 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0xc5, 0xbe, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, - 0x6e, 0x61, 0x63, 0x65, 0xc5, 0xbe, 0x0a - }; - - CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - CzechAnalyzer::~CzechAnalyzer() - { - } - - const HashSet CzechAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - { - String stopWords(UTF8_TO_STRING(_CZECH_STOP_WORDS)); - Collection words(StringUtils::split(stopWords, L"\n")); - stopSet = HashSet::newInstance(words.begin(), words.end()); - } - return stopSet; - } - - TokenStreamPtr CzechAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - return result; - } - - TokenStreamPtr CzechAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - CzechAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(streams->result); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - CzechAnalyzerSavedStreams::~CzechAnalyzerSavedStreams() - { +namespace Lucene { + +/// Default Czech stopwords in UTF-8 format. +const uint8_t CzechAnalyzer::_CZECH_STOP_WORDS[] = { + 0x61, 0x0a, 0x73, 0x0a, 0x6b, 0x0a, 0x6f, 0x0a, 0x69, 0x0a, 0x75, 0x0a, 0x76, 0x0a, 0x7a, 0x0a, + 0x64, 0x6e, 0x65, 0x73, 0x0a, 0x63, 0x7a, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x74, 0x6f, 0x0a, 0x62, + 0x75, 0x64, 0x65, 0xc5, 0xa1, 0x0a, 0x62, 0x75, 0x64, 0x65, 0x6d, 0x0a, 0x62, 0x79, 0x6c, 0x69, + 0x0a, 0x6a, 0x73, 0x65, 0xc5, 0xa1, 0x0a, 0x6d, 0x75, 0x6a, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, + 0x0a, 0x74, 0x61, 0x0a, 0x74, 0x6f, 0x6d, 0x74, 0x6f, 0x0a, 0x74, 0x6f, 0x68, 0x6c, 0x65, 0x0a, + 0x74, 0x75, 0x74, 0x6f, 0x0a, 0x74, 0x79, 0x74, 0x6f, 0x0a, 0x6a, 0x65, 0x6a, 0x0a, 0x7a, 0x64, + 0x61, 0x0a, 0x70, 0x72, 0x6f, 0x63, 0x0a, 0x6d, 0xc3, 0xa1, 0x74, 0x65, 0x0a, 0x74, 0x61, 0x74, + 0x6f, 0x0a, 0x6b, 0x61, 0x6d, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x64, 0x6f, + 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xad, 0x0a, 0x6d, 0x69, 0x0a, 0x6e, 0xc3, 0xa1, 0x6d, 0x0a, + 0x74, 0x6f, 0x6d, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x74, 0x6f, 0x0a, 0x6d, 0xc3, 0xad, 0x74, 0x0a, + 0x6e, 0x69, 0x63, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0x6f, 0x75, + 0x0a, 0x62, 0x79, 0x6c, 0x61, 0x0a, 0x74, 0x6f, 0x68, 0x6f, 0x0a, 0x70, 0x72, 0x6f, 0x74, 0x6f, + 0xc5, 0xbe, 0x65, 0x0a, 0x61, 0x73, 0x69, 0x0a, 0x68, 0x6f, 0x0a, 0x6e, 0x61, 0xc5, 0xa1, 0x69, + 0x0a, 0x6e, 0x61, 0x70, 0x69, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x72, 0x65, 0x0a, 0x63, 0x6f, 0xc5, + 0xbe, 0x0a, 0x74, 0xc3, 0xad, 0x6d, 0x0a, 0x74, 0x61, 0x6b, 0xc5, 0xbe, 0x65, 0x0a, 0x73, 0x76, + 0xc3, 0xbd, 0x63, 0x68, 0x0a, 0x6a, 0x65, 0x6a, 0xc3, 0xad, 0x0a, 0x73, 0x76, 0xc3, 0xbd, 0x6d, + 0x69, 0x0a, 0x6a, 0x73, 0x74, 0x65, 0x0a, 0x61, 0x6a, 0x0a, 0x74, 0x75, 0x0a, 0x74, 0x65, 0x64, + 0x79, 0x0a, 0x74, 0x65, 0x74, 0x6f, 0x0a, 0x62, 0x79, 0x6c, 0x6f, 0x0a, 0x6b, 0x64, 0x65, 0x0a, + 0x6b, 0x65, 0x0a, 0x70, 0x72, 0x61, 0x76, 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x0a, 0x6e, 0x61, 0x64, + 0x0a, 0x6e, 0x65, 0x6a, 0x73, 0x6f, 0x75, 0x0a, 0x63, 0x69, 0x0a, 0x70, 0x6f, 0x64, 0x0a, 0x74, + 0xc3, 0xa9, 0x6d, 0x61, 0x0a, 0x6d, 0x65, 0x7a, 0x69, 0x0a, 0x70, 0x72, 0x65, 0x73, 0x0a, 0x74, + 0x79, 0x0a, 0x70, 0x61, 0x6b, 0x0a, 0x76, 0xc3, 0xa1, 0x6d, 0x0a, 0x61, 0x6e, 0x69, 0x0a, 0x6b, + 0x64, 0x79, 0xc5, 0xbe, 0x0a, 0x76, 0xc5, 0xa1, 0x61, 0x6b, 0x0a, 0x6e, 0x65, 0x67, 0x0a, 0x6a, + 0x73, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6e, 0x74, 0x6f, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, + 0x75, 0x0a, 0x63, 0x6c, 0xc3, 0xa1, 0x6e, 0x6b, 0x79, 0x0a, 0x61, 0x62, 0x79, 0x0a, 0x6a, 0x73, + 0x6d, 0x65, 0x0a, 0x70, 0x72, 0x65, 0x64, 0x0a, 0x70, 0x74, 0x61, 0x0a, 0x6a, 0x65, 0x6a, 0x69, + 0x63, 0x68, 0x0a, 0x62, 0x79, 0x6c, 0x0a, 0x6a, 0x65, 0xc5, 0xa1, 0x74, 0x65, 0x0a, 0x61, 0xc5, + 0xbe, 0x0a, 0x62, 0x65, 0x7a, 0x0a, 0x74, 0x61, 0x6b, 0xc3, 0xa9, 0x0a, 0x70, 0x6f, 0x75, 0x7a, + 0x65, 0x0a, 0x70, 0x72, 0x76, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0x61, 0xc5, 0xa1, 0x65, 0x0a, 0x6b, + 0x74, 0x65, 0x72, 0xc3, 0xa1, 0x0a, 0x6e, 0xc3, 0xa1, 0x73, 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xbd, + 0x0a, 0x74, 0x69, 0x70, 0x79, 0x0a, 0x70, 0x6f, 0x6b, 0x75, 0x64, 0x0a, 0x6d, 0x75, 0xc5, 0xbe, + 0x65, 0x0a, 0x73, 0x74, 0x72, 0x61, 0x6e, 0x61, 0x0a, 0x6a, 0x65, 0x68, 0x6f, 0x0a, 0x73, 0x76, + 0xc3, 0xa9, 0x0a, 0x6a, 0x69, 0x6e, 0xc3, 0xa9, 0x0a, 0x7a, 0x70, 0x72, 0xc3, 0xa1, 0x76, 0x79, + 0x0a, 0x6e, 0x6f, 0x76, 0xc3, 0xa9, 0x0a, 0x6e, 0x65, 0x6e, 0xc3, 0xad, 0x0a, 0x76, 0xc3, 0xa1, + 0x73, 0x0a, 0x6a, 0x65, 0x6e, 0x0a, 0x70, 0x6f, 0x64, 0x6c, 0x65, 0x0a, 0x7a, 0x64, 0x65, 0x0a, + 0x75, 0xc5, 0xbe, 0x0a, 0x62, 0xc3, 0xbd, 0x74, 0x0a, 0x76, 0xc3, 0xad, 0x63, 0x65, 0x0a, 0x62, + 0x75, 0x64, 0x65, 0x0a, 0x6a, 0x69, 0xc5, 0xbe, 0x0a, 0x6e, 0x65, 0xc5, 0xbe, 0x0a, 0x6b, 0x74, + 0x65, 0x72, 0xc3, 0xbd, 0x0a, 0x62, 0x79, 0x0a, 0x6b, 0x74, 0x65, 0x72, 0xc3, 0xa9, 0x0a, 0x63, + 0x6f, 0x0a, 0x6e, 0x65, 0x62, 0x6f, 0x0a, 0x74, 0x65, 0x6e, 0x0a, 0x74, 0x61, 0x6b, 0x0a, 0x6d, + 0xc3, 0xa1, 0x0a, 0x70, 0x72, 0x69, 0x0a, 0x6f, 0x64, 0x0a, 0x70, 0x6f, 0x0a, 0x6a, 0x73, 0x6f, + 0x75, 0x0a, 0x6a, 0x61, 0x6b, 0x0a, 0x64, 0x61, 0x6c, 0xc5, 0xa1, 0xc3, 0xad, 0x0a, 0x61, 0x6c, + 0x65, 0x0a, 0x73, 0x69, 0x0a, 0x73, 0x65, 0x0a, 0x76, 0x65, 0x0a, 0x74, 0x6f, 0x0a, 0x6a, 0x61, + 0x6b, 0x6f, 0x0a, 0x7a, 0x61, 0x0a, 0x7a, 0x70, 0x65, 0x74, 0x0a, 0x7a, 0x65, 0x0a, 0x64, 0x6f, + 0x0a, 0x70, 0x72, 0x6f, 0x0a, 0x6a, 0x65, 0x0a, 0x6e, 0x61, 0x0a, 0x61, 0x74, 0x64, 0x0a, 0x61, + 0x74, 0x70, 0x0a, 0x6a, 0x61, 0x6b, 0x6d, 0x69, 0x6c, 0x65, 0x0a, 0x70, 0x72, 0x69, 0x63, 0x65, + 0x6d, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xa1, 0x0a, 0x6f, 0x6e, 0x0a, 0x6f, 0x6e, 0x61, 0x0a, 0x6f, + 0x6e, 0x6f, 0x0a, 0x6f, 0x6e, 0x69, 0x0a, 0x6f, 0x6e, 0x79, 0x0a, 0x6d, 0x79, 0x0a, 0x76, 0x79, + 0x0a, 0x6a, 0xc3, 0xad, 0x0a, 0x6a, 0x69, 0x0a, 0x6d, 0x65, 0x0a, 0x6d, 0x6e, 0x65, 0x0a, 0x6a, + 0x65, 0x6d, 0x75, 0x0a, 0x74, 0x6f, 0x6d, 0x75, 0x0a, 0x74, 0x65, 0x6d, 0x0a, 0x74, 0x65, 0x6d, + 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0x0a, 0x6e, 0x65, 0x6d, 0x75, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, + 0x68, 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0xc3, 0xad, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0x6c, 0x69, 0x6b, + 0x6f, 0xc5, 0xbe, 0x0a, 0x6a, 0x65, 0xc5, 0xbe, 0x0a, 0x6a, 0x61, 0x6b, 0x6f, 0xc5, 0xbe, 0x0a, + 0x6e, 0x61, 0x63, 0x65, 0xc5, 0xbe, 0x0a +}; + +CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +CzechAnalyzer::CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +CzechAnalyzer::~CzechAnalyzer() { +} + +const HashSet CzechAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + String stopWords(UTF8_TO_STRING(_CZECH_STOP_WORDS)); + Collection words(StringUtils::split(stopWords, L"\n")); + stopSet = HashSet::newInstance(words.begin(), words.end()); + ); + return stopSet; +} + +TokenStreamPtr CzechAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + return result; +} + +TokenStreamPtr CzechAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + CzechAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(streams->result); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +CzechAnalyzerSavedStreams::~CzechAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp b/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp index ddcbadfc..8bcc3373 100644 --- a/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/de/GermanAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,85 +12,76 @@ #include "StopFilter.h" #include "GermanStemFilter.h" -namespace Lucene -{ - const wchar_t* GermanAnalyzer::_GERMAN_STOP_WORDS[] = - { - L"einer", L"eine", L"eines", L"einem", L"einen", L"der", L"die", - L"das", L"dass", L"da\x00df", L"du", L"er", L"sie", L"es", L"was", - L"wer", L"wie", L"wir", L"und", L"oder", L"ohne", L"mit", L"am", - L"im", L"in", L"aus", L"auf", L"ist", L"sein", L"war", L"wird", - L"ihr", L"ihre", L"ihres", L"als", L"f\x00fcr", L"von", L"mit", - L"dich", L"dir", L"mich", L"mir", L"mein", L"sein", L"kein", - L"durch", L"wegen", L"wird" - }; - - GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion) - { - this->stopSet = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stopSet = stopwords; - this->matchVersion = matchVersion; - } - - GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) - { - this->stopSet = stopwords; - this->exclusionSet = exclusions; - this->matchVersion = matchVersion; - } - - GermanAnalyzer::~GermanAnalyzer() - { - } - - const HashSet GermanAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - stopSet = HashSet::newInstance(_GERMAN_STOP_WORDS, _GERMAN_STOP_WORDS + SIZEOF_ARRAY(_GERMAN_STOP_WORDS)); - return stopSet; - } - - void GermanAnalyzer::setStemExclusionTable(HashSet exclusions) - { - exclusionSet = exclusions; - setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created - } - - TokenStreamPtr GermanAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); - result = newLucene(result, exclusionSet); - return result; - } - - TokenStreamPtr GermanAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - GermanAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(streams->result); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); - streams->result = newLucene(streams->result, exclusionSet); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - GermanAnalyzerSavedStreams::~GermanAnalyzerSavedStreams() - { +namespace Lucene { + +const wchar_t* GermanAnalyzer::_GERMAN_STOP_WORDS[] = { + L"einer", L"eine", L"eines", L"einem", L"einen", L"der", L"die", + L"das", L"dass", L"da\x00df", L"du", L"er", L"sie", L"es", L"was", + L"wer", L"wie", L"wir", L"und", L"oder", L"ohne", L"mit", L"am", + L"im", L"in", L"aus", L"auf", L"ist", L"sein", L"war", L"wird", + L"ihr", L"ihre", L"ihres", L"als", L"f\x00fcr", L"von", L"mit", + L"dich", L"dir", L"mich", L"mir", L"mein", L"sein", L"kein", + L"durch", L"wegen", L"wird" +}; + +GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion) { + this->stopSet = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stopSet = stopwords; + this->matchVersion = matchVersion; +} + +GermanAnalyzer::GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { + this->stopSet = stopwords; + this->exclusionSet = exclusions; + this->matchVersion = matchVersion; +} + +GermanAnalyzer::~GermanAnalyzer() { +} + +const HashSet GermanAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + stopSet = HashSet::newInstance(_GERMAN_STOP_WORDS, _GERMAN_STOP_WORDS + SIZEOF_ARRAY(_GERMAN_STOP_WORDS)); + ); + return stopSet; +} + +void GermanAnalyzer::setStemExclusionTable(HashSet exclusions) { + exclusionSet = exclusions; + setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created +} + +TokenStreamPtr GermanAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); + result = newLucene(result, exclusionSet); + return result; +} + +TokenStreamPtr GermanAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + GermanAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(streams->result); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); + streams->result = newLucene(streams->result, exclusionSet); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +GermanAnalyzerSavedStreams::~GermanAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp b/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp index 8694e70c..9b760b7c 100644 --- a/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/de/GermanStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,52 +9,47 @@ #include "GermanStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - GermanStemFilter::GermanStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - GermanStemFilter::GermanStemFilter(TokenStreamPtr input, HashSet exclusionSet) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - this->exclusionSet = exclusionSet; - } - - GermanStemFilter::~GermanStemFilter() - { - } - - bool GermanStemFilter::incrementToken() - { - if (input->incrementToken()) - { - String term(termAtt->term()); - // Check the exclusion table. - if (!exclusionSet || !exclusionSet.contains(term)) - { - String s(stemmer->stem(term)); - // If not stemmed, don't waste the time adjusting the token. - if (!s.empty() && s != term) - termAtt->setTermBuffer(s); +namespace Lucene { + +GermanStemFilter::GermanStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +GermanStemFilter::GermanStemFilter(const TokenStreamPtr& input, HashSet exclusionSet) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); + this->exclusionSet = exclusionSet; +} + +GermanStemFilter::~GermanStemFilter() { +} + +bool GermanStemFilter::incrementToken() { + if (input->incrementToken()) { + String term(termAtt->term()); + // Check the exclusion table. + if (!exclusionSet || !exclusionSet.contains(term)) { + String s(stemmer->stem(term)); + // If not stemmed, don't waste the time adjusting the token. + if (!s.empty() && s != term) { + termAtt->setTermBuffer(s); } - return true; } - else - return false; - } - - void GermanStemFilter::setStemmer(GermanStemmerPtr stemmer) - { - if (stemmer) - this->stemmer = stemmer; + return true; + } else { + return false; } - - void GermanStemFilter::setExclusionSet(HashSet exclusionSet) - { - this->exclusionSet = exclusionSet; +} + +void GermanStemFilter::setStemmer(const GermanStemmerPtr& stemmer) { + if (stemmer) { + this->stemmer = stemmer; } } + +void GermanStemFilter::setExclusionSet(HashSet exclusionSet) { + this->exclusionSet = exclusionSet; +} + +} diff --git a/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp b/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp index 0c6f2679..d6631ec7 100644 --- a/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/de/GermanStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,200 +11,167 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - GermanStemmer::GermanStemmer() - { - substCount = 0; - } - - GermanStemmer::~GermanStemmer() - { - } - - String GermanStemmer::stem(const String& term) - { - // Use lowercase for medium stemming. - buffer = StringUtils::toLower(term); - if (!isStemmable()) - return buffer; - - // Stemming starts here - substitute(); - strip(); - optimize(); - resubstitute(); - removeParticleDenotion(); - +namespace Lucene { + +GermanStemmer::GermanStemmer() { + substCount = 0; +} + +GermanStemmer::~GermanStemmer() { +} + +String GermanStemmer::stem(const String& term) { + // Use lowercase for medium stemming. + buffer = StringUtils::toLower(term); + if (!isStemmable()) { return buffer; } - - bool GermanStemmer::isStemmable() - { - for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) - { - if (!UnicodeUtil::isAlpha(buffer[c])) - return false; + + // Stemming starts here + substitute(); + strip(); + optimize(); + resubstitute(); + removeParticleDenotion(); + + return buffer; +} + +bool GermanStemmer::isStemmable() { + for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { + if (!UnicodeUtil::isAlpha(buffer[c])) { + return false; } - return true; } - - void GermanStemmer::strip() - { - bool doMore = true; - while (doMore && buffer.length() > 3) - { - if (buffer.length() + substCount > 5 && boost::ends_with(buffer, L"nd")) - buffer.resize(buffer.length() - 2); - else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"em")) - buffer.resize(buffer.length() - 2); - else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"er")) - buffer.resize(buffer.length() - 2); - else if (buffer[buffer.length() - 1] == L'e') - buffer.resize(buffer.length() - 1); - else if (buffer[buffer.length() - 1] == L's') - buffer.resize(buffer.length() - 1); - else if (buffer[buffer.length() - 1] == L'n') - buffer.resize(buffer.length() - 1); - // "t" occurs only as suffix of verbs. - else if (buffer[buffer.length() - 1] == L't') - buffer.resize(buffer.length() - 1); - else - doMore = false; + return true; +} + +void GermanStemmer::strip() { + bool doMore = true; + while (doMore && buffer.length() > 3) { + if (buffer.length() + substCount > 5 && boost::ends_with(buffer, L"nd")) { + buffer.resize(buffer.length() - 2); + } else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"em")) { + buffer.resize(buffer.length() - 2); + } else if (buffer.length() + substCount > 4 && boost::ends_with(buffer, L"er")) { + buffer.resize(buffer.length() - 2); + } else if (buffer[buffer.length() - 1] == L'e') { + buffer.resize(buffer.length() - 1); + } else if (buffer[buffer.length() - 1] == L's') { + buffer.resize(buffer.length() - 1); + } else if (buffer[buffer.length() - 1] == L'n') { + buffer.resize(buffer.length() - 1); } - } - - void GermanStemmer::optimize() - { - // Additional step for female plurals of professions and inhabitants. - if (buffer.length() > 5 && boost::ends_with(buffer, L"erin*")) - { + // "t" occurs only as suffix of verbs. + else if (buffer[buffer.length() - 1] == L't') { buffer.resize(buffer.length() - 1); - strip(); + } else { + doMore = false; } - - // Additional step for irregular plural nouns like "Matrizen -> Matrix". - if (buffer[buffer.length() - 1] == L'z') - buffer[buffer.length() - 1] = L'x'; } - - void GermanStemmer::removeParticleDenotion() - { - if (buffer.length() > 4) - { - for (int32_t c = 0; c < (int32_t)buffer.length() - 3; ++c) - { - if (buffer.substr(c, 4) == L"gege") - { - buffer.erase(c, 2); - return; - } +} + +void GermanStemmer::optimize() { + // Additional step for female plurals of professions and inhabitants. + if (buffer.length() > 5 && boost::ends_with(buffer, L"erin*")) { + buffer.resize(buffer.length() - 1); + strip(); + } + + // Additional step for irregular plural nouns like "Matrizen -> Matrix". + if (buffer[buffer.length() - 1] == L'z') { + buffer[buffer.length() - 1] = L'x'; + } +} + +void GermanStemmer::removeParticleDenotion() { + if (buffer.length() > 4) { + for (int32_t c = 0; c < (int32_t)buffer.length() - 3; ++c) { + if (buffer.substr(c, 4) == L"gege") { + buffer.erase(c, 2); + return; } } } - - void GermanStemmer::substitute() - { - substCount = 0; - for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) - { - // Replace the second char of a pair of the equal characters with an asterisk - if (c > 0 && buffer[c] == buffer[c - 1]) - buffer[c] = L'*'; - // Substitute Umlauts. - else if (buffer[c] == L'\x00e4') - buffer[c] = L'a'; - else if (buffer[c] == L'\x00f6') - buffer[c] = L'o'; - else if (buffer[c] == L'\x00fc') - buffer[c] = L'u'; - // Fix bug so that 'ß' at the end of a word is replaced. - else if (buffer[c] == L'\x00df') - { - buffer[c] = L's'; - buffer.insert(c + 1, 1, L's'); +} + +void GermanStemmer::substitute() { + substCount = 0; + for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { + // Replace the second char of a pair of the equal characters with an asterisk + if (c > 0 && buffer[c] == buffer[c - 1]) { + buffer[c] = L'*'; + } + // Substitute Umlauts. + else if (buffer[c] == L'\x00e4') { + buffer[c] = L'a'; + } else if (buffer[c] == L'\x00f6') { + buffer[c] = L'o'; + } else if (buffer[c] == L'\x00fc') { + buffer[c] = L'u'; + } + // Fix bug so that 'ß' at the end of a word is replaced. + else if (buffer[c] == L'\x00df') { + buffer[c] = L's'; + buffer.insert(c + 1, 1, L's'); + ++substCount; + } + // Take care that at least one character is left left side from the current one + if (c < (int32_t)buffer.length() - 1) { + // Masking several common character combinations with an token + if (c < (int32_t)buffer.length() - 2 && buffer[c] == L's' && buffer[c + 1] == L'c' && buffer[c + 2] == L'h') { + buffer[c] = L'$'; + buffer.erase(c + 1, 2); + substCount += 2; + } else if (buffer[c] == L'c' && buffer[c + 1] == L'h') { + buffer[c] = L'\x00a7'; + buffer.erase(c + 1, 1); + ++substCount; + } else if (buffer[c] == L'e' && buffer[c + 1] == L'i') { + buffer[c] = L'%'; + buffer.erase(c + 1, 1); + ++substCount; + } else if (buffer[c] == L'i' && buffer[c + 1] == L'e') { + buffer[c] = L'&'; + buffer.erase(c + 1, 1); + ++substCount; + } else if (buffer[c] == L'i' && buffer[c + 1] == L'g') { + buffer[c] = L'#'; + buffer.erase(c + 1, 1); + ++substCount; + } else if (buffer[c] == L's' && buffer[c + 1] == L't') { + buffer[c] = L'!'; + buffer.erase(c + 1, 1); ++substCount; - } - // Take care that at least one character is left left side from the current one - if (c < (int32_t)buffer.length() - 1) - { - // Masking several common character combinations with an token - if (c < (int32_t)buffer.length() - 2 && buffer[c] == L's' && buffer[c + 1] == L'c' && buffer[c + 2] == L'h') - { - buffer[c] = L'$'; - buffer.erase(c + 1, 2); - substCount += 2; - } - else if (buffer[c] == L'c' && buffer[c + 1] == L'h') - { - buffer[c] = L'\x00a7'; - buffer.erase(c + 1, 1); - ++substCount; - } - else if (buffer[c] == L'e' && buffer[c + 1] == L'i') - { - buffer[c] = L'%'; - buffer.erase(c + 1, 1); - ++substCount; - } - else if (buffer[c] == L'i' && buffer[c + 1] == L'e') - { - buffer[c] = L'&'; - buffer.erase(c + 1, 1); - ++substCount; - } - else if (buffer[c] == L'i' && buffer[c + 1] == L'g') - { - buffer[c] = L'#'; - buffer.erase(c + 1, 1); - ++substCount; - } - else if (buffer[c] == L's' && buffer[c + 1] == L't') - { - buffer[c] = L'!'; - buffer.erase(c + 1, 1); - ++substCount; - } } } } - - void GermanStemmer::resubstitute() - { - for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) - { - if (buffer[c] == L'*') - buffer[c] = buffer[c - 1]; - else if (buffer[c] == L'$') - { - buffer[c] = L's'; - buffer.insert(c + 1, L"ch"); - } - else if (buffer[c] == L'\x00a7') - { - buffer[c] = L'c'; - buffer.insert(c + 1, 1, L'h'); - } - else if (buffer[c] == L'%') - { - buffer[c] = L'e'; - buffer.insert(c + 1, 1, L'i'); - } - else if (buffer[c] == L'&') - { - buffer[c] = L'i'; - buffer.insert(c + 1, 1, L'e'); - } - else if (buffer[c] == L'#') - { - buffer[c] = L'i'; - buffer.insert(c + 1, 1, L'g'); - } - else if (buffer[c] == L'!') - { - buffer[c] = L's'; - buffer.insert(c + 1, 1, L't'); - } +} + +void GermanStemmer::resubstitute() { + for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { + if (buffer[c] == L'*') { + buffer[c] = buffer[c - 1]; + } else if (buffer[c] == L'$') { + buffer[c] = L's'; + buffer.insert(c + 1, L"ch"); + } else if (buffer[c] == L'\x00a7') { + buffer[c] = L'c'; + buffer.insert(c + 1, 1, L'h'); + } else if (buffer[c] == L'%') { + buffer[c] = L'e'; + buffer.insert(c + 1, 1, L'i'); + } else if (buffer[c] == L'&') { + buffer[c] = L'i'; + buffer.insert(c + 1, 1, L'e'); + } else if (buffer[c] == L'#') { + buffer[c] = L'i'; + buffer.insert(c + 1, 1, L'g'); + } else if (buffer[c] == L'!') { + buffer[c] = L's'; + buffer.insert(c + 1, 1, L't'); } } } + +} diff --git a/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp b/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp index 330d9978..f85a47b2 100644 --- a/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/el/GreekAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,107 +11,98 @@ #include "StopFilter.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default Greek stopwords in UTF-8 format. - const uint8_t GreekAnalyzer::_GREEK_STOP_WORDS[] = - { - 0xce, 0xbf, 0x0a, 0xce, 0xb7, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xce, 0xb9, 0x0a, - 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x84, 0xce, 0xb7, - 0xcf, 0x83, 0x0a, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, - 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, - 0xba, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xce, - 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, - 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, - 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x84, 0xce, - 0xb5, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xce, - 0xbd, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0xce, - 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0x0a, - 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbf, 0x0a, 0xce, 0xb3, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, - 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xce, 0xb5, - 0x0a, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0x0a, 0xce, - 0xb1, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xcf, 0x84, 0xce, 0xb1, - 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb8, 0xce, 0xb1, 0x0a, 0xce, - 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0xce, 0xbd, 0x0a, - 0xce, 0xbc, 0xce, 0xb7, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xcf, 0x80, - 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xbd, 0xcf, 0x89, 0x0a, 0xce, 0xb5, 0xce, 0xb1, 0xce, 0xbd, - 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, - 0x80, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, - 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb1, - 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, - 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb5, 0xcf, 0x83, 0x0a, - 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, - 0xb9, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, - 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xce, 0xb1, 0xcf, 0x85, - 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xb9, 0x0a, - 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, - 0x84, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb5, - 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, - 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, - 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb7, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, - 0xce, 0xbd, 0xce, 0xbf, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, - 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb5, - 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0x0a, - 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, - 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, - 0xce, 0xbf, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xce, 0xbc, 0xcf, 0x89, 0xcf, - 0x83, 0x0a, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x83, 0xce, - 0xbf, 0x0a, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb9, 0x0a - }; - - GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion) - { - this->stopSet = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stopSet = stopwords; - this->matchVersion = matchVersion; - } - - GreekAnalyzer::~GreekAnalyzer() - { - } - - const HashSet GreekAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - { - String stopWords(UTF8_TO_STRING(_GREEK_STOP_WORDS)); - Collection words(StringUtils::split(stopWords, L"\n")); - stopSet = HashSet::newInstance(words.begin(), words.end()); - } - return stopSet; - } - - TokenStreamPtr GreekAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); - return result; - } - - TokenStreamPtr GreekAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - GreekAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - GreekAnalyzerSavedStreams::~GreekAnalyzerSavedStreams() - { +namespace Lucene { + +/// Default Greek stopwords in UTF-8 format. +const uint8_t GreekAnalyzer::_GREEK_STOP_WORDS[] = { + 0xce, 0xbf, 0x0a, 0xce, 0xb7, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xbf, 0xce, 0xb9, 0x0a, + 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x84, 0xce, 0xb7, + 0xcf, 0x83, 0x0a, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xbd, + 0x0a, 0xcf, 0x84, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, + 0xba, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, 0xce, + 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, + 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbc, 0xce, 0xb1, + 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x84, 0xce, + 0xb5, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xce, + 0xbd, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xb7, 0xce, + 0xbd, 0x0a, 0xce, 0xbc, 0xce, 0xb1, 0x0a, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0x0a, + 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbf, 0x0a, 0xce, 0xb3, 0xce, 0xb9, 0xce, 0xb1, 0x0a, 0xcf, 0x80, + 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0x0a, 0xcf, 0x83, 0xce, 0xb5, + 0x0a, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0x0a, 0xce, + 0xb1, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb9, 0x0a, 0xce, 0xba, 0xce, 0xb1, 0xcf, 0x84, 0xce, 0xb1, + 0x0a, 0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb8, 0xce, 0xb1, 0x0a, 0xce, + 0xbd, 0xce, 0xb1, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0x0a, 0xce, 0xb4, 0xce, 0xb5, 0xce, 0xbd, 0x0a, + 0xce, 0xbc, 0xce, 0xb7, 0x0a, 0xce, 0xbc, 0xce, 0xb7, 0xce, 0xbd, 0x0a, 0xce, 0xb5, 0xcf, 0x80, + 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xbd, 0xcf, 0x89, 0x0a, 0xce, 0xb5, 0xce, 0xb1, 0xce, 0xbd, + 0x0a, 0xce, 0xb1, 0xce, 0xbd, 0x0a, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb5, 0x0a, 0xcf, + 0x80, 0xce, 0xbf, 0xcf, 0x85, 0x0a, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, + 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb1, + 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, + 0xce, 0xbf, 0xce, 0xb9, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xb5, 0xcf, 0x83, 0x0a, + 0xcf, 0x80, 0xce, 0xbf, 0xce, 0xb9, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xcf, 0x80, 0xce, 0xbf, 0xce, + 0xb9, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, + 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb7, 0x0a, 0xce, 0xb1, 0xcf, 0x85, + 0xcf, 0x84, 0xce, 0xbf, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xbf, 0xce, 0xb9, 0x0a, + 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, + 0x84, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb5, + 0xcf, 0x83, 0x0a, 0xce, 0xb1, 0xcf, 0x85, 0xcf, 0x84, 0xce, 0xb1, 0x0a, 0xce, 0xb5, 0xce, 0xba, + 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, + 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb7, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, + 0xce, 0xbd, 0xce, 0xbf, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, + 0xbf, 0xce, 0xb9, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb5, + 0xcf, 0x83, 0x0a, 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xb1, 0x0a, + 0xce, 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xcf, 0x89, 0xce, 0xbd, 0x0a, 0xce, + 0xb5, 0xce, 0xba, 0xce, 0xb5, 0xce, 0xb9, 0xce, 0xbd, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0x0a, + 0xce, 0xbf, 0xcf, 0x80, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xce, 0xbc, 0xcf, 0x89, 0xcf, + 0x83, 0x0a, 0xce, 0xb9, 0xcf, 0x83, 0xcf, 0x89, 0xcf, 0x83, 0x0a, 0xce, 0xbf, 0xcf, 0x83, 0xce, + 0xbf, 0x0a, 0xce, 0xbf, 0xcf, 0x84, 0xce, 0xb9, 0x0a +}; + +GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion) { + this->stopSet = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +GreekAnalyzer::GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stopSet = stopwords; + this->matchVersion = matchVersion; +} + +GreekAnalyzer::~GreekAnalyzer() { +} + +const HashSet GreekAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + String stopWords(UTF8_TO_STRING(_GREEK_STOP_WORDS)); + Collection words(StringUtils::split(stopWords, L"\n")); + stopSet = HashSet::newInstance(words.begin(), words.end()); + ); + return stopSet; +} + +TokenStreamPtr GreekAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); + return result; +} + +TokenStreamPtr GreekAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + GreekAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +GreekAnalyzerSavedStreams::~GreekAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp b/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp index 0255a559..af9ca45c 100644 --- a/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp +++ b/src/contrib/analyzers/common/analysis/el/GreekLowerCaseFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,85 +9,81 @@ #include "TermAttribute.h" #include "CharFolder.h" -namespace Lucene -{ - GreekLowerCaseFilter::GreekLowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) - { - termAtt = addAttribute(); - } - - GreekLowerCaseFilter::~GreekLowerCaseFilter() - { - } - - bool GreekLowerCaseFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* chArray = termAtt->termBufferArray(); - int32_t chLen = termAtt->termLength(); - - for (int32_t i = 0; i < chLen; ++i) - chArray[i] = lowerCase(chArray[i]); - - return true; +namespace Lucene { + +GreekLowerCaseFilter::GreekLowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { + termAtt = addAttribute(); +} + +GreekLowerCaseFilter::~GreekLowerCaseFilter() { +} + +bool GreekLowerCaseFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* chArray = termAtt->termBufferArray(); + int32_t chLen = termAtt->termLength(); + + for (int32_t i = 0; i < chLen; ++i) { + chArray[i] = lowerCase(chArray[i]); } - else - return false; + + return true; + } else { + return false; } - - wchar_t GreekLowerCaseFilter::lowerCase(wchar_t codepoint) - { - switch (codepoint) - { - case L'\x03c2': // small final sigma - return 0x03c3; // small sigma - - // Some Greek characters contain diacritics. - // This filter removes these, converting to the lowercase base form. - - case L'\x0386': // capital alpha with tonos - case L'\x03ac': // small alpha with tonos - return L'\x03b1'; // small alpha - - case L'\x0388': // capital epsilon with tonos - case L'\x03ad': // small epsilon with tonos - return L'\x03b5'; // small epsilon - - case L'\x0389': // capital eta with tonos - case L'\x03ae': // small eta with tonos - return L'\x03b7'; // small eta - - case L'\x038a': // capital iota with tonos - case L'\x03aa': // capital iota with dialytika - case L'\x03af': // small iota with tonos - case L'\x03ca': // small iota with dialytika - case L'\x0390': // small iota with dialytika and tonos - return L'\x03b9'; // small iota - - case L'\x038e': // capital upsilon with tonos - case L'\x03ab': // capital upsilon with dialytika - case L'\x03cd': // small upsilon with tonos - case L'\x03cb': // small upsilon with dialytika - case L'\x03b0': // small upsilon with dialytika and tonos - return L'\x03c5'; // small upsilon - - case L'\x038c': // capital omicron with tonos - case L'\x03cc': // small omicron with tonos - return L'\x03bf'; // small omicron - - case L'\x038f': // capital omega with tonos - case L'\x03ce': // small omega with tonos - return L'\x03c9'; // small omega - - // The previous implementation did the conversion below. - // Only implemented for backwards compatibility with old indexes. - - case L'\x03a2': // reserved - return L'\x03c2'; // small final sigma - - default: - return CharFolder::toLower(codepoint); - } +} + +wchar_t GreekLowerCaseFilter::lowerCase(wchar_t codepoint) { + switch (codepoint) { + case L'\x03c2': // small final sigma + return 0x03c3; // small sigma + + // Some Greek characters contain diacritics. + // This filter removes these, converting to the lowercase base form. + + case L'\x0386': // capital alpha with tonos + case L'\x03ac': // small alpha with tonos + return L'\x03b1'; // small alpha + + case L'\x0388': // capital epsilon with tonos + case L'\x03ad': // small epsilon with tonos + return L'\x03b5'; // small epsilon + + case L'\x0389': // capital eta with tonos + case L'\x03ae': // small eta with tonos + return L'\x03b7'; // small eta + + case L'\x038a': // capital iota with tonos + case L'\x03aa': // capital iota with dialytika + case L'\x03af': // small iota with tonos + case L'\x03ca': // small iota with dialytika + case L'\x0390': // small iota with dialytika and tonos + return L'\x03b9'; // small iota + + case L'\x038e': // capital upsilon with tonos + case L'\x03ab': // capital upsilon with dialytika + case L'\x03cd': // small upsilon with tonos + case L'\x03cb': // small upsilon with dialytika + case L'\x03b0': // small upsilon with dialytika and tonos + return L'\x03c5'; // small upsilon + + case L'\x038c': // capital omicron with tonos + case L'\x03cc': // small omicron with tonos + return L'\x03bf'; // small omicron + + case L'\x038f': // capital omega with tonos + case L'\x03ce': // small omega with tonos + return L'\x03c9'; // small omega + + // The previous implementation did the conversion below. + // Only implemented for backwards compatibility with old indexes. + + case L'\x03a2': // reserved + return L'\x03c2'; // small final sigma + + default: + return CharFolder::toLower(codepoint); } } + +} diff --git a/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp b/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp index c6d50688..0bcd8558 100644 --- a/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/fa/PersianAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,245 +13,236 @@ #include "PersianNormalizationFilter.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default Persian stopwords in UTF-8 format. - /// - /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html - /// The stopword list is BSD-Licensed. - const uint8_t PersianAnalyzer::DEFAULT_STOPWORD_FILE[] = - { - 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, - 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xb1, - 0x0a, 0xd8, 0xae, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb4, - 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x83, - 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, - 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, - 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x88, 0x0a, - 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, - 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, - 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0xda, 0xaf, 0xd8, 0xb2, 0x0a, 0xd9, 0xbe, 0xd9, - 0x86, 0xd8, 0xac, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, - 0x85, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, - 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, - 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, - 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xda, - 0x86, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, - 0xb7, 0x0a, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x82, - 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, - 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, - 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x87, 0xd8, 0xb2, - 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, - 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, - 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, - 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, - 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, - 0x86, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, - 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, - 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x88, 0xd9, - 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, - 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xac, 0xd8, 0xb2, - 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd8, 0xaf, - 0xd9, 0x85, 0xd8, 0xa7, 0xd8, 0xaa, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, - 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, - 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd9, - 0x84, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd9, 0x82, - 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x88, - 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, - 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xb8, 0xd9, 0x8a, 0xd8, - 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, - 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, - 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, - 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, - 0xb4, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, - 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, - 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, - 0xda, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, - 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x83, - 0xd8, 0xac, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, - 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, - 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, - 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, - 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd8, 0xa7, 0x0a, - 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xda, - 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, - 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd9, 0x86, - 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, - 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, - 0x0a, 0xd8, 0xb3, 0xd9, 0x85, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, - 0x86, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, - 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xaf, - 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xab, - 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, - 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, - 0xaa, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa8, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, - 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb3, - 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, - 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, - 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, - 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, - 0xd9, 0x88, 0x0a, 0xd8, 0xac, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, - 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, - 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, - 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd8, 0xaf, 0xd8, - 0xaa, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, - 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0x0a, 0xd8, - 0xaa, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, - 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, - 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd9, - 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xaf, - 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, - 0xd9, 0x85, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, - 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xb5, 0xd8, 0xaf, 0x0a, 0xd9, - 0x81, 0xd9, 0x82, 0xd8, 0xb7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x8a, - 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, - 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, - 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x85, 0x0a, - 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, - 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, - 0x81, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, - 0x83, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, - 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, - 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, - 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, - 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0xd9, - 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb7, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, - 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, - 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, - 0xb4, 0xd8, 0xaa, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xd8, 0xb7, 0xd8, 0xb1, 0xd9, - 0x8a, 0xd9, 0x82, 0x0a, 0xd8, 0xa7, 0xd8, 0xb4, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, - 0xaa, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, - 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, - 0xaf, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, - 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, - 0xa7, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, - 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, - 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, - 0xaf, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, - 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, - 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0xbe, 0xd8, 0xa7, - 0xd8, 0xb9, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0x0a, 0xd8, 0xad, 0xd8, 0xaf, - 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x81, 0x0a, - 0xd9, 0x85, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, - 0xb2, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, - 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xb6, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, - 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, - 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, - 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xb3, 0xd9, 0x8a, - 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, - 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xda, 0x86, 0xd9, 0x88, 0xd9, - 0x86, 0x0a, 0xd8, 0xae, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xac, 0x0a, 0xd8, 0xb4, 0xd8, 0xb4, 0x0a, - 0xd9, 0x87, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd8, 0xad, 0xd8, 0xaa, 0x0a, - 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, - 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x83, 0xd8, - 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0xbe, 0xd9, - 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd9, - 0x88, 0xd8, 0xb2, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, - 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, - 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, - 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd9, 0x86, - 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, - 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, - 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xaf, - 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xaf, - 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x8a, - 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, - 0xd9, 0x82, 0xd8, 0xa8, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, - 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, - 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x87, 0x0a, - 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, - 0xd9, 0x85, 0xd8, 0xab, 0xd9, 0x84, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, - 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, - 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, - 0x0a, 0xd8, 0xa7, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xb5, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaa, - 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, - 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, - 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, - 0x86, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, - 0x0a, 0xd9, 0x84, 0xd8, 0xb7, 0xd9, 0x81, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, - 0xaf, 0xd8, 0xb1, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd9, 0x86, - 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x8a, - 0xd9, 0x86, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, - 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd8, - 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, - 0x87, 0xd9, 0x85, 0x0a, 0xd9, 0x81, 0xd9, 0x88, 0xd9, 0x82, 0x0a, 0xd9, 0x86, 0xd9, 0x87, 0x0a, - 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, - 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, - 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd9, 0x84, 0x0a, - 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, - 0x87, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, - 0x85, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, - 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x83, 0xd9, - 0x86, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, - 0xb2, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, - 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, - 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, - 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a - }; - - PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - PersianAnalyzer::~PersianAnalyzer() - { - } - - const HashSet PersianAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - { - String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); - Collection words(StringUtils::split(stopWords, L"\n")); - stopSet = HashSet::newInstance(words.begin(), words.end()); - } - return stopSet; - } - - TokenStreamPtr PersianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(reader); - result = newLucene(result); - result = newLucene(result); +namespace Lucene { + +/// Default Persian stopwords in UTF-8 format. +/// +/// Generated from http://members.unine.ch/jacques.savoy/clef/index.html +/// The stopword list is BSD-Licensed. +const uint8_t PersianAnalyzer::DEFAULT_STOPWORD_FILE[] = { + 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, + 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xb1, + 0x0a, 0xd8, 0xae, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xb4, + 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x83, + 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, 0xaa, 0xd8, + 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x85, 0x0a, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, + 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0x0a, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x88, 0x0a, + 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, 0xd8, + 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0xbe, 0xd8, 0xb3, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, + 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0xda, 0xaf, 0xd8, 0xb2, 0x0a, 0xd9, 0xbe, 0xd9, + 0x86, 0xd8, 0xac, 0x0a, 0xd9, 0x86, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, + 0x85, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x84, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, + 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x87, 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd8, 0xb7, 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, + 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0x0a, 0xd8, 0xaf, 0xd9, 0x88, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, + 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x88, 0xd9, 0x84, 0xd9, 0x8a, 0x0a, 0xda, + 0x86, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, + 0xb7, 0x0a, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x82, + 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb1, 0xd9, 0x81, + 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, 0x86, + 0xd9, 0x86, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x87, 0xd8, 0xb2, + 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, + 0xd9, 0x8a, 0x0a, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, + 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, + 0xd8, 0xb1, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, + 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, + 0x86, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x86, + 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, + 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x88, 0xd9, + 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, + 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, 0xd8, 0xac, 0xd8, 0xb2, + 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x84, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd8, 0xaf, + 0xd9, 0x85, 0xd8, 0xa7, 0xd8, 0xaa, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, + 0xd8, 0xb1, 0xd8, 0xae, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, + 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd9, + 0x84, 0xd9, 0x88, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd9, 0x82, + 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd9, 0x88, + 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, + 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xb8, 0xd9, 0x8a, 0xd8, + 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, + 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, + 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, + 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, + 0xb4, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, + 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x8a, + 0xd8, 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd9, 0x87, 0xd9, 0x8a, + 0xda, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, + 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x85, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x83, + 0xd8, 0xac, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, + 0xaf, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, + 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, + 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, + 0xaf, 0x0a, 0xd8, 0xb3, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd8, 0xa7, 0x0a, + 0xd9, 0x86, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xda, + 0xaf, 0xd8, 0xb1, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, + 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd9, 0x87, 0xd9, 0x86, + 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, + 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd9, 0x86, 0xda, 0xaf, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, + 0x0a, 0xd8, 0xb3, 0xd9, 0x85, 0xd8, 0xaa, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, + 0x86, 0xda, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd8, 0xaf, 0xd8, + 0xa7, 0xd8, 0xaf, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xaf, + 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xab, + 0xd8, 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x87, + 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0xd8, + 0xaa, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa8, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, + 0xa8, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb3, + 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, + 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xb6, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd8, 0xb1, + 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x8a, + 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x84, 0xd9, 0x8a, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, + 0xd9, 0x88, 0x0a, 0xd8, 0xac, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, + 0xd9, 0x88, 0xd9, 0x84, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, + 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xb1, 0x0a, + 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd9, 0x85, 0xd8, 0xaf, 0xd8, + 0xaa, 0xd9, 0x8a, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, + 0xa7, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0x0a, 0xd8, + 0xaa, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xac, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, + 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, + 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x86, 0x0a, 0xd9, + 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xaf, + 0x0a, 0xd9, 0x83, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, + 0xd9, 0x85, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, + 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xb5, 0xd8, 0xaf, 0x0a, 0xd9, + 0x81, 0xd9, 0x82, 0xd8, 0xb7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0xd9, 0x8a, + 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, 0xaf, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, 0xa7, + 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, + 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xb7, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x85, 0x0a, + 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x85, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, + 0xaf, 0x0a, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, + 0x81, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb4, 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd9, + 0x83, 0xd9, 0x86, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, + 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd8, 0xb7, + 0xd9, 0x88, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, + 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, + 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd9, 0x86, 0xd8, 0xb2, 0xd8, 0xaf, 0xd9, + 0x8a, 0xd9, 0x83, 0x0a, 0xd8, 0xb7, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, + 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb2, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x87, 0xd8, 0xa7, 0x0a, + 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, + 0xb4, 0xd8, 0xaa, 0x0a, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xd8, 0xb7, 0xd8, 0xb1, 0xd9, + 0x8a, 0xd9, 0x82, 0x0a, 0xd8, 0xa7, 0xd8, 0xb4, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, + 0xaa, 0x0a, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0x0a, 0xd9, 0x86, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, + 0x8a, 0xd8, 0xaf, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0x0a, 0xda, 0x86, 0xd9, 0x86, 0xd8, + 0xaf, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, + 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, + 0xa7, 0xd9, 0x8a, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, + 0xd8, 0xa7, 0xd9, 0x8a, 0xd8, 0xaf, 0x0a, 0xd8, 0xaa, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, + 0xd8, 0xa7, 0xd9, 0x8a, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xda, + 0xaf, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, + 0xa7, 0xd9, 0x8a, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, + 0x87, 0xd9, 0x85, 0xda, 0x86, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0xbe, 0xd8, 0xa7, + 0xd8, 0xb9, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd8, 0xb3, 0x0a, 0xd8, 0xad, 0xd8, 0xaf, + 0xd9, 0x88, 0xd8, 0xaf, 0x0a, 0xd9, 0x85, 0xd8, 0xae, 0xd8, 0xaa, 0xd9, 0x84, 0xd9, 0x81, 0x0a, + 0xd9, 0x85, 0xd9, 0x82, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xda, 0x86, 0xd9, 0x8a, 0xd8, + 0xb2, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd9, 0x86, 0xd8, 0xaf, 0xd8, + 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, 0xb6, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xda, + 0x86, 0xd9, 0x88, 0xd9, 0x86, 0x0a, 0xd8, 0xb3, 0xd8, 0xa7, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, + 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0x0a, 0xd8, + 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd8, 0xb1, 0xd8, 0xb3, 0xd9, 0x8a, + 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xae, + 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xda, 0x86, 0xd9, 0x88, 0xd9, + 0x86, 0x0a, 0xd8, 0xae, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xac, 0x0a, 0xd8, 0xb4, 0xd8, 0xb4, 0x0a, + 0xd9, 0x87, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd8, 0xaa, 0xd8, 0xad, 0xd8, 0xaa, 0x0a, + 0xd8, 0xb6, 0xd9, 0x85, 0xd9, 0x86, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, 0x8a, 0xd9, + 0x85, 0x0a, 0xda, 0xaf, 0xd9, 0x81, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, 0x81, 0xd9, 0x83, 0xd8, + 0xb1, 0x0a, 0xd8, 0xa8, 0xd8, 0xb3, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0xbe, 0xd9, + 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xb1, 0xd9, + 0x88, 0xd8, 0xb2, 0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, + 0xd9, 0x87, 0x0a, 0xd9, 0x86, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x0a, + 0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x84, 0x0a, 0xd9, 0x88, + 0xd9, 0x82, 0xd8, 0xaa, 0xd9, 0x8a, 0x0a, 0xd9, 0x83, 0xd9, 0x8a, 0x0a, 0xda, 0x86, 0xd9, 0x86, + 0xd9, 0x8a, 0xd9, 0x86, 0x0a, 0xd9, 0x83, 0xd9, 0x87, 0x0a, 0xda, 0xaf, 0xd9, 0x8a, 0xd8, 0xb1, + 0xd9, 0x8a, 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, 0xd8, 0xa7, 0xd8, 0xb3, + 0xd8, 0xaa, 0x0a, 0xd9, 0x83, 0xd8, 0xac, 0xd8, 0xa7, 0x0a, 0xd9, 0x83, 0xd9, 0x86, 0xd8, 0xaf, + 0x0a, 0xd9, 0x86, 0xd9, 0x8a, 0xd8, 0xb2, 0x0a, 0xd9, 0x8a, 0xd8, 0xa7, 0xd8, 0xa8, 0xd8, 0xaf, + 0x0a, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x8a, 0x0a, 0xd8, 0xad, 0xd8, 0xaa, 0xd9, 0x8a, + 0x0a, 0xd8, 0xaa, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb9, + 0xd9, 0x82, 0xd8, 0xa8, 0x0a, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa, 0x0a, + 0xd9, 0x83, 0xd9, 0x86, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa8, 0xd9, 0x8a, 0xd9, 0x86, 0x0a, + 0xd8, 0xaa, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x87, 0x0a, + 0xd9, 0x85, 0xd8, 0xa7, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, + 0xd9, 0x85, 0xd8, 0xab, 0xd9, 0x84, 0x0a, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, + 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, 0xd8, 0xb1, + 0xd9, 0x87, 0x0a, 0xd8, 0xb7, 0xd8, 0xa8, 0xd9, 0x82, 0x0a, 0xd8, 0xa8, 0xd8, 0xb9, 0xd8, 0xaf, + 0x0a, 0xd8, 0xa7, 0xda, 0xaf, 0xd8, 0xb1, 0x0a, 0xd8, 0xb5, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaa, + 0x0a, 0xd8, 0xba, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd8, + 0xa8, 0xd9, 0x8a, 0xd8, 0xb4, 0x0a, 0xd8, 0xb1, 0xd9, 0x8a, 0xd8, 0xb2, 0xd9, 0x8a, 0x0a, 0xd8, + 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xda, + 0x86, 0xda, 0xaf, 0xd9, 0x88, 0xd9, 0x86, 0xd9, 0x87, 0x0a, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, + 0x0a, 0xd9, 0x84, 0xd8, 0xb7, 0xd9, 0x81, 0xd8, 0xa7, 0x0a, 0xd9, 0x85, 0xd9, 0x8a, 0x0a, 0xd8, + 0xaf, 0xd8, 0xb1, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x85, 0xd9, 0x86, + 0x0a, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xaf, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x8a, + 0xd9, 0x86, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xa8, + 0xd8, 0xb1, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x84, 0xd8, + 0xaa, 0x0a, 0xda, 0xaf, 0xd8, 0xb0, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0xd9, 0x87, 0x0a, 0xd9, + 0x87, 0xd9, 0x85, 0x0a, 0xd9, 0x81, 0xd9, 0x88, 0xd9, 0x82, 0x0a, 0xd9, 0x86, 0xd9, 0x87, 0x0a, + 0xd9, 0x87, 0xd8, 0xa7, 0x0a, 0xd8, 0xb4, 0xd9, 0x88, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xa7, + 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaf, 0x0a, 0xd9, 0x87, 0xd9, 0x85, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, + 0xb1, 0xd9, 0x87, 0x0a, 0xd9, 0x87, 0xd8, 0xb1, 0x0a, 0xd8, 0xa7, 0xd9, 0x88, 0xd9, 0x84, 0x0a, + 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd9, 0x86, 0xd8, 0xaf, 0x0a, 0xda, 0x86, 0xd9, + 0x87, 0xd8, 0xa7, 0xd8, 0xb1, 0x0a, 0xd9, 0x86, 0xd8, 0xa7, 0xd9, 0x85, 0x0a, 0xd8, 0xa7, 0xd9, + 0x85, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xb2, 0x0a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd9, + 0x87, 0xd8, 0xa7, 0xd9, 0x8a, 0x0a, 0xd9, 0x82, 0xd8, 0xa8, 0xd9, 0x84, 0x0a, 0xd9, 0x83, 0xd9, + 0x86, 0xd9, 0x85, 0x0a, 0xd8, 0xb3, 0xd8, 0xb9, 0xd9, 0x8a, 0x0a, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, + 0xb2, 0xd9, 0x87, 0x0a, 0xd8, 0xb1, 0xd8, 0xa7, 0x0a, 0xd9, 0x87, 0xd8, 0xb3, 0xd8, 0xaa, 0xd9, + 0x86, 0xd8, 0xaf, 0x0a, 0xd8, 0xb2, 0xd9, 0x8a, 0xd8, 0xb1, 0x0a, 0xd8, 0xac, 0xd9, 0x84, 0xd9, + 0x88, 0xd9, 0x8a, 0x0a, 0xd8, 0xb9, 0xd9, 0x86, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x86, 0x0a, 0xd8, + 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0x0a +}; + +PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +PersianAnalyzer::PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +PersianAnalyzer::~PersianAnalyzer() { +} + +const HashSet PersianAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); + Collection words(StringUtils::split(stopWords, L"\n")); + stopSet = HashSet::newInstance(words.begin(), words.end()); + ); + return stopSet; +} + +TokenStreamPtr PersianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(reader); + result = newLucene(result); + result = newLucene(result); + // additional Persian-specific normalization + result = newLucene(result); + // the order here is important: the stopword list is not normalized + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + return result; +} + +TokenStreamPtr PersianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + PersianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(streams->result); // additional Persian-specific normalization - result = newLucene(result); - // the order here is important: the stopword list is not normalized - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - return result; - } - - TokenStreamPtr PersianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - PersianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(streams->result); - // additional Persian-specific normalization - streams->result = newLucene(streams->result); - // the order here is important: the stopword list is not normalized - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - PersianAnalyzerSavedStreams::~PersianAnalyzerSavedStreams() - { + streams->result = newLucene(streams->result); + // the order here is important: the stopword list is not normalized + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +PersianAnalyzerSavedStreams::~PersianAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp b/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp index d855ff04..1510376d 100644 --- a/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp +++ b/src/contrib/analyzers/common/analysis/fa/PersianNormalizationFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,27 +9,24 @@ #include "PersianNormalizer.h" #include "TermAttribute.h" -namespace Lucene -{ - PersianNormalizationFilter::PersianNormalizationFilter(TokenStreamPtr input) : TokenFilter(input) - { - normalizer = newLucene(); - termAtt = addAttribute(); - } - - PersianNormalizationFilter::~PersianNormalizationFilter() - { - } - - bool PersianNormalizationFilter::incrementToken() - { - if (input->incrementToken()) - { - int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); - termAtt->setTermLength(newlen); - return true; - } - else - return false; +namespace Lucene { + +PersianNormalizationFilter::PersianNormalizationFilter(const TokenStreamPtr& input) : TokenFilter(input) { + normalizer = newLucene(); + termAtt = addAttribute(); +} + +PersianNormalizationFilter::~PersianNormalizationFilter() { +} + +bool PersianNormalizationFilter::incrementToken() { + if (input->incrementToken()) { + int32_t newlen = normalizer->normalize(termAtt->termBuffer().get(), termAtt->termLength()); + termAtt->setTermLength(newlen); + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp b/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp index 7e8d1dd1..d65512f2 100644 --- a/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp +++ b/src/contrib/analyzers/common/analysis/fa/PersianNormalizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,53 +8,50 @@ #include "PersianNormalizer.h" #include "MiscUtils.h" -namespace Lucene -{ - const wchar_t PersianNormalizer::YEH = (wchar_t)0x064a; - const wchar_t PersianNormalizer::FARSI_YEH = (wchar_t)0x06cc; - const wchar_t PersianNormalizer::YEH_BARREE = (wchar_t)0x06d2; - const wchar_t PersianNormalizer::KEHEH = (wchar_t)0x06a9; - const wchar_t PersianNormalizer::KAF = (wchar_t)0x0643; - const wchar_t PersianNormalizer::HAMZA_ABOVE = (wchar_t)0x0654; - const wchar_t PersianNormalizer::HEH_YEH = (wchar_t)0x06c0; - const wchar_t PersianNormalizer::HEH_GOAL = (wchar_t)0x06c1; - const wchar_t PersianNormalizer::HEH = (wchar_t)0x0647; - - PersianNormalizer::~PersianNormalizer() - { - } - - int32_t PersianNormalizer::normalize(wchar_t* s, int32_t len) - { - for (int32_t i = 0; i < len; ++i) - { - switch (s[i]) - { - case FARSI_YEH: - case YEH_BARREE: - s[i] = YEH; - break; - case KEHEH: - s[i] = KAF; - break; - case HEH_YEH: - case HEH_GOAL: - s[i] = HEH; - break; - case HAMZA_ABOVE: // necessary for HEH + HAMZA - len = deleteChar(s, i--, len); - break; - default: - break; - } +namespace Lucene { + +const wchar_t PersianNormalizer::YEH = (wchar_t)0x064a; +const wchar_t PersianNormalizer::FARSI_YEH = (wchar_t)0x06cc; +const wchar_t PersianNormalizer::YEH_BARREE = (wchar_t)0x06d2; +const wchar_t PersianNormalizer::KEHEH = (wchar_t)0x06a9; +const wchar_t PersianNormalizer::KAF = (wchar_t)0x0643; +const wchar_t PersianNormalizer::HAMZA_ABOVE = (wchar_t)0x0654; +const wchar_t PersianNormalizer::HEH_YEH = (wchar_t)0x06c0; +const wchar_t PersianNormalizer::HEH_GOAL = (wchar_t)0x06c1; +const wchar_t PersianNormalizer::HEH = (wchar_t)0x0647; + +PersianNormalizer::~PersianNormalizer() { +} + +int32_t PersianNormalizer::normalize(wchar_t* s, int32_t len) { + for (int32_t i = 0; i < len; ++i) { + switch (s[i]) { + case FARSI_YEH: + case YEH_BARREE: + s[i] = YEH; + break; + case KEHEH: + s[i] = KAF; + break; + case HEH_YEH: + case HEH_GOAL: + s[i] = HEH; + break; + case HAMZA_ABOVE: // necessary for HEH + HAMZA + len = deleteChar(s, i--, len); + break; + default: + break; } - return len; } - - int32_t PersianNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) - { - if (pos < len) - MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); - return len - 1; + return len; +} + +int32_t PersianNormalizer::deleteChar(wchar_t* s, int32_t pos, int32_t len) { + if (pos < len) { + MiscUtils::arrayCopy(s, pos + 1, s, pos, len - pos - 1); } + return len - 1; +} + } diff --git a/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp b/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp index 30f70099..7d0b7566 100644 --- a/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp +++ b/src/contrib/analyzers/common/analysis/fr/ElisionFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,59 +9,52 @@ #include "CharArraySet.h" #include "TermAttribute.h" -namespace Lucene -{ - const wchar_t ElisionFilter::apostrophes[] = {L'\'', L'\x2019'}; - - ElisionFilter::ElisionFilter(TokenStreamPtr input) : TokenFilter(input) - { - articles = newLucene(newCollection(L"l", L"m", L"t", L"qu", L"n", L"s", L"j"), true); - termAtt = addAttribute(); - } - - ElisionFilter::ElisionFilter(TokenStreamPtr input, HashSet articles) : TokenFilter(input) - { - setArticles(articles); - termAtt = addAttribute(); - } - - ElisionFilter::~ElisionFilter() - { - } - - void ElisionFilter::setArticles(HashSet articles) - { - this->articles = newLucene(articles, true); - } - - bool ElisionFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* termBuffer = termAtt->termBufferArray(); - int32_t termLength = termAtt->termLength(); - - int32_t minPoz = INT_MAX; - for (int32_t i = 0; i < SIZEOF_ARRAY(apostrophes); ++i) - { - wchar_t apos = apostrophes[i]; - for (int32_t poz = 0; poz < termLength; ++poz) - { - if (termBuffer[poz] == apos) - { - minPoz = std::min(poz, minPoz); - break; - } +namespace Lucene { + +const wchar_t ElisionFilter::apostrophes[] = {L'\'', L'\x2019'}; + +ElisionFilter::ElisionFilter(const TokenStreamPtr& input) : TokenFilter(input) { + articles = newLucene(newCollection(L"l", L"m", L"t", L"qu", L"n", L"s", L"j"), true); + termAtt = addAttribute(); +} + +ElisionFilter::ElisionFilter(const TokenStreamPtr& input, HashSet articles) : TokenFilter(input) { + setArticles(articles); + termAtt = addAttribute(); +} + +ElisionFilter::~ElisionFilter() { +} + +void ElisionFilter::setArticles(HashSet articles) { + this->articles = newLucene(articles, true); +} + +bool ElisionFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* termBuffer = termAtt->termBufferArray(); + int32_t termLength = termAtt->termLength(); + + int32_t minPoz = INT_MAX; + for (int32_t i = 0; i < SIZEOF_ARRAY(apostrophes); ++i) { + wchar_t apos = apostrophes[i]; + for (int32_t poz = 0; poz < termLength; ++poz) { + if (termBuffer[poz] == apos) { + minPoz = std::min(poz, minPoz); + break; } } - - // An apostrophe has been found. If the prefix is an article strip it off. - if (minPoz != INT_MAX && articles->contains(termBuffer, 0, minPoz)) - termAtt->setTermBuffer(termBuffer, minPoz + 1, termLength - (minPoz + 1)); - - return true; } - else - return false; + + // An apostrophe has been found. If the prefix is an article strip it off. + if (minPoz != INT_MAX && articles->contains(termBuffer, 0, minPoz)) { + termAtt->setTermBuffer(termBuffer, minPoz + 1, termLength - (minPoz + 1)); + } + + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp b/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp index 7f2b57a6..0ccdbb42 100644 --- a/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/fr/FrenchAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,103 +12,94 @@ #include "StopFilter.h" #include "FrenchStemFilter.h" -namespace Lucene -{ - const wchar_t* FrenchAnalyzer::_FRENCH_STOP_WORDS[] = - { - L"a", L"afin", L"ai", L"ainsi", L"apr\x00e8s", L"attendu", L"au", L"aujourd", L"auquel", L"aussi", - L"autre", L"autres", L"aux", L"auxquelles", L"auxquels", L"avait", L"avant", L"avec", L"avoir", - L"c", L"car", L"ce", L"ceci", L"cela", L"celle", L"celles", L"celui", L"cependant", L"certain", - L"certaine", L"certaines", L"certains", L"ces", L"cet", L"cette", L"ceux", L"chez", L"ci", - L"combien", L"comme", L"comment", L"concernant", L"contre", L"d", L"dans", L"de", L"debout", - L"dedans", L"dehors", L"del\x00e0", L"depuis", L"derri\x00e8re", L"des", L"d\x00e9sormais", - L"desquelles", L"desquels", L"dessous", L"dessus", L"devant", L"devers", L"devra", L"divers", - L"diverse", L"diverses", L"doit", L"donc", L"dont", L"du", L"duquel", L"durant", L"d\x00e8s", - L"elle", L"elles", L"en", L"entre", L"environ", L"est", L"et", L"etc", L"etre", L"eu", L"eux", - L"except\x00e9", L"hormis", L"hors", L"h\x00e9las", L"hui", L"il", L"ils", L"j", L"je", L"jusqu", - L"jusque", L"l", L"la", L"laquelle", L"le", L"lequel", L"les", L"lesquelles", L"lesquels", L"leur", - L"leurs", L"lorsque", L"lui", L"l\x00e0", L"ma", L"mais", L"malgr\x00e9", L"me", L"merci", L"mes", - L"mien", L"mienne", L"miennes", L"miens", L"moi", L"moins", L"mon", L"moyennant", L"m\x00eame", - L"m\x00eames", L"n", L"ne", L"ni", L"non", L"nos", L"notre", L"nous", L"n\x00e9anmoins", - L"n\x00f4tre", L"n\x00f4tres", L"on", L"ont", L"ou", L"outre", L"o\x00f9", L"par", L"parmi", - L"partant", L"pas", L"pass\x00e9", L"pendant", L"plein", L"plus", L"plusieurs", L"pour", L"pourquoi", - L"proche", L"pr\x00e8s", L"puisque", L"qu", L"quand", L"que", L"quel", L"quelle", L"quelles", - L"quels", L"qui", L"quoi", L"quoique", L"revoici", L"revoil\x00e0", L"s", L"sa", L"sans", L"sauf", - L"se", L"selon", L"seront", L"ses", L"si", L"sien", L"sienne", L"siennes", L"siens", L"sinon", - L"soi", L"soit", L"son", L"sont", L"sous", L"suivant", L"sur", L"ta", L"te", L"tes", L"tien", - L"tienne", L"tiennes", L"tiens", L"toi", L"ton", L"tous", L"tout", L"toute", L"toutes", L"tu", L"un", - L"une", L"va", L"vers", L"voici", L"voil\x00e0", L"vos", L"votre", L"vous", L"vu", L"v\x00f4tre", - L"v\x00f4tres", L"y", L"\x00e0", L"\x00e7a", L"\x00e8s", L"\x00e9t\x00e9", L"\x00eatre", L"\x00f4" - }; - - FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->matchVersion = matchVersion; - } - - FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) - { - this->stoptable = stopwords; - this->excltable = exclusions; - this->matchVersion = matchVersion; - } - - FrenchAnalyzer::~FrenchAnalyzer() - { - } - - const HashSet FrenchAnalyzer::getDefaultStopSet() - { - static HashSet stoptable; - if (!stoptable) - stoptable = HashSet::newInstance(_FRENCH_STOP_WORDS, _FRENCH_STOP_WORDS + SIZEOF_ARRAY(_FRENCH_STOP_WORDS)); - return stoptable; - } - - void FrenchAnalyzer::setStemExclusionTable(HashSet exclusions) - { - excltable = exclusions; - setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created - } - - TokenStreamPtr FrenchAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - result = newLucene(result, excltable); +namespace Lucene { + +const wchar_t* FrenchAnalyzer::_FRENCH_STOP_WORDS[] = { + L"a", L"afin", L"ai", L"ainsi", L"apr\x00e8s", L"attendu", L"au", L"aujourd", L"auquel", L"aussi", + L"autre", L"autres", L"aux", L"auxquelles", L"auxquels", L"avait", L"avant", L"avec", L"avoir", + L"c", L"car", L"ce", L"ceci", L"cela", L"celle", L"celles", L"celui", L"cependant", L"certain", + L"certaine", L"certaines", L"certains", L"ces", L"cet", L"cette", L"ceux", L"chez", L"ci", + L"combien", L"comme", L"comment", L"concernant", L"contre", L"d", L"dans", L"de", L"debout", + L"dedans", L"dehors", L"del\x00e0", L"depuis", L"derri\x00e8re", L"des", L"d\x00e9sormais", + L"desquelles", L"desquels", L"dessous", L"dessus", L"devant", L"devers", L"devra", L"divers", + L"diverse", L"diverses", L"doit", L"donc", L"dont", L"du", L"duquel", L"durant", L"d\x00e8s", + L"elle", L"elles", L"en", L"entre", L"environ", L"est", L"et", L"etc", L"etre", L"eu", L"eux", + L"except\x00e9", L"hormis", L"hors", L"h\x00e9las", L"hui", L"il", L"ils", L"j", L"je", L"jusqu", + L"jusque", L"l", L"la", L"laquelle", L"le", L"lequel", L"les", L"lesquelles", L"lesquels", L"leur", + L"leurs", L"lorsque", L"lui", L"l\x00e0", L"ma", L"mais", L"malgr\x00e9", L"me", L"merci", L"mes", + L"mien", L"mienne", L"miennes", L"miens", L"moi", L"moins", L"mon", L"moyennant", L"m\x00eame", + L"m\x00eames", L"n", L"ne", L"ni", L"non", L"nos", L"notre", L"nous", L"n\x00e9anmoins", + L"n\x00f4tre", L"n\x00f4tres", L"on", L"ont", L"ou", L"outre", L"o\x00f9", L"par", L"parmi", + L"partant", L"pas", L"pass\x00e9", L"pendant", L"plein", L"plus", L"plusieurs", L"pour", L"pourquoi", + L"proche", L"pr\x00e8s", L"puisque", L"qu", L"quand", L"que", L"quel", L"quelle", L"quelles", + L"quels", L"qui", L"quoi", L"quoique", L"revoici", L"revoil\x00e0", L"s", L"sa", L"sans", L"sauf", + L"se", L"selon", L"seront", L"ses", L"si", L"sien", L"sienne", L"siennes", L"siens", L"sinon", + L"soi", L"soit", L"son", L"sont", L"sous", L"suivant", L"sur", L"ta", L"te", L"tes", L"tien", + L"tienne", L"tiennes", L"tiens", L"toi", L"ton", L"tous", L"tout", L"toute", L"toutes", L"tu", L"un", + L"une", L"va", L"vers", L"voici", L"voil\x00e0", L"vos", L"votre", L"vous", L"vu", L"v\x00f4tre", + L"v\x00f4tres", L"y", L"\x00e0", L"\x00e7a", L"\x00e8s", L"\x00e9t\x00e9", L"\x00eatre", L"\x00f4" +}; + +FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->matchVersion = matchVersion; +} + +FrenchAnalyzer::FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { + this->stoptable = stopwords; + this->excltable = exclusions; + this->matchVersion = matchVersion; +} + +FrenchAnalyzer::~FrenchAnalyzer() { +} + +const HashSet FrenchAnalyzer::getDefaultStopSet() { + static HashSet stoptable; + LUCENE_RUN_ONCE( + stoptable = HashSet::newInstance(_FRENCH_STOP_WORDS, _FRENCH_STOP_WORDS + SIZEOF_ARRAY(_FRENCH_STOP_WORDS)); + ); + return stoptable; +} + +void FrenchAnalyzer::setStemExclusionTable(HashSet exclusions) { + excltable = exclusions; + setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created +} + +TokenStreamPtr FrenchAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + result = newLucene(result, excltable); + // Convert to lowercase after stemming + result = newLucene(result); + return result; +} + +TokenStreamPtr FrenchAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + FrenchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + streams->result = newLucene(streams->result, excltable); // Convert to lowercase after stemming - result = newLucene(result); - return result; - } - - TokenStreamPtr FrenchAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - FrenchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - streams->result = newLucene(streams->result, excltable); - // Convert to lowercase after stemming - streams->result = newLucene(streams->result); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - FrenchAnalyzerSavedStreams::~FrenchAnalyzerSavedStreams() - { + streams->result = newLucene(streams->result); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +FrenchAnalyzerSavedStreams::~FrenchAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp b/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp index 822b508f..ed71f480 100644 --- a/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/fr/FrenchStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,52 +9,47 @@ #include "FrenchStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - FrenchStemFilter::FrenchStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - FrenchStemFilter::FrenchStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - this->exclusions = exclusiontable; - } - - FrenchStemFilter::~FrenchStemFilter() - { - } - - bool FrenchStemFilter::incrementToken() - { - if (input->incrementToken()) - { - String term(termAtt->term()); - // Check the exclusion table. - if (!exclusions || !exclusions.contains(term)) - { - String s(stemmer->stem(term)); - // If not stemmed, don't waste the time adjusting the token. - if (!s.empty() && s != term) - termAtt->setTermBuffer(s); +namespace Lucene { + +FrenchStemFilter::FrenchStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +FrenchStemFilter::FrenchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); + this->exclusions = exclusiontable; +} + +FrenchStemFilter::~FrenchStemFilter() { +} + +bool FrenchStemFilter::incrementToken() { + if (input->incrementToken()) { + String term(termAtt->term()); + // Check the exclusion table. + if (!exclusions || !exclusions.contains(term)) { + String s(stemmer->stem(term)); + // If not stemmed, don't waste the time adjusting the token. + if (!s.empty() && s != term) { + termAtt->setTermBuffer(s); } - return true; } - else - return false; - } - - void FrenchStemFilter::setStemmer(FrenchStemmerPtr stemmer) - { - if (stemmer) - this->stemmer = stemmer; + return true; + } else { + return false; } - - void FrenchStemFilter::setExclusionSet(HashSet exclusiontable) - { - this->exclusions = exclusiontable; +} + +void FrenchStemFilter::setStemmer(const FrenchStemmerPtr& stemmer) { + if (stemmer) { + this->stemmer = stemmer; } } + +void FrenchStemFilter::setExclusionSet(HashSet exclusiontable) { + this->exclusions = exclusiontable; +} + +} diff --git a/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp b/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp index d4349eb0..44a7d404 100644 --- a/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/fr/FrenchStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,269 +11,259 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - FrenchStemmer::FrenchStemmer() - { - suite = false; - modified = false; - } - - FrenchStemmer::~FrenchStemmer() - { +namespace Lucene { + +FrenchStemmer::FrenchStemmer() { + suite = false; + modified = false; +} + +FrenchStemmer::~FrenchStemmer() { +} + +String FrenchStemmer::stem(const String& term) { + if (!isStemmable(term)) { + return term; } - - String FrenchStemmer::stem(const String& term) - { - if (!isStemmable(term)) - return term; - - // Use lowercase for medium stemming. - stringBuffer = StringUtils::toLower(term); - - // reset the booleans - modified = false; - suite = false; - - treatVowels(stringBuffer); - - setStrings(); - - step1(); - - if (!modified || suite) - { - if (!RV.empty()) - { - suite = step2a(); - if (!suite) - step2b(); + + // Use lowercase for medium stemming. + stringBuffer = StringUtils::toLower(term); + + // reset the booleans + modified = false; + suite = false; + + treatVowels(stringBuffer); + + setStrings(); + + step1(); + + if (!modified || suite) { + if (!RV.empty()) { + suite = step2a(); + if (!suite) { + step2b(); } } + } - if (modified || suite) - step3(); - else - step4(); + if (modified || suite) { + step3(); + } else { + step4(); + } - step5(); + step5(); - step6(); + step6(); - return stringBuffer; - } - - void FrenchStemmer::setStrings() - { - // set the strings - R0 = stringBuffer; - RV = retrieveRV(stringBuffer); - R1 = retrieveR(stringBuffer); - if (!R1.empty()) - { - tempBuffer = R1; - R2 = retrieveR(tempBuffer); - } - else - R2.clear(); + return stringBuffer; +} + +void FrenchStemmer::setStrings() { + // set the strings + R0 = stringBuffer; + RV = retrieveRV(stringBuffer); + R1 = retrieveR(stringBuffer); + if (!R1.empty()) { + tempBuffer = R1; + R2 = retrieveR(tempBuffer); + } else { + R2.clear(); } - - void FrenchStemmer::step1() - { - Collection suffix = newCollection(L"ances", L"iqUes", L"ismes", L"ables", L"istes", L"ance", L"iqUe", L"isme", L"able", L"iste"); - deleteFrom(R2, suffix); +} - replaceFrom(R2, newCollection(L"logies", L"logie"), L"log"); - replaceFrom(R2, newCollection(L"usions", L"utions", L"usion", L"ution"), L"u"); - replaceFrom(R2, newCollection(L"ences", L"ence"), L"ent"); +void FrenchStemmer::step1() { + Collection suffix = newCollection(L"ances", L"iqUes", L"ismes", L"ables", L"istes", L"ance", L"iqUe", L"isme", L"able", L"iste"); + deleteFrom(R2, suffix); - Collection search = newCollection(L"atrices", L"ateurs", L"ations", L"atrice", L"ateur", L"ation"); - deleteButSuffixFromElseReplace(R2, search, L"ic", true, R0, L"iqU"); + replaceFrom(R2, newCollection(L"logies", L"logie"), L"log"); + replaceFrom(R2, newCollection(L"usions", L"utions", L"usion", L"ution"), L"u"); + replaceFrom(R2, newCollection(L"ences", L"ence"), L"ent"); - deleteButSuffixFromElseReplace(R2, newCollection(L"ements", L"ement"), L"eus", false, R0, L"eux"); - deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"ativ", false); - deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iv", false); - deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"abl", false); - deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iqU", false); + Collection search = newCollection(L"atrices", L"ateurs", L"ations", L"atrice", L"ateur", L"ation"); + deleteButSuffixFromElseReplace(R2, search, L"ic", true, R0, L"iqU"); - deleteFromIfTestVowelBeforeIn(R1, newCollection(L"issements", L"issement"), false, R0); - deleteFrom(RV, newCollection(L"ements", L"ement")); + deleteButSuffixFromElseReplace(R2, newCollection(L"ements", L"ement"), L"eus", false, R0, L"eux"); + deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"ativ", false); + deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iv", false); + deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"abl", false); + deleteButSuffixFrom(R2, newCollection(L"ements", L"ement"), L"iqU", false); - deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"abil", false, R0, L"abl"); - deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"ic", false, R0, L"iqU"); - deleteButSuffixFrom(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"iv", true); + deleteFromIfTestVowelBeforeIn(R1, newCollection(L"issements", L"issement"), false, R0); + deleteFrom(RV, newCollection(L"ements", L"ement")); - Collection autre = newCollection(L"ifs", L"ives", L"if", L"ive"); - deleteButSuffixFromElseReplace(R2, autre, L"icat", false, R0, L"iqU"); - deleteButSuffixFromElseReplace(R2, autre, L"at", true, R2, L"iqU"); + deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"abil", false, R0, L"abl"); + deleteButSuffixFromElseReplace(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"ic", false, R0, L"iqU"); + deleteButSuffixFrom(R2, newCollection(L"it\x00e9s", L"it\x00e9"), L"iv", true); - replaceFrom(R0, newCollection(L"eaux"), L"eau"); + Collection autre = newCollection(L"ifs", L"ives", L"if", L"ive"); + deleteButSuffixFromElseReplace(R2, autre, L"icat", false, R0, L"iqU"); + deleteButSuffixFromElseReplace(R2, autre, L"at", true, R2, L"iqU"); - replaceFrom(R1, newCollection(L"aux"), L"al"); + replaceFrom(R0, newCollection(L"eaux"), L"eau"); - deleteButSuffixFromElseReplace(R2, newCollection(L"euses", L"euse"), L"", true, R1, L"eux"); + replaceFrom(R1, newCollection(L"aux"), L"al"); - deleteFrom(R2, newCollection(L"eux")); + deleteButSuffixFromElseReplace(R2, newCollection(L"euses", L"euse"), L"", true, R1, L"eux"); - // if one of the next steps is performed, we will need to perform step2a - if (replaceFrom(RV, newCollection(L"amment"), L"ant")) - suite = true; - if (replaceFrom(RV, newCollection(L"emment"), L"ent")) - suite = true; - if (deleteFromIfTestVowelBeforeIn(RV, newCollection(L"ments", L"ment"), true, RV)) - suite = true; + deleteFrom(R2, newCollection(L"eux")); + + // if one of the next steps is performed, we will need to perform step2a + if (replaceFrom(RV, newCollection(L"amment"), L"ant")) { + suite = true; } - - bool FrenchStemmer::step2a() - { - static Collection search; - if (!search) - { - static const wchar_t* _search[] = - { - L"\x00eemes", L"\x00eetes", L"iraIent", L"irait", L"irais", L"irai", L"iras", L"ira", - L"irent", L"iriez", L"irez", L"irions", L"irons", L"iront", L"issaIent", - L"issais", L"issantes", L"issante", L"issants", L"issant", L"issait", - L"issais", L"issions", L"issons", L"issiez", L"issez", L"issent", L"isses", - L"isse", L"ir", L"is", L"\x00eet", L"it", L"ies", L"ie", L"i" - }; - search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); - } - return deleteFromIfTestVowelBeforeIn(RV, search, false, RV); + if (replaceFrom(RV, newCollection(L"emment"), L"ent")) { + suite = true; } - - void FrenchStemmer::step2b() - { - static Collection suffix; - if (!suffix) - { - static const wchar_t* _suffix[] = - { - L"eraIent", L"erais", L"erait", L"erai", L"eras", L"erions", L"eriez", - L"erons", L"eront", L"erez", L"\x00e8rent", L"era", L"\x00e9es", L"iez", L"\x00e9e", L"\x00e9s", - L"er", L"ez", L"\x00e9" - }; - suffix = Collection::newInstance(_suffix, _suffix + SIZEOF_ARRAY(_suffix)); - } - deleteFrom(RV, suffix); - - static Collection search; - if (!search) - { - static const wchar_t* _search[] = - { - L"assions", L"assiez", L"assent", L"asses", L"asse", L"aIent", L"antes", - L"aIent", L"Aient", L"ante", L"\x00e2mes", L"\x00e2tes", L"ants", L"ant", L"ait", - L"a\x00eet", L"ais", L"Ait", L"A\x00eet", L"Ais", L"\x00e2t", L"as", L"ai", L"Ai", L"a" - }; - search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); - } - deleteButSuffixFrom(RV, search, L"e", true); - - deleteFrom(R2, newCollection(L"ions")); + if (deleteFromIfTestVowelBeforeIn(RV, newCollection(L"ments", L"ment"), true, RV)) { + suite = true; } +} + +bool FrenchStemmer::step2a() { + static Collection search; + static const wchar_t* _search[] = { + L"\x00eemes", L"\x00eetes", L"iraIent", L"irait", L"irais", L"irai", L"iras", L"ira", + L"irent", L"iriez", L"irez", L"irions", L"irons", L"iront", L"issaIent", + L"issais", L"issantes", L"issante", L"issants", L"issant", L"issait", + L"issais", L"issions", L"issons", L"issiez", L"issez", L"issent", L"isses", + L"isse", L"ir", L"is", L"\x00eet", L"it", L"ies", L"ie", L"i" + }; + + LUCENE_RUN_ONCE( + search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); + ); - void FrenchStemmer::step3() - { - if (!stringBuffer.empty()) - { - wchar_t ch = stringBuffer[stringBuffer.length() - 1]; - if (ch == L'Y') - { - stringBuffer[stringBuffer.length() - 1] = L'i'; - setStrings(); - } - else if (ch == L'\x00e7') - { - stringBuffer[stringBuffer.length() - 1] = L'c'; - setStrings(); - } + return deleteFromIfTestVowelBeforeIn(RV, search, false, RV); +} + +void FrenchStemmer::step2b() { + static Collection suffix; + static const wchar_t* _suffix[] = { + L"eraIent", L"erais", L"erait", L"erai", L"eras", L"erions", L"eriez", + L"erons", L"eront", L"erez", L"\x00e8rent", L"era", L"\x00e9es", L"iez", L"\x00e9e", L"\x00e9s", + L"er", L"ez", L"\x00e9" + }; + LUCENE_RUN_ONCE( + suffix = Collection::newInstance(_suffix, _suffix + SIZEOF_ARRAY(_suffix)); + ); + deleteFrom(RV, suffix); + + static Collection search; + static const wchar_t* _search[] = { + L"assions", L"assiez", L"assent", L"asses", L"asse", L"aIent", L"antes", + L"aIent", L"Aient", L"ante", L"\x00e2mes", L"\x00e2tes", L"ants", L"ant", L"ait", + L"a\x00eet", L"ais", L"Ait", L"A\x00eet", L"Ais", L"\x00e2t", L"as", L"ai", L"Ai", L"a" + }; + LUCENE_RUN_ONCE( + search = Collection::newInstance(_search, _search + SIZEOF_ARRAY(_search)); + ); + deleteButSuffixFrom(RV, search, L"e", true); + + deleteFrom(R2, newCollection(L"ions")); +} + +void FrenchStemmer::step3() { + if (!stringBuffer.empty()) { + wchar_t ch = stringBuffer[stringBuffer.length() - 1]; + if (ch == L'Y') { + stringBuffer[stringBuffer.length() - 1] = L'i'; + setStrings(); + } else if (ch == L'\x00e7') { + stringBuffer[stringBuffer.length() - 1] = L'c'; + setStrings(); } } - - void FrenchStemmer::step4() - { - if (stringBuffer.length() > 1) - { - wchar_t ch = stringBuffer[stringBuffer.length() - 1]; - if (ch == L's') - { - wchar_t b = stringBuffer[stringBuffer.length() - 2]; - if (b != L'a' && b != L'i' && b != L'o' && b != L'u' && b != L'\x00e8' && b != L's') - { - stringBuffer.resize(stringBuffer.length() - 1); - setStrings(); - } - } - } - if (!deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"s")) - deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"t"); +} - replaceFrom(RV, newCollection(L"I\x00e8re", L"i\x00e8re", L"Ier", L"ier"), L"i"); - deleteFrom(RV, newCollection(L"e")); - deleteFromIfPrecededIn(RV, newCollection(L"\x00eb"), R0, L"gu"); - } - - void FrenchStemmer::step5() - { - if (!R0.empty()) - { - if (boost::ends_with(R0, L"enn") || boost::ends_with(R0, L"onn") || - boost::ends_with(R0, L"ett") || boost::ends_with(R0, L"ell") || boost::ends_with(R0, L"eill")) - { +void FrenchStemmer::step4() { + if (stringBuffer.length() > 1) { + wchar_t ch = stringBuffer[stringBuffer.length() - 1]; + if (ch == L's') { + wchar_t b = stringBuffer[stringBuffer.length() - 2]; + if (b != L'a' && b != L'i' && b != L'o' && b != L'u' && b != L'\x00e8' && b != L's') { stringBuffer.resize(stringBuffer.length() - 1); setStrings(); } } } - - void FrenchStemmer::step6() - { - if (!R0.empty()) - { - bool seenVowel = false; - bool seenConson = false; - int32_t pos = -1; - for (int32_t i = (int32_t)(R0.length() - 1); i > -1; --i) - { - wchar_t ch = R0[i]; - if (isVowel(ch)) - { - if (!seenVowel) - { - if (ch == L'\x00e9' || ch == L'\x00e8') - { - pos = i; - break; - } + if (!deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"s")) { + deleteFromIfPrecededIn(R2, newCollection(L"ion"), RV, L"t"); + } + + replaceFrom(RV, newCollection(L"I\x00e8re", L"i\x00e8re", L"Ier", L"ier"), L"i"); + deleteFrom(RV, newCollection(L"e")); + deleteFromIfPrecededIn(RV, newCollection(L"\x00eb"), R0, L"gu"); +} + +void FrenchStemmer::step5() { + if (!R0.empty()) { + if (boost::ends_with(R0, L"enn") || boost::ends_with(R0, L"onn") || + boost::ends_with(R0, L"ett") || boost::ends_with(R0, L"ell") || boost::ends_with(R0, L"eill")) { + stringBuffer.resize(stringBuffer.length() - 1); + setStrings(); + } + } +} + +void FrenchStemmer::step6() { + if (!R0.empty()) { + bool seenVowel = false; + bool seenConson = false; + int32_t pos = -1; + for (int32_t i = (int32_t)(R0.length() - 1); i > -1; --i) { + wchar_t ch = R0[i]; + if (isVowel(ch)) { + if (!seenVowel) { + if (ch == L'\x00e9' || ch == L'\x00e8') { + pos = i; + break; } - seenVowel = true; } - else - { - if (seenVowel) - break; - else - seenConson = true; + seenVowel = true; + } else { + if (seenVowel) { + break; + } else { + seenConson = true; } } - if (pos > -1 && seenConson && !seenVowel) - stringBuffer[pos] = L'e'; + } + if (pos > -1 && seenConson && !seenVowel) { + stringBuffer[pos] = L'e'; } } - - bool FrenchStemmer::deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix) - { - bool found = false; - if (!source.empty()) - { - for (int32_t i = 0; i < search.size(); ++i) - { - if (boost::ends_with(source, search[i])) - { - if (!from.empty() && boost::ends_with(from, prefix + search[i])) - { +} + +bool FrenchStemmer::deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix) { + bool found = false; + if (!source.empty()) { + for (int32_t i = 0; i < search.size(); ++i) { + if (boost::ends_with(source, search[i])) { + if (!from.empty() && boost::ends_with(from, prefix + search[i])) { + stringBuffer.resize(stringBuffer.length() - search[i].length()); + found = true; + setStrings(); + break; + } + } + } + } + return found; +} + +bool FrenchStemmer::deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from) { + bool found = false; + if (!source.empty() && !from.empty()) { + for (int32_t i = 0; i < search.size(); ++i) { + if (boost::ends_with(source, search[i])) { + if ((search[i].length() + 1) <= from.length()) { + bool test = isVowel(stringBuffer[stringBuffer.length() - (search[i].length() + 1)]); + if (test == vowel) { stringBuffer.resize(stringBuffer.length() - search[i].length()); + modified = true; found = true; setStrings(); break; @@ -281,288 +271,223 @@ namespace Lucene } } } - return found; } - - bool FrenchStemmer::deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from) - { - bool found = false; - if (!source.empty() && !from.empty()) - { - for (int32_t i = 0; i < search.size(); ++i) - { - if (boost::ends_with(source, search[i])) - { - if ((search[i].length() + 1) <= from.length()) - { - bool test = isVowel(stringBuffer[stringBuffer.length() - (search[i].length() + 1)]); - if (test == vowel) - { - stringBuffer.resize(stringBuffer.length() - search[i].length()); - modified = true; - found = true; - setStrings(); - break; - } - } - } + return found; +} + +void FrenchStemmer::deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without) { + if (!source.empty()) { + for (int32_t i = 0; i < search.size(); ++i) { + if (boost::ends_with(source, prefix + search[i])) { + stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); + modified = true; + setStrings(); + break; + } else if (without && boost::ends_with(source, search[i])) { + stringBuffer.resize(stringBuffer.length() - search[i].length()); + modified = true; + setStrings(); + break; } } - return found; } - - void FrenchStemmer::deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without) - { - if (!source.empty()) - { - for (int32_t i = 0; i < search.size(); ++i) - { - if (boost::ends_with(source, prefix + search[i])) - { - stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); - modified = true; - setStrings(); - break; - } - else if (without && boost::ends_with(source, search[i])) - { - stringBuffer.resize(stringBuffer.length() - search[i].length()); - modified = true; - setStrings(); - break; - } +} + +void FrenchStemmer::deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace) { + if (!source.empty()) { + for (int32_t i = 0; i < search.size(); ++i) { + if (boost::ends_with(source, prefix + search[i])) { + stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); + modified = true; + setStrings(); + break; + } else if (!from.empty() && boost::ends_with(from, prefix + search[i])) { + stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); + stringBuffer += replace; + modified = true; + setStrings(); + break; + } else if (without && boost::ends_with(source, search[i])) { + stringBuffer.resize(stringBuffer.length() - search[i].length()); + modified = true; + setStrings(); + break; } } } - - void FrenchStemmer::deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace) - { - if (!source.empty()) - { - for (int32_t i = 0; i < search.size(); ++i) - { - if (boost::ends_with(source, prefix + search[i])) - { - stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); - modified = true; - setStrings(); - break; - } - else if (!from.empty() && boost::ends_with(from, prefix + search[i])) - { - stringBuffer.resize(stringBuffer.length() - (prefix.length() + search[i].length())); - stringBuffer += replace; - modified = true; - setStrings(); - break; - } - else if (without && boost::ends_with(source, search[i])) - { - stringBuffer.resize(stringBuffer.length() - search[i].length()); - modified = true; - setStrings(); - break; - } +} + +bool FrenchStemmer::replaceFrom(const String& source, Collection search, const String& replace) { + bool found = false; + if (!source.empty()) { + for (int32_t i = 0; i < search.size(); ++i) { + if (boost::ends_with(source, search[i])) { + stringBuffer.resize(stringBuffer.length() - search[i].length()); + stringBuffer += replace; + modified = true; + found = true; + setStrings(); + break; } } } - - bool FrenchStemmer::replaceFrom(const String& source, Collection search, const String& replace) - { - bool found = false; - if (!source.empty()) - { - for (int32_t i = 0; i < search.size(); ++i) - { - if (boost::ends_with(source, search[i])) - { - stringBuffer.resize(stringBuffer.length() - search[i].length()); - stringBuffer += replace; - modified = true; - found = true; - setStrings(); - break; - } + return found; +} + +void FrenchStemmer::deleteFrom(const String& source, Collection suffix) { + if (!source.empty()) { + for (int32_t i = 0; i < suffix.size(); ++i) { + if (boost::ends_with(source, suffix[i])) { + stringBuffer.resize(stringBuffer.length() - suffix[i].length()); + modified = true; + setStrings(); + break; } } - return found; } - - void FrenchStemmer::deleteFrom(const String& source, Collection suffix) - { - if (!source.empty()) - { - for (int32_t i = 0; i < suffix.size(); ++i) - { - if (boost::ends_with(source, suffix[i])) - { - stringBuffer.resize(stringBuffer.length() - suffix[i].length()); - modified = true; - setStrings(); - break; - } - } - } +} + +bool FrenchStemmer::isVowel(wchar_t ch) { + switch (ch) { + case L'a': + case L'e': + case L'i': + case L'o': + case L'u': + case L'y': + case L'\x00e2': + case L'\x00e0': + case L'\x00eb': + case L'\x00e9': + case L'\x00ea': + case L'\x00e8': + case L'\x00ef': + case L'\x00ee': + case L'\x00f4': + case L'\x00fc': + case L'\x00f9': + case L'\x00fb': + return true; + default: + return false; } - - bool FrenchStemmer::isVowel(wchar_t ch) - { - switch (ch) - { - case L'a': - case L'e': - case L'i': - case L'o': - case L'u': - case L'y': - case L'\x00e2': - case L'\x00e0': - case L'\x00eb': - case L'\x00e9': - case L'\x00ea': - case L'\x00e8': - case L'\x00ef': - case L'\x00ee': - case L'\x00f4': - case L'\x00fc': - case L'\x00f9': - case L'\x00fb': - return true; - default: - return false; +} + +String FrenchStemmer::retrieveR(const String& buffer) { + int32_t len = (int32_t)buffer.length(); + int32_t pos = -1; + for (int32_t c = 0; c < len; ++c) { + if (isVowel(buffer[c])) { + pos = c; + break; } } - - String FrenchStemmer::retrieveR(const String& buffer) - { - int32_t len = (int32_t)buffer.length(); - int32_t pos = -1; - for (int32_t c = 0; c < len; ++c) - { - if (isVowel(buffer[c])) - { - pos = c; + if (pos > -1) { + int32_t consonne = -1; + for (int32_t c = pos; c < len; ++c) { + if (!isVowel(buffer[c])) { + consonne = c; break; } } - if (pos > -1) - { - int32_t consonne = -1; - for (int32_t c = pos; c < len; ++c) - { - if (!isVowel(buffer[c])) - { - consonne = c; + if (consonne > -1 && (consonne + 1) < len) { + return buffer.substr(consonne + 1); + } else { + return L""; + } + } else { + return L""; + } +} + +String FrenchStemmer::retrieveRV(const String& buffer) { + int32_t len = (int32_t)buffer.length(); + if (buffer.length() > 3) { + if (isVowel(buffer[0]) && isVowel(buffer[1])) { + return buffer.substr(3); + } else { + int32_t pos = 0; + for (int32_t c = 1; c < len; ++c) { + if (isVowel(buffer[c])) { + pos = c; break; } } - if (consonne > -1 && (consonne + 1) < len) - return buffer.substr(consonne + 1); - else + if (pos + 1 < len) { + return buffer.substr(pos + 1); + } else { return L""; - } - else - return L""; - } - - String FrenchStemmer::retrieveRV(const String& buffer) - { - int32_t len = (int32_t)buffer.length(); - if (buffer.length() > 3) - { - if (isVowel(buffer[0]) && isVowel(buffer[1])) - return buffer.substr(3); - else - { - int32_t pos = 0; - for (int32_t c = 1; c < len; ++c) - { - if (isVowel(buffer[c])) - { - pos = c; - break; - } - } - if (pos + 1 < len) - return buffer.substr(pos + 1); - else - return L""; } } - else - return L""; + } else { + return L""; } - - void FrenchStemmer::treatVowels(String& buffer) - { - - for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) - { - wchar_t ch = buffer[c]; - - if (c == 0) // first char - { - if (buffer.length() > 1) - { - if (ch == L'y' && isVowel(buffer[c + 1])) - buffer[c] = L'Y'; +} + +void FrenchStemmer::treatVowels(String& buffer) { + + for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { + wchar_t ch = buffer[c]; + + if (c == 0) { // first char + if (buffer.length() > 1) { + if (ch == L'y' && isVowel(buffer[c + 1])) { + buffer[c] = L'Y'; } } - else if (c == buffer.length() - 1) // last char - { - if (ch == L'u' && buffer[c - 1] == L'q') - buffer[c] = L'U'; - if (ch == L'y' && isVowel(buffer[c - 1])) - buffer[c] = L'Y'; + } else if (c == buffer.length() - 1) { // last char + if (ch == L'u' && buffer[c - 1] == L'q') { + buffer[c] = L'U'; } - else // other cases - { - if (ch == L'u') - { - if (buffer[c - 1] == L'q') - buffer[c] = L'U'; - else if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) + if (ch == L'y' && isVowel(buffer[c - 1])) { + buffer[c] = L'Y'; + } + } else { // other cases + if (ch == L'u') { + if (buffer[c - 1] == L'q') { + buffer[c] = L'U'; + } else if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) { buffer[c] = L'U'; } - if (ch == L'i') - { - if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) - buffer[c] = L'I'; + } + if (ch == L'i') { + if (isVowel(buffer[c - 1]) && isVowel(buffer[c + 1])) { + buffer[c] = L'I'; } - if (ch == L'y') - { - if (isVowel(buffer[c - 1]) || isVowel(buffer[c + 1])) - buffer[c] = L'Y'; + } + if (ch == L'y') { + if (isVowel(buffer[c - 1]) || isVowel(buffer[c + 1])) { + buffer[c] = L'Y'; } } } } - - bool FrenchStemmer::isStemmable(const String& term) - { - bool upper = false; - int32_t first = -1; - for (int32_t c = 0; c < (int32_t)term.length(); ++c) - { - // Discard terms that contain non-letter characters. - if (!UnicodeUtil::isAlpha(term[c])) +} + +bool FrenchStemmer::isStemmable(const String& term) { + bool upper = false; + int32_t first = -1; + for (int32_t c = 0; c < (int32_t)term.length(); ++c) { + // Discard terms that contain non-letter characters. + if (!UnicodeUtil::isAlpha(term[c])) { + return false; + } + // Discard terms that contain multiple uppercase letters. + if (UnicodeUtil::isUpper(term[c])) { + if (upper) { return false; - // Discard terms that contain multiple uppercase letters. - if (UnicodeUtil::isUpper(term[c])) - { - if (upper) - return false; - else // First encountered uppercase letter, set flag and save position. - { - first = c; - upper = true; - } + } else { // First encountered uppercase letter, set flag and save position. + first = c; + upper = true; } } - // Discard the term if it contains a single uppercase letter that - // is not starting the term. - if (first > 0) - return false; - return true; } + // Discard the term if it contains a single uppercase letter that + // is not starting the term. + if (first > 0) { + return false; + } + return true; +} + } diff --git a/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp b/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp index 46cbfdaa..a4fa595d 100644 --- a/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/nl/DutchAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,97 +11,87 @@ #include "StopFilter.h" #include "DutchStemFilter.h" -namespace Lucene -{ - const wchar_t* DutchAnalyzer::_DUTCH_STOP_WORDS[] = - { - L"de", L"en", L"van", L"ik", L"te", L"dat", L"die", L"in", L"een", L"hij", L"het", L"niet", - L"zijn", L"is", L"was", L"op", L"aan", L"met", L"als", L"voor", L"had", L"er", L"maar", - L"om", L"hem", L"dan", L"zou", L"of", L"wat", L"mijn", L"men", L"dit", L"zo", L"door", - L"over", L"ze", L"zich", L"bij", L"ook", L"tot", L"je", L"mij", L"uit", L"der", L"daar", - L"haar", L"naar", L"heb", L"hoe", L"heeft", L"hebben", L"deze", L"u", L"want", L"nog", - L"zal", L"me", L"zij", L"nu", L"ge", L"geen", L"omdat", L"iets", L"worden", L"toch", - L"al", L"waren", L"veel", L"meer", L"doen", L"toen", L"moet", L"ben", L"zonder", L"kan", - L"hun", L"dus", L"alles", L"onder", L"ja", L"eens", L"hier", L"wie", L"werd", L"altijd", - L"doch", L"wordt", L"wezen", L"kunnen", L"ons", L"zelf", L"tegen", L"na", L"reeds", L"wil", - L"kon", L"niets", L"uw", L"iemand", L"geweest", L"andere" - }; - - DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion) - { - this->stoptable = getDefaultStopSet(); - this->excltable = HashSet::newInstance(); - this->stemdict = MapStringString::newInstance(); - this->matchVersion = matchVersion; - } - - DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stoptable = stopwords; - this->excltable = HashSet::newInstance(); - this->matchVersion = matchVersion; - } - - DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) - { - this->stoptable = stopwords; - this->excltable = exclusions; - this->matchVersion = matchVersion; - } - - DutchAnalyzer::~DutchAnalyzer() - { - } - - void DutchAnalyzer::initialize() - { - stemdict.put(L"fiets", L"fiets"); // otherwise fiet - stemdict.put(L"bromfiets", L"bromfiets"); // otherwise bromfiet - stemdict.put(L"ei", L"eier"); - stemdict.put(L"kind", L"kinder"); - } - - const HashSet DutchAnalyzer::getDefaultStopSet() - { - static HashSet stoptable; - if (!stoptable) - stoptable = HashSet::newInstance(_DUTCH_STOP_WORDS, _DUTCH_STOP_WORDS + SIZEOF_ARRAY(_DUTCH_STOP_WORDS)); - return stoptable; - } - - void DutchAnalyzer::setStemExclusionTable(HashSet exclusions) - { - excltable = exclusions; - setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created - } - - TokenStreamPtr DutchAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); - result = newLucene(result, excltable); - return result; - } - - TokenStreamPtr DutchAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - DutchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); - streams->result = newLucene(streams->result, excltable); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - DutchAnalyzerSavedStreams::~DutchAnalyzerSavedStreams() - { +namespace Lucene { + +const wchar_t* DutchAnalyzer::_DUTCH_STOP_WORDS[] = { + L"de", L"en", L"van", L"ik", L"te", L"dat", L"die", L"in", L"een", L"hij", L"het", L"niet", + L"zijn", L"is", L"was", L"op", L"aan", L"met", L"als", L"voor", L"had", L"er", L"maar", + L"om", L"hem", L"dan", L"zou", L"of", L"wat", L"mijn", L"men", L"dit", L"zo", L"door", + L"over", L"ze", L"zich", L"bij", L"ook", L"tot", L"je", L"mij", L"uit", L"der", L"daar", + L"haar", L"naar", L"heb", L"hoe", L"heeft", L"hebben", L"deze", L"u", L"want", L"nog", + L"zal", L"me", L"zij", L"nu", L"ge", L"geen", L"omdat", L"iets", L"worden", L"toch", + L"al", L"waren", L"veel", L"meer", L"doen", L"toen", L"moet", L"ben", L"zonder", L"kan", + L"hun", L"dus", L"alles", L"onder", L"ja", L"eens", L"hier", L"wie", L"werd", L"altijd", + L"doch", L"wordt", L"wezen", L"kunnen", L"ons", L"zelf", L"tegen", L"na", L"reeds", L"wil", + L"kon", L"niets", L"uw", L"iemand", L"geweest", L"andere" +}; + +DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion) { + this->stoptable = getDefaultStopSet(); + this->excltable = HashSet::newInstance(); + this->stemdict = MapStringString::newInstance(); + this->matchVersion = matchVersion; +} + +DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stoptable = stopwords; + this->excltable = HashSet::newInstance(); + this->matchVersion = matchVersion; +} + +DutchAnalyzer::DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions) { + this->stoptable = stopwords; + this->excltable = exclusions; + this->matchVersion = matchVersion; +} + +DutchAnalyzer::~DutchAnalyzer() { +} + +void DutchAnalyzer::initialize() { + stemdict.put(L"fiets", L"fiets"); // otherwise fiet + stemdict.put(L"bromfiets", L"bromfiets"); // otherwise bromfiet + stemdict.put(L"ei", L"eier"); + stemdict.put(L"kind", L"kinder"); +} + +const HashSet DutchAnalyzer::getDefaultStopSet() { + static HashSet stoptable; + LUCENE_RUN_ONCE( + stoptable = HashSet::newInstance(_DUTCH_STOP_WORDS, _DUTCH_STOP_WORDS + SIZEOF_ARRAY(_DUTCH_STOP_WORDS)); + ); + return stoptable; +} + +void DutchAnalyzer::setStemExclusionTable(HashSet exclusions) { + excltable = exclusions; + setPreviousTokenStream(LuceneObjectPtr()); // force a new stemmer to be created +} + +TokenStreamPtr DutchAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stoptable); + result = newLucene(result, excltable); + return result; +} + +TokenStreamPtr DutchAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + DutchAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stoptable); + streams->result = newLucene(streams->result, excltable); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +DutchAnalyzerSavedStreams::~DutchAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp b/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp index 37230245..cc4e1a07 100644 --- a/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/nl/DutchStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,67 +9,61 @@ #include "DutchStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - DutchStemFilter::DutchStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - DutchStemFilter::DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - this->exclusions = exclusiontable; - } - - DutchStemFilter::DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable, MapStringString stemdictionary) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - this->exclusions = exclusiontable; - this->stemmer->setStemDictionary(stemdictionary); - } - - DutchStemFilter::~DutchStemFilter() - { - } - - bool DutchStemFilter::incrementToken() - { - if (input->incrementToken()) - { - String term(termAtt->term()); - - // Check the exclusion table. - if (!exclusions || !exclusions.contains(term)) - { - String s(stemmer->stem(term)); - // If not stemmed, don't waste the time adjusting the token. - if (!s.empty() && s != term) - termAtt->setTermBuffer(s); +namespace Lucene { + +DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); + this->exclusions = exclusiontable; +} + +DutchStemFilter::DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable, MapStringString stemdictionary) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); + this->exclusions = exclusiontable; + this->stemmer->setStemDictionary(stemdictionary); +} + +DutchStemFilter::~DutchStemFilter() { +} + +bool DutchStemFilter::incrementToken() { + if (input->incrementToken()) { + String term(termAtt->term()); + + // Check the exclusion table. + if (!exclusions || !exclusions.contains(term)) { + String s(stemmer->stem(term)); + // If not stemmed, don't waste the time adjusting the token. + if (!s.empty() && s != term) { + termAtt->setTermBuffer(s); } - return true; } - else - return false; - } - - void DutchStemFilter::setStemmer(DutchStemmerPtr stemmer) - { - if (stemmer) - this->stemmer = stemmer; + return true; + } else { + return false; } - - void DutchStemFilter::setExclusionSet(HashSet exclusiontable) - { - this->exclusions = exclusiontable; +} + +void DutchStemFilter::setStemmer(const DutchStemmerPtr& stemmer) { + if (stemmer) { + this->stemmer = stemmer; } - - void DutchStemFilter::setStemDictionary(MapStringString dict) - { - if (stemmer) - this->stemmer->setStemDictionary(dict); +} + +void DutchStemFilter::setExclusionSet(HashSet exclusiontable) { + this->exclusions = exclusiontable; +} + +void DutchStemFilter::setStemDictionary(MapStringString dict) { + if (stemmer) { + this->stemmer->setStemDictionary(dict); } } + +} diff --git a/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp b/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp index 191282e7..9d6a8fbb 100644 --- a/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/nl/DutchStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,313 +11,298 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DutchStemmer::DutchStemmer() - { - removedE = false; - R1 = 0; - R2 = 0; - } - - DutchStemmer::~DutchStemmer() - { - } - - String DutchStemmer::stem(const String& term) - { - // Use lowercase for medium stemming. - buffer = StringUtils::toLower(term); - if (!isStemmable()) - return buffer; - - if (stemDict && stemDict.contains(term)) - return stemDict.get(term); - - // Stemming starts here... - substitute(); - storeYandI(); - R1 = getRIndex(0); - R1 = std::max((int32_t)3, R1); - step1(); - step2(); - R2 = getRIndex(R1); - step3a(); - step3b(); - step4(); - reStoreYandI(); +namespace Lucene { + +DutchStemmer::DutchStemmer() { + removedE = false; + R1 = 0; + R2 = 0; +} + +DutchStemmer::~DutchStemmer() { +} + +String DutchStemmer::stem(const String& term) { + // Use lowercase for medium stemming. + buffer = StringUtils::toLower(term); + if (!isStemmable()) { return buffer; } - bool DutchStemmer::enEnding() - { - Collection enend = newCollection(L"ene", L"en"); - for (int32_t i = 0; i < enend.size(); ++i) - { - String end = enend[i]; - int32_t index = (int32_t)(buffer.length() - end.length()); - if (boost::ends_with(buffer, end) && index >= R1 && isValidEnEnding(index - 1)) - { - buffer.erase(index, end.length()); - unDouble(index); - return true; - } - } - return false; + if (stemDict && stemDict.contains(term)) { + return stemDict.get(term); } - - void DutchStemmer::step1() - { - if (R1 >= (int32_t)buffer.length()) - return; - int32_t lengthR1 = (int32_t)(buffer.length() - R1); - int32_t index; + // Stemming starts here... + substitute(); + storeYandI(); + R1 = getRIndex(0); + R1 = std::max((int32_t)3, R1); + step1(); + step2(); + R2 = getRIndex(R1); + step3a(); + step3b(); + step4(); + reStoreYandI(); + return buffer; +} - if (boost::ends_with(buffer, L"heden")) - { - buffer.replace(R1, lengthR1, boost::replace_all_copy(buffer.substr(R1, lengthR1), L"heden", L"heid")); - return; +bool DutchStemmer::enEnding() { + Collection enend = newCollection(L"ene", L"en"); + for (int32_t i = 0; i < enend.size(); ++i) { + String end = enend[i]; + int32_t index = (int32_t)(buffer.length() - end.length()); + if (boost::ends_with(buffer, end) && index >= R1 && isValidEnEnding(index - 1)) { + buffer.erase(index, end.length()); + unDouble(index); + return true; } + } + return false; +} - if (enEnding()) - return; - - index = (int32_t)buffer.length() - 2; - if (boost::ends_with(buffer, L"se") && index >= R1 && isValidSEnding(index - 1)) - { - buffer.erase(index, 2); - return; - } - - index = (int32_t)(buffer.length() - 1); - if (boost::ends_with(buffer, L"s") && index >= R1 && isValidSEnding(index - 1)) - buffer.erase(index, 1); - } - - void DutchStemmer::step2() - { - removedE = false; - if (R1 >= (int32_t)buffer.length()) - return; - int32_t index = (int32_t)(buffer.length() - 1); - if (index >= R1 && boost::ends_with(buffer, L"e") && !isVowel(buffer[index - 1])) - { - buffer.erase(index, 1); - unDouble(); - removedE = true; - } +void DutchStemmer::step1() { + if (R1 >= (int32_t)buffer.length()) { + return; } - - void DutchStemmer::step3a() - { - if (R2 >= (int32_t)buffer.length()) - return; - int32_t index = (int32_t)(buffer.length() - 4); - if (boost::ends_with(buffer, L"heid") && index >= R2 && buffer[index - 1] != L'c') - { - buffer.erase(index, 4); // remove heid - enEnding(); - } + + int32_t lengthR1 = (int32_t)(buffer.length() - R1); + int32_t index; + + if (boost::ends_with(buffer, L"heden")) { + buffer.replace(R1, lengthR1, boost::replace_all_copy(buffer.substr(R1, lengthR1), L"heden", L"heid")); + return; } - - void DutchStemmer::step3b() - { - if (R2 >= (int32_t)buffer.length()) - return; - - int32_t index = (int32_t)(buffer.length() - 3); - if ((boost::ends_with(buffer, L"end") || boost::ends_with(buffer, L"ing")) && index >= R2) - { - buffer.erase(index, 3); - if (buffer[index - 2] == L'i' && buffer[index - 1] == L'g') - { - if (buffer[index - 3] != L'e' && index - 2 >= R2) - { - index -= 2; - buffer.erase(index, 2); - } - } - else - unDouble(index); - return; - } - index = (int32_t)(buffer.length() - 2); - if (boost::ends_with(buffer, L"ig") && index >= R2) - { - if (buffer[index - 1] != L'e') + + if (enEnding()) { + return; + } + + index = (int32_t)buffer.length() - 2; + if (boost::ends_with(buffer, L"se") && index >= R1 && isValidSEnding(index - 1)) { + buffer.erase(index, 2); + return; + } + + index = (int32_t)(buffer.length() - 1); + if (boost::ends_with(buffer, L"s") && index >= R1 && isValidSEnding(index - 1)) { + buffer.erase(index, 1); + } +} + +void DutchStemmer::step2() { + removedE = false; + if (R1 >= (int32_t)buffer.length()) { + return; + } + int32_t index = (int32_t)(buffer.length() - 1); + if (index >= R1 && boost::ends_with(buffer, L"e") && !isVowel(buffer[index - 1])) { + buffer.erase(index, 1); + unDouble(); + removedE = true; + } +} + +void DutchStemmer::step3a() { + if (R2 >= (int32_t)buffer.length()) { + return; + } + int32_t index = (int32_t)(buffer.length() - 4); + if (boost::ends_with(buffer, L"heid") && index >= R2 && buffer[index - 1] != L'c') { + buffer.erase(index, 4); // remove heid + enEnding(); + } +} + +void DutchStemmer::step3b() { + if (R2 >= (int32_t)buffer.length()) { + return; + } + + int32_t index = (int32_t)(buffer.length() - 3); + if ((boost::ends_with(buffer, L"end") || boost::ends_with(buffer, L"ing")) && index >= R2) { + buffer.erase(index, 3); + if (buffer[index - 2] == L'i' && buffer[index - 1] == L'g') { + if (buffer[index - 3] != L'e' && index - 2 >= R2) { + index -= 2; buffer.erase(index, 2); - return; - } - index = (int32_t)(buffer.length() - 4); - if (boost::ends_with(buffer, L"lijk") && index >= R2) - { - buffer.erase(index, 4); - step2(); - return; + } + } else { + unDouble(index); } - index = (int32_t)(buffer.length() - 4); - if (boost::ends_with(buffer, L"baar") && index >= R2) - { - buffer.erase(index, 4); - return; + return; + } + index = (int32_t)(buffer.length() - 2); + if (boost::ends_with(buffer, L"ig") && index >= R2) { + if (buffer[index - 1] != L'e') { + buffer.erase(index, 2); } - index = (int32_t)(buffer.length() - 3); - if (boost::ends_with(buffer, L"bar") && index >= R2) - { - if (removedE) - buffer.erase(index, 3); - return; + return; + } + index = (int32_t)(buffer.length() - 4); + if (boost::ends_with(buffer, L"lijk") && index >= R2) { + buffer.erase(index, 4); + step2(); + return; + } + index = (int32_t)(buffer.length() - 4); + if (boost::ends_with(buffer, L"baar") && index >= R2) { + buffer.erase(index, 4); + return; + } + index = (int32_t)(buffer.length() - 3); + if (boost::ends_with(buffer, L"bar") && index >= R2) { + if (removedE) { + buffer.erase(index, 3); } + return; + } +} + +void DutchStemmer::step4() { + if (buffer.length() < 4) { + return; } - - void DutchStemmer::step4() - { - if (buffer.length() < 4) - return; - String end(buffer.substr(buffer.length() - 4)); - if (end[1] == end[2] && end[3] != L'I' && end[1] != L'i' && isVowel(end[1]) && !isVowel(end[3]) && !isVowel(end[0])) - buffer.erase(buffer.length() - 2, 1); - } - - bool DutchStemmer::isStemmable() - { - for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) - { - if (!UnicodeUtil::isAlnum(buffer[c])) - return false; + String end(buffer.substr(buffer.length() - 4)); + if (end[1] == end[2] && end[3] != L'I' && end[1] != L'i' && isVowel(end[1]) && !isVowel(end[3]) && !isVowel(end[0])) { + buffer.erase(buffer.length() - 2, 1); + } +} + +bool DutchStemmer::isStemmable() { + for (int32_t c = 0; c < (int32_t)buffer.length(); ++c) { + if (!UnicodeUtil::isAlnum(buffer[c])) { + return false; } - return true; } - - void DutchStemmer::substitute() - { - for (int32_t i = 0; i < (int32_t)buffer.length(); ++i) - { - switch (buffer[i]) - { - case L'\x00e4': - case L'\x00e1': - buffer[i] = L'a'; - break; - case L'\x00eb': - case L'\x00e9': - buffer[i] = L'e'; - break; - case L'\x00fc': - case L'\x00fa': - buffer[i] = L'u'; - break; - case L'\x00ef': - case L'i': - buffer[i] = L'i'; - break; - case L'\x00f6': - case L'\x00f3': - buffer[i] = L'o'; - break; - } + return true; +} + +void DutchStemmer::substitute() { + for (int32_t i = 0; i < (int32_t)buffer.length(); ++i) { + switch (buffer[i]) { + case L'\x00e4': + case L'\x00e1': + buffer[i] = L'a'; + break; + case L'\x00eb': + case L'\x00e9': + buffer[i] = L'e'; + break; + case L'\x00fc': + case L'\x00fa': + buffer[i] = L'u'; + break; + case L'\x00ef': + case L'i': + buffer[i] = L'i'; + break; + case L'\x00f6': + case L'\x00f3': + buffer[i] = L'o'; + break; } } - - bool DutchStemmer::isValidSEnding(int32_t index) - { - wchar_t c = buffer[index]; - if (isVowel(c) || c == L'j') - return false; - return true; +} + +bool DutchStemmer::isValidSEnding(int32_t index) { + wchar_t c = buffer[index]; + if (isVowel(c) || c == L'j') { + return false; } - - bool DutchStemmer::isValidEnEnding(int32_t index) - { - wchar_t c = buffer[index]; - if (isVowel(c)) - return false; - if (c < 3) - return false; - // ends with "gem"? - if (c == L'm' && buffer[index - 2] == L'g' && buffer[index - 1] == L'e') - return false; - return true; + return true; +} + +bool DutchStemmer::isValidEnEnding(int32_t index) { + wchar_t c = buffer[index]; + if (isVowel(c)) { + return false; } - - void DutchStemmer::unDouble() - { - unDouble((int32_t)buffer.length()); - } - - void DutchStemmer::unDouble(int32_t endIndex) - { - String s = buffer.substr(0, endIndex); - if (boost::ends_with(s, L"kk") || boost::ends_with(s, L"tt") || boost::ends_with(s, L"dd") || - boost::ends_with(s, L"nn") || boost::ends_with(s, L"mm") || boost::ends_with(s, L"ff")) - buffer.resize(endIndex - 1); - } - - int32_t DutchStemmer::getRIndex(int32_t start) - { - if (start == 0) - start = 1; - int32_t i = start; - for (; i < (int32_t)buffer.length(); ++i) - { - // first non-vowel preceded by a vowel - if (!isVowel(buffer[i]) && isVowel(buffer[i - 1])) - return i + 1; + if (c < 3) { + return false; + } + // ends with "gem"? + if (c == L'm' && buffer[index - 2] == L'g' && buffer[index - 1] == L'e') { + return false; + } + return true; +} + +void DutchStemmer::unDouble() { + unDouble((int32_t)buffer.length()); +} + +void DutchStemmer::unDouble(int32_t endIndex) { + String s = buffer.substr(0, endIndex); + if (boost::ends_with(s, L"kk") || boost::ends_with(s, L"tt") || boost::ends_with(s, L"dd") || + boost::ends_with(s, L"nn") || boost::ends_with(s, L"mm") || boost::ends_with(s, L"ff")) { + buffer.resize(endIndex - 1); + } +} + +int32_t DutchStemmer::getRIndex(int32_t start) { + if (start == 0) { + start = 1; + } + int32_t i = start; + for (; i < (int32_t)buffer.length(); ++i) { + // first non-vowel preceded by a vowel + if (!isVowel(buffer[i]) && isVowel(buffer[i - 1])) { + return i + 1; } - return i + 1; - } - - void DutchStemmer::storeYandI() - { - if (buffer[0] == L'y') - buffer[0] = L'Y'; - - int32_t last = (int32_t)(buffer.length() - 1); - - for (int32_t i = 1; i < last; i++) - { - switch (buffer[i]) - { - case L'i': - if (isVowel(buffer[i - 1]) && isVowel(buffer[i + 1])) - buffer[i] = L'I'; - break; - case L'y': - if (isVowel(buffer[i - 1])) - buffer[i] = L'Y'; - break; + } + return i + 1; +} + +void DutchStemmer::storeYandI() { + if (buffer[0] == L'y') { + buffer[0] = L'Y'; + } + + int32_t last = (int32_t)(buffer.length() - 1); + + for (int32_t i = 1; i < last; i++) { + switch (buffer[i]) { + case L'i': + if (isVowel(buffer[i - 1]) && isVowel(buffer[i + 1])) { + buffer[i] = L'I'; } + break; + case L'y': + if (isVowel(buffer[i - 1])) { + buffer[i] = L'Y'; + } + break; } - if (last > 0 && buffer[last] == L'y' && isVowel(buffer[last - 1])) - buffer[last] = L'Y'; - } - - void DutchStemmer::reStoreYandI() - { - boost::replace_all(buffer, L"I", L"i"); - boost::replace_all(buffer, L"Y", L"y"); - } - - bool DutchStemmer::isVowel(wchar_t c) - { - switch (c) - { - case L'e': - case L'a': - case L'o': - case L'i': - case L'u': - case L'y': - case L'\x00e8': - return true; - default: - return false; - } } - - void DutchStemmer::setStemDictionary(MapStringString dict) - { - stemDict = dict; + if (last > 0 && buffer[last] == L'y' && isVowel(buffer[last - 1])) { + buffer[last] = L'Y'; + } +} + +void DutchStemmer::reStoreYandI() { + boost::replace_all(buffer, L"I", L"i"); + boost::replace_all(buffer, L"Y", L"y"); +} + +bool DutchStemmer::isVowel(wchar_t c) { + switch (c) { + case L'e': + case L'a': + case L'o': + case L'i': + case L'u': + case L'y': + case L'\x00e8': + return true; + default: + return false; } } + +void DutchStemmer::setStemDictionary(MapStringString dict) { + stemDict = dict; +} + +} diff --git a/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp b/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp index e470a592..9a93e0ae 100644 --- a/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp +++ b/src/contrib/analyzers/common/analysis/reverse/ReverseStringFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,55 +8,50 @@ #include "ReverseStringFilter.h" #include "TermAttribute.h" -namespace Lucene -{ - const wchar_t ReverseStringFilter::NOMARKER = (wchar_t)0xffff; - - /// Example marker character: U+0001 (START OF HEADING) - const wchar_t ReverseStringFilter::START_OF_HEADING_MARKER = (wchar_t)0x0001; - - /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) - const wchar_t ReverseStringFilter::INFORMATION_SEPARATOR_MARKER = (wchar_t)0x001f; - - /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) - const wchar_t ReverseStringFilter::PUA_EC00_MARKER = (wchar_t)0xec00; - - /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) - const wchar_t ReverseStringFilter::RTL_DIRECTION_MARKER = (wchar_t)0x200f; - - ReverseStringFilter::ReverseStringFilter(TokenStreamPtr input) : TokenFilter(input) - { - this->marker = NOMARKER; - termAtt = addAttribute(); - } - - ReverseStringFilter::ReverseStringFilter(TokenStreamPtr input, wchar_t marker) : TokenFilter(input) - { - this->marker = marker; - termAtt = addAttribute(); - } - - ReverseStringFilter::~ReverseStringFilter() - { - } - - bool ReverseStringFilter::incrementToken() - { - if (input->incrementToken()) - { - int32_t len = termAtt->termLength(); - if (marker != NOMARKER) - { - ++len; - termAtt->resizeTermBuffer(len); - termAtt->termBuffer()[len - 1] = marker; - } - CharArray term(termAtt->termBuffer()); - std::reverse(term.get(), term.get() + len); - termAtt->setTermLength(len); - return true; +namespace Lucene { + +const wchar_t ReverseStringFilter::NOMARKER = (wchar_t)0xffff; + +/// Example marker character: U+0001 (START OF HEADING) +const wchar_t ReverseStringFilter::START_OF_HEADING_MARKER = (wchar_t)0x0001; + +/// Example marker character: U+001F (INFORMATION SEPARATOR ONE) +const wchar_t ReverseStringFilter::INFORMATION_SEPARATOR_MARKER = (wchar_t)0x001f; + +/// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) +const wchar_t ReverseStringFilter::PUA_EC00_MARKER = (wchar_t)0xec00; + +/// Example marker character: U+200F (RIGHT-TO-LEFT MARK) +const wchar_t ReverseStringFilter::RTL_DIRECTION_MARKER = (wchar_t)0x200f; + +ReverseStringFilter::ReverseStringFilter(const TokenStreamPtr& input) : TokenFilter(input) { + this->marker = NOMARKER; + termAtt = addAttribute(); +} + +ReverseStringFilter::ReverseStringFilter(const TokenStreamPtr& input, wchar_t marker) : TokenFilter(input) { + this->marker = marker; + termAtt = addAttribute(); +} + +ReverseStringFilter::~ReverseStringFilter() { +} + +bool ReverseStringFilter::incrementToken() { + if (input->incrementToken()) { + int32_t len = termAtt->termLength(); + if (marker != NOMARKER) { + ++len; + termAtt->resizeTermBuffer(len); + termAtt->termBuffer()[len - 1] = marker; } - else - return false; + CharArray term(termAtt->termBuffer()); + std::reverse(term.get(), term.get() + len); + termAtt->setTermLength(len); + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp b/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp index 4bd04a86..3d6818fd 100644 --- a/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp +++ b/src/contrib/analyzers/common/analysis/ru/RussianAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,112 +12,103 @@ #include "RussianStemFilter.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default Russian stopwords in UTF-8 format. - const uint8_t RussianAnalyzer::DEFAULT_STOPWORD_FILE[] = - { - 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd0, 0xb5, 0xd0, 0xb7, 0x0a, 0xd0, 0xb1, 0xd0, 0xbe, 0xd0, 0xbb, - 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, - 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, - 0xd0, 0xb8, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xbe, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, - 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd0, - 0xb2, 0xd0, 0xb0, 0xd1, 0x81, 0x0a, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, - 0xb2, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, - 0xb5, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd1, - 0x81, 0xd0, 0xb5, 0xd1, 0x85, 0x0a, 0xd0, 0xb2, 0xd1, 0x8b, 0x0a, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, - 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, - 0xd0, 0xb4, 0xd0, 0xbb, 0xd1, 0x8f, 0x0a, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb3, - 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd0, 0xb5, 0xd1, - 0x8e, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, - 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5, 0x0a, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, - 0xd0, 0xb7, 0xd0, 0xb0, 0x0a, 0xd0, 0xb7, 0xd0, 0xb4, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, - 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xb7, 0x0a, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, - 0xb8, 0xd0, 0xbc, 0x0a, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xb0, - 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xb4, - 0xd0, 0xb0, 0x0a, 0xd0, 0xba, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, - 0xbb, 0xd0, 0xb8, 0xd0, 0xb1, 0xd0, 0xbe, 0x0a, 0xd0, 0xbc, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, - 0xbc, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbc, 0xd1, 0x8b, 0x0a, 0xd0, - 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, - 0xb0, 0xd1, 0x88, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, - 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, - 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xbd, 0xd0, 0xbe, - 0x0a, 0xd0, 0xbd, 0xd1, 0x83, 0x0a, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xb1, 0x0a, 0xd0, 0xbe, - 0xd0, 0xb4, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0x0a, - 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbe, - 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xbe, 0xd1, 0x87, 0xd0, 0xb5, - 0xd0, 0xbd, 0xd1, 0x8c, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0xd0, 0xb4, - 0x0a, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb8, 0x0a, 0xd1, 0x81, 0x0a, 0xd1, 0x81, 0xd0, 0xbe, 0x0a, - 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xb6, 0xd0, - 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, - 0xb0, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, - 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, - 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, - 0xd0, 0xbe, 0xd0, 0xbb, 0xd1, 0x8c, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, - 0xbc, 0x0a, 0xd1, 0x82, 0xd1, 0x8b, 0x0a, 0xd1, 0x83, 0x0a, 0xd1, 0x83, 0xd0, 0xb6, 0xd0, 0xb5, - 0x0a, 0xd1, 0x85, 0xd0, 0xbe, 0xd1, 0x82, 0xd1, 0x8f, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb3, - 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbc, - 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb1, - 0xd1, 0x8b, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd0, 0xb5, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd1, 0x8f, - 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb0, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0x0a, 0xd1, - 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x8f, 0x0a - }; - - RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion) - { - this->stopSet = getDefaultStopSet(); - this->matchVersion = matchVersion; - } - - RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) - { - this->stopSet = stopwords; - this->matchVersion = matchVersion; - } - - RussianAnalyzer::~RussianAnalyzer() - { - } - - const HashSet RussianAnalyzer::getDefaultStopSet() - { - static HashSet stopSet; - if (!stopSet) - { - String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); - Collection words(StringUtils::split(stopWords, L"\n")); - stopSet = HashSet::newInstance(words.begin(), words.end()); - } - return stopSet; - } - - TokenStreamPtr RussianAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(reader); - result = newLucene(result); - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); - result = newLucene(result); - return result; - } - - TokenStreamPtr RussianAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - RussianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); - streams->result = newLucene(streams->result); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - RussianAnalyzerSavedStreams::~RussianAnalyzerSavedStreams() - { +namespace Lucene { + +/// Default Russian stopwords in UTF-8 format. +const uint8_t RussianAnalyzer::DEFAULT_STOPWORD_FILE[] = { + 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd0, 0xb5, 0xd0, 0xb7, 0x0a, 0xd0, 0xb1, 0xd0, 0xbe, 0xd0, 0xbb, + 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, + 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xb0, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, + 0xd0, 0xb8, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, 0xd0, 0xbb, 0xd0, 0xbe, 0x0a, 0xd0, 0xb1, 0xd1, 0x8b, + 0xd1, 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb2, 0x0a, 0xd0, 0xb2, 0xd0, 0xb0, 0xd0, 0xbc, 0x0a, 0xd0, + 0xb2, 0xd0, 0xb0, 0xd1, 0x81, 0x0a, 0xd0, 0xb2, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, 0xd0, + 0xb2, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, + 0xb5, 0x0a, 0xd0, 0xb2, 0xd1, 0x81, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd0, 0xb2, 0xd1, + 0x81, 0xd0, 0xb5, 0xd1, 0x85, 0x0a, 0xd0, 0xb2, 0xd1, 0x8b, 0x0a, 0xd0, 0xb3, 0xd0, 0xb4, 0xd0, + 0xb5, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0x0a, 0xd0, 0xb4, 0xd0, 0xb0, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, + 0xd0, 0xb4, 0xd0, 0xbb, 0xd1, 0x8f, 0x0a, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb3, + 0xd0, 0xbe, 0x0a, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd0, 0xb5, 0xd1, + 0x8e, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, + 0x82, 0xd1, 0x8c, 0x0a, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5, 0x0a, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, + 0xd0, 0xb7, 0xd0, 0xb0, 0x0a, 0xd0, 0xb7, 0xd0, 0xb4, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x8c, 0x0a, + 0xd0, 0xb8, 0x0a, 0xd0, 0xb8, 0xd0, 0xb7, 0x0a, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, + 0xb8, 0xd0, 0xbc, 0x0a, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xb0, + 0xd0, 0xba, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xb4, + 0xd0, 0xb0, 0x0a, 0xd0, 0xba, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd0, 0xbb, 0xd0, 0xb8, 0x0a, 0xd0, + 0xbb, 0xd0, 0xb8, 0xd0, 0xb1, 0xd0, 0xbe, 0x0a, 0xd0, 0xbc, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, + 0xbc, 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, 0xd0, 0xbc, 0xd1, 0x8b, 0x0a, 0xd0, + 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xb4, 0xd0, 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, + 0xb0, 0xd1, 0x88, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb3, 0xd0, + 0xbe, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd0, 0xb5, 0x0a, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x82, 0x0a, + 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x85, 0x0a, 0xd0, 0xbd, 0xd0, 0xbe, + 0x0a, 0xd0, 0xbd, 0xd1, 0x83, 0x0a, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xb1, 0x0a, 0xd0, 0xbe, + 0xd0, 0xb4, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0x0a, + 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb0, 0x0a, 0xd0, 0xbe, 0xd0, 0xbd, 0xd0, 0xb8, 0x0a, 0xd0, 0xbe, + 0xd0, 0xbd, 0xd0, 0xbe, 0x0a, 0xd0, 0xbe, 0xd1, 0x82, 0x0a, 0xd0, 0xbe, 0xd1, 0x87, 0xd0, 0xb5, + 0xd0, 0xbd, 0xd1, 0x8c, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0x0a, 0xd0, 0xbf, 0xd0, 0xbe, 0xd0, 0xb4, + 0x0a, 0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb8, 0x0a, 0xd1, 0x81, 0x0a, 0xd1, 0x81, 0xd0, 0xbe, 0x0a, + 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xb6, 0xd0, + 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xba, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, 0xd0, + 0xb0, 0xd0, 0xbc, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x0a, + 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb3, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, + 0xd0, 0xbe, 0xd0, 0xb6, 0xd0, 0xb5, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb9, 0x0a, 0xd1, 0x82, + 0xd0, 0xbe, 0xd0, 0xbb, 0xd1, 0x8c, 0xd0, 0xba, 0xd0, 0xbe, 0x0a, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, + 0xbc, 0x0a, 0xd1, 0x82, 0xd1, 0x8b, 0x0a, 0xd1, 0x83, 0x0a, 0xd1, 0x83, 0xd0, 0xb6, 0xd0, 0xb5, + 0x0a, 0xd1, 0x85, 0xd0, 0xbe, 0xd1, 0x82, 0xd1, 0x8f, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb3, + 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xb9, 0x0a, 0xd1, 0x87, 0xd0, 0xb5, 0xd0, 0xbc, + 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x87, 0xd1, 0x82, 0xd0, 0xbe, 0xd0, 0xb1, + 0xd1, 0x8b, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd0, 0xb5, 0x0a, 0xd1, 0x87, 0xd1, 0x8c, 0xd1, 0x8f, + 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb0, 0x0a, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xb8, 0x0a, 0xd1, + 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x0a, 0xd1, 0x8f, 0x0a +}; + +RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion) { + this->stopSet = getDefaultStopSet(); + this->matchVersion = matchVersion; +} + +RussianAnalyzer::RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords) { + this->stopSet = stopwords; + this->matchVersion = matchVersion; +} + +RussianAnalyzer::~RussianAnalyzer() { +} + +const HashSet RussianAnalyzer::getDefaultStopSet() { + static HashSet stopSet; + LUCENE_RUN_ONCE( + String stopWords(UTF8_TO_STRING(DEFAULT_STOPWORD_FILE)); + Collection words(StringUtils::split(stopWords, L"\n")); + stopSet = HashSet::newInstance(words.begin(), words.end()); + ); + return stopSet; +} + +TokenStreamPtr RussianAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(reader); + result = newLucene(result); + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); + result = newLucene(result); + return result; +} + +TokenStreamPtr RussianAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + RussianAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); + streams->result = newLucene(streams->result); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +RussianAnalyzerSavedStreams::~RussianAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp b/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp index 9a3c7a0f..43ef0fe0 100644 --- a/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp +++ b/src/contrib/analyzers/common/analysis/ru/RussianLetterTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,22 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - RussianLetterTokenizer::RussianLetterTokenizer(ReaderPtr input) : CharTokenizer(input) - { - } - - RussianLetterTokenizer::RussianLetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) - { - } - - RussianLetterTokenizer::RussianLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) - { - } - - RussianLetterTokenizer::~RussianLetterTokenizer() - { - } - - bool RussianLetterTokenizer::isTokenChar(wchar_t c) - { - return (UnicodeUtil::isAlpha(c) || UnicodeUtil::isDigit(c)); - } +namespace Lucene { + +RussianLetterTokenizer::RussianLetterTokenizer(const ReaderPtr& input) : CharTokenizer(input) { +} + +RussianLetterTokenizer::RussianLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { +} + +RussianLetterTokenizer::RussianLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { +} + +RussianLetterTokenizer::~RussianLetterTokenizer() { +} + +bool RussianLetterTokenizer::isTokenChar(wchar_t c) { + return (UnicodeUtil::isAlpha(c) || UnicodeUtil::isDigit(c)); +} + } diff --git a/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp b/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp index 5015f481..45a12f72 100644 --- a/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp +++ b/src/contrib/analyzers/common/analysis/ru/RussianLowerCaseFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,28 +9,26 @@ #include "TermAttribute.h" #include "CharFolder.h" -namespace Lucene -{ - RussianLowerCaseFilter::RussianLowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) - { - termAtt = addAttribute(); - } - - RussianLowerCaseFilter::~RussianLowerCaseFilter() - { - } - - bool RussianLowerCaseFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* buffer = termAtt->termBufferArray(); - int32_t length = termAtt->termLength(); - for (int32_t i = 0; i < length; ++i) - buffer[i] = CharFolder::toLower(buffer[i]); - return true; +namespace Lucene { + +RussianLowerCaseFilter::RussianLowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { + termAtt = addAttribute(); +} + +RussianLowerCaseFilter::~RussianLowerCaseFilter() { +} + +bool RussianLowerCaseFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* buffer = termAtt->termBufferArray(); + int32_t length = termAtt->termLength(); + for (int32_t i = 0; i < length; ++i) { + buffer[i] = CharFolder::toLower(buffer[i]); } - else - return false; + return true; + } else { + return false; } } + +} diff --git a/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp b/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp index 9fc8f6d3..4b0a72e3 100644 --- a/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp +++ b/src/contrib/analyzers/common/analysis/ru/RussianStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,35 +9,33 @@ #include "RussianStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - RussianStemFilter::RussianStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - RussianStemFilter::~RussianStemFilter() - { - } - - bool RussianStemFilter::incrementToken() - { - if (input->incrementToken()) - { - String term(termAtt->term()); - String s(stemmer->stem(term)); - if (!s.empty() && s != term) - termAtt->setTermBuffer(s); - return true; +namespace Lucene { + +RussianStemFilter::RussianStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +RussianStemFilter::~RussianStemFilter() { +} + +bool RussianStemFilter::incrementToken() { + if (input->incrementToken()) { + String term(termAtt->term()); + String s(stemmer->stem(term)); + if (!s.empty() && s != term) { + termAtt->setTermBuffer(s); } - else - return false; + return true; + } else { + return false; } - - void RussianStemFilter::setStemmer(RussianStemmerPtr stemmer) - { - if (stemmer) - this->stemmer = stemmer; +} + +void RussianStemFilter::setStemmer(const RussianStemmerPtr& stemmer) { + if (stemmer) { + this->stemmer = stemmer; } } + +} diff --git a/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp b/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp index a590b803..5ae7bca7 100644 --- a/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp +++ b/src/contrib/analyzers/common/analysis/ru/RussianStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,563 +9,513 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - const wchar_t RussianStemmer::A = L'\x0430'; - const wchar_t RussianStemmer::V = L'\x0432'; - const wchar_t RussianStemmer::G = L'\x0433'; - const wchar_t RussianStemmer::E = L'\x0435'; - const wchar_t RussianStemmer::I = L'\x0438'; - const wchar_t RussianStemmer::I_ = L'\x0439'; - const wchar_t RussianStemmer::L = L'\x043b'; - const wchar_t RussianStemmer::M = L'\x043c'; - const wchar_t RussianStemmer::N = L'\x043d'; - const wchar_t RussianStemmer::O = L'\x043e'; - const wchar_t RussianStemmer::S = L'\x0441'; - const wchar_t RussianStemmer::T = L'\x0442'; - const wchar_t RussianStemmer::U = L'\x0443'; - const wchar_t RussianStemmer::X = L'\x0445'; - const wchar_t RussianStemmer::SH = L'\x0448'; - const wchar_t RussianStemmer::SHCH = L'\x0449'; - const wchar_t RussianStemmer::Y = L'\x044b'; - const wchar_t RussianStemmer::SOFT = L'\x044c'; - const wchar_t RussianStemmer::AE = L'\x044d'; - const wchar_t RussianStemmer::IU = L'\x044e'; - const wchar_t RussianStemmer::IA = L'\x044f'; - - const wchar_t RussianStemmer::vowels[] = {A, E, I, O, U, Y, AE, IU, IA}; - - RussianStemmer::RussianStemmer() - { - RV = 0; - R1 = 0; - R2 = 0; - } - - RussianStemmer::~RussianStemmer() - { - } - - Collection RussianStemmer::perfectiveGerundEndings1() - { - static Collection _perfectiveGerundEndings1; - if (!_perfectiveGerundEndings1) - { - _perfectiveGerundEndings1 = Collection::newInstance(); - _perfectiveGerundEndings1.add(String(L"") + V); - _perfectiveGerundEndings1.add(String(L"") + V + SH + I); - _perfectiveGerundEndings1.add(String(L"") + V + SH + I + S + SOFT); - } - return _perfectiveGerundEndings1; - } - - Collection RussianStemmer::perfectiveGerund1Predessors() - { - static Collection _perfectiveGerund1Predessors; - if (!_perfectiveGerund1Predessors) - { - _perfectiveGerund1Predessors = Collection::newInstance(); - _perfectiveGerund1Predessors.add(String(L"") + A); - _perfectiveGerund1Predessors.add(String(L"") + IA); - } - return _perfectiveGerund1Predessors; - } - - Collection RussianStemmer::perfectiveGerundEndings2() - { - static Collection _perfectiveGerundEndings2; - if (!_perfectiveGerundEndings2) - { - _perfectiveGerundEndings2 = Collection::newInstance(); - _perfectiveGerundEndings2.add(String(L"") + I + V); - _perfectiveGerundEndings2.add(String(L"") + Y + V); - _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I); - _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I); - _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I + S + SOFT); - _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I + S + SOFT); - } - return _perfectiveGerundEndings2; - } - - Collection RussianStemmer::adjectiveEndings() - { - static Collection _adjectiveEndings; - if (!_adjectiveEndings) - { - _adjectiveEndings = Collection::newInstance(); - _adjectiveEndings.add(String(L"") + E + E); - _adjectiveEndings.add(String(L"") + I + E); - _adjectiveEndings.add(String(L"") + Y + E); - _adjectiveEndings.add(String(L"") + O + E); - _adjectiveEndings.add(String(L"") + E + I_); - _adjectiveEndings.add(String(L"") + I + I_); - _adjectiveEndings.add(String(L"") + Y + I_); - _adjectiveEndings.add(String(L"") + O + I_); - _adjectiveEndings.add(String(L"") + E + M); - _adjectiveEndings.add(String(L"") + I + M); - _adjectiveEndings.add(String(L"") + Y + M); - _adjectiveEndings.add(String(L"") + O + M); - _adjectiveEndings.add(String(L"") + I + X); - _adjectiveEndings.add(String(L"") + Y + X); - _adjectiveEndings.add(String(L"") + U + IU); - _adjectiveEndings.add(String(L"") + IU + IU); - _adjectiveEndings.add(String(L"") + A + IA); - _adjectiveEndings.add(String(L"") + IA + IA); - _adjectiveEndings.add(String(L"") + O + IU); - _adjectiveEndings.add(String(L"") + E + IU); - _adjectiveEndings.add(String(L"") + I + M + I); - _adjectiveEndings.add(String(L"") + Y + M + I); - _adjectiveEndings.add(String(L"") + E + G + O); - _adjectiveEndings.add(String(L"") + O + G + O); - _adjectiveEndings.add(String(L"") + E + M + U); - _adjectiveEndings.add(String(L"") + O + M + U); - } - return _adjectiveEndings; - } +namespace Lucene { - Collection RussianStemmer::participleEndings1() - { - static Collection _participleEndings1; - if (!_participleEndings1) - { - _participleEndings1 = Collection::newInstance(); - _participleEndings1.add(String(L"") + SHCH); - _participleEndings1.add(String(L"") + E + M); - _participleEndings1.add(String(L"") + N + N); - _participleEndings1.add(String(L"") + V + SH); - _participleEndings1.add(String(L"") + IU + SHCH); - } - return _participleEndings1; - } - - Collection RussianStemmer::participleEndings2() - { - static Collection _participleEndings2; - if (!_participleEndings2) - { - _participleEndings2 = Collection::newInstance(); - _participleEndings2.add(String(L"") + I + V + SH); - _participleEndings2.add(String(L"") + Y + V + SH); - _participleEndings2.add(String(L"") + U + IU + SHCH); - } - return _participleEndings2; - } - - Collection RussianStemmer::participle1Predessors() - { - static Collection _participle1Predessors; - if (!_participle1Predessors) - { - _participle1Predessors = Collection::newInstance(); - _participle1Predessors.add(String(L"") + A); - _participle1Predessors.add(String(L"") + IA); - } - return _participle1Predessors; - } - - Collection RussianStemmer::reflexiveEndings() - { - static Collection _participle1Predessors; - if (!_participle1Predessors) - { - _participle1Predessors = Collection::newInstance(); - _participle1Predessors.add(String(L"") + S + IA); - _participle1Predessors.add(String(L"") + S + SOFT); - } - return _participle1Predessors; - } - - Collection RussianStemmer::verbEndings1() - { - static Collection _verbEndings1; - if (!_verbEndings1) - { - _verbEndings1 = Collection::newInstance(); - _verbEndings1.add(String(L"") + I_); - _verbEndings1.add(String(L"") + L); - _verbEndings1.add(String(L"") + N); - _verbEndings1.add(String(L"") + L + O); - _verbEndings1.add(String(L"") + N + O); - _verbEndings1.add(String(L"") + E + T); - _verbEndings1.add(String(L"") + IU + T); - _verbEndings1.add(String(L"") + L + A); - _verbEndings1.add(String(L"") + N + A); - _verbEndings1.add(String(L"") + L + I); - _verbEndings1.add(String(L"") + E + M); - _verbEndings1.add(String(L"") + N + Y); - _verbEndings1.add(String(L"") + E + T + E); - _verbEndings1.add(String(L"") + I_ + T + E); - _verbEndings1.add(String(L"") + T + SOFT); - _verbEndings1.add(String(L"") + E + SH + SOFT); - _verbEndings1.add(String(L"") + N + N + O); - } - return _verbEndings1; - } - - Collection RussianStemmer::verbEndings2() - { - static Collection _verbEndings2; - if (!_verbEndings2) - { - _verbEndings2 = Collection::newInstance(); - _verbEndings2.add(String(L"") + IU); - _verbEndings2.add(String(L"") + U + IU); - _verbEndings2.add(String(L"") + E + N); - _verbEndings2.add(String(L"") + E + I_); - _verbEndings2.add(String(L"") + IA + T); - _verbEndings2.add(String(L"") + U + I_); - _verbEndings2.add(String(L"") + I + L); - _verbEndings2.add(String(L"") + Y + L); - _verbEndings2.add(String(L"") + I + M); - _verbEndings2.add(String(L"") + Y + M); - _verbEndings2.add(String(L"") + I + T); - _verbEndings2.add(String(L"") + Y + T); - _verbEndings2.add(String(L"") + I + L + A); - _verbEndings2.add(String(L"") + Y + L + A); - _verbEndings2.add(String(L"") + E + N + A); - _verbEndings2.add(String(L"") + I + T + E); - _verbEndings2.add(String(L"") + I + L + I); - _verbEndings2.add(String(L"") + Y + L + I); - _verbEndings2.add(String(L"") + I + L + O); - _verbEndings2.add(String(L"") + Y + L + O); - _verbEndings2.add(String(L"") + E + N + O); - _verbEndings2.add(String(L"") + U + E + T); - _verbEndings2.add(String(L"") + U + IU + T); - _verbEndings2.add(String(L"") + E + N + Y); - _verbEndings2.add(String(L"") + I + T + SOFT); - _verbEndings2.add(String(L"") + Y + T + SOFT); - _verbEndings2.add(String(L"") + I + SH + SOFT); - _verbEndings2.add(String(L"") + E + I_ + T + E); - _verbEndings2.add(String(L"") + U + I_ + T + E); - } - return _verbEndings2; - } - - Collection RussianStemmer::verb1Predessors() - { - static Collection _verb1Predessors; - if (!_verb1Predessors) - { - _verb1Predessors = Collection::newInstance(); - _verb1Predessors.add(String(L"") + A); - _verb1Predessors.add(String(L"") + IA); - } - return _verb1Predessors; - } - - Collection RussianStemmer::nounEndings() - { - static Collection _nounEndings; - if (!_nounEndings) - { - _nounEndings = Collection::newInstance(); - _nounEndings.add(String(L"") + A); - _nounEndings.add(String(L"") + U); - _nounEndings.add(String(L"") + I_); - _nounEndings.add(String(L"") + O); - _nounEndings.add(String(L"") + U); - _nounEndings.add(String(L"") + E); - _nounEndings.add(String(L"") + Y); - _nounEndings.add(String(L"") + I); - _nounEndings.add(String(L"") + SOFT); - _nounEndings.add(String(L"") + IA); - _nounEndings.add(String(L"") + E + V); - _nounEndings.add(String(L"") + O + V); - _nounEndings.add(String(L"") + I + E); - _nounEndings.add(String(L"") + SOFT + E); - _nounEndings.add(String(L"") + IA + X); - _nounEndings.add(String(L"") + I + IU); - _nounEndings.add(String(L"") + E + I); - _nounEndings.add(String(L"") + I + I); - _nounEndings.add(String(L"") + E + I_); - _nounEndings.add(String(L"") + O + I_); - _nounEndings.add(String(L"") + E + M); - _nounEndings.add(String(L"") + A + M); - _nounEndings.add(String(L"") + O + M); - _nounEndings.add(String(L"") + A + X); - _nounEndings.add(String(L"") + SOFT + IU); - _nounEndings.add(String(L"") + I + IA); - _nounEndings.add(String(L"") + SOFT + IA); - _nounEndings.add(String(L"") + I + I_); - _nounEndings.add(String(L"") + IA + M); - _nounEndings.add(String(L"") + IA + M + I); - _nounEndings.add(String(L"") + A + M + I); - _nounEndings.add(String(L"") + I + E + I_); - _nounEndings.add(String(L"") + I + IA + M); - _nounEndings.add(String(L"") + I + E + M); - _nounEndings.add(String(L"") + I + IA + X); - _nounEndings.add(String(L"") + I + IA + M + I); - } - return _nounEndings; - } +const wchar_t RussianStemmer::A = L'\x0430'; +const wchar_t RussianStemmer::V = L'\x0432'; +const wchar_t RussianStemmer::G = L'\x0433'; +const wchar_t RussianStemmer::E = L'\x0435'; +const wchar_t RussianStemmer::I = L'\x0438'; +const wchar_t RussianStemmer::I_ = L'\x0439'; +const wchar_t RussianStemmer::L = L'\x043b'; +const wchar_t RussianStemmer::M = L'\x043c'; +const wchar_t RussianStemmer::N = L'\x043d'; +const wchar_t RussianStemmer::O = L'\x043e'; +const wchar_t RussianStemmer::S = L'\x0441'; +const wchar_t RussianStemmer::T = L'\x0442'; +const wchar_t RussianStemmer::U = L'\x0443'; +const wchar_t RussianStemmer::X = L'\x0445'; +const wchar_t RussianStemmer::SH = L'\x0448'; +const wchar_t RussianStemmer::SHCH = L'\x0449'; +const wchar_t RussianStemmer::Y = L'\x044b'; +const wchar_t RussianStemmer::SOFT = L'\x044c'; +const wchar_t RussianStemmer::AE = L'\x044d'; +const wchar_t RussianStemmer::IU = L'\x044e'; +const wchar_t RussianStemmer::IA = L'\x044f'; - Collection RussianStemmer::superlativeEndings() - { - static Collection _superlativeEndings; - if (!_superlativeEndings) - { - _superlativeEndings = Collection::newInstance(); - _superlativeEndings.add(String(L"") + E + I_ + SH); - _superlativeEndings.add(String(L"") + E + I_ + SH + E); - } - return _superlativeEndings; - } - - Collection RussianStemmer::derivationalEndings() - { - static Collection _derivationalEndings; - if (!_derivationalEndings) - { - _derivationalEndings = Collection::newInstance(); - _derivationalEndings.add(String(L"") + O + S + T); - _derivationalEndings.add(String(L"") + O + S + T + SOFT); - } - return _derivationalEndings; - } - - Collection RussianStemmer::doubleN() - { - static Collection _doubleN; - if (!_doubleN) - { - _doubleN = Collection::newInstance(); - _doubleN.add(String(L"") + N + N); - } - return _doubleN; +const wchar_t RussianStemmer::vowels[] = {A, E, I, O, U, Y, AE, IU, IA}; + +RussianStemmer::RussianStemmer() { + RV = 0; + R1 = 0; + R2 = 0; +} + +RussianStemmer::~RussianStemmer() { +} + +Collection RussianStemmer::perfectiveGerundEndings1() { + static Collection _perfectiveGerundEndings1; + LUCENE_RUN_ONCE( + _perfectiveGerundEndings1 = Collection::newInstance(); + _perfectiveGerundEndings1.add(String(L"") + V); + _perfectiveGerundEndings1.add(String(L"") + V + SH + I); + _perfectiveGerundEndings1.add(String(L"") + V + SH + I + S + SOFT); + ); + return _perfectiveGerundEndings1; +} + +Collection RussianStemmer::perfectiveGerund1Predessors() { + static Collection _perfectiveGerund1Predessors; + LUCENE_RUN_ONCE( + _perfectiveGerund1Predessors = Collection::newInstance(); + _perfectiveGerund1Predessors.add(String(L"") + A); + _perfectiveGerund1Predessors.add(String(L"") + IA); + ); + return _perfectiveGerund1Predessors; +} + +Collection RussianStemmer::perfectiveGerundEndings2() { + static Collection _perfectiveGerundEndings2; + LUCENE_RUN_ONCE( + _perfectiveGerundEndings2 = Collection::newInstance(); + _perfectiveGerundEndings2.add(String(L"") + I + V); + _perfectiveGerundEndings2.add(String(L"") + Y + V); + _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I); + _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I); + _perfectiveGerundEndings2.add(String(L"") + I + V + SH + I + S + SOFT); + _perfectiveGerundEndings2.add(String(L"") + Y + V + SH + I + S + SOFT); + ); + return _perfectiveGerundEndings2; +} + +Collection RussianStemmer::adjectiveEndings() { + static Collection _adjectiveEndings; + LUCENE_RUN_ONCE( + _adjectiveEndings = Collection::newInstance(); + _adjectiveEndings.add(String(L"") + E + E); + _adjectiveEndings.add(String(L"") + I + E); + _adjectiveEndings.add(String(L"") + Y + E); + _adjectiveEndings.add(String(L"") + O + E); + _adjectiveEndings.add(String(L"") + E + I_); + _adjectiveEndings.add(String(L"") + I + I_); + _adjectiveEndings.add(String(L"") + Y + I_); + _adjectiveEndings.add(String(L"") + O + I_); + _adjectiveEndings.add(String(L"") + E + M); + _adjectiveEndings.add(String(L"") + I + M); + _adjectiveEndings.add(String(L"") + Y + M); + _adjectiveEndings.add(String(L"") + O + M); + _adjectiveEndings.add(String(L"") + I + X); + _adjectiveEndings.add(String(L"") + Y + X); + _adjectiveEndings.add(String(L"") + U + IU); + _adjectiveEndings.add(String(L"") + IU + IU); + _adjectiveEndings.add(String(L"") + A + IA); + _adjectiveEndings.add(String(L"") + IA + IA); + _adjectiveEndings.add(String(L"") + O + IU); + _adjectiveEndings.add(String(L"") + E + IU); + _adjectiveEndings.add(String(L"") + I + M + I); + _adjectiveEndings.add(String(L"") + Y + M + I); + _adjectiveEndings.add(String(L"") + E + G + O); + _adjectiveEndings.add(String(L"") + O + G + O); + _adjectiveEndings.add(String(L"") + E + M + U); + _adjectiveEndings.add(String(L"") + O + M + U); + ); + return _adjectiveEndings; +} + +Collection RussianStemmer::participleEndings1() { + static Collection _participleEndings1; + LUCENE_RUN_ONCE( + _participleEndings1 = Collection::newInstance(); + _participleEndings1.add(String(L"") + SHCH); + _participleEndings1.add(String(L"") + E + M); + _participleEndings1.add(String(L"") + N + N); + _participleEndings1.add(String(L"") + V + SH); + _participleEndings1.add(String(L"") + IU + SHCH); + ); + return _participleEndings1; +} + +Collection RussianStemmer::participleEndings2() { + static Collection _participleEndings2; + LUCENE_RUN_ONCE( + _participleEndings2 = Collection::newInstance(); + _participleEndings2.add(String(L"") + I + V + SH); + _participleEndings2.add(String(L"") + Y + V + SH); + _participleEndings2.add(String(L"") + U + IU + SHCH); + ); + return _participleEndings2; +} + +Collection RussianStemmer::participle1Predessors() { + static Collection _participle1Predessors; + LUCENE_RUN_ONCE( + _participle1Predessors = Collection::newInstance(); + _participle1Predessors.add(String(L"") + A); + _participle1Predessors.add(String(L"") + IA); + ); + return _participle1Predessors; +} + +Collection RussianStemmer::reflexiveEndings() { + static Collection _participle1Predessors; + LUCENE_RUN_ONCE( + _participle1Predessors = Collection::newInstance(); + _participle1Predessors.add(String(L"") + S + IA); + _participle1Predessors.add(String(L"") + S + SOFT); + ); + return _participle1Predessors; +} + +Collection RussianStemmer::verbEndings1() { + static Collection _verbEndings1; + LUCENE_RUN_ONCE( + _verbEndings1 = Collection::newInstance(); + _verbEndings1.add(String(L"") + I_); + _verbEndings1.add(String(L"") + L); + _verbEndings1.add(String(L"") + N); + _verbEndings1.add(String(L"") + L + O); + _verbEndings1.add(String(L"") + N + O); + _verbEndings1.add(String(L"") + E + T); + _verbEndings1.add(String(L"") + IU + T); + _verbEndings1.add(String(L"") + L + A); + _verbEndings1.add(String(L"") + N + A); + _verbEndings1.add(String(L"") + L + I); + _verbEndings1.add(String(L"") + E + M); + _verbEndings1.add(String(L"") + N + Y); + _verbEndings1.add(String(L"") + E + T + E); + _verbEndings1.add(String(L"") + I_ + T + E); + _verbEndings1.add(String(L"") + T + SOFT); + _verbEndings1.add(String(L"") + E + SH + SOFT); + _verbEndings1.add(String(L"") + N + N + O); + ); + return _verbEndings1; +} + +Collection RussianStemmer::verbEndings2() { + static Collection _verbEndings2; + LUCENE_RUN_ONCE( + _verbEndings2 = Collection::newInstance(); + _verbEndings2.add(String(L"") + IU); + _verbEndings2.add(String(L"") + U + IU); + _verbEndings2.add(String(L"") + E + N); + _verbEndings2.add(String(L"") + E + I_); + _verbEndings2.add(String(L"") + IA + T); + _verbEndings2.add(String(L"") + U + I_); + _verbEndings2.add(String(L"") + I + L); + _verbEndings2.add(String(L"") + Y + L); + _verbEndings2.add(String(L"") + I + M); + _verbEndings2.add(String(L"") + Y + M); + _verbEndings2.add(String(L"") + I + T); + _verbEndings2.add(String(L"") + Y + T); + _verbEndings2.add(String(L"") + I + L + A); + _verbEndings2.add(String(L"") + Y + L + A); + _verbEndings2.add(String(L"") + E + N + A); + _verbEndings2.add(String(L"") + I + T + E); + _verbEndings2.add(String(L"") + I + L + I); + _verbEndings2.add(String(L"") + Y + L + I); + _verbEndings2.add(String(L"") + I + L + O); + _verbEndings2.add(String(L"") + Y + L + O); + _verbEndings2.add(String(L"") + E + N + O); + _verbEndings2.add(String(L"") + U + E + T); + _verbEndings2.add(String(L"") + U + IU + T); + _verbEndings2.add(String(L"") + E + N + Y); + _verbEndings2.add(String(L"") + I + T + SOFT); + _verbEndings2.add(String(L"") + Y + T + SOFT); + _verbEndings2.add(String(L"") + I + SH + SOFT); + _verbEndings2.add(String(L"") + E + I_ + T + E); + _verbEndings2.add(String(L"") + U + I_ + T + E); + ); + return _verbEndings2; +} + +Collection RussianStemmer::verb1Predessors() { + static Collection _verb1Predessors; + LUCENE_RUN_ONCE( + _verb1Predessors = Collection::newInstance(); + _verb1Predessors.add(String(L"") + A); + _verb1Predessors.add(String(L"") + IA); + ); + return _verb1Predessors; +} + +Collection RussianStemmer::nounEndings() { + static Collection _nounEndings; + LUCENE_RUN_ONCE( + _nounEndings = Collection::newInstance(); + _nounEndings.add(String(L"") + A); + _nounEndings.add(String(L"") + U); + _nounEndings.add(String(L"") + I_); + _nounEndings.add(String(L"") + O); + _nounEndings.add(String(L"") + U); + _nounEndings.add(String(L"") + E); + _nounEndings.add(String(L"") + Y); + _nounEndings.add(String(L"") + I); + _nounEndings.add(String(L"") + SOFT); + _nounEndings.add(String(L"") + IA); + _nounEndings.add(String(L"") + E + V); + _nounEndings.add(String(L"") + O + V); + _nounEndings.add(String(L"") + I + E); + _nounEndings.add(String(L"") + SOFT + E); + _nounEndings.add(String(L"") + IA + X); + _nounEndings.add(String(L"") + I + IU); + _nounEndings.add(String(L"") + E + I); + _nounEndings.add(String(L"") + I + I); + _nounEndings.add(String(L"") + E + I_); + _nounEndings.add(String(L"") + O + I_); + _nounEndings.add(String(L"") + E + M); + _nounEndings.add(String(L"") + A + M); + _nounEndings.add(String(L"") + O + M); + _nounEndings.add(String(L"") + A + X); + _nounEndings.add(String(L"") + SOFT + IU); + _nounEndings.add(String(L"") + I + IA); + _nounEndings.add(String(L"") + SOFT + IA); + _nounEndings.add(String(L"") + I + I_); + _nounEndings.add(String(L"") + IA + M); + _nounEndings.add(String(L"") + IA + M + I); + _nounEndings.add(String(L"") + A + M + I); + _nounEndings.add(String(L"") + I + E + I_); + _nounEndings.add(String(L"") + I + IA + M); + _nounEndings.add(String(L"") + I + E + M); + _nounEndings.add(String(L"") + I + IA + X); + _nounEndings.add(String(L"") + I + IA + M + I); + ); + return _nounEndings; +} + +Collection RussianStemmer::superlativeEndings() { + static Collection _superlativeEndings; + LUCENE_RUN_ONCE( + _superlativeEndings = Collection::newInstance(); + _superlativeEndings.add(String(L"") + E + I_ + SH); + _superlativeEndings.add(String(L"") + E + I_ + SH + E); + ); + return _superlativeEndings; +} + +Collection RussianStemmer::derivationalEndings() { + static Collection _derivationalEndings; + LUCENE_RUN_ONCE( + _derivationalEndings = Collection::newInstance(); + _derivationalEndings.add(String(L"") + O + S + T); + _derivationalEndings.add(String(L"") + O + S + T + SOFT); + ); + return _derivationalEndings; +} + +Collection RussianStemmer::doubleN() { + static Collection _doubleN; + LUCENE_RUN_ONCE( + _doubleN = Collection::newInstance(); + _doubleN.add(String(L"") + N + N); + ); + return _doubleN; +} + +String RussianStemmer::stem(const String& input) { + markPositions(input); + if (RV == 0) { + return input; // RV wasn't detected, nothing to stem } - - String RussianStemmer::stem(const String& input) - { - markPositions(input); - if (RV == 0) - return input; // RV wasn't detected, nothing to stem - - String stemmingZone(input.substr(RV)); - - // stemming goes on in RV - - // Step 1 - if (!perfectiveGerund(stemmingZone)) - { - reflexive(stemmingZone); - - if (!adjectival(stemmingZone)) - { - if (!verb(stemmingZone)) - noun(stemmingZone); + + String stemmingZone(input.substr(RV)); + + // stemming goes on in RV + + // Step 1 + if (!perfectiveGerund(stemmingZone)) { + reflexive(stemmingZone); + + if (!adjectival(stemmingZone)) { + if (!verb(stemmingZone)) { + noun(stemmingZone); } } + } - // Step 2 - removeI(stemmingZone); + // Step 2 + removeI(stemmingZone); - // Step 3 - derivational(stemmingZone); + // Step 3 + derivational(stemmingZone); - // Step 4 - superlative(stemmingZone); - undoubleN(stemmingZone); - removeSoft(stemmingZone); + // Step 4 + superlative(stemmingZone); + undoubleN(stemmingZone); + removeSoft(stemmingZone); + + // return result + return input.substr(0, RV) + stemmingZone; +} - // return result - return input.substr(0, RV) + stemmingZone; +String RussianStemmer::stemWord(const String& word) { + return newLucene()->stem(word); +} + +bool RussianStemmer::adjectival(String& stemmingZone) { + // look for adjective ending in a stemming zone + if (!findAndRemoveEnding(stemmingZone, adjectiveEndings())) { + return false; } - - String RussianStemmer::stemWord(const String& word) - { - return newLucene()->stem(word); + + if (!findAndRemoveEnding(stemmingZone, participleEndings1(), participle1Predessors())) { + findAndRemoveEnding(stemmingZone, participleEndings2()); } - - bool RussianStemmer::adjectival(String& stemmingZone) - { - // look for adjective ending in a stemming zone - if (!findAndRemoveEnding(stemmingZone, adjectiveEndings())) + + return true; +} + +bool RussianStemmer::derivational(String& stemmingZone) { + int32_t endingLength = findEnding(stemmingZone, derivationalEndings()); + if (endingLength == 0) { + return false; // no derivational ending found + } else { + // Ensure that the ending locates in R2 + if (R2 - RV <= (int32_t)stemmingZone.length() - endingLength) { + stemmingZone.resize(stemmingZone.length() - endingLength); + return true; + } else { return false; - - if (!findAndRemoveEnding(stemmingZone, participleEndings1(), participle1Predessors())) - findAndRemoveEnding(stemmingZone, participleEndings2()); - - return true; - } - - bool RussianStemmer::derivational(String& stemmingZone) - { - int32_t endingLength = findEnding(stemmingZone, derivationalEndings()); - if (endingLength == 0) - return false; // no derivational ending found - else - { - // Ensure that the ending locates in R2 - if (R2 - RV <= (int32_t)stemmingZone.length() - endingLength) - { - stemmingZone.resize(stemmingZone.length() - endingLength); - return true; - } - else - return false; } } - - int32_t RussianStemmer::findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass) - { - bool match = false; - for (int32_t i = theEndingClass.size() - 1; i >= 0; --i) - { - String theEnding(theEndingClass[i]); - // check if the ending is bigger than stemming zone - if (startIndex < (int32_t)theEnding.length() - 1) - { +} + +int32_t RussianStemmer::findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass) { + bool match = false; + for (int32_t i = theEndingClass.size() - 1; i >= 0; --i) { + String theEnding(theEndingClass[i]); + // check if the ending is bigger than stemming zone + if (startIndex < (int32_t)theEnding.length() - 1) { + match = false; + continue; + } + match = true; + int32_t stemmingIndex = startIndex; + for (int32_t j = (int32_t)theEnding.length() - 1; j >= 0; --j) { + if (stemmingZone[stemmingIndex--] != theEnding[j]) { match = false; - continue; + break; } - match = true; - int32_t stemmingIndex = startIndex; - for (int32_t j = (int32_t)theEnding.length() - 1; j >= 0; --j) - { - if (stemmingZone[stemmingIndex--] != theEnding[j]) - { - match = false; - break; - } - } - // check if ending was found - if (match) - return (int32_t)theEndingClass[i].size(); // cut ending } - return 0; + // check if ending was found + if (match) { + return (int32_t)theEndingClass[i].size(); // cut ending + } } - - int32_t RussianStemmer::findEnding(String& stemmingZone, Collection theEndingClass) - { - return findEnding(stemmingZone, (int32_t)(stemmingZone.length() - 1), theEndingClass); + return 0; +} + +int32_t RussianStemmer::findEnding(String& stemmingZone, Collection theEndingClass) { + return findEnding(stemmingZone, (int32_t)(stemmingZone.length() - 1), theEndingClass); +} + +bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass) { + int32_t endingLength = findEnding(stemmingZone, theEndingClass); + if (endingLength == 0) { + return false; // not found + } else { + stemmingZone.resize(stemmingZone.length() - endingLength); + return true; // cut the ending found } - - bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass) - { - int32_t endingLength = findEnding(stemmingZone, theEndingClass); - if (endingLength == 0) - return false; // not found - else - { +} + +bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors) { + int32_t endingLength = findEnding(stemmingZone, theEndingClass); + if (endingLength == 0) { + return false; // not found + } else { + int32_t predessorLength = findEnding(stemmingZone, (int32_t)(stemmingZone.length() - endingLength - 1), thePredessors); + if (predessorLength == 0) { + return false; + } else { stemmingZone.resize(stemmingZone.length() - endingLength); return true; // cut the ending found } } - - bool RussianStemmer::findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors) - { - int32_t endingLength = findEnding(stemmingZone, theEndingClass); - if (endingLength == 0) - return false; // not found - else - { - int32_t predessorLength = findEnding(stemmingZone, (int32_t)(stemmingZone.length() - endingLength - 1), thePredessors); - if (predessorLength == 0) - return false; - else - { - stemmingZone.resize(stemmingZone.length() - endingLength); - return true; // cut the ending found - } - } +} + +void RussianStemmer::markPositions(const String& word) { + RV = 0; + R1 = 0; + R2 = 0; + int32_t i = 0; + // find RV + while ((int32_t)word.length() > i && !isVowel(word[i])) { + ++i; } - - void RussianStemmer::markPositions(const String& word) - { - RV = 0; - R1 = 0; - R2 = 0; - int32_t i = 0; - // find RV - while ((int32_t)word.length() > i && !isVowel(word[i])) - ++i; - if ((int32_t)word.length() - 1 < ++i) - return; // RV zone is empty - RV = i; - // find R1 - while ((int32_t)word.length() > i && isVowel(word[i])) - ++i; - if ((int32_t)word.length() - 1 < ++i) - return; // R1 zone is empty - R1 = i; - // find R2 - while ((int32_t)word.length() > i && !isVowel(word[i])) - ++i; - if ((int32_t)word.length() - 1 < ++i) - return; // R2 zone is empty - while ((int32_t)word.length() > i && isVowel(word[i])) - ++i; - if ((int32_t)word.length() - 1 < ++i) - return; // R2 zone is empty - R2 = i; + if ((int32_t)word.length() - 1 < ++i) { + return; // RV zone is empty } - - bool RussianStemmer::isVowel(wchar_t letter) - { - for (int32_t i = 0; i < SIZEOF_ARRAY(vowels); ++i) - { - if (letter == vowels[i]) - return true; - } - return false; + RV = i; + // find R1 + while ((int32_t)word.length() > i && isVowel(word[i])) { + ++i; } - - bool RussianStemmer::noun(String& stemmingZone) - { - return findAndRemoveEnding(stemmingZone, nounEndings()); + if ((int32_t)word.length() - 1 < ++i) { + return; // R1 zone is empty } - - bool RussianStemmer::perfectiveGerund(String& stemmingZone) - { - return findAndRemoveEnding(stemmingZone, perfectiveGerundEndings1(), perfectiveGerund1Predessors()) || - findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2()); + R1 = i; + // find R2 + while ((int32_t)word.length() > i && !isVowel(word[i])) { + ++i; } - - bool RussianStemmer::reflexive(String& stemmingZone) - { - return findAndRemoveEnding(stemmingZone, reflexiveEndings()); + if ((int32_t)word.length() - 1 < ++i) { + return; // R2 zone is empty } - - bool RussianStemmer::removeI(String& stemmingZone) - { - if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == I) - { - stemmingZone.resize(stemmingZone.length() - 1); - return true; - } - else - return false; + while ((int32_t)word.length() > i && isVowel(word[i])) { + ++i; + } + if ((int32_t)word.length() - 1 < ++i) { + return; // R2 zone is empty } - - bool RussianStemmer::removeSoft(String& stemmingZone) - { - if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == SOFT) - { - stemmingZone.resize(stemmingZone.length() - 1); + R2 = i; +} + +bool RussianStemmer::isVowel(wchar_t letter) { + for (int32_t i = 0; i < SIZEOF_ARRAY(vowels); ++i) { + if (letter == vowels[i]) { return true; } - return false; } - - bool RussianStemmer::superlative(String& stemmingZone) - { - return findAndRemoveEnding(stemmingZone, superlativeEndings()); + return false; +} + +bool RussianStemmer::noun(String& stemmingZone) { + return findAndRemoveEnding(stemmingZone, nounEndings()); +} + +bool RussianStemmer::perfectiveGerund(String& stemmingZone) { + return findAndRemoveEnding(stemmingZone, perfectiveGerundEndings1(), perfectiveGerund1Predessors()) || + findAndRemoveEnding(stemmingZone, perfectiveGerundEndings2()); +} + +bool RussianStemmer::reflexive(String& stemmingZone) { + return findAndRemoveEnding(stemmingZone, reflexiveEndings()); +} + +bool RussianStemmer::removeI(String& stemmingZone) { + if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == I) { + stemmingZone.resize(stemmingZone.length() - 1); + return true; + } else { + return false; } - - bool RussianStemmer::undoubleN(String& stemmingZone) - { - if (findEnding(stemmingZone, doubleN()) != 0) - { - stemmingZone.resize(stemmingZone.length() - 1); - return true; - } - else - return false; +} + +bool RussianStemmer::removeSoft(String& stemmingZone) { + if ((int32_t)stemmingZone.length() > 0 && stemmingZone[stemmingZone.length() - 1] == SOFT) { + stemmingZone.resize(stemmingZone.length() - 1); + return true; } - - bool RussianStemmer::verb(String& stemmingZone) - { - return findAndRemoveEnding(stemmingZone, verbEndings1(), verb1Predessors()) || - findAndRemoveEnding(stemmingZone, verbEndings2()); + return false; +} + +bool RussianStemmer::superlative(String& stemmingZone) { + return findAndRemoveEnding(stemmingZone, superlativeEndings()); +} + +bool RussianStemmer::undoubleN(String& stemmingZone) { + if (findEnding(stemmingZone, doubleN()) != 0) { + stemmingZone.resize(stemmingZone.length() - 1); + return true; + } else { + return false; } } + +bool RussianStemmer::verb(String& stemmingZone) { + return findAndRemoveEnding(stemmingZone, verbEndings1(), verb1Predessors()) || + findAndRemoveEnding(stemmingZone, verbEndings2()); +} + +} diff --git a/src/contrib/highlighter/DefaultEncoder.cpp b/src/contrib/highlighter/DefaultEncoder.cpp index c49aaadd..c23bbb88 100644 --- a/src/contrib/highlighter/DefaultEncoder.cpp +++ b/src/contrib/highlighter/DefaultEncoder.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "ContribInc.h" #include "DefaultEncoder.h" -namespace Lucene -{ - DefaultEncoder::~DefaultEncoder() - { - } - - String DefaultEncoder::encodeText(const String& originalText) - { - return originalText; - } +namespace Lucene { + +DefaultEncoder::~DefaultEncoder() { +} + +String DefaultEncoder::encodeText(const String& originalText) { + return originalText; +} + } diff --git a/src/contrib/highlighter/Encoder.cpp b/src/contrib/highlighter/Encoder.cpp index 31e64398..d5882db4 100644 --- a/src/contrib/highlighter/Encoder.cpp +++ b/src/contrib/highlighter/Encoder.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,15 +7,14 @@ #include "ContribInc.h" #include "Encoder.h" -namespace Lucene -{ - Encoder::~Encoder() - { - } - - String Encoder::encodeText(const String& originalText) - { - BOOST_ASSERT(false); - return L""; // override - } +namespace Lucene { + +Encoder::~Encoder() { +} + +String Encoder::encodeText(const String& originalText) { + BOOST_ASSERT(false); + return L""; // override +} + } diff --git a/src/contrib/highlighter/Formatter.cpp b/src/contrib/highlighter/Formatter.cpp index 77aea354..24b28083 100644 --- a/src/contrib/highlighter/Formatter.cpp +++ b/src/contrib/highlighter/Formatter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,15 +7,14 @@ #include "ContribInc.h" #include "Formatter.h" -namespace Lucene -{ - Formatter::~Formatter() - { - } - - String Formatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) - { - BOOST_ASSERT(false); - return L""; // override - } +namespace Lucene { + +Formatter::~Formatter() { +} + +String Formatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { + BOOST_ASSERT(false); + return L""; // override +} + } diff --git a/src/contrib/highlighter/Fragmenter.cpp b/src/contrib/highlighter/Fragmenter.cpp index c336a638..70723cf0 100644 --- a/src/contrib/highlighter/Fragmenter.cpp +++ b/src/contrib/highlighter/Fragmenter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,21 +7,19 @@ #include "ContribInc.h" #include "Fragmenter.h" -namespace Lucene -{ - Fragmenter::~Fragmenter() - { - } - - void Fragmenter::start(const String& originalText, TokenStreamPtr tokenStream) - { - BOOST_ASSERT(false); - // override - } - - bool Fragmenter::isNewFragment() - { - BOOST_ASSERT(false); - return false; // override - } +namespace Lucene { + +Fragmenter::~Fragmenter() { +} + +void Fragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { + BOOST_ASSERT(false); + // override +} + +bool Fragmenter::isNewFragment() { + BOOST_ASSERT(false); + return false; // override +} + } diff --git a/src/contrib/highlighter/GradientFormatter.cpp b/src/contrib/highlighter/GradientFormatter.cpp index dcd278e4..9a32ba41 100644 --- a/src/contrib/highlighter/GradientFormatter.cpp +++ b/src/contrib/highlighter/GradientFormatter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,120 +9,121 @@ #include "TokenGroup.h" #include "StringUtils.h" -namespace Lucene -{ - GradientFormatter::GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) - { - highlightForeground = (!minForegroundColor.empty() && !maxForegroundColor.empty()); - if (highlightForeground) - { - if (minForegroundColor.length() != 7) - boost::throw_exception(IllegalArgumentException(L"minForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); - if (maxForegroundColor.length() != 7) - boost::throw_exception(IllegalArgumentException(L"maxForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); - - fgRMin = hexToInt(minForegroundColor.substr(1, 2)); - fgGMin = hexToInt(minForegroundColor.substr(3, 2)); - fgBMin = hexToInt(minForegroundColor.substr(5, 2)); - - fgRMax = hexToInt(maxForegroundColor.substr(1, 2)); - fgGMax = hexToInt(maxForegroundColor.substr(3, 2)); - fgBMax = hexToInt(maxForegroundColor.substr(5, 2)); +namespace Lucene { + +GradientFormatter::GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) { + highlightForeground = (!minForegroundColor.empty() && !maxForegroundColor.empty()); + if (highlightForeground) { + if (minForegroundColor.length() != 7) { + boost::throw_exception(IllegalArgumentException(L"minForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } - - highlightBackground = (!minBackgroundColor.empty() && !maxBackgroundColor.empty()); - if (highlightBackground) - { - if (minBackgroundColor.length() != 7) - boost::throw_exception(IllegalArgumentException(L"minBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); - if (maxBackgroundColor.length() != 7) - boost::throw_exception(IllegalArgumentException(L"maxBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); - - bgRMin = hexToInt(minBackgroundColor.substr(1, 2)); - bgGMin = hexToInt(minBackgroundColor.substr(3, 2)); - bgBMin = hexToInt(minBackgroundColor.substr(5, 2)); - - bgRMax = hexToInt(maxBackgroundColor.substr(1, 2)); - bgGMax = hexToInt(maxBackgroundColor.substr(3, 2)); - bgBMax = hexToInt(maxBackgroundColor.substr(5, 2)); + if (maxForegroundColor.length() != 7) { + boost::throw_exception(IllegalArgumentException(L"maxForegroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); } - - this->maxScore = maxScore; + + fgRMin = hexToInt(minForegroundColor.substr(1, 2)); + fgGMin = hexToInt(minForegroundColor.substr(3, 2)); + fgBMin = hexToInt(minForegroundColor.substr(5, 2)); + + fgRMax = hexToInt(maxForegroundColor.substr(1, 2)); + fgGMax = hexToInt(maxForegroundColor.substr(3, 2)); + fgBMax = hexToInt(maxForegroundColor.substr(5, 2)); } - - GradientFormatter::~GradientFormatter() - { + + highlightBackground = (!minBackgroundColor.empty() && !maxBackgroundColor.empty()); + if (highlightBackground) { + if (minBackgroundColor.length() != 7) { + boost::throw_exception(IllegalArgumentException(L"minBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); + } + if (maxBackgroundColor.length() != 7) { + boost::throw_exception(IllegalArgumentException(L"maxBackgroundColor is not 7 bytes long eg a hex RGB value such as #FFFFFF")); + } + + bgRMin = hexToInt(minBackgroundColor.substr(1, 2)); + bgGMin = hexToInt(minBackgroundColor.substr(3, 2)); + bgBMin = hexToInt(minBackgroundColor.substr(5, 2)); + + bgRMax = hexToInt(maxBackgroundColor.substr(1, 2)); + bgGMax = hexToInt(maxBackgroundColor.substr(3, 2)); + bgBMax = hexToInt(maxBackgroundColor.substr(5, 2)); + } + + this->maxScore = maxScore; +} + +GradientFormatter::~GradientFormatter() { +} + +String GradientFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { + if (tokenGroup->getTotalScore() == 0) { + return originalText; } - - String GradientFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) - { - if (tokenGroup->getTotalScore() == 0) - return originalText; - double score = tokenGroup->getTotalScore(); - if (score == 0.0) - return originalText; - StringStream buffer; - buffer << L"" << originalText << L""; - return buffer.str(); + double score = tokenGroup->getTotalScore(); + if (score == 0.0) { + return originalText; } - - String GradientFormatter::getForegroundColorString(double score) - { - int32_t rVal = getColorVal(fgRMin, fgRMax, score); - int32_t gVal = getColorVal(fgGMin, fgGMax, score); - int32_t bVal = getColorVal(fgBMin, fgBMax, score); - StringStream buffer; - buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); - return buffer.str(); + StringStream buffer; + buffer << L"" << originalText << L""; + return buffer.str(); +} + +String GradientFormatter::getForegroundColorString(double score) { + int32_t rVal = getColorVal(fgRMin, fgRMax, score); + int32_t gVal = getColorVal(fgGMin, fgGMax, score); + int32_t bVal = getColorVal(fgBMin, fgBMax, score); + StringStream buffer; + buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); + return buffer.str(); +} + +String GradientFormatter::getBackgroundColorString(double score) { + int32_t rVal = getColorVal(bgRMin, bgRMax, score); + int32_t gVal = getColorVal(bgGMin, bgGMax, score); + int32_t bVal = getColorVal(bgBMin, bgBMax, score); + StringStream buffer; + buffer << L"#" << intToHex(rVal) << intToHex(gVal) << intToHex(bVal); + return buffer.str(); +} + +int32_t GradientFormatter::getColorVal(int32_t colorMin, int32_t colorMax, double score) { + if (colorMin == colorMax) { + return colorMin; } - - String GradientFormatter::intToHex(int32_t i) - { - static const wchar_t* hexDigits = L"0123456789abcdef"; - StringStream buffer; - buffer << hexDigits[(i & 0xf0) >> 4] << hexDigits[i & 0x0f]; - return buffer.str(); + double scale = std::abs((double)(colorMin - colorMax)); + double relScorePercent = std::min(maxScore, score) / maxScore; + double colScore = scale * relScorePercent; + return std::min(colorMin, colorMax) + (int32_t)colScore; +} + +String GradientFormatter::intToHex(int32_t i) { + static const wchar_t* hexDigits = L"0123456789abcdef"; + StringStream buffer; + buffer << hexDigits[(i & 0xf0) >> 4] << hexDigits[i & 0x0f]; + return buffer.str(); +} + +int32_t GradientFormatter::hexToInt(const String& hex) { + int32_t len = (int32_t)hex.length(); + if (len > 16) { + boost::throw_exception(NumberFormatException()); } - - int32_t GradientFormatter::hexToInt(const String& hex) - { - int32_t len = (int32_t)hex.length(); - if (len > 16) + int32_t l = 0; + for (int32_t i = 0; i < len; ++i) { + l <<= 4; + int32_t c = (int32_t)StringUtils::toLong(hex.substr(i, 1), 16); + if (c < 0) { boost::throw_exception(NumberFormatException()); - int32_t l = 0; - for (int32_t i = 0; i < len; ++i) - { - l <<= 4; - int32_t c = (int32_t)StringUtils::toLong(hex.substr(i, 1), 16); - if (c < 0) - boost::throw_exception(NumberFormatException()); - l |= c; } - return l; + l |= c; } + return l; +} + } diff --git a/src/contrib/highlighter/Highlighter.cpp b/src/contrib/highlighter/Highlighter.cpp index b226e760..9f9f2ede 100644 --- a/src/contrib/highlighter/Highlighter.cpp +++ b/src/contrib/highlighter/Highlighter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -21,337 +21,302 @@ #include "SimpleFragmenter.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t Highlighter::DEFAULT_MAX_CHARS_TO_ANALYZE = 50 * 1024; - - Highlighter::Highlighter(HighlighterScorerPtr fragmentScorer) - { - this->formatter = newLucene(); - this->encoder = newLucene(); - this->fragmentScorer = fragmentScorer; - this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; - this->textFragmenter = newLucene(); - } - - Highlighter::Highlighter(FormatterPtr formatter, HighlighterScorerPtr fragmentScorer) - { - this->formatter = formatter; - this->encoder = newLucene(); - this->fragmentScorer = fragmentScorer; - this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; - this->textFragmenter = newLucene(); - } - - Highlighter::Highlighter(FormatterPtr formatter, EncoderPtr encoder, HighlighterScorerPtr fragmentScorer) - { - this->formatter = formatter; - this->encoder = encoder; - this->fragmentScorer = fragmentScorer; - this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; - this->textFragmenter = newLucene(); - } - - Highlighter::~Highlighter() - { - } - - String Highlighter::getBestFragment(AnalyzerPtr analyzer, const String& fieldName, const String& text) - { - TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); - return getBestFragment(tokenStream, text); - } - - String Highlighter::getBestFragment(TokenStreamPtr tokenStream, const String& text) - { - Collection results(getBestFragments(tokenStream,text, 1)); - return results.empty() ? L"" : results[0]; - } - - Collection Highlighter::getBestFragments(AnalyzerPtr analyzer, const String& fieldName, const String& text, int32_t maxNumFragments) - { - TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); - return getBestFragments(tokenStream, text, maxNumFragments); - } - - Collection Highlighter::getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments) - { - maxNumFragments = std::max((int32_t)1, maxNumFragments); //sanity check - - Collection frag(getBestTextFragments(tokenStream, text, true, maxNumFragments)); - - // Get text - Collection fragTexts(Collection::newInstance()); - for (int32_t i = 0; i < frag.size(); ++i) - { - if (frag[i] && frag[i]->getScore() > 0) - fragTexts.add(frag[i]->toString()); +namespace Lucene { + +const int32_t Highlighter::DEFAULT_MAX_CHARS_TO_ANALYZE = 50 * 1024; + +Highlighter::Highlighter(const HighlighterScorerPtr& fragmentScorer) { + this->formatter = newLucene(); + this->encoder = newLucene(); + this->fragmentScorer = fragmentScorer; + this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; + this->textFragmenter = newLucene(); +} + +Highlighter::Highlighter(const FormatterPtr& formatter, const HighlighterScorerPtr& fragmentScorer) { + this->formatter = formatter; + this->encoder = newLucene(); + this->fragmentScorer = fragmentScorer; + this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; + this->textFragmenter = newLucene(); +} + +Highlighter::Highlighter(const FormatterPtr& formatter, const EncoderPtr& encoder, const HighlighterScorerPtr& fragmentScorer) { + this->formatter = formatter; + this->encoder = encoder; + this->fragmentScorer = fragmentScorer; + this->maxDocCharsToAnalyze = DEFAULT_MAX_CHARS_TO_ANALYZE; + this->textFragmenter = newLucene(); +} + +Highlighter::~Highlighter() { +} + +String Highlighter::getBestFragment(const AnalyzerPtr& analyzer, const String& fieldName, const String& text) { + TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); + return getBestFragment(tokenStream, text); +} + +String Highlighter::getBestFragment(const TokenStreamPtr& tokenStream, const String& text) { + Collection results(getBestFragments(tokenStream,text, 1)); + return results.empty() ? L"" : results[0]; +} + +Collection Highlighter::getBestFragments(const AnalyzerPtr& analyzer, const String& fieldName, const String& text, int32_t maxNumFragments) { + TokenStreamPtr tokenStream(analyzer->tokenStream(fieldName, newLucene(text))); + return getBestFragments(tokenStream, text, maxNumFragments); +} + +Collection Highlighter::getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments) { + maxNumFragments = std::max((int32_t)1, maxNumFragments); //sanity check + + Collection frag(getBestTextFragments(tokenStream, text, true, maxNumFragments)); + + // Get text + Collection fragTexts(Collection::newInstance()); + for (int32_t i = 0; i < frag.size(); ++i) { + if (frag[i] && frag[i]->getScore() > 0) { + fragTexts.add(frag[i]->toString()); } - return fragTexts; } - - Collection Highlighter::getBestTextFragments(TokenStreamPtr tokenStream, const String& text, bool merge, int32_t maxNumFragments) - { - Collection docFrags(Collection::newInstance()); - StringBufferPtr newText(newLucene()); - - TermAttributePtr termAtt(tokenStream->addAttribute()); - OffsetAttributePtr offsetAtt(tokenStream->addAttribute()); - tokenStream->addAttribute(); - tokenStream->reset(); - - TextFragmentPtr currentFrag(newLucene(newText, newText->length(), docFrags.size())); - TokenStreamPtr newStream(fragmentScorer->init(tokenStream)); - if (newStream) - tokenStream = newStream; - fragmentScorer->startFragment(currentFrag); - docFrags.add(currentFrag); - - FragmentQueuePtr fragQueue(newLucene(maxNumFragments)); - Collection frag; - - LuceneException finally; - try - { - textFragmenter->start(text, tokenStream); - TokenGroupPtr tokenGroup(newLucene(tokenStream)); - String tokenText; - int32_t startOffset = 0; - int32_t endOffset = 0; - int32_t lastEndOffset = 0; - - for (bool next = tokenStream->incrementToken(); next && offsetAtt->startOffset() < maxDocCharsToAnalyze; next = tokenStream->incrementToken()) - { - if (offsetAtt->endOffset() > (int32_t)text.length() || offsetAtt->startOffset() > (int32_t)text.length()) - boost::throw_exception(RuntimeException(L"InvalidTokenOffsets: Token " + termAtt->term() + L" exceeds length of provided text sized " + StringUtils::toString(text.length()))); - - if (tokenGroup->numTokens > 0 && tokenGroup->isDistinct()) - { - // the current token is distinct from previous tokens - markup the cached token group info - startOffset = tokenGroup->matchStartOffset; - endOffset = tokenGroup->matchEndOffset; - tokenText = text.substr(startOffset, endOffset - startOffset); - String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); - // store any whitespace etc from between this and last group - if (startOffset > lastEndOffset) - newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); - newText->append(markedUpText); - lastEndOffset = std::max(endOffset, lastEndOffset); - tokenGroup->clear(); - - // check if current token marks the start of a new fragment - if (textFragmenter->isNewFragment()) - { - currentFrag->setScore(fragmentScorer->getFragmentScore()); - // record stats for a new fragment - currentFrag->textEndPos = newText->length(); - currentFrag = newLucene(newText, newText->length(), docFrags.size()); - fragmentScorer->startFragment(currentFrag); - docFrags.add(currentFrag); - } - } - - tokenGroup->addToken(fragmentScorer->getTokenScore()); + return fragTexts; +} + +Collection Highlighter::getBestTextFragments(const TokenStreamPtr& tokenStream, const String& text, bool merge, int32_t maxNumFragments) { + Collection docFrags(Collection::newInstance()); + StringBufferPtr newText(newLucene()); + + TokenStreamPtr _tokenStream(tokenStream); + TermAttributePtr termAtt(_tokenStream->addAttribute()); + OffsetAttributePtr offsetAtt(_tokenStream->addAttribute()); + _tokenStream->addAttribute(); + _tokenStream->reset(); + + TextFragmentPtr currentFrag(newLucene(newText, newText->length(), docFrags.size())); + TokenStreamPtr newStream(fragmentScorer->init(_tokenStream)); + if (newStream) { + _tokenStream = newStream; + } + fragmentScorer->startFragment(currentFrag); + docFrags.add(currentFrag); + + FragmentQueuePtr fragQueue(newLucene(maxNumFragments)); + Collection frag; + + LuceneException finally; + try { + textFragmenter->start(text, _tokenStream); + TokenGroupPtr tokenGroup(newLucene(_tokenStream)); + String tokenText; + int32_t startOffset = 0; + int32_t endOffset = 0; + int32_t lastEndOffset = 0; + + for (bool next = _tokenStream->incrementToken(); next && offsetAtt->startOffset() < maxDocCharsToAnalyze; next = _tokenStream->incrementToken()) { + if (offsetAtt->endOffset() > (int32_t)text.length() || offsetAtt->startOffset() > (int32_t)text.length()) { + boost::throw_exception(RuntimeException(L"InvalidTokenOffsets: Token " + termAtt->term() + L" exceeds length of provided text sized " + StringUtils::toString(text.length()))); } - - currentFrag->setScore(fragmentScorer->getFragmentScore()); - - if (tokenGroup->numTokens > 0) - { - // flush the accumulated text (same code as in above loop) + + if (tokenGroup->numTokens > 0 && tokenGroup->isDistinct()) { + // the current token is distinct from previous tokens - markup the cached token group info startOffset = tokenGroup->matchStartOffset; endOffset = tokenGroup->matchEndOffset; tokenText = text.substr(startOffset, endOffset - startOffset); String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); // store any whitespace etc from between this and last group - if (startOffset > lastEndOffset) + if (startOffset > lastEndOffset) { newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); + } newText->append(markedUpText); - lastEndOffset = std::max(lastEndOffset, endOffset); - } - - // Test what remains of the original text beyond the point where we stopped analyzing - if (lastEndOffset < (int32_t)text.length() && (int32_t)text.length() <= maxDocCharsToAnalyze) - { - // append it to the last fragment - newText->append(encoder->encodeText(text.substr(lastEndOffset))); - } - - currentFrag->textEndPos = newText->length(); - - // sort the most relevant sections of the text - for (Collection::iterator i = docFrags.begin(); i != docFrags.end(); ++i) - fragQueue->addOverflow(*i); - - // return the most relevant fragments - frag = Collection::newInstance(fragQueue->size()); - for (int32_t i = frag.size() - 1; i >= 0; --i) - frag[i] = fragQueue->pop(); - - // merge any contiguous fragments to improve readability - if (merge) - { - mergeContiguousFragments(frag); - Collection fragTexts(Collection::newInstance()); - for (int32_t i = 0; i < frag.size(); ++i) - { - if (frag[i] && frag[i]->getScore() > 0) - fragTexts.add(frag[i]); + lastEndOffset = std::max(endOffset, lastEndOffset); + tokenGroup->clear(); + + // check if current token marks the start of a new fragment + if (textFragmenter->isNewFragment()) { + currentFrag->setScore(fragmentScorer->getFragmentScore()); + // record stats for a new fragment + currentFrag->textEndPos = newText->length(); + currentFrag = newLucene(newText, newText->length(), docFrags.size()); + fragmentScorer->startFragment(currentFrag); + docFrags.add(currentFrag); } - frag = fragTexts; } + + tokenGroup->addToken(fragmentScorer->getTokenScore()); } - catch (LuceneException& e) - { - finally = e; - } - if (tokenStream) - { - try - { - tokenStream->close(); + + currentFrag->setScore(fragmentScorer->getFragmentScore()); + + if (tokenGroup->numTokens > 0) { + // flush the accumulated text (same code as in above loop) + startOffset = tokenGroup->matchStartOffset; + endOffset = tokenGroup->matchEndOffset; + tokenText = text.substr(startOffset, endOffset - startOffset); + String markedUpText(formatter->highlightTerm(encoder->encodeText(tokenText), tokenGroup)); + // store any whitespace etc from between this and last group + if (startOffset > lastEndOffset) { + newText->append(encoder->encodeText(text.substr(lastEndOffset, startOffset - lastEndOffset))); } - catch (...) - { + newText->append(markedUpText); + lastEndOffset = std::max(lastEndOffset, endOffset); + } + + // Test what remains of the original text beyond the point where we stopped analyzing + if (lastEndOffset < (int32_t)text.length() && (int32_t)text.length() <= maxDocCharsToAnalyze) { + // append it to the last fragment + newText->append(encoder->encodeText(text.substr(lastEndOffset))); + } + + currentFrag->textEndPos = newText->length(); + + // sort the most relevant sections of the text + for (Collection::iterator i = docFrags.begin(); i != docFrags.end(); ++i) { + fragQueue->addOverflow(*i); + } + + // return the most relevant fragments + frag = Collection::newInstance(fragQueue->size()); + for (int32_t i = frag.size() - 1; i >= 0; --i) { + frag[i] = fragQueue->pop(); + } + + // merge any contiguous fragments to improve readability + if (merge) { + mergeContiguousFragments(frag); + Collection fragTexts(Collection::newInstance()); + for (int32_t i = 0; i < frag.size(); ++i) { + if (frag[i] && frag[i]->getScore() > 0) { + fragTexts.add(frag[i]); + } } + frag = fragTexts; } - finally.throwException(); - return frag; + } catch (LuceneException& e) { + finally = e; } - - void Highlighter::mergeContiguousFragments(Collection frag) - { - if (frag.size() > 1) - { - bool mergingStillBeingDone = false; - do - { - mergingStillBeingDone = false; // initialise loop control flag - // for each fragment, scan other frags looking for contiguous blocks - for (int32_t i = 0; i < frag.size(); ++i) - { - if (!frag[i]) + if (_tokenStream) { + try { + _tokenStream->close(); + } catch (...) { + } + } + finally.throwException(); + return frag; +} + +void Highlighter::mergeContiguousFragments(Collection frag) { + if (frag.size() > 1) { + bool mergingStillBeingDone = false; + do { + mergingStillBeingDone = false; // initialise loop control flag + // for each fragment, scan other frags looking for contiguous blocks + for (int32_t i = 0; i < frag.size(); ++i) { + if (!frag[i]) { + continue; + } + // merge any contiguous blocks + for (int32_t x = 0; x < frag.size(); ++x) { + if (!frag[x]) { continue; - // merge any contiguous blocks - for (int32_t x = 0; x < frag.size(); ++x) - { - if (!frag[x]) - continue; - if (!frag[i]) - break; - TextFragmentPtr frag1; - TextFragmentPtr frag2; - int32_t frag1Num = 0; - int32_t frag2Num = 0; - int32_t bestScoringFragNum = 0; - int32_t worstScoringFragNum = 0; - // if blocks are contiguous - if (frag[i]->follows(frag[x])) - { - frag1 = frag[x]; - frag1Num = x; - frag2 = frag[i]; - frag2Num = i; - } - else if (frag[x]->follows(frag[i])) - { - frag1 = frag[i]; - frag1Num = i; - frag2 = frag[x]; - frag2Num = x; - } - - // merging required - if (frag1) - { - if (frag1->getScore() > frag2->getScore()) - { - bestScoringFragNum = frag1Num; - worstScoringFragNum = frag2Num; - } - else - { - bestScoringFragNum = frag2Num; - worstScoringFragNum = frag1Num; - } - frag1->merge(frag2); - frag[worstScoringFragNum].reset(); - mergingStillBeingDone = true; - frag[bestScoringFragNum] = frag1; + } + if (!frag[i]) { + break; + } + TextFragmentPtr frag1; + TextFragmentPtr frag2; + int32_t frag1Num = 0; + int32_t frag2Num = 0; + int32_t bestScoringFragNum = 0; + int32_t worstScoringFragNum = 0; + // if blocks are contiguous + if (frag[i]->follows(frag[x])) { + frag1 = frag[x]; + frag1Num = x; + frag2 = frag[i]; + frag2Num = i; + } else if (frag[x]->follows(frag[i])) { + frag1 = frag[i]; + frag1Num = i; + frag2 = frag[x]; + frag2Num = x; + } + + // merging required + if (frag1) { + if (frag1->getScore() > frag2->getScore()) { + bestScoringFragNum = frag1Num; + worstScoringFragNum = frag2Num; + } else { + bestScoringFragNum = frag2Num; + worstScoringFragNum = frag1Num; } + frag1->merge(frag2); + frag[worstScoringFragNum].reset(); + mergingStillBeingDone = true; + frag[bestScoringFragNum] = frag1; } } } - while (mergingStillBeingDone); - } + } while (mergingStillBeingDone); } - - String Highlighter::getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments, const String& separator) - { - Collection sections(getBestFragments(tokenStream, text, maxNumFragments)); - StringStream result; - for (int32_t i = 0; i < sections.size(); ++i) - { - if (i > 0) - result << separator; - result << sections[i]; +} + +String Highlighter::getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments, const String& separator) { + Collection sections(getBestFragments(tokenStream, text, maxNumFragments)); + StringStream result; + for (int32_t i = 0; i < sections.size(); ++i) { + if (i > 0) { + result << separator; } - return result.str(); - } - - int32_t Highlighter::getMaxDocCharsToAnalyze() - { - return maxDocCharsToAnalyze; - } - - void Highlighter::setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze) - { - this->maxDocCharsToAnalyze = maxDocCharsToAnalyze; - } - - FragmenterPtr Highlighter::getTextFragmenter() - { - return textFragmenter; - } - - void Highlighter::setTextFragmenter(FragmenterPtr fragmenter) - { - textFragmenter = fragmenter; - } - - HighlighterScorerPtr Highlighter::getFragmentScorer() - { - return fragmentScorer; - } - - void Highlighter::setFragmentScorer(HighlighterScorerPtr scorer) - { - fragmentScorer = scorer; - } - - EncoderPtr Highlighter::getEncoder() - { - return encoder; - } - - void Highlighter::setEncoder(EncoderPtr encoder) - { - this->encoder = encoder; - } - - FragmentQueue::FragmentQueue(int32_t size) : PriorityQueue(size) - { + result << sections[i]; } - - FragmentQueue::~FragmentQueue() - { - } - - bool FragmentQueue::lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second) - { - if (first->getScore() == second->getScore()) - return first->fragNum > second->fragNum; - else - return first->getScore() < second->getScore(); + return result.str(); +} + +int32_t Highlighter::getMaxDocCharsToAnalyze() { + return maxDocCharsToAnalyze; +} + +void Highlighter::setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze) { + this->maxDocCharsToAnalyze = maxDocCharsToAnalyze; +} + +FragmenterPtr Highlighter::getTextFragmenter() { + return textFragmenter; +} + +void Highlighter::setTextFragmenter(const FragmenterPtr& fragmenter) { + textFragmenter = fragmenter; +} + +HighlighterScorerPtr Highlighter::getFragmentScorer() { + return fragmentScorer; +} + +void Highlighter::setFragmentScorer(const HighlighterScorerPtr& scorer) { + fragmentScorer = scorer; +} + +EncoderPtr Highlighter::getEncoder() { + return encoder; +} + +void Highlighter::setEncoder(const EncoderPtr& encoder) { + this->encoder = encoder; +} + +FragmentQueue::FragmentQueue(int32_t size) : PriorityQueue(size) { +} + +FragmentQueue::~FragmentQueue() { +} + +bool FragmentQueue::lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second) { + if (first->getScore() == second->getScore()) { + return first->fragNum > second->fragNum; + } else { + return first->getScore() < second->getScore(); } } + +} diff --git a/src/contrib/highlighter/HighlighterScorer.cpp b/src/contrib/highlighter/HighlighterScorer.cpp index 489e33fc..17ca78ad 100644 --- a/src/contrib/highlighter/HighlighterScorer.cpp +++ b/src/contrib/highlighter/HighlighterScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,33 +7,29 @@ #include "ContribInc.h" #include "HighlighterScorer.h" -namespace Lucene -{ - HighlighterScorer::~HighlighterScorer() - { - } - - TokenStreamPtr HighlighterScorer::init(TokenStreamPtr tokenStream) - { - BOOST_ASSERT(false); - return TokenStreamPtr(); // override - } - - void HighlighterScorer::startFragment(TextFragmentPtr newFragment) - { - BOOST_ASSERT(false); - // override - } - - double HighlighterScorer::getTokenScore() - { - BOOST_ASSERT(false); - return 0; // override - } - - double HighlighterScorer::getFragmentScore() - { - BOOST_ASSERT(false); - return 0; // override - } +namespace Lucene { + +HighlighterScorer::~HighlighterScorer() { +} + +TokenStreamPtr HighlighterScorer::init(const TokenStreamPtr& tokenStream) { + BOOST_ASSERT(false); + return TokenStreamPtr(); // override +} + +void HighlighterScorer::startFragment(const TextFragmentPtr& newFragment) { + BOOST_ASSERT(false); + // override +} + +double HighlighterScorer::getTokenScore() { + BOOST_ASSERT(false); + return 0; // override +} + +double HighlighterScorer::getFragmentScore() { + BOOST_ASSERT(false); + return 0; // override +} + } diff --git a/src/contrib/highlighter/MapWeightedSpanTerm.cpp b/src/contrib/highlighter/MapWeightedSpanTerm.cpp index 5a5596e5..e5e8c765 100644 --- a/src/contrib/highlighter/MapWeightedSpanTerm.cpp +++ b/src/contrib/highlighter/MapWeightedSpanTerm.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,39 +7,33 @@ #include "ContribInc.h" #include "MapWeightedSpanTerm.h" -namespace Lucene -{ - MapWeightedSpanTerm::MapWeightedSpanTerm() - { - map = MapStringWeightedSpanTerm::newInstance(); - } - - MapWeightedSpanTerm::~MapWeightedSpanTerm() - { - } - - MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::begin() - { - return map.begin(); - } - - MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::end() - { - return map.end(); - } - - void MapWeightedSpanTerm::put(const String& key, WeightedSpanTermPtr val) - { - return map.put(key, val); - } - - WeightedSpanTermPtr MapWeightedSpanTerm::get(const String& key) const - { - return map.get(key); - } - - void MapWeightedSpanTerm::clear() - { - map.clear(); - } +namespace Lucene { + +MapWeightedSpanTerm::MapWeightedSpanTerm() { + map = MapStringWeightedSpanTerm::newInstance(); +} + +MapWeightedSpanTerm::~MapWeightedSpanTerm() { +} + +MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::begin() { + return map.begin(); +} + +MapStringWeightedSpanTerm::iterator MapWeightedSpanTerm::end() { + return map.end(); +} + +void MapWeightedSpanTerm::put(const String& key, const WeightedSpanTermPtr& val) { + return map.put(key, val); +} + +WeightedSpanTermPtr MapWeightedSpanTerm::get(const String& key) const { + return map.get(key); +} + +void MapWeightedSpanTerm::clear() { + map.clear(); +} + } diff --git a/src/contrib/highlighter/NullFragmenter.cpp b/src/contrib/highlighter/NullFragmenter.cpp index 60697883..3e5fa31d 100644 --- a/src/contrib/highlighter/NullFragmenter.cpp +++ b/src/contrib/highlighter/NullFragmenter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,18 +7,16 @@ #include "ContribInc.h" #include "NullFragmenter.h" -namespace Lucene -{ - NullFragmenter::~NullFragmenter() - { - } - - void NullFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) - { - } - - bool NullFragmenter::isNewFragment() - { - return false; - } +namespace Lucene { + +NullFragmenter::~NullFragmenter() { +} + +void NullFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { +} + +bool NullFragmenter::isNewFragment() { + return false; +} + } diff --git a/src/contrib/highlighter/QueryScorer.cpp b/src/contrib/highlighter/QueryScorer.cpp index 5616e74c..cbd5177f 100644 --- a/src/contrib/highlighter/QueryScorer.cpp +++ b/src/contrib/highlighter/QueryScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,158 +13,142 @@ #include "MapWeightedSpanTerm.h" #include "WeightedSpanTermExtractor.h" -namespace Lucene -{ - QueryScorer::QueryScorer(QueryPtr query) - { - init(query, L"", IndexReaderPtr(), true); - } - - QueryScorer::QueryScorer(QueryPtr query, const String& field) - { - init(query, field, IndexReaderPtr(), true); - } - - QueryScorer::QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field) - { - init(query, field, reader, true); - } - - QueryScorer::QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field, const String& defaultField) - { - this->defaultField = defaultField; - init(query, field, reader, true); - } - - QueryScorer::QueryScorer(QueryPtr query, const String& field, const String& defaultField) - { - this->defaultField = defaultField; - init(query, field, IndexReaderPtr(), true); - } - - QueryScorer::QueryScorer(Collection weightedTerms) - { - init(QueryPtr(), L"", IndexReaderPtr(), true); - - this->fieldWeightedSpanTerms = newLucene(); - for (int32_t i = 0; i < weightedTerms.size(); ++i) - { - WeightedSpanTermPtr existingTerm(fieldWeightedSpanTerms->get(weightedTerms[i]->term)); - if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) - { - // if a term is defined more than once, always use the highest scoring weight - fieldWeightedSpanTerms->put(weightedTerms[i]->term, weightedTerms[i]); - maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); - } - } - skipInitExtractor = true; - } - - QueryScorer::~QueryScorer() - { - } - - void QueryScorer::init(QueryPtr query, const String& field, IndexReaderPtr reader, bool expandMultiTermQuery) - { - this->totalScore = 0; - this->maxTermWeight = 0; - this->position = -1; - this->skipInitExtractor = false; - this->wrapToCaching = true; - - this->reader = reader; - this->expandMultiTermQuery = expandMultiTermQuery; - this->query = query; - this->field = field; - } - - double QueryScorer::getFragmentScore() - { - return totalScore; - } - - double QueryScorer::getMaxTermWeight() - { - return maxTermWeight; - } - - double QueryScorer::getTokenScore() - { - position += posIncAtt->getPositionIncrement(); - String termText(termAtt->term()); +namespace Lucene { - WeightedSpanTermPtr weightedSpanTerm(fieldWeightedSpanTerms->get(termText)); +QueryScorer::QueryScorer(const QueryPtr& query) { + init(query, L"", IndexReaderPtr(), true); +} - if (!weightedSpanTerm) - return 0.0; +QueryScorer::QueryScorer(const QueryPtr& query, const String& field) { + init(query, field, IndexReaderPtr(), true); +} - if (weightedSpanTerm->positionSensitive && !weightedSpanTerm->checkPosition(position)) - return 0.0; +QueryScorer::QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field) { + init(query, field, reader, true); +} - double score = weightedSpanTerm->getWeight(); +QueryScorer::QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field, const String& defaultField) { + this->defaultField = defaultField; + init(query, field, reader, true); +} - // found a query term - is it unique in this doc? - if (!foundTerms.contains(termText)) - { - totalScore += score; - foundTerms.add(termText); - } +QueryScorer::QueryScorer(const QueryPtr& query, const String& field, const String& defaultField) { + this->defaultField = defaultField; + init(query, field, IndexReaderPtr(), true); +} - return score; - } - - TokenStreamPtr QueryScorer::init(TokenStreamPtr tokenStream) - { - position = -1; - termAtt = tokenStream->addAttribute(); - posIncAtt = tokenStream->addAttribute(); - if (!skipInitExtractor) - { - if (fieldWeightedSpanTerms) - fieldWeightedSpanTerms->clear(); - return initExtractor(tokenStream); +QueryScorer::QueryScorer(Collection weightedTerms) { + init(QueryPtr(), L"", IndexReaderPtr(), true); + + this->fieldWeightedSpanTerms = newLucene(); + for (int32_t i = 0; i < weightedTerms.size(); ++i) { + WeightedSpanTermPtr existingTerm(fieldWeightedSpanTerms->get(weightedTerms[i]->term)); + if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { + // if a term is defined more than once, always use the highest scoring weight + fieldWeightedSpanTerms->put(weightedTerms[i]->term, weightedTerms[i]); + maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } - return TokenStreamPtr(); } - - WeightedSpanTermPtr QueryScorer::getWeightedSpanTerm(const String& token) - { - return fieldWeightedSpanTerms->get(token); + skipInitExtractor = true; +} + +QueryScorer::~QueryScorer() { +} + +void QueryScorer::init(const QueryPtr& query, const String& field, const IndexReaderPtr& reader, bool expandMultiTermQuery) { + this->totalScore = 0; + this->maxTermWeight = 0; + this->position = -1; + this->skipInitExtractor = false; + this->wrapToCaching = true; + + this->reader = reader; + this->expandMultiTermQuery = expandMultiTermQuery; + this->query = query; + this->field = field; +} + +double QueryScorer::getFragmentScore() { + return totalScore; +} + +double QueryScorer::getMaxTermWeight() { + return maxTermWeight; +} + +double QueryScorer::getTokenScore() { + position += posIncAtt->getPositionIncrement(); + String termText(termAtt->term()); + + WeightedSpanTermPtr weightedSpanTerm(fieldWeightedSpanTerms->get(termText)); + + if (!weightedSpanTerm) { + return 0.0; } - - TokenStreamPtr QueryScorer::initExtractor(TokenStreamPtr tokenStream) - { - WeightedSpanTermExtractorPtr qse(newLucene(defaultField)); - - qse->setExpandMultiTermQuery(expandMultiTermQuery); - qse->setWrapIfNotCachingTokenFilter(wrapToCaching); - if (!reader) - this->fieldWeightedSpanTerms = qse->getWeightedSpanTerms(query, tokenStream, field); - else - this->fieldWeightedSpanTerms = qse->getWeightedSpanTermsWithScores(query, tokenStream, field, reader); - if (qse->isCachedTokenStream()) - return qse->getTokenStream(); - return TokenStreamPtr(); + + if (weightedSpanTerm->positionSensitive && !weightedSpanTerm->checkPosition(position)) { + return 0.0; } - - void QueryScorer::startFragment(TextFragmentPtr newFragment) - { - foundTerms = HashSet::newInstance(); - totalScore = 0; + + double score = weightedSpanTerm->getWeight(); + + // found a query term - is it unique in this doc? + if (!foundTerms.contains(termText)) { + totalScore += score; + foundTerms.add(termText); } - - bool QueryScorer::isExpandMultiTermQuery() - { - return expandMultiTermQuery; + + return score; +} + +TokenStreamPtr QueryScorer::init(const TokenStreamPtr& tokenStream) { + position = -1; + termAtt = tokenStream->addAttribute(); + posIncAtt = tokenStream->addAttribute(); + if (!skipInitExtractor) { + if (fieldWeightedSpanTerms) { + fieldWeightedSpanTerms->clear(); + } + return initExtractor(tokenStream); } - - void QueryScorer::setExpandMultiTermQuery(bool expandMultiTermQuery) - { - this->expandMultiTermQuery = expandMultiTermQuery; + return TokenStreamPtr(); +} + +WeightedSpanTermPtr QueryScorer::getWeightedSpanTerm(const String& token) { + return fieldWeightedSpanTerms->get(token); +} + +TokenStreamPtr QueryScorer::initExtractor(const TokenStreamPtr& tokenStream) { + WeightedSpanTermExtractorPtr qse(newLucene(defaultField)); + + qse->setExpandMultiTermQuery(expandMultiTermQuery); + qse->setWrapIfNotCachingTokenFilter(wrapToCaching); + if (!reader) { + this->fieldWeightedSpanTerms = qse->getWeightedSpanTerms(query, tokenStream, field); + } else { + this->fieldWeightedSpanTerms = qse->getWeightedSpanTermsWithScores(query, tokenStream, field, reader); } - - void QueryScorer::setWrapIfNotCachingTokenFilter(bool wrap) - { - this->wrapToCaching = wrap; + if (qse->isCachedTokenStream()) { + return qse->getTokenStream(); } + return TokenStreamPtr(); +} + +void QueryScorer::startFragment(const TextFragmentPtr& newFragment) { + foundTerms = HashSet::newInstance(); + totalScore = 0; +} + +bool QueryScorer::isExpandMultiTermQuery() { + return expandMultiTermQuery; +} + +void QueryScorer::setExpandMultiTermQuery(bool expandMultiTermQuery) { + this->expandMultiTermQuery = expandMultiTermQuery; +} + +void QueryScorer::setWrapIfNotCachingTokenFilter(bool wrap) { + this->wrapToCaching = wrap; +} + } diff --git a/src/contrib/highlighter/QueryTermExtractor.cpp b/src/contrib/highlighter/QueryTermExtractor.cpp index 9b697270..b1666886 100644 --- a/src/contrib/highlighter/QueryTermExtractor.cpp +++ b/src/contrib/highlighter/QueryTermExtractor.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,92 +14,78 @@ #include "IndexReader.h" #include "MiscUtils.h" -namespace Lucene -{ - QueryTermExtractor::~QueryTermExtractor() - { - } - - Collection QueryTermExtractor::getTerms(QueryPtr query) - { - return getTerms(query, false); - } - - Collection QueryTermExtractor::getIdfWeightedTerms(QueryPtr query, IndexReaderPtr reader, const String& fieldName) - { - Collection terms(getTerms(query, false, fieldName)); - int32_t totalNumDocs = reader->numDocs(); - for (int32_t i = 0; i < terms.size(); ++i) - { - try - { - int32_t docFreq = reader->docFreq(newLucene(fieldName, terms[i]->term)); - // docFreq counts deletes - if (totalNumDocs < docFreq) - docFreq = totalNumDocs; - // IDF algorithm taken from DefaultSimilarity class - double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); - terms[i]->weight *= idf; - } - catch (...) - { - // ignore +namespace Lucene { + +QueryTermExtractor::~QueryTermExtractor() { +} + +Collection QueryTermExtractor::getTerms(const QueryPtr& query) { + return getTerms(query, false); +} + +Collection QueryTermExtractor::getIdfWeightedTerms(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName) { + Collection terms(getTerms(query, false, fieldName)); + int32_t totalNumDocs = reader->numDocs(); + for (int32_t i = 0; i < terms.size(); ++i) { + try { + int32_t docFreq = reader->docFreq(newLucene(fieldName, terms[i]->term)); + // docFreq counts deletes + if (totalNumDocs < docFreq) { + docFreq = totalNumDocs; } + // IDF algorithm taken from DefaultSimilarity class + double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); + terms[i]->weight *= idf; + } catch (...) { + // ignore } - return terms; - } - - Collection QueryTermExtractor::getTerms(QueryPtr query, bool prohibited, const String& fieldName) - { - SetWeightedTerm terms(SetWeightedTerm::newInstance()); - getTerms(query, terms, prohibited, fieldName); - return Collection::newInstance(terms.begin(), terms.end()); } - - Collection QueryTermExtractor::getTerms(QueryPtr query, bool prohibited) - { - SetWeightedTerm terms(SetWeightedTerm::newInstance()); - getTerms(query, terms, prohibited, L""); - return Collection::newInstance(terms.begin(), terms.end()); - } - - void QueryTermExtractor::getTerms(QueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) - { - try - { - if (MiscUtils::typeOf(query)) - getTermsFromBooleanQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); - else if (MiscUtils::typeOf(query)) - getTermsFromFilteredQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); - else - { - SetTerm nonWeightedTerms(SetTerm::newInstance()); - query->extractTerms(nonWeightedTerms); - for (SetTerm::iterator term = nonWeightedTerms.begin(); term != nonWeightedTerms.end(); ++term) - { - if (fieldName.empty() || (*term)->field() == fieldName) - terms.add(newLucene(query->getBoost(), (*term)->text())); + return terms; +} + +Collection QueryTermExtractor::getTerms(const QueryPtr& query, bool prohibited, const String& fieldName) { + SetWeightedTerm terms(SetWeightedTerm::newInstance()); + getTerms(query, terms, prohibited, fieldName); + return Collection::newInstance(terms.begin(), terms.end()); +} + +Collection QueryTermExtractor::getTerms(const QueryPtr& query, bool prohibited) { + SetWeightedTerm terms(SetWeightedTerm::newInstance()); + getTerms(query, terms, prohibited, L""); + return Collection::newInstance(terms.begin(), terms.end()); +} + +void QueryTermExtractor::getTerms(const QueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { + try { + if (MiscUtils::typeOf(query)) { + getTermsFromBooleanQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); + } else if (MiscUtils::typeOf(query)) { + getTermsFromFilteredQuery(boost::dynamic_pointer_cast(query), terms, prohibited, fieldName); + } else { + SetTerm nonWeightedTerms(SetTerm::newInstance()); + query->extractTerms(nonWeightedTerms); + for (SetTerm::iterator term = nonWeightedTerms.begin(); term != nonWeightedTerms.end(); ++term) { + if (fieldName.empty() || (*term)->field() == fieldName) { + terms.add(newLucene(query->getBoost(), (*term)->text())); } } } - catch (UnsupportedOperationException&) - { - // this is non-fatal for our purposes - } + } catch (UnsupportedOperationException&) { + // this is non-fatal for our purposes } - - void QueryTermExtractor::getTermsFromBooleanQuery(BooleanQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) - { - Collection queryClauses(query->getClauses()); - for (int32_t i = 0; i < queryClauses.size(); ++i) - { - if (prohibited || queryClauses[i]->getOccur() != BooleanClause::MUST_NOT) - getTerms(queryClauses[i]->getQuery(), terms, prohibited, fieldName); +} + +void QueryTermExtractor::getTermsFromBooleanQuery(const BooleanQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { + Collection queryClauses(query->getClauses()); + for (int32_t i = 0; i < queryClauses.size(); ++i) { + if (prohibited || queryClauses[i]->getOccur() != BooleanClause::MUST_NOT) { + getTerms(queryClauses[i]->getQuery(), terms, prohibited, fieldName); } } - - void QueryTermExtractor::getTermsFromFilteredQuery(FilteredQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName) - { - getTerms(query->getQuery(), terms, prohibited, fieldName); - } +} + +void QueryTermExtractor::getTermsFromFilteredQuery(const FilteredQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName) { + getTerms(query->getQuery(), terms, prohibited, fieldName); +} + } diff --git a/src/contrib/highlighter/QueryTermScorer.cpp b/src/contrib/highlighter/QueryTermScorer.cpp index 264c47ba..da31587a 100644 --- a/src/contrib/highlighter/QueryTermScorer.cpp +++ b/src/contrib/highlighter/QueryTermScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,93 +11,80 @@ #include "WeightedTerm.h" #include "TokenStream.h" -namespace Lucene -{ - QueryTermScorer::QueryTermScorer(QueryPtr query) - { - ConstructQueryTermScorer(QueryTermExtractor::getTerms(query)); - } - - QueryTermScorer::QueryTermScorer(QueryPtr query, const String& fieldName) - { - ConstructQueryTermScorer(QueryTermExtractor::getTerms(query, false, fieldName)); - } - - QueryTermScorer::QueryTermScorer(QueryPtr query, IndexReaderPtr reader, const String& fieldName) - { - ConstructQueryTermScorer(QueryTermExtractor::getIdfWeightedTerms(query, reader, fieldName)); - } - - QueryTermScorer::QueryTermScorer(Collection weightedTerms) - { - ConstructQueryTermScorer(weightedTerms); - } - - QueryTermScorer::~QueryTermScorer() - { - } - - void QueryTermScorer::ConstructQueryTermScorer(Collection weightedTerms) - { - totalScore = 0; - maxTermWeight = 0; - - termsToFind = MapStringWeightedTerm::newInstance(); - for (int32_t i = 0; i < weightedTerms.size(); ++i) - { - WeightedTermPtr existingTerm(termsToFind.get(weightedTerms[i]->term)); - if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) - { - // if a term is defined more than once, always use the highest scoring weight - termsToFind.put(weightedTerms[i]->term, weightedTerms[i]); - maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); - } +namespace Lucene { + +QueryTermScorer::QueryTermScorer(const QueryPtr& query) { + ConstructQueryTermScorer(QueryTermExtractor::getTerms(query)); +} + +QueryTermScorer::QueryTermScorer(const QueryPtr& query, const String& fieldName) { + ConstructQueryTermScorer(QueryTermExtractor::getTerms(query, false, fieldName)); +} + +QueryTermScorer::QueryTermScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName) { + ConstructQueryTermScorer(QueryTermExtractor::getIdfWeightedTerms(query, reader, fieldName)); +} + +QueryTermScorer::QueryTermScorer(Collection weightedTerms) { + ConstructQueryTermScorer(weightedTerms); +} + +QueryTermScorer::~QueryTermScorer() { +} + +void QueryTermScorer::ConstructQueryTermScorer(Collection weightedTerms) { + totalScore = 0; + maxTermWeight = 0; + + termsToFind = MapStringWeightedTerm::newInstance(); + for (int32_t i = 0; i < weightedTerms.size(); ++i) { + WeightedTermPtr existingTerm(termsToFind.get(weightedTerms[i]->term)); + if (!existingTerm || existingTerm->weight < weightedTerms[i]->weight) { + // if a term is defined more than once, always use the highest scoring weight + termsToFind.put(weightedTerms[i]->term, weightedTerms[i]); + maxTermWeight = std::max(maxTermWeight, weightedTerms[i]->getWeight()); } } - - TokenStreamPtr QueryTermScorer::init(TokenStreamPtr tokenStream) - { - termAtt = tokenStream->addAttribute(); - return TokenStreamPtr(); - } - - void QueryTermScorer::startFragment(TextFragmentPtr newFragment) - { - uniqueTermsInFragment = HashSet::newInstance(); - currentTextFragment = newFragment; - totalScore = 0; - } - - double QueryTermScorer::getTokenScore() - { - String termText(termAtt->term()); - - WeightedTermPtr queryTerm(termsToFind.get(termText)); - if (!queryTerm) - return 0.0; // not a query term - return - - // found a query term - is it unique in this doc? - if (!uniqueTermsInFragment.contains(termText)) - { - totalScore += queryTerm->getWeight();; - uniqueTermsInFragment.add(termText); - } +} - return queryTerm->getWeight(); - } - - double QueryTermScorer::getFragmentScore() - { - return totalScore; +TokenStreamPtr QueryTermScorer::init(const TokenStreamPtr& tokenStream) { + termAtt = tokenStream->addAttribute(); + return TokenStreamPtr(); +} + +void QueryTermScorer::startFragment(const TextFragmentPtr& newFragment) { + uniqueTermsInFragment = HashSet::newInstance(); + currentTextFragment = newFragment; + totalScore = 0; +} + +double QueryTermScorer::getTokenScore() { + String termText(termAtt->term()); + + WeightedTermPtr queryTerm(termsToFind.get(termText)); + if (!queryTerm) { + return 0.0; // not a query term - return } - - void QueryTermScorer::allFragmentsProcessed() - { - // this class has no special operations to perform at end of processing - } - - double QueryTermScorer::getMaxTermWeight() - { - return maxTermWeight; + + // found a query term - is it unique in this doc? + if (!uniqueTermsInFragment.contains(termText)) { + totalScore += queryTerm->getWeight();; + uniqueTermsInFragment.add(termText); } + + return queryTerm->getWeight(); +} + +double QueryTermScorer::getFragmentScore() { + return totalScore; +} + +void QueryTermScorer::allFragmentsProcessed() { + // this class has no special operations to perform at end of processing +} + +double QueryTermScorer::getMaxTermWeight() { + return maxTermWeight; +} + } diff --git a/src/contrib/highlighter/SimpleFragmenter.cpp b/src/contrib/highlighter/SimpleFragmenter.cpp index 42f332df..4a3df9d1 100644 --- a/src/contrib/highlighter/SimpleFragmenter.cpp +++ b/src/contrib/highlighter/SimpleFragmenter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,47 +10,42 @@ #include "OffsetAttribute.h" #include "TokenStream.h" -namespace Lucene -{ - const int32_t SimpleFragmenter::DEFAULT_FRAGMENT_SIZE = 100; - - SimpleFragmenter::SimpleFragmenter() - { - this->currentNumFrags = 0; - this->fragmentSize = DEFAULT_FRAGMENT_SIZE; - } - - SimpleFragmenter::SimpleFragmenter(int32_t fragmentSize) - { - this->currentNumFrags = 0; - this->fragmentSize = fragmentSize; - } - - SimpleFragmenter::~SimpleFragmenter() - { - } - - void SimpleFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) - { - offsetAtt = tokenStream->addAttribute(); - currentNumFrags = 1; - } - - bool SimpleFragmenter::isNewFragment() - { - bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags)); - if (isNewFrag) - ++currentNumFrags; - return isNewFrag; - } - - int32_t SimpleFragmenter::getFragmentSize() - { - return fragmentSize; - } - - void SimpleFragmenter::setFragmentSize(int32_t size) - { - fragmentSize = size; +namespace Lucene { + +const int32_t SimpleFragmenter::DEFAULT_FRAGMENT_SIZE = 100; + +SimpleFragmenter::SimpleFragmenter() { + this->currentNumFrags = 0; + this->fragmentSize = DEFAULT_FRAGMENT_SIZE; +} + +SimpleFragmenter::SimpleFragmenter(int32_t fragmentSize) { + this->currentNumFrags = 0; + this->fragmentSize = fragmentSize; +} + +SimpleFragmenter::~SimpleFragmenter() { +} + +void SimpleFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { + offsetAtt = tokenStream->addAttribute(); + currentNumFrags = 1; +} + +bool SimpleFragmenter::isNewFragment() { + bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags)); + if (isNewFrag) { + ++currentNumFrags; } + return isNewFrag; +} + +int32_t SimpleFragmenter::getFragmentSize() { + return fragmentSize; +} + +void SimpleFragmenter::setFragmentSize(int32_t size) { + fragmentSize = size; +} + } diff --git a/src/contrib/highlighter/SimpleHTMLEncoder.cpp b/src/contrib/highlighter/SimpleHTMLEncoder.cpp index e3ee16f7..bfe9e035 100644 --- a/src/contrib/highlighter/SimpleHTMLEncoder.cpp +++ b/src/contrib/highlighter/SimpleHTMLEncoder.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,51 +7,49 @@ #include "ContribInc.h" #include "SimpleHTMLEncoder.h" -namespace Lucene -{ - SimpleHTMLEncoder::~SimpleHTMLEncoder() - { - } - - String SimpleHTMLEncoder::encodeText(const String& originalText) - { - return htmlEncode(originalText); +namespace Lucene { + +SimpleHTMLEncoder::~SimpleHTMLEncoder() { +} + +String SimpleHTMLEncoder::encodeText(const String& originalText) { + return htmlEncode(originalText); +} + +String SimpleHTMLEncoder::htmlEncode(const String& plainText) { + if (plainText.empty()) { + return L""; } - - String SimpleHTMLEncoder::htmlEncode(const String& plainText) - { - if (plainText.empty()) - return L""; - - StringStream result; - - for (int32_t index = 0; index < (int32_t)plainText.length(); ++index) - { - wchar_t ch = plainText[index]; - - switch (ch) - { - case L'\"': - result << L"""; - break; - case L'&': - result << L"&"; - break; - case L'<': - result << L"<"; - break; - case L'>': - result << L">"; - break; - default: - if (ch < 128) - result << ch; - else - result << L"&#" << (int32_t)ch << L";"; - break; + + StringStream result; + + for (int32_t index = 0; index < (int32_t)plainText.length(); ++index) { + wchar_t ch = plainText[index]; + + switch (ch) { + case L'\"': + result << L"""; + break; + case L'&': + result << L"&"; + break; + case L'<': + result << L"<"; + break; + case L'>': + result << L">"; + break; + default: + if (ch < 128) { + result << ch; + } else { + result << L"&#" << (int32_t)ch << L";"; } + break; } - - return result.str(); } + + return result.str(); +} + } diff --git a/src/contrib/highlighter/SimpleHTMLFormatter.cpp b/src/contrib/highlighter/SimpleHTMLFormatter.cpp index 811a322b..80fc9be5 100644 --- a/src/contrib/highlighter/SimpleHTMLFormatter.cpp +++ b/src/contrib/highlighter/SimpleHTMLFormatter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,33 +8,31 @@ #include "SimpleHTMLFormatter.h" #include "TokenGroup.h" -namespace Lucene -{ - const String SimpleHTMLFormatter::DEFAULT_PRE_TAG = L""; - const String SimpleHTMLFormatter::DEFAULT_POST_TAG = L""; - - SimpleHTMLFormatter::SimpleHTMLFormatter() - { - this->preTag = DEFAULT_PRE_TAG; - this->postTag = DEFAULT_POST_TAG; - } - - SimpleHTMLFormatter::SimpleHTMLFormatter(const String& preTag, const String& postTag) - { - this->preTag = preTag; - this->postTag = postTag; - } - - SimpleHTMLFormatter::~SimpleHTMLFormatter() - { - } - - String SimpleHTMLFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) - { - if (tokenGroup->getTotalScore() == 0) - return originalText; - StringStream buffer; - buffer << preTag << originalText << postTag; - return buffer.str(); +namespace Lucene { + +const String SimpleHTMLFormatter::DEFAULT_PRE_TAG = L""; +const String SimpleHTMLFormatter::DEFAULT_POST_TAG = L""; + +SimpleHTMLFormatter::SimpleHTMLFormatter() { + this->preTag = DEFAULT_PRE_TAG; + this->postTag = DEFAULT_POST_TAG; +} + +SimpleHTMLFormatter::SimpleHTMLFormatter(const String& preTag, const String& postTag) { + this->preTag = preTag; + this->postTag = postTag; +} + +SimpleHTMLFormatter::~SimpleHTMLFormatter() { +} + +String SimpleHTMLFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { + if (tokenGroup->getTotalScore() == 0) { + return originalText; } + StringStream buffer; + buffer << preTag << originalText << postTag; + return buffer.str(); +} + } diff --git a/src/contrib/highlighter/SimpleSpanFragmenter.cpp b/src/contrib/highlighter/SimpleSpanFragmenter.cpp index 6403f66e..1e34b5ad 100644 --- a/src/contrib/highlighter/SimpleSpanFragmenter.cpp +++ b/src/contrib/highlighter/SimpleSpanFragmenter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,76 +14,71 @@ #include "TokenStream.h" #include "MiscUtils.h" -namespace Lucene -{ - const int32_t SimpleSpanFragmenter::DEFAULT_FRAGMENT_SIZE = 100; - - SimpleSpanFragmenter::SimpleSpanFragmenter(QueryScorerPtr queryScorer) - { - this->currentNumFrags = 0; - this->position = -1; - this->waitForPos = -1; - this->textSize = 0; - - this->queryScorer = queryScorer; - this->fragmentSize = DEFAULT_FRAGMENT_SIZE; - } - - SimpleSpanFragmenter::SimpleSpanFragmenter(QueryScorerPtr queryScorer, int32_t fragmentSize) - { - this->currentNumFrags = 0; - this->position = -1; - this->waitForPos = -1; - this->textSize = 0; - - this->queryScorer = queryScorer; - this->fragmentSize = fragmentSize; - } - - SimpleSpanFragmenter::~SimpleSpanFragmenter() - { +namespace Lucene { + +const int32_t SimpleSpanFragmenter::DEFAULT_FRAGMENT_SIZE = 100; + +SimpleSpanFragmenter::SimpleSpanFragmenter(const QueryScorerPtr& queryScorer) { + this->currentNumFrags = 0; + this->position = -1; + this->waitForPos = -1; + this->textSize = 0; + + this->queryScorer = queryScorer; + this->fragmentSize = DEFAULT_FRAGMENT_SIZE; +} + +SimpleSpanFragmenter::SimpleSpanFragmenter(const QueryScorerPtr& queryScorer, int32_t fragmentSize) { + this->currentNumFrags = 0; + this->position = -1; + this->waitForPos = -1; + this->textSize = 0; + + this->queryScorer = queryScorer; + this->fragmentSize = fragmentSize; +} + +SimpleSpanFragmenter::~SimpleSpanFragmenter() { +} + +bool SimpleSpanFragmenter::isNewFragment() { + position += posIncAtt->getPositionIncrement(); + + if (waitForPos == position) { + waitForPos = -1; + } else if (waitForPos != -1) { + return false; } - - bool SimpleSpanFragmenter::isNewFragment() - { - position += posIncAtt->getPositionIncrement(); - - if (waitForPos == position) - waitForPos = -1; - else if (waitForPos != -1) - return false; - - WeightedSpanTermPtr wSpanTerm(queryScorer->getWeightedSpanTerm(termAtt->term())); - - if (wSpanTerm) - { - Collection positionSpans(wSpanTerm->getPositionSpans()); - - for (int32_t i = 0; i < positionSpans.size(); ++i) - { - if (positionSpans[i]->start == position) - { - waitForPos = positionSpans[i]->end + 1; - break; - } + + WeightedSpanTermPtr wSpanTerm(queryScorer->getWeightedSpanTerm(termAtt->term())); + + if (wSpanTerm) { + Collection positionSpans(wSpanTerm->getPositionSpans()); + + for (int32_t i = 0; i < positionSpans.size(); ++i) { + if (positionSpans[i]->start == position) { + waitForPos = positionSpans[i]->end + 1; + break; } } - - bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags) && (textSize - offsetAtt->endOffset()) >= MiscUtils::unsignedShift(fragmentSize, 1)); - - if (isNewFrag) - ++currentNumFrags; - - return isNewFrag; } - - void SimpleSpanFragmenter::start(const String& originalText, TokenStreamPtr tokenStream) - { - position = -1; - currentNumFrags = 1; - textSize = originalText.length(); - termAtt = tokenStream->addAttribute(); - posIncAtt = tokenStream->addAttribute(); - offsetAtt = tokenStream->addAttribute(); + + bool isNewFrag = (offsetAtt->endOffset() >= (fragmentSize * currentNumFrags) && (textSize - offsetAtt->endOffset()) >= MiscUtils::unsignedShift(fragmentSize, 1)); + + if (isNewFrag) { + ++currentNumFrags; } + + return isNewFrag; +} + +void SimpleSpanFragmenter::start(const String& originalText, const TokenStreamPtr& tokenStream) { + position = -1; + currentNumFrags = 1; + textSize = originalText.length(); + termAtt = tokenStream->addAttribute(); + posIncAtt = tokenStream->addAttribute(); + offsetAtt = tokenStream->addAttribute(); +} + } diff --git a/src/contrib/highlighter/SpanGradientFormatter.cpp b/src/contrib/highlighter/SpanGradientFormatter.cpp index d30186b2..d218cdcf 100644 --- a/src/contrib/highlighter/SpanGradientFormatter.cpp +++ b/src/contrib/highlighter/SpanGradientFormatter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,31 +8,33 @@ #include "SpanGradientFormatter.h" #include "TokenGroup.h" -namespace Lucene -{ - SpanGradientFormatter::SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) : - GradientFormatter(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor) - { +namespace Lucene { + +SpanGradientFormatter::SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor) : + GradientFormatter(maxScore, minForegroundColor, maxForegroundColor, minBackgroundColor, maxBackgroundColor) { +} + +SpanGradientFormatter::~SpanGradientFormatter() { +} + +String SpanGradientFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { + if (tokenGroup->getTotalScore() == 0) { + return originalText; + } + double score = tokenGroup->getTotalScore(); + if (score == 0.0) { + return originalText; } - - SpanGradientFormatter::~SpanGradientFormatter() - { + StringStream buffer; + buffer << L"getTotalScore() == 0) - return originalText; - double score = tokenGroup->getTotalScore(); - if (score == 0.0) - return originalText; - StringStream buffer; - buffer << L"" << originalText << L""; - return buffer.str(); + if (highlightBackground) { + buffer << L"background: " << getBackgroundColorString(score) << L"; "; } + buffer << L"\">" << originalText << L""; + return buffer.str(); +} + } diff --git a/src/contrib/highlighter/TextFragment.cpp b/src/contrib/highlighter/TextFragment.cpp index 7002dd49..6bcafbfe 100644 --- a/src/contrib/highlighter/TextFragment.cpp +++ b/src/contrib/highlighter/TextFragment.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,73 +7,61 @@ #include "ContribInc.h" #include "TextFragment.h" -namespace Lucene -{ - TextFragment::TextFragment(StringBufferPtr markedUpText, int32_t textStartPos, int32_t fragNum) - { - this->markedUpText = markedUpText; - this->textStartPos = textStartPos; - this->textEndPos = 0; - this->fragNum = fragNum; - this->score = 0; - } - - TextFragment::~TextFragment() - { - } - - void TextFragment::setScore(double score) - { - this->score = score; - } - - double TextFragment::getScore() - { - return score; - } - - void TextFragment::merge(TextFragmentPtr frag2) - { - textEndPos = frag2->textEndPos; - score = std::max(score, frag2->score); - } - - bool TextFragment::follows(TextFragmentPtr fragment) - { - return (textStartPos == fragment->textEndPos); - } - - int32_t TextFragment::getFragNum() - { - return fragNum; - } - - String TextFragment::toString() - { - return markedUpText->toString().substr(textStartPos, textEndPos - textStartPos); - } - - StringBuffer::~StringBuffer() - { - } - - int32_t StringBuffer::length() - { - return buffer.str().length(); - } - - String StringBuffer::toString() - { - return buffer.str(); - } - - void StringBuffer::append(const String& str) - { - buffer << str; - } - - void StringBuffer::clear() - { - buffer.str(L""); - } +namespace Lucene { + +TextFragment::TextFragment(const StringBufferPtr& markedUpText, int32_t textStartPos, int32_t fragNum) { + this->markedUpText = markedUpText; + this->textStartPos = textStartPos; + this->textEndPos = 0; + this->fragNum = fragNum; + this->score = 0; +} + +TextFragment::~TextFragment() { +} + +void TextFragment::setScore(double score) { + this->score = score; +} + +double TextFragment::getScore() { + return score; +} + +void TextFragment::merge(const TextFragmentPtr& frag2) { + textEndPos = frag2->textEndPos; + score = std::max(score, frag2->score); +} + +bool TextFragment::follows(const TextFragmentPtr& fragment) { + return (textStartPos == fragment->textEndPos); +} + +int32_t TextFragment::getFragNum() { + return fragNum; +} + +String TextFragment::toString() { + return markedUpText->toString().substr(textStartPos, textEndPos - textStartPos); +} + +StringBuffer::~StringBuffer() { +} + +int32_t StringBuffer::length() { + return buffer.str().length(); +} + +String StringBuffer::toString() { + return buffer.str(); +} + +void StringBuffer::append(const String& str) { + buffer << str; +} + +void StringBuffer::clear() { + buffer.str(L""); +} + } diff --git a/src/contrib/highlighter/TokenGroup.cpp b/src/contrib/highlighter/TokenGroup.cpp index 2d9e1aa9..5e448bad 100644 --- a/src/contrib/highlighter/TokenGroup.cpp +++ b/src/contrib/highlighter/TokenGroup.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,108 +11,90 @@ #include "TokenStream.h" #include "Token.h" -namespace Lucene -{ - const int32_t TokenGroup::MAX_NUM_TOKENS_PER_GROUP = 50; - - TokenGroup::TokenGroup(TokenStreamPtr tokenStream) - { - offsetAtt = tokenStream->addAttribute(); - termAtt = tokenStream->addAttribute(); - - tokens = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); - scores = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); - numTokens = 0; - startOffset = 0; - endOffset = 0; - tot = 0.0; - matchStartOffset = 0; - matchEndOffset = 0; - } - - TokenGroup::~TokenGroup() - { - } - - void TokenGroup::addToken(double score) - { - if (numTokens < MAX_NUM_TOKENS_PER_GROUP) - { - int32_t termStartOffset = offsetAtt->startOffset(); - int32_t termEndOffset = offsetAtt->endOffset(); - if (numTokens == 0) - { - matchStartOffset = termStartOffset; - startOffset = termStartOffset; - matchEndOffset = termEndOffset; - endOffset = termEndOffset; - tot += score; - } - else - { - startOffset = std::min(startOffset, termStartOffset); - endOffset = std::max(endOffset, termEndOffset); - if (score > 0) - { - if (tot == 0) - { - matchStartOffset = offsetAtt->startOffset(); - matchEndOffset = offsetAtt->endOffset(); - } - else - { - matchStartOffset = std::min(matchStartOffset, termStartOffset); - matchEndOffset = std::max(matchEndOffset, termEndOffset); - } - tot += score; +namespace Lucene { + +const int32_t TokenGroup::MAX_NUM_TOKENS_PER_GROUP = 50; + +TokenGroup::TokenGroup(const TokenStreamPtr& tokenStream) { + offsetAtt = tokenStream->addAttribute(); + termAtt = tokenStream->addAttribute(); + + tokens = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); + scores = Collection::newInstance(MAX_NUM_TOKENS_PER_GROUP); + numTokens = 0; + startOffset = 0; + endOffset = 0; + tot = 0.0; + matchStartOffset = 0; + matchEndOffset = 0; +} + +TokenGroup::~TokenGroup() { +} + +void TokenGroup::addToken(double score) { + if (numTokens < MAX_NUM_TOKENS_PER_GROUP) { + int32_t termStartOffset = offsetAtt->startOffset(); + int32_t termEndOffset = offsetAtt->endOffset(); + if (numTokens == 0) { + matchStartOffset = termStartOffset; + startOffset = termStartOffset; + matchEndOffset = termEndOffset; + endOffset = termEndOffset; + tot += score; + } else { + startOffset = std::min(startOffset, termStartOffset); + endOffset = std::max(endOffset, termEndOffset); + if (score > 0) { + if (tot == 0) { + matchStartOffset = offsetAtt->startOffset(); + matchEndOffset = offsetAtt->endOffset(); + } else { + matchStartOffset = std::min(matchStartOffset, termStartOffset); + matchEndOffset = std::max(matchEndOffset, termEndOffset); } + tot += score; } - TokenPtr token(newLucene(termStartOffset, termEndOffset)); - token->setTermBuffer(termAtt->term()); - tokens[numTokens] = token; - scores[numTokens] = score; - ++numTokens; } + TokenPtr token(newLucene(termStartOffset, termEndOffset)); + token->setTermBuffer(termAtt->term()); + tokens[numTokens] = token; + scores[numTokens] = score; + ++numTokens; } - - bool TokenGroup::isDistinct() - { - return (offsetAtt->startOffset() >= endOffset); - } - - void TokenGroup::clear() - { - numTokens = 0; - tot = 0; - } - - TokenPtr TokenGroup::getToken(int32_t index) - { - return tokens[index]; - } - - double TokenGroup::getScore(int32_t index) - { - return scores[index]; - } - - int32_t TokenGroup::getEndOffset() - { - return endOffset; - } - - int32_t TokenGroup::getNumTokens() - { - return numTokens; - } - - int32_t TokenGroup::getStartOffset() - { - return startOffset; - } - - double TokenGroup::getTotalScore() - { - return tot; - } +} + +bool TokenGroup::isDistinct() { + return (offsetAtt->startOffset() >= endOffset); +} + +void TokenGroup::clear() { + numTokens = 0; + tot = 0; +} + +TokenPtr TokenGroup::getToken(int32_t index) { + return tokens[index]; +} + +double TokenGroup::getScore(int32_t index) { + return scores[index]; +} + +int32_t TokenGroup::getEndOffset() { + return endOffset; +} + +int32_t TokenGroup::getNumTokens() { + return numTokens; +} + +int32_t TokenGroup::getStartOffset() { + return startOffset; +} + +double TokenGroup::getTotalScore() { + return tot; +} + } diff --git a/src/contrib/highlighter/TokenSources.cpp b/src/contrib/highlighter/TokenSources.cpp index 270984d5..9810f331 100644 --- a/src/contrib/highlighter/TokenSources.cpp +++ b/src/contrib/highlighter/TokenSources.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -19,170 +19,157 @@ #include "StringReader.h" #include "StringUtils.h" -namespace Lucene -{ - TokenSources::~TokenSources() - { +namespace Lucene { + +TokenSources::~TokenSources() { +} + +TokenStreamPtr TokenSources::getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { + TokenStreamPtr ts; + TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); + if (tfv) { + if (boost::dynamic_pointer_cast(tfv)) { + ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); + } + } + // No token info stored so fall back to analyzing raw content + if (!ts) { + ts = getTokenStream(doc, field, analyzer); } - - TokenStreamPtr TokenSources::getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, DocumentPtr doc, AnalyzerPtr analyzer) - { - TokenStreamPtr ts; - TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); - if (tfv) - { - if (boost::dynamic_pointer_cast(tfv)) - ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); + return ts; +} + +TokenStreamPtr TokenSources::getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer) { + TokenStreamPtr ts; + TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); + if (tfv) { + if (boost::dynamic_pointer_cast(tfv)) { + ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); } - // No token info stored so fall back to analyzing raw content - if (!ts) - ts = getTokenStream(doc, field, analyzer); - return ts; } - - TokenStreamPtr TokenSources::getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer) - { - TokenStreamPtr ts; - TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); - if (tfv) - { - if (boost::dynamic_pointer_cast(tfv)) - ts = getTokenStream(boost::dynamic_pointer_cast(tfv)); + // No token info stored so fall back to analyzing raw content + if (!ts) { + ts = getTokenStream(reader, docId, field, analyzer); + } + return ts; +} + +TokenStreamPtr TokenSources::getTokenStream(const TermPositionVectorPtr& tpv) { + // assumes the worst and makes no assumptions about token position sequences. + return getTokenStream(tpv, false); +} + +struct lessTokenOffset { + inline bool operator()(const TokenPtr& first, const TokenPtr& second) const { + if (first->startOffset() < second->startOffset()) { + return true; } - // No token info stored so fall back to analyzing raw content - if (!ts) - ts = getTokenStream(reader, docId, field, analyzer); - return ts; + return (first->startOffset() > second->endOffset()); } - - TokenStreamPtr TokenSources::getTokenStream(TermPositionVectorPtr tpv) - { - // assumes the worst and makes no assumptions about token position sequences. - return getTokenStream(tpv, false); +}; + +TokenStreamPtr TokenSources::getTokenStream(const TermPositionVectorPtr& tpv, bool tokenPositionsGuaranteedContiguous) { + // code to reconstruct the original sequence of Tokens + Collection terms(tpv->getTerms()); + Collection freq(tpv->getTermFrequencies()); + int32_t totalTokens = 0; + + for (int32_t t = 0; t < freq.size(); ++t) { + totalTokens += freq[t]; } - - struct lessTokenOffset - { - inline bool operator()(const TokenPtr& first, const TokenPtr& second) const - { - if (first->startOffset() < second->startOffset()) - return true; - return (first->startOffset() > second->endOffset()); + + Collection tokensInOriginalOrder(Collection::newInstance(totalTokens)); + Collection unsortedTokens; + for (int32_t t = 0; t < freq.size(); ++t) { + Collection offsets(tpv->getOffsets(t)); + if (!offsets) { + return TokenStreamPtr(); + } + Collection pos; + if (tokenPositionsGuaranteedContiguous) { + // try get the token position info to speed up assembly of tokens into sorted sequence + pos = tpv->getTermPositions(t); } - }; - - TokenStreamPtr TokenSources::getTokenStream(TermPositionVectorPtr tpv, bool tokenPositionsGuaranteedContiguous) - { - // code to reconstruct the original sequence of Tokens - Collection terms(tpv->getTerms()); - Collection freq(tpv->getTermFrequencies()); - int32_t totalTokens = 0; - - for (int32_t t = 0; t < freq.size(); ++t) - totalTokens += freq[t]; - - Collection tokensInOriginalOrder(Collection::newInstance(totalTokens)); - Collection unsortedTokens; - for (int32_t t = 0; t < freq.size(); ++t) - { - Collection offsets(tpv->getOffsets(t)); - if (!offsets) - return TokenStreamPtr(); - Collection pos; - if (tokenPositionsGuaranteedContiguous) - { - // try get the token position info to speed up assembly of tokens into sorted sequence - pos = tpv->getTermPositions(t); + if (!pos) { + // tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later + if (!unsortedTokens) { + unsortedTokens = Collection::newInstance(); } - if (!pos) - { - // tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later - if (!unsortedTokens) - unsortedTokens = Collection::newInstance(); - for (int32_t tp = 0; tp < offsets.size(); ++tp) - { - TokenPtr token(newLucene(offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); - token->setTermBuffer(terms[t]); - unsortedTokens.add(token); - } + for (int32_t tp = 0; tp < offsets.size(); ++tp) { + TokenPtr token(newLucene(offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); + token->setTermBuffer(terms[t]); + unsortedTokens.add(token); } - else - { - // We have positions stored and a guarantee that the token position information is contiguous - - // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or - // creates jumps in position numbers - this code would fail under those circumstances - - // Tokens stored with positions - can use this to index straight into sorted array - for (int32_t tp = 0; tp < pos.size(); ++tp) - { - TokenPtr token(newLucene(terms[t], offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); - tokensInOriginalOrder[pos[tp]] = token; - } + } else { + // We have positions stored and a guarantee that the token position information is contiguous + + // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or + // creates jumps in position numbers - this code would fail under those circumstances + + // Tokens stored with positions - can use this to index straight into sorted array + for (int32_t tp = 0; tp < pos.size(); ++tp) { + TokenPtr token(newLucene(terms[t], offsets[tp]->getStartOffset(), offsets[tp]->getEndOffset())); + tokensInOriginalOrder[pos[tp]] = token; } } - // If the field has been stored without position data we must perform a sort - if (unsortedTokens) - { - tokensInOriginalOrder = unsortedTokens; - std::sort(tokensInOriginalOrder.begin(), tokensInOriginalOrder.end(), lessTokenOffset()); - } - return newLucene(tokensInOriginalOrder); } - - TokenStreamPtr TokenSources::getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field) - { - TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); - if (!tfv) - boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); - - if (boost::dynamic_pointer_cast(tfv)) - { - TermPositionVectorPtr tpv(boost::dynamic_pointer_cast(reader->getTermFreqVector(docId, field))); - return getTokenStream(tpv); - } - boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); - return TokenStreamPtr(); + // If the field has been stored without position data we must perform a sort + if (unsortedTokens) { + tokensInOriginalOrder = unsortedTokens; + std::sort(tokensInOriginalOrder.begin(), tokensInOriginalOrder.end(), lessTokenOffset()); } - - TokenStreamPtr TokenSources::getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer) - { - DocumentPtr doc(reader->document(docId)); - return getTokenStream(doc, field, analyzer); - } - - TokenStreamPtr TokenSources::getTokenStream(DocumentPtr doc, const String& field, AnalyzerPtr analyzer) - { - String contents(doc->get(field)); - if (contents.empty()) - boost::throw_exception(IllegalArgumentException(L"Field " + field + L" in document is not stored and cannot be analyzed")); - return getTokenStream(field, contents, analyzer); - } - - TokenStreamPtr TokenSources::getTokenStream(const String& field, const String& contents, AnalyzerPtr analyzer) - { - return analyzer->tokenStream(field, newLucene(contents)); + return newLucene(tokensInOriginalOrder); +} + +TokenStreamPtr TokenSources::getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field) { + TermFreqVectorPtr tfv(reader->getTermFreqVector(docId, field)); + if (!tfv) { + boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); } - - StoredTokenStream::StoredTokenStream(Collection tokens) - { - this->tokens = tokens; - this->termAtt = addAttribute(); - this->offsetAtt = addAttribute(); + + if (boost::dynamic_pointer_cast(tfv)) { + TermPositionVectorPtr tpv(boost::dynamic_pointer_cast(reader->getTermFreqVector(docId, field))); + return getTokenStream(tpv); } - - StoredTokenStream::~StoredTokenStream() - { + boost::throw_exception(IllegalArgumentException(field + L" in doc #" + StringUtils::toString(docId) + L"does not have any term position data stored")); + return TokenStreamPtr(); +} + +TokenStreamPtr TokenSources::getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer) { + DocumentPtr doc(reader->document(docId)); + return getTokenStream(doc, field, analyzer); +} + +TokenStreamPtr TokenSources::getTokenStream(const DocumentPtr& doc, const String& field, const AnalyzerPtr& analyzer) { + String contents(doc->get(field)); + if (contents.empty()) { + boost::throw_exception(IllegalArgumentException(L"Field " + field + L" in document is not stored and cannot be analyzed")); } - - bool StoredTokenStream::incrementToken() - { - if (currentToken >= tokens.size()) - return false; - clearAttributes(); - TokenPtr token(tokens[currentToken++]); - termAtt->setTermBuffer(token->term()); - offsetAtt->setOffset(token->startOffset(), token->endOffset()); - return true; + return getTokenStream(field, contents, analyzer); +} + +TokenStreamPtr TokenSources::getTokenStream(const String& field, const String& contents, const AnalyzerPtr& analyzer) { + return analyzer->tokenStream(field, newLucene(contents)); +} + +StoredTokenStream::StoredTokenStream(Collection tokens) { + this->tokens = tokens; + this->termAtt = addAttribute(); + this->offsetAtt = addAttribute(); +} + +StoredTokenStream::~StoredTokenStream() { +} + +bool StoredTokenStream::incrementToken() { + if (currentToken >= tokens.size()) { + return false; } + clearAttributes(); + TokenPtr token(tokens[currentToken++]); + termAtt->setTermBuffer(token->term()); + offsetAtt->setOffset(token->startOffset(), token->endOffset()); + return true; +} + } diff --git a/src/contrib/highlighter/WeightedSpanTerm.cpp b/src/contrib/highlighter/WeightedSpanTerm.cpp index cc3995ea..99fe4dd2 100644 --- a/src/contrib/highlighter/WeightedSpanTerm.cpp +++ b/src/contrib/highlighter/WeightedSpanTerm.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,57 +7,49 @@ #include "ContribInc.h" #include "WeightedSpanTerm.h" -namespace Lucene -{ - WeightedSpanTerm::WeightedSpanTerm(double weight, const String& term, bool positionSensitive) : WeightedTerm(weight, term) - { - this->positionSensitive = positionSensitive; - this->positionSpans = Collection::newInstance(); - } - - WeightedSpanTerm::~WeightedSpanTerm() - { - } - - bool WeightedSpanTerm::checkPosition(int32_t position) - { - // There would probably be a slight speed improvement if PositionSpans where kept in some sort of priority queue - - // that way this method could bail early without checking each PositionSpan. - for (Collection::iterator posSpan = positionSpans.begin(); posSpan != positionSpans.end(); ++posSpan) - { - if (position >= (*posSpan)->start && position <= (*posSpan)->end) - return true; +namespace Lucene { + +WeightedSpanTerm::WeightedSpanTerm(double weight, const String& term, bool positionSensitive) : WeightedTerm(weight, term) { + this->positionSensitive = positionSensitive; + this->positionSpans = Collection::newInstance(); +} + +WeightedSpanTerm::~WeightedSpanTerm() { +} + +bool WeightedSpanTerm::checkPosition(int32_t position) { + // There would probably be a slight speed improvement if PositionSpans where kept in some sort of priority queue - + // that way this method could bail early without checking each PositionSpan. + for (Collection::iterator posSpan = positionSpans.begin(); posSpan != positionSpans.end(); ++posSpan) { + if (position >= (*posSpan)->start && position <= (*posSpan)->end) { + return true; } - return false; - } - - void WeightedSpanTerm::addPositionSpans(Collection positionSpans) - { - this->positionSpans.addAll(positionSpans.begin(), positionSpans.end()); - } - - bool WeightedSpanTerm::isPositionSensitive() - { - return positionSensitive; - } - - void WeightedSpanTerm::setPositionSensitive(bool positionSensitive) - { - this->positionSensitive = positionSensitive; - } - - Collection WeightedSpanTerm::getPositionSpans() - { - return positionSpans; - } - - PositionSpan::PositionSpan(int32_t start, int32_t end) - { - this->start = start; - this->end = end; - } - - PositionSpan::~PositionSpan() - { } + return false; +} + +void WeightedSpanTerm::addPositionSpans(Collection positionSpans) { + this->positionSpans.addAll(positionSpans.begin(), positionSpans.end()); +} + +bool WeightedSpanTerm::isPositionSensitive() { + return positionSensitive; +} + +void WeightedSpanTerm::setPositionSensitive(bool positionSensitive) { + this->positionSensitive = positionSensitive; +} + +Collection WeightedSpanTerm::getPositionSpans() { + return positionSpans; +} + +PositionSpan::PositionSpan(int32_t start, int32_t end) { + this->start = start; + this->end = end; +} + +PositionSpan::~PositionSpan() { +} + } diff --git a/src/contrib/highlighter/WeightedSpanTermExtractor.cpp b/src/contrib/highlighter/WeightedSpanTermExtractor.cpp index 025c646c..5abe39c0 100644 --- a/src/contrib/highlighter/WeightedSpanTermExtractor.cpp +++ b/src/contrib/highlighter/WeightedSpanTermExtractor.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -30,464 +30,399 @@ #include "MemoryIndex.h" #include "MiscUtils.h" -namespace Lucene -{ - WeightedSpanTermExtractor::WeightedSpanTermExtractor(const String& defaultField) - { - this->defaultField = defaultField; - this->expandMultiTermQuery = false; - this->cachedTokenStream = false; - this->wrapToCaching = true; - this->readers = MapStringIndexReader::newInstance(); - } - - WeightedSpanTermExtractor::~WeightedSpanTermExtractor() - { - } - - void WeightedSpanTermExtractor::closeReaders() - { - for (MapStringIndexReader::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - try - { - reader->second->close(); - } - catch (...) - { - } +namespace Lucene { + +WeightedSpanTermExtractor::WeightedSpanTermExtractor(const String& defaultField) { + this->defaultField = defaultField; + this->expandMultiTermQuery = false; + this->cachedTokenStream = false; + this->wrapToCaching = true; + this->readers = MapStringIndexReader::newInstance(); +} + +WeightedSpanTermExtractor::~WeightedSpanTermExtractor() { +} + +void WeightedSpanTermExtractor::closeReaders() { + for (MapStringIndexReader::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + try { + reader->second->close(); + } catch (...) { } } - - void WeightedSpanTermExtractor::extract(QueryPtr query, MapWeightedSpanTermPtr terms) - { - if (MiscUtils::typeOf(query)) - { - Collection queryClauses(boost::dynamic_pointer_cast(query)->getClauses()); - for (int32_t i = 0; i < queryClauses.size(); ++i) - { - if (!queryClauses[i]->isProhibited()) - extract(queryClauses[i]->getQuery(), terms); +} + +void WeightedSpanTermExtractor::extract(const QueryPtr& query, const MapWeightedSpanTermPtr& terms) { + QueryPtr _query(query); + if (MiscUtils::typeOf(_query)) { + Collection queryClauses(boost::dynamic_pointer_cast(_query)->getClauses()); + for (int32_t i = 0; i < queryClauses.size(); ++i) { + if (!queryClauses[i]->isProhibited()) { + extract(queryClauses[i]->getQuery(), terms); } } - else if (MiscUtils::typeOf(query)) - { - PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); - Collection phraseQueryTerms(phraseQuery->getTerms()); - Collection clauses(Collection::newInstance(phraseQueryTerms.size())); - for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) - clauses[i] = newLucene(phraseQueryTerms[i]); - int32_t slop = phraseQuery->getSlop(); - Collection positions(phraseQuery->getPositions()); - // add largest position increment to slop - if (!positions.empty()) - { - int32_t lastPos = positions[0]; - int32_t largestInc = 0; - int32_t sz = positions.size(); - for (int32_t i = 1; i < sz; ++i) - { - int32_t pos = positions[i]; - int32_t inc = pos - lastPos; - if (inc > largestInc) - largestInc = inc; - lastPos = pos; - } - if (largestInc > 1) - slop += largestInc; - } - - bool inorder = (slop == 0); - - SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); - sp->setBoost(query->getBoost()); - extractWeightedSpanTerms(terms, sp); + } else if (MiscUtils::typeOf(_query)) { + PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(_query)); + Collection phraseQueryTerms(phraseQuery->getTerms()); + Collection clauses(Collection::newInstance(phraseQueryTerms.size())); + for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) { + clauses[i] = newLucene(phraseQueryTerms[i]); } - else if (MiscUtils::typeOf(query)) - extractWeightedTerms(terms, query); - else if (MiscUtils::typeOf(query)) - extractWeightedSpanTerms(terms, boost::dynamic_pointer_cast(query)); - else if (MiscUtils::typeOf(query)) - extract(boost::dynamic_pointer_cast(query)->getQuery(), terms); - else if (MiscUtils::typeOf(query)) - { - DisjunctionMaxQueryPtr dmq(boost::dynamic_pointer_cast(query)); - for (Collection::iterator q = dmq->begin(); q != dmq->end(); ++q) - extract(*q, terms); + + // sum position increments beyond 1 + int32_t positionGaps = 0; + Collection positions(phraseQuery->getPositions()); + if (!positions.empty() && positions.size() > 1) { + // positions are in increasing order. max(0,...) is just a safeguard. + positionGaps = (std::max)(0, positions[positions.size() - 1] - positions[0] - positions.size() + 1 ); } - else if (MiscUtils::typeOf(query) && expandMultiTermQuery) - { - MultiTermQueryPtr mtq(boost::dynamic_pointer_cast(query)); - if (mtq->getRewriteMethod() != MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()) - { - mtq = boost::dynamic_pointer_cast(mtq->clone()); - mtq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); - query = mtq; - } - FakeReaderPtr fReader(newLucene()); - MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()->rewrite(fReader, mtq); - if (!fReader->field.empty()) - { - IndexReaderPtr ir(getReaderForField(fReader->field)); - extract(query->rewrite(ir), terms); - } + + //if original slop is 0 then require inOrder + bool inorder = (phraseQuery->getSlop() == 0); + + SpanNearQueryPtr sp(newLucene(clauses, phraseQuery->getSlop() + positionGaps, inorder)); + sp->setBoost(_query->getBoost()); + extractWeightedSpanTerms(terms, sp); + } else if (MiscUtils::typeOf(_query)) { + extractWeightedTerms(terms, _query); + } else if (MiscUtils::typeOf(_query)) { + extractWeightedSpanTerms(terms, boost::dynamic_pointer_cast(_query)); + } else if (MiscUtils::typeOf(_query)) { + extract(boost::dynamic_pointer_cast(_query)->getQuery(), terms); + } else if (MiscUtils::typeOf(_query)) { + DisjunctionMaxQueryPtr dmq(boost::dynamic_pointer_cast(_query)); + for (Collection::iterator q = dmq->begin(); q != dmq->end(); ++q) { + extract(*q, terms); + } + } else if (MiscUtils::typeOf(_query) && expandMultiTermQuery) { + MultiTermQueryPtr mtq(boost::dynamic_pointer_cast(_query)); + if (mtq->getRewriteMethod() != MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()) { + mtq = boost::dynamic_pointer_cast(mtq->clone()); + mtq->setRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); + _query = mtq; + } + FakeReaderPtr fReader(newLucene()); + MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()->rewrite(fReader, mtq); + if (!fReader->field.empty()) { + IndexReaderPtr ir(getReaderForField(fReader->field)); + extract(_query->rewrite(ir), terms); } - else if (MiscUtils::typeOf(query)) - { - MultiPhraseQueryPtr mpq(boost::dynamic_pointer_cast(query)); - Collection< Collection > termArrays(mpq->getTermArrays()); - Collection positions(mpq->getPositions()); - if (!positions.empty()) - { - int32_t maxPosition = positions[positions.size() - 1]; - for (int32_t i = 0; i < positions.size() - 1; ++i) - { - if (positions[i] > maxPosition) - maxPosition = positions[i]; + } else if (MiscUtils::typeOf(_query)) { + MultiPhraseQueryPtr mpq(boost::dynamic_pointer_cast(_query)); + Collection< Collection > termArrays(mpq->getTermArrays()); + Collection positions(mpq->getPositions()); + if (!positions.empty()) { + int32_t maxPosition = positions[positions.size() - 1]; + for (int32_t i = 0; i < positions.size() - 1; ++i) { + if (positions[i] > maxPosition) { + maxPosition = positions[i]; } - - Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); - int32_t distinctPositions = 0; - for (int32_t i = 0; i < termArrays.size(); ++i) - { - Collection termArray(termArrays[i]); - Collection disjuncts(disjunctLists[positions[i]]); - if (!disjuncts) - { - disjunctLists[positions[i]] = Collection::newInstance(); - disjuncts = disjunctLists[positions[i]]; - ++distinctPositions; - } - for (int32_t j = 0; j < termArray.size(); ++j) - disjuncts.add(newLucene(termArray[j])); + } + + Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); + int32_t distinctPositions = 0; + for (int32_t i = 0; i < termArrays.size(); ++i) { + Collection termArray(termArrays[i]); + Collection disjuncts(disjunctLists[positions[i]]); + if (!disjuncts) { + disjunctLists[positions[i]] = Collection::newInstance(); + disjuncts = disjunctLists[positions[i]]; + ++distinctPositions; } - - int32_t positionGaps = 0; - int32_t position = 0; - Collection clauses(Collection::newInstance(distinctPositions)); - for (int32_t i = 0; i < disjunctLists.size(); ++i) - { - Collection disjuncts(disjunctLists[i]); - if (disjuncts) - clauses[position++] = newLucene(disjuncts); - else - ++positionGaps; + for (int32_t j = 0; j < termArray.size(); ++j) { + disjuncts.add(newLucene(termArray[j])); } - - int32_t slop = mpq->getSlop(); - bool inorder = (slop == 0); + } - SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); - sp->setBoost(query->getBoost()); - extractWeightedSpanTerms(terms, sp); + int32_t positionGaps = 0; + int32_t position = 0; + Collection clauses(Collection::newInstance(distinctPositions)); + for (int32_t i = 0; i < disjunctLists.size(); ++i) { + Collection disjuncts(disjunctLists[i]); + if (disjuncts) { + clauses[position++] = newLucene(disjuncts); + } else { + ++positionGaps; + } } + + int32_t slop = mpq->getSlop(); + bool inorder = (slop == 0); + + SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); + sp->setBoost(_query->getBoost()); + extractWeightedSpanTerms(terms, sp); } } - - void WeightedSpanTermExtractor::extractWeightedSpanTerms(MapWeightedSpanTermPtr terms, SpanQueryPtr spanQuery) - { - HashSet fieldNames(HashSet::newInstance()); - if (fieldName.empty()) - collectSpanQueryFields(spanQuery, fieldNames); - else - fieldNames.add(fieldName); - // To support the use of the default field name - if (!defaultField.empty()) - fieldNames.add(defaultField); - - MapStringSpanQuery queries(MapStringSpanQuery::newInstance()); - SetTerm nonWeightedTerms(SetTerm::newInstance()); - - bool rewriteQuery = mustRewriteQuery(spanQuery); - if (rewriteQuery) - { - for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) - { - SpanQueryPtr rewrittenQuery(boost::dynamic_pointer_cast(spanQuery->rewrite(getReaderForField(*field)))); - queries.put(*field, rewrittenQuery); - rewrittenQuery->extractTerms(nonWeightedTerms); - } +} + +void WeightedSpanTermExtractor::extractWeightedSpanTerms(const MapWeightedSpanTermPtr& terms, const SpanQueryPtr& spanQuery) { + HashSet fieldNames(HashSet::newInstance()); + if (fieldName.empty()) { + collectSpanQueryFields(spanQuery, fieldNames); + } else { + fieldNames.add(fieldName); + } + // To support the use of the default field name + if (!defaultField.empty()) { + fieldNames.add(defaultField); + } + + MapStringSpanQuery queries(MapStringSpanQuery::newInstance()); + SetTerm nonWeightedTerms(SetTerm::newInstance()); + + bool rewriteQuery = mustRewriteQuery(spanQuery); + if (rewriteQuery) { + for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { + SpanQueryPtr rewrittenQuery(boost::dynamic_pointer_cast(spanQuery->rewrite(getReaderForField(*field)))); + queries.put(*field, rewrittenQuery); + rewrittenQuery->extractTerms(nonWeightedTerms); } - else - spanQuery->extractTerms(nonWeightedTerms); - - Collection spanPositions(Collection::newInstance()); - - for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) - { - IndexReaderPtr reader(getReaderForField(*field)); - SpansPtr spans; - if (rewriteQuery) - spans = queries.get(*field)->getSpans(reader); - else - spans = spanQuery->getSpans(reader); - - // collect span positions - while (spans->next()) - spanPositions.add(newLucene(spans->start(), spans->end() - 1)); + } else { + spanQuery->extractTerms(nonWeightedTerms); + } + + Collection spanPositions(Collection::newInstance()); + + for (HashSet::iterator field = fieldNames.begin(); field != fieldNames.end(); ++field) { + IndexReaderPtr reader(getReaderForField(*field)); + SpansPtr spans; + if (rewriteQuery) { + spans = queries.get(*field)->getSpans(reader); + } else { + spans = spanQuery->getSpans(reader); } - - if (spanPositions.empty()) - { - // no spans found - return; + + // collect span positions + while (spans->next()) { + spanPositions.add(newLucene(spans->start(), spans->end() - 1)); } - - for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) - { - if (fieldNameComparator((*queryTerm)->field())) - { - WeightedSpanTermPtr weightedSpanTerm(terms->get((*queryTerm)->text())); - if (!weightedSpanTerm) - { - weightedSpanTerm = newLucene(spanQuery->getBoost(), (*queryTerm)->text()); + } + + if (spanPositions.empty()) { + // no spans found + return; + } + + for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { + if (fieldNameComparator((*queryTerm)->field())) { + WeightedSpanTermPtr weightedSpanTerm(terms->get((*queryTerm)->text())); + if (!weightedSpanTerm) { + weightedSpanTerm = newLucene(spanQuery->getBoost(), (*queryTerm)->text()); + weightedSpanTerm->addPositionSpans(spanPositions); + weightedSpanTerm->positionSensitive = true; + terms->put((*queryTerm)->text(), weightedSpanTerm); + } else { + if (!spanPositions.empty()) { weightedSpanTerm->addPositionSpans(spanPositions); - weightedSpanTerm->positionSensitive = true; - terms->put((*queryTerm)->text(), weightedSpanTerm); - } - else - { - if (!spanPositions.empty()) - weightedSpanTerm->addPositionSpans(spanPositions); } } } } - - void WeightedSpanTermExtractor::extractWeightedTerms(MapWeightedSpanTermPtr terms, QueryPtr query) - { - SetTerm nonWeightedTerms(SetTerm::newInstance()); - query->extractTerms(nonWeightedTerms); - - for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) - { - if (fieldNameComparator((*queryTerm)->field())) - { - WeightedSpanTermPtr weightedSpanTerm(newLucene(query->getBoost(), (*queryTerm)->text())); - terms->put((*queryTerm)->text(), weightedSpanTerm); - } +} + +void WeightedSpanTermExtractor::extractWeightedTerms(const MapWeightedSpanTermPtr& terms, const QueryPtr& query) { + SetTerm nonWeightedTerms(SetTerm::newInstance()); + query->extractTerms(nonWeightedTerms); + + for (SetTerm::iterator queryTerm = nonWeightedTerms.begin(); queryTerm != nonWeightedTerms.end(); ++queryTerm) { + if (fieldNameComparator((*queryTerm)->field())) { + WeightedSpanTermPtr weightedSpanTerm(newLucene(query->getBoost(), (*queryTerm)->text())); + terms->put((*queryTerm)->text(), weightedSpanTerm); } } - - bool WeightedSpanTermExtractor::fieldNameComparator(const String& fieldNameToCheck) - { - return (fieldName.empty() || fieldNameToCheck == fieldName || fieldNameToCheck == defaultField); - } - - IndexReaderPtr WeightedSpanTermExtractor::getReaderForField(const String& field) - { - if (wrapToCaching && !cachedTokenStream && !MiscUtils::typeOf(tokenStream)) - { - tokenStream = newLucene(tokenStream); - cachedTokenStream = true; - } - IndexReaderPtr reader(readers.get(field)); - if (!reader) - { - MemoryIndexPtr indexer(newLucene()); - indexer->addField(field, tokenStream); - tokenStream->reset(); - IndexSearcherPtr searcher(indexer->createSearcher()); - reader = searcher->getIndexReader(); - readers.put(field, reader); - } - return reader; +} + +bool WeightedSpanTermExtractor::fieldNameComparator(const String& fieldNameToCheck) { + return (fieldName.empty() || fieldNameToCheck == fieldName || fieldNameToCheck == defaultField); +} + +IndexReaderPtr WeightedSpanTermExtractor::getReaderForField(const String& field) { + if (wrapToCaching && !cachedTokenStream && !MiscUtils::typeOf(tokenStream)) { + tokenStream = newLucene(tokenStream); + cachedTokenStream = true; } - - MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream) - { - return getWeightedSpanTerms(query, tokenStream, L""); + IndexReaderPtr reader(readers.get(field)); + if (!reader) { + MemoryIndexPtr indexer(newLucene()); + indexer->addField(field, tokenStream); + tokenStream->reset(); + IndexSearcherPtr searcher(indexer->createSearcher()); + reader = searcher->getIndexReader(); + readers.put(field, reader); } - - MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName) - { - if (!fieldName.empty()) - this->fieldName = fieldName; - else - this->fieldName.clear(); - - MapWeightedSpanTermPtr terms(newLucene()); - this->tokenStream = tokenStream; - - LuceneException finally; - try - { - extract(query, terms); - } - catch (LuceneException& e) - { - finally = e; - } - closeReaders(); - finally.throwException(); - return terms; + return reader; +} + +MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream) { + return getWeightedSpanTerms(query, tokenStream, L""); +} + +MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName) { + if (!fieldName.empty()) { + this->fieldName = fieldName; + } else { + this->fieldName.clear(); } - - MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTermsWithScores(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName, IndexReaderPtr reader) - { - if (!fieldName.empty()) - this->fieldName = fieldName; - else - this->fieldName.clear(); - - MapWeightedSpanTermPtr terms(newLucene()); + + MapWeightedSpanTermPtr terms(newLucene()); + this->tokenStream = tokenStream; + + LuceneException finally; + try { extract(query, terms); - - int32_t totalNumDocs = reader->numDocs(); - - LuceneException finally; - try - { - for (MapStringWeightedSpanTerm::iterator weightedSpanTerm = terms->begin(); weightedSpanTerm != terms->end(); ++weightedSpanTerm) - { - int32_t docFreq = reader->docFreq(newLucene(fieldName, weightedSpanTerm->second->term)); - // docFreq counts deletes - if (totalNumDocs < docFreq) - docFreq = totalNumDocs; - // IDF algorithm taken from DefaultSimilarity class - double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); - weightedSpanTerm->second->weight *= idf; + } catch (LuceneException& e) { + finally = e; + } + closeReaders(); + finally.throwException(); + return terms; +} + +MapWeightedSpanTermPtr WeightedSpanTermExtractor::getWeightedSpanTermsWithScores(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName, const IndexReaderPtr& reader) { + if (!fieldName.empty()) { + this->fieldName = fieldName; + } else { + this->fieldName.clear(); + } + + MapWeightedSpanTermPtr terms(newLucene()); + extract(query, terms); + + int32_t totalNumDocs = reader->numDocs(); + + LuceneException finally; + try { + for (MapStringWeightedSpanTerm::iterator weightedSpanTerm = terms->begin(); weightedSpanTerm != terms->end(); ++weightedSpanTerm) { + int32_t docFreq = reader->docFreq(newLucene(fieldName, weightedSpanTerm->second->term)); + // docFreq counts deletes + if (totalNumDocs < docFreq) { + docFreq = totalNumDocs; } + // IDF algorithm taken from DefaultSimilarity class + double idf = (double)(std::log((double)totalNumDocs / (double)(docFreq + 1)) + 1.0); + weightedSpanTerm->second->weight *= idf; } - catch (LuceneException& e) - { - finally = e; - } - closeReaders(); - finally.throwException(); - return terms; + } catch (LuceneException& e) { + finally = e; } - - void WeightedSpanTermExtractor::collectSpanQueryFields(SpanQueryPtr spanQuery, HashSet fieldNames) - { - if (MiscUtils::typeOf(spanQuery)) - collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery(), fieldNames); - else if (MiscUtils::typeOf(spanQuery)) - collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMatch(), fieldNames); - else if (MiscUtils::typeOf(spanQuery)) - { - Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - collectSpanQueryFields(*clause, fieldNames); + closeReaders(); + finally.throwException(); + return terms; +} + +void WeightedSpanTermExtractor::collectSpanQueryFields(const SpanQueryPtr& spanQuery, HashSet fieldNames) { + if (MiscUtils::typeOf(spanQuery)) { + collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery(), fieldNames); + } else if (MiscUtils::typeOf(spanQuery)) { + collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getMatch(), fieldNames); + } else if (MiscUtils::typeOf(spanQuery)) { + Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + collectSpanQueryFields(*clause, fieldNames); } - else if (MiscUtils::typeOf(spanQuery)) - collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getInclude(), fieldNames); - else if (MiscUtils::typeOf(spanQuery)) - { - Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - collectSpanQueryFields(*clause, fieldNames); + } else if (MiscUtils::typeOf(spanQuery)) { + collectSpanQueryFields(boost::dynamic_pointer_cast(spanQuery)->getInclude(), fieldNames); + } else if (MiscUtils::typeOf(spanQuery)) { + Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + collectSpanQueryFields(*clause, fieldNames); } - else - fieldNames.add(spanQuery->getField()); + } else { + fieldNames.add(spanQuery->getField()); } - - bool WeightedSpanTermExtractor::mustRewriteQuery(SpanQueryPtr spanQuery) - { - if (!expandMultiTermQuery) - return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery. - else if (MiscUtils::typeOf(spanQuery)) - return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery()); - else if (MiscUtils::typeOf(spanQuery)) - return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMatch()); - else if (MiscUtils::typeOf(spanQuery)) - { - Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (mustRewriteQuery(*clause)) - return true; +} + +bool WeightedSpanTermExtractor::mustRewriteQuery(const SpanQueryPtr& spanQuery) { + if (!expandMultiTermQuery) { + return false; // Will throw UnsupportedOperationException in case of a SpanRegexQuery. + } else if (MiscUtils::typeOf(spanQuery)) { + return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMaskedQuery()); + } else if (MiscUtils::typeOf(spanQuery)) { + return mustRewriteQuery(boost::dynamic_pointer_cast(spanQuery)->getMatch()); + } else if (MiscUtils::typeOf(spanQuery)) { + Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (mustRewriteQuery(*clause)) { + return true; } - return false; } - else if (MiscUtils::typeOf(spanQuery)) - { - SpanNotQueryPtr spanNotQuery(boost::dynamic_pointer_cast(spanQuery)); - return mustRewriteQuery(spanNotQuery->getInclude()) || mustRewriteQuery(spanNotQuery->getExclude()); - } - else if (MiscUtils::typeOf(spanQuery)) - { - Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (mustRewriteQuery(*clause)) - return true; + return false; + } else if (MiscUtils::typeOf(spanQuery)) { + SpanNotQueryPtr spanNotQuery(boost::dynamic_pointer_cast(spanQuery)); + return mustRewriteQuery(spanNotQuery->getInclude()) || mustRewriteQuery(spanNotQuery->getExclude()); + } else if (MiscUtils::typeOf(spanQuery)) { + Collection clauses(boost::dynamic_pointer_cast(spanQuery)->getClauses()); + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (mustRewriteQuery(*clause)) { + return true; } - return false; } - else if (MiscUtils::typeOf(spanQuery)) - return false; - else - return true; - } - - bool WeightedSpanTermExtractor::getExpandMultiTermQuery() - { - return expandMultiTermQuery; - } - - void WeightedSpanTermExtractor::setExpandMultiTermQuery(bool expandMultiTermQuery) - { - this->expandMultiTermQuery = expandMultiTermQuery; - } - - bool WeightedSpanTermExtractor::isCachedTokenStream() - { - return cachedTokenStream; - } - - TokenStreamPtr WeightedSpanTermExtractor::getTokenStream() - { - return tokenStream; - } - - void WeightedSpanTermExtractor::setWrapIfNotCachingTokenFilter(bool wrap) - { - this->wrapToCaching = wrap; - } - - PositionCheckingMap::~PositionCheckingMap() - { + return false; + } else if (MiscUtils::typeOf(spanQuery)) { + return false; + } else { + return true; } - - void PositionCheckingMap::put(const String& key, WeightedSpanTermPtr val) - { - MapStringWeightedSpanTerm::iterator prev = map.find(key); - if (prev == map.end()) - { - map.put(key, val); - return; - } - bool positionSensitive = prev->second->positionSensitive; - prev->second = val; - if (!positionSensitive) - prev->second->positionSensitive = false; - } - - FakeReader::FakeReader() : FilterIndexReader(EMPTY_MEMORY_INDEX_READER()) - { - } - - FakeReader::~FakeReader() - { +} + +bool WeightedSpanTermExtractor::getExpandMultiTermQuery() { + return expandMultiTermQuery; +} + +void WeightedSpanTermExtractor::setExpandMultiTermQuery(bool expandMultiTermQuery) { + this->expandMultiTermQuery = expandMultiTermQuery; +} + +bool WeightedSpanTermExtractor::isCachedTokenStream() { + return cachedTokenStream; +} + +TokenStreamPtr WeightedSpanTermExtractor::getTokenStream() { + return tokenStream; +} + +void WeightedSpanTermExtractor::setWrapIfNotCachingTokenFilter(bool wrap) { + this->wrapToCaching = wrap; +} + +PositionCheckingMap::~PositionCheckingMap() { +} + +void PositionCheckingMap::put(const String& key, const WeightedSpanTermPtr& val) { + MapStringWeightedSpanTerm::iterator prev = map.find(key); + if (prev == map.end()) { + map.put(key, val); + return; } - - IndexReaderPtr FakeReader::EMPTY_MEMORY_INDEX_READER() - { - static IndexReaderPtr _EMPTY_MEMORY_INDEX_READER; - if (!_EMPTY_MEMORY_INDEX_READER) - { - _EMPTY_MEMORY_INDEX_READER = newLucene()->createSearcher()->getIndexReader(); - CycleCheck::addStatic(_EMPTY_MEMORY_INDEX_READER); - } - return _EMPTY_MEMORY_INDEX_READER; + bool positionSensitive = prev->second->positionSensitive; + prev->second = val; + if (!positionSensitive) { + prev->second->positionSensitive = false; } - - TermEnumPtr FakeReader::terms(TermPtr t) - { - // only set first fieldname - if (t && field.empty()) - field = t->field(); - return FilterIndexReader::terms(t); +} + +FakeReader::FakeReader() : FilterIndexReader(EMPTY_MEMORY_INDEX_READER()) { +} + +FakeReader::~FakeReader() { +} + +IndexReaderPtr FakeReader::EMPTY_MEMORY_INDEX_READER() { + static IndexReaderPtr _EMPTY_MEMORY_INDEX_READER; + LUCENE_RUN_ONCE( + _EMPTY_MEMORY_INDEX_READER = newLucene()->createSearcher()->getIndexReader(); + CycleCheck::addStatic(_EMPTY_MEMORY_INDEX_READER); + ); + return _EMPTY_MEMORY_INDEX_READER; +} + +TermEnumPtr FakeReader::terms(const TermPtr& t) { + // only set first fieldname + if (t && field.empty()) { + field = t->field(); } + return FilterIndexReader::terms(t); +} + } diff --git a/src/contrib/highlighter/WeightedTerm.cpp b/src/contrib/highlighter/WeightedTerm.cpp index 342a10d6..2635ce71 100644 --- a/src/contrib/highlighter/WeightedTerm.cpp +++ b/src/contrib/highlighter/WeightedTerm.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,35 +7,30 @@ #include "ContribInc.h" #include "WeightedTerm.h" -namespace Lucene -{ - WeightedTerm::WeightedTerm(double weight, const String& term) - { - this->weight = weight; - this->term = term; - } - - WeightedTerm::~WeightedTerm() - { - } - - String WeightedTerm::getTerm() - { - return term; - } - - double WeightedTerm::getWeight() - { - return weight; - } - - void WeightedTerm::setTerm(const String& term) - { - this->term = term; - } - - void WeightedTerm::setWeight(double weight) - { - this->weight = weight; - } +namespace Lucene { + +WeightedTerm::WeightedTerm(double weight, const String& term) { + this->weight = weight; + this->term = term; +} + +WeightedTerm::~WeightedTerm() { +} + +String WeightedTerm::getTerm() { + return term; +} + +double WeightedTerm::getWeight() { + return weight; +} + +void WeightedTerm::setTerm(const String& term) { + this->term = term; +} + +void WeightedTerm::setWeight(double weight) { + this->weight = weight; +} + } diff --git a/src/contrib/include/ArabicAnalyzer.h b/src/contrib/include/ArabicAnalyzer.h index 86b17a50..f38c2ad2 100644 --- a/src/contrib/include/ArabicAnalyzer.h +++ b/src/contrib/include/ArabicAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,78 +10,77 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Arabic. +namespace Lucene { + +/// {@link Analyzer} for Arabic. +/// +/// This analyzer implements light-stemming as specified by: +/// Light Stemming for Arabic Information Retrieval +/// +/// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf +/// +/// The analysis package contains three primary components: +///
    +///
  • {@link ArabicNormalizationFilter}: Arabic orthographic normalization. +///
  • {@link ArabicStemFilter}: Arabic light stemming. +///
  • Arabic stop words file: a set of default Arabic stop words. +///
+class LPPCONTRIBAPI ArabicAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + ArabicAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~ArabicAnalyzer(); + + LUCENE_CLASS(ArabicAnalyzer); + +public: + /// Default Arabic stopwords in UTF-8 format. /// - /// This analyzer implements light-stemming as specified by: - /// Light Stemming for Arabic Information Retrieval + /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html + /// The stopword list is BSD-Licensed. + static const uint8_t DEFAULT_STOPWORD_FILE[]; + +protected: + /// Contains the stopwords used with the StopFilter. + HashSet stoptable; + + LuceneVersion::Version matchVersion; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// http://www.mtholyoke.edu/~lballest/Pubs/arab_stem05.pdf + /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and + /// {@link ArabicStemFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// The analysis package contains three primary components: - ///
    - ///
  • {@link ArabicNormalizationFilter}: Arabic orthographic normalization. - ///
  • {@link ArabicStemFilter}: Arabic light stemming. - ///
  • Arabic stop words file: a set of default Arabic stop words. - ///
- class LPPCONTRIBAPI ArabicAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - ArabicAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - ArabicAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~ArabicAnalyzer(); - - LUCENE_CLASS(ArabicAnalyzer); - - public: - /// Default Arabic stopwords in UTF-8 format. - /// - /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html - /// The stopword list is BSD-Licensed. - static const uint8_t DEFAULT_STOPWORD_FILE[]; - - protected: - /// Contains the stopwords used with the StopFilter. - HashSet stoptable; - - LuceneVersion::Version matchVersion; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and - /// {@link ArabicStemFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and - /// {@link ArabicStemFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI ArabicAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~ArabicAnalyzerSavedStreams(); - - LUCENE_CLASS(ArabicAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StopFilter}, {@link ArabicNormalizationFilter} and + /// {@link ArabicStemFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI ArabicAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~ArabicAnalyzerSavedStreams(); + + LUCENE_CLASS(ArabicAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/ArabicLetterTokenizer.h b/src/contrib/include/ArabicLetterTokenizer.h index 7e78b10b..79622877 100644 --- a/src/contrib/include/ArabicLetterTokenizer.h +++ b/src/contrib/include/ArabicLetterTokenizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,33 +10,33 @@ #include "LuceneContrib.h" #include "LetterTokenizer.h" -namespace Lucene -{ - /// Tokenizer that breaks text into runs of letters and diacritics. - /// - /// The problem with the standard Letter tokenizer is that it fails on diacritics. - /// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc. - /// - class LPPCONTRIBAPI ArabicLetterTokenizer : public LetterTokenizer - { - public: - /// Construct a new ArabicLetterTokenizer. - ArabicLetterTokenizer(ReaderPtr input); - - /// Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}. - ArabicLetterTokenizer(AttributeSourcePtr source, ReaderPtr input); - - /// Construct a new ArabicLetterTokenizer using a given {@link AttributeFactory}. - ArabicLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~ArabicLetterTokenizer(); - - LUCENE_CLASS(ArabicLetterTokenizer); - - public: - /// Allows for Letter category or NonspacingMark category - virtual bool isTokenChar(wchar_t c); - }; +namespace Lucene { + +/// Tokenizer that breaks text into runs of letters and diacritics. +/// +/// The problem with the standard Letter tokenizer is that it fails on diacritics. +/// Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc. +/// +class LPPCONTRIBAPI ArabicLetterTokenizer : public LetterTokenizer { +public: + /// Construct a new ArabicLetterTokenizer. + ArabicLetterTokenizer(const ReaderPtr& input); + + /// Construct a new ArabicLetterTokenizer using a given {@link AttributeSource}. + ArabicLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Construct a new ArabicLetterTokenizer using a given {@link AttributeFactory}. + ArabicLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~ArabicLetterTokenizer(); + + LUCENE_CLASS(ArabicLetterTokenizer); + +public: + /// Allows for Letter category or NonspacingMark category + virtual bool isTokenChar(wchar_t c); +}; + } #endif diff --git a/src/contrib/include/ArabicNormalizationFilter.h b/src/contrib/include/ArabicNormalizationFilter.h index 6417475b..47b4ed31 100644 --- a/src/contrib/include/ArabicNormalizationFilter.h +++ b/src/contrib/include/ArabicNormalizationFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,24 +10,24 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. - class LPPCONTRIBAPI ArabicNormalizationFilter : public TokenFilter - { - public: - ArabicNormalizationFilter(TokenStreamPtr input); - virtual ~ArabicNormalizationFilter(); - - LUCENE_CLASS(ArabicNormalizationFilter); - - protected: - ArabicNormalizerPtr normalizer; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A {@link TokenFilter} that applies {@link ArabicNormalizer} to normalize the orthography. +class LPPCONTRIBAPI ArabicNormalizationFilter : public TokenFilter { +public: + ArabicNormalizationFilter(const TokenStreamPtr& input); + virtual ~ArabicNormalizationFilter(); + + LUCENE_CLASS(ArabicNormalizationFilter); + +protected: + ArabicNormalizerPtr normalizer; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/ArabicNormalizer.h b/src/contrib/include/ArabicNormalizer.h index 906d11c2..318414a0 100644 --- a/src/contrib/include/ArabicNormalizer.h +++ b/src/contrib/include/ArabicNormalizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,64 +10,64 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Normalizer for Arabic. - /// - /// Normalization is done in-place for efficiency, operating on a termbuffer. - /// - /// Normalization is defined as: - ///
    - ///
  • Normalization of hamza with alef seat to a bare alef. - ///
  • Normalization of teh marbuta to heh - ///
  • Normalization of dotless yeh (alef maksura) to yeh. - ///
  • Removal of Arabic diacritics (the harakat) - ///
  • Removal of tatweel (stretching character). - ///
- class LPPCONTRIBAPI ArabicNormalizer : public LuceneObject - { - public: - virtual ~ArabicNormalizer(); - - LUCENE_CLASS(ArabicNormalizer); - - public: - static const wchar_t ALEF; - static const wchar_t ALEF_MADDA; - static const wchar_t ALEF_HAMZA_ABOVE; - static const wchar_t ALEF_HAMZA_BELOW; - - static const wchar_t YEH; - static const wchar_t DOTLESS_YEH; - - static const wchar_t TEH_MARBUTA; - static const wchar_t HEH; - - static const wchar_t TATWEEL; - - static const wchar_t FATHATAN; - static const wchar_t DAMMATAN; - static const wchar_t KASRATAN; - static const wchar_t FATHA; - static const wchar_t DAMMA; - static const wchar_t KASRA; - static const wchar_t SHADDA; - static const wchar_t SUKUN; - - public: - /// Normalize an input buffer of Arabic text - /// @param s input buffer - /// @param len length of input buffer - /// @return length of input buffer after normalization - int32_t normalize(wchar_t* s, int32_t len); - - /// Delete a character in-place - /// @param s Input Buffer - /// @param pos Position of character to delete - /// @param len length of input buffer - /// @return length of input buffer after deletion - int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); - }; +namespace Lucene { + +/// Normalizer for Arabic. +/// +/// Normalization is done in-place for efficiency, operating on a termbuffer. +/// +/// Normalization is defined as: +///
    +///
  • Normalization of hamza with alef seat to a bare alef. +///
  • Normalization of teh marbuta to heh +///
  • Normalization of dotless yeh (alef maksura) to yeh. +///
  • Removal of Arabic diacritics (the harakat) +///
  • Removal of tatweel (stretching character). +///
+class LPPCONTRIBAPI ArabicNormalizer : public LuceneObject { +public: + virtual ~ArabicNormalizer(); + + LUCENE_CLASS(ArabicNormalizer); + +public: + static const wchar_t ALEF; + static const wchar_t ALEF_MADDA; + static const wchar_t ALEF_HAMZA_ABOVE; + static const wchar_t ALEF_HAMZA_BELOW; + + static const wchar_t YEH; + static const wchar_t DOTLESS_YEH; + + static const wchar_t TEH_MARBUTA; + static const wchar_t HEH; + + static const wchar_t TATWEEL; + + static const wchar_t FATHATAN; + static const wchar_t DAMMATAN; + static const wchar_t KASRATAN; + static const wchar_t FATHA; + static const wchar_t DAMMA; + static const wchar_t KASRA; + static const wchar_t SHADDA; + static const wchar_t SUKUN; + +public: + /// Normalize an input buffer of Arabic text + /// @param s input buffer + /// @param len length of input buffer + /// @return length of input buffer after normalization + int32_t normalize(wchar_t* s, int32_t len); + + /// Delete a character in-place + /// @param s Input Buffer + /// @param pos Position of character to delete + /// @param len length of input buffer + /// @return length of input buffer after deletion + int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); +}; + } #endif diff --git a/src/contrib/include/ArabicStemFilter.h b/src/contrib/include/ArabicStemFilter.h index 2342d581..3cbd93cb 100644 --- a/src/contrib/include/ArabicStemFilter.h +++ b/src/contrib/include/ArabicStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,24 +10,24 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words. - class LPPCONTRIBAPI ArabicStemFilter : public TokenFilter - { - public: - ArabicStemFilter(TokenStreamPtr input); - virtual ~ArabicStemFilter(); - - LUCENE_CLASS(ArabicStemFilter); - - protected: - ArabicStemmerPtr stemmer; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A {@link TokenFilter} that applies {@link ArabicStemmer} to stem Arabic words. +class LPPCONTRIBAPI ArabicStemFilter : public TokenFilter { +public: + ArabicStemFilter(const TokenStreamPtr& input); + virtual ~ArabicStemFilter(); + + LUCENE_CLASS(ArabicStemFilter); + +protected: + ArabicStemmerPtr stemmer; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/ArabicStemmer.h b/src/contrib/include/ArabicStemmer.h index b7b38d6b..51787f5f 100644 --- a/src/contrib/include/ArabicStemmer.h +++ b/src/contrib/include/ArabicStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,89 +10,89 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Stemmer for Arabic. - /// - /// Stemming is done in-place for efficiency, operating on a termbuffer. - /// - /// Stemming is defined as: - ///
    - ///
  • Removal of attached definite article, conjunction, and prepositions. - ///
  • Stemming of common suffixes. - ///
- class LPPCONTRIBAPI ArabicStemmer : public LuceneObject - { - public: - virtual ~ArabicStemmer(); - - LUCENE_CLASS(ArabicStemmer); - - public: - static const wchar_t ALEF; - static const wchar_t BEH; - static const wchar_t TEH_MARBUTA; - static const wchar_t TEH; - static const wchar_t FEH; - static const wchar_t KAF; - static const wchar_t LAM; - static const wchar_t NOON; - static const wchar_t HEH; - static const wchar_t WAW; - static const wchar_t YEH; - - public: - static const Collection prefixes(); - static const Collection suffixes(); - - /// Stem an input buffer of Arabic text. - /// @param s input buffer - /// @param len length of input buffer - /// @return length of input buffer after normalization - int32_t stem(wchar_t* s, int32_t len); - - /// Stem a prefix off an Arabic word. - /// @param s input buffer - /// @param len length of input buffer - /// @return new length of input buffer after stemming. - int32_t stemPrefix(wchar_t* s, int32_t len); - - /// Stem suffix(es) off an Arabic word. - /// @param s input buffer - /// @param len length of input buffer - /// @return new length of input buffer after stemming - int32_t stemSuffix(wchar_t* s, int32_t len); - - /// Returns true if the prefix matches and can be stemmed - /// @param s input buffer - /// @param len length of input buffer - /// @param prefix prefix to check - /// @return true if the prefix matches and can be stemmed - bool startsWith(wchar_t* s, int32_t len, const String& prefix); - - /// Returns true if the suffix matches and can be stemmed - /// @param s input buffer - /// @param len length of input buffer - /// @param suffix suffix to check - /// @return true if the suffix matches and can be stemmed - bool endsWith(wchar_t* s, int32_t len, const String& suffix); - - protected: - /// Delete n characters in-place - /// @param s Input Buffer - /// @param pos Position of character to delete - /// @param len Length of input buffer - /// @param chars number of characters to delete - /// @return length of input buffer after deletion - int32_t deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars); - - /// Delete a character in-place - /// @param s Input Buffer - /// @param pos Position of character to delete - /// @param len length of input buffer - /// @return length of input buffer after deletion - int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); - }; +namespace Lucene { + +/// Stemmer for Arabic. +/// +/// Stemming is done in-place for efficiency, operating on a termbuffer. +/// +/// Stemming is defined as: +///
    +///
  • Removal of attached definite article, conjunction, and prepositions. +///
  • Stemming of common suffixes. +///
+class LPPCONTRIBAPI ArabicStemmer : public LuceneObject { +public: + virtual ~ArabicStemmer(); + + LUCENE_CLASS(ArabicStemmer); + +public: + static const wchar_t ALEF; + static const wchar_t BEH; + static const wchar_t TEH_MARBUTA; + static const wchar_t TEH; + static const wchar_t FEH; + static const wchar_t KAF; + static const wchar_t LAM; + static const wchar_t NOON; + static const wchar_t HEH; + static const wchar_t WAW; + static const wchar_t YEH; + +public: + static const Collection prefixes(); + static const Collection suffixes(); + + /// Stem an input buffer of Arabic text. + /// @param s input buffer + /// @param len length of input buffer + /// @return length of input buffer after normalization + int32_t stem(wchar_t* s, int32_t len); + + /// Stem a prefix off an Arabic word. + /// @param s input buffer + /// @param len length of input buffer + /// @return new length of input buffer after stemming. + int32_t stemPrefix(wchar_t* s, int32_t len); + + /// Stem suffix(es) off an Arabic word. + /// @param s input buffer + /// @param len length of input buffer + /// @return new length of input buffer after stemming + int32_t stemSuffix(wchar_t* s, int32_t len); + + /// Returns true if the prefix matches and can be stemmed + /// @param s input buffer + /// @param len length of input buffer + /// @param prefix prefix to check + /// @return true if the prefix matches and can be stemmed + bool startsWith(wchar_t* s, int32_t len, const String& prefix); + + /// Returns true if the suffix matches and can be stemmed + /// @param s input buffer + /// @param len length of input buffer + /// @param suffix suffix to check + /// @return true if the suffix matches and can be stemmed + bool endsWith(wchar_t* s, int32_t len, const String& suffix); + +protected: + /// Delete n characters in-place + /// @param s Input Buffer + /// @param pos Position of character to delete + /// @param len Length of input buffer + /// @param chars number of characters to delete + /// @return length of input buffer after deletion + int32_t deleteChars(wchar_t* s, int32_t pos, int32_t len, int32_t chars); + + /// Delete a character in-place + /// @param s Input Buffer + /// @param pos Position of character to delete + /// @param len length of input buffer + /// @return length of input buffer after deletion + int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); +}; + } #endif diff --git a/src/contrib/include/BrazilianAnalyzer.h b/src/contrib/include/BrazilianAnalyzer.h index c9001387..7fe72c47 100644 --- a/src/contrib/include/BrazilianAnalyzer.h +++ b/src/contrib/include/BrazilianAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,74 +10,73 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Brazilian Portuguese language. +namespace Lucene { + +/// {@link Analyzer} for Brazilian Portuguese language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all) and an external list of +/// exclusions (words that will not be stemmed, but indexed). +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI BrazilianAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + BrazilianAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + /// Builds an analyzer with the given stop words and stemming exclusion words. + BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); + + virtual ~BrazilianAnalyzer(); + + LUCENE_CLASS(BrazilianAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stoptable; + + /// Contains words that should be indexed but not stemmed. + HashSet excltable; + + LuceneVersion::Version matchVersion; + + /// List of typical Brazilian Portuguese stopwords. + static const wchar_t* _BRAZILIAN_STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + void setStemExclusionTable(HashSet exclusions); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of - /// exclusions (words that will not be stemmed, but indexed). + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link BrazilianStemFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI BrazilianAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - BrazilianAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - /// Builds an analyzer with the given stop words and stemming exclusion words. - BrazilianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); - - virtual ~BrazilianAnalyzer(); - - LUCENE_CLASS(BrazilianAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stoptable; - - /// Contains words that should be indexed but not stemmed. - HashSet excltable; - - LuceneVersion::Version matchVersion; - - /// List of typical Brazilian Portuguese stopwords. - static const wchar_t* _BRAZILIAN_STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - void setStemExclusionTable(HashSet exclusions); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link BrazilianStemFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link BrazilianLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StopFilter}, {@link BrazilianNormalizationFilter} and - /// {@link BrazilianStemFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI BrazilianAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~BrazilianAnalyzerSavedStreams(); - - LUCENE_CLASS(BrazilianAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link BrazilianLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StopFilter}, {@link BrazilianNormalizationFilter} and + /// {@link BrazilianStemFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI BrazilianAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~BrazilianAnalyzerSavedStreams(); + + LUCENE_CLASS(BrazilianAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/BrazilianStemFilter.h b/src/contrib/include/BrazilianStemFilter.h index 7080788d..05bf6297 100644 --- a/src/contrib/include/BrazilianStemFilter.h +++ b/src/contrib/include/BrazilianStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,29 +10,29 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that applies {@link BrazilianStemmer}. - class LPPCONTRIBAPI BrazilianStemFilter : public TokenFilter - { - public: - BrazilianStemFilter(TokenStreamPtr input); - BrazilianStemFilter(TokenStreamPtr input, HashSet exclusiontable); - - virtual ~BrazilianStemFilter(); - - LUCENE_CLASS(BrazilianStemFilter); - - protected: - /// {@link BrazilianStemmer} in use by this filter. - BrazilianStemmerPtr stemmer; - - HashSet exclusions; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A {@link TokenFilter} that applies {@link BrazilianStemmer}. +class LPPCONTRIBAPI BrazilianStemFilter : public TokenFilter { +public: + BrazilianStemFilter(const TokenStreamPtr& input); + BrazilianStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); + + virtual ~BrazilianStemFilter(); + + LUCENE_CLASS(BrazilianStemFilter); + +protected: + /// {@link BrazilianStemmer} in use by this filter. + BrazilianStemmerPtr stemmer; + + HashSet exclusions; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/BrazilianStemmer.h b/src/contrib/include/BrazilianStemmer.h index ae37877b..11f59c9b 100644 --- a/src/contrib/include/BrazilianStemmer.h +++ b/src/contrib/include/BrazilianStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,108 +10,108 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// A stemmer for Brazilian Portuguese words. - class LPPCONTRIBAPI BrazilianStemmer : public LuceneObject - { - public: - virtual ~BrazilianStemmer(); - - LUCENE_CLASS(BrazilianStemmer); - - protected: - String TERM; - String CT; - String R1; - String R2; - String RV; - - public: - /// Stems the given term to a unique discriminator. - /// - /// @param term The term that should be stemmed. - /// @return Discriminator for term. - String stem(const String& term); - - protected: - /// Checks a term if it can be processed correctly. - /// @return true if, and only if, the given term consists in letters. - bool isStemmable(const String& term); - - /// Checks a term if it can be processed indexed. - /// @return true if it can be indexed - bool isIndexable(const String& term); - - /// See if string is 'a','e','i','o','u' - /// @return true if is vowel - bool isVowel(wchar_t value); - - /// Gets R1. - /// R1 - is the region after the first non-vowel following a vowel, or is the null region at the end of the - /// word if there is no such non-vowel. - /// @return null or a string representing R1 - String getR1(const String& value); - - /// Gets RV. - /// RV - if the second letter is a consonant, RV is the region after the next following vowel, - /// - /// OR if the first two letters are vowels, RV is the region after the next consonant, - /// - /// AND otherwise (consonant-vowel case) RV is the region after the third letter. - /// - /// BUT RV is the end of the word if this positions cannot be found. - /// @return null or a string representing RV - String getRV(const String& value); - - /// 1) Turn to lowercase - /// 2) Remove accents - /// 3) ã -> a ; õ -> o - /// 4) ç -> c - /// @return null or a string transformed - String changeTerm(const String& value); - - /// Check if a string ends with a suffix. - /// @return true if the string ends with the specified suffix. - bool checkSuffix(const String& value, const String& suffix); - - /// Replace a string suffix by another - /// @return the replaced String - String replaceSuffix(const String& value, const String& toReplace, const String& changeTo); - - /// Remove a string suffix. - /// @return the String without the suffix; - String removeSuffix(const String& value, const String& toRemove); - - /// See if a suffix is preceded by a String. - /// @return true if the suffix is preceded. - bool suffixPreceded(const String& value, const String& suffix, const String& preceded); - - /// Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. - void createCT(const String& term); - - /// Standard suffix removal. - /// @return false if no ending was removed - bool step1(); - - /// Verb suffixes. - /// Search for the longest among the following suffixes in RV, and if found, delete. - /// @return false if no ending was removed - bool step2(); - - /// Delete suffix 'i' if in RV and preceded by 'c' - void step3(); - - /// Residual suffix - /// If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it. - void step4(); - - /// If the word ends with one of (e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with - /// the 'u' (or 'i') in RV, delete the 'u' (or 'i') - /// - /// Or if the word ends ç remove the cedilha. - void step5(); - }; +namespace Lucene { + +/// A stemmer for Brazilian Portuguese words. +class LPPCONTRIBAPI BrazilianStemmer : public LuceneObject { +public: + virtual ~BrazilianStemmer(); + + LUCENE_CLASS(BrazilianStemmer); + +protected: + String TERM; + String CT; + String R1; + String R2; + String RV; + +public: + /// Stems the given term to a unique discriminator. + /// + /// @param term The term that should be stemmed. + /// @return Discriminator for term. + String stem(const String& term); + +protected: + /// Checks a term if it can be processed correctly. + /// @return true if, and only if, the given term consists in letters. + bool isStemmable(const String& term); + + /// Checks a term if it can be processed indexed. + /// @return true if it can be indexed + bool isIndexable(const String& term); + + /// See if string is 'a','e','i','o','u' + /// @return true if is vowel + bool isVowel(wchar_t value); + + /// Gets R1. + /// R1 - is the region after the first non-vowel following a vowel, or is the null region at the end of the + /// word if there is no such non-vowel. + /// @return null or a string representing R1 + String getR1(const String& value); + + /// Gets RV. + /// RV - if the second letter is a consonant, RV is the region after the next following vowel, + /// + /// OR if the first two letters are vowels, RV is the region after the next consonant, + /// + /// AND otherwise (consonant-vowel case) RV is the region after the third letter. + /// + /// BUT RV is the end of the word if this positions cannot be found. + /// @return null or a string representing RV + String getRV(const String& value); + + /// 1) Turn to lowercase + /// 2) Remove accents + /// 3) ã -> a ; õ -> o + /// 4) ç -> c + /// @return null or a string transformed + String changeTerm(const String& value); + + /// Check if a string ends with a suffix. + /// @return true if the string ends with the specified suffix. + bool checkSuffix(const String& value, const String& suffix); + + /// Replace a string suffix by another + /// @return the replaced String + String replaceSuffix(const String& value, const String& toReplace, const String& changeTo); + + /// Remove a string suffix. + /// @return the String without the suffix; + String removeSuffix(const String& value, const String& toRemove); + + /// See if a suffix is preceded by a String. + /// @return true if the suffix is preceded. + bool suffixPreceded(const String& value, const String& suffix, const String& preceded); + + /// Creates CT (changed term) , substituting * 'ã' and 'õ' for 'a~' and 'o~'. + void createCT(const String& term); + + /// Standard suffix removal. + /// @return false if no ending was removed + bool step1(); + + /// Verb suffixes. + /// Search for the longest among the following suffixes in RV, and if found, delete. + /// @return false if no ending was removed + bool step2(); + + /// Delete suffix 'i' if in RV and preceded by 'c' + void step3(); + + /// Residual suffix + /// If the word ends with one of the suffixes (os a i o á í ó) in RV, delete it. + void step4(); + + /// If the word ends with one of (e é ê) in RV,delete it, and if preceded by 'gu' (or 'ci') with + /// the 'u' (or 'i') in RV, delete the 'u' (or 'i') + /// + /// Or if the word ends ç remove the cedilha. + void step5(); +}; + } #endif diff --git a/src/contrib/include/CJKAnalyzer.h b/src/contrib/include/CJKAnalyzer.h index e1ae37a3..d420f567 100644 --- a/src/contrib/include/CJKAnalyzer.h +++ b/src/contrib/include/CJKAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,58 +10,57 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// An {@link Analyzer} that tokenizes text with {@link CJKTokenizer} and filters with {@link StopFilter} - class LPPCONTRIBAPI CJKAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - CJKAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~CJKAnalyzer(); - - LUCENE_CLASS(CJKAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stoptable; - - LuceneVersion::Version matchVersion; - - /// List of typical English stopwords. - static const wchar_t* _STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI CJKAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~CJKAnalyzerSavedStreams(); - - LUCENE_CLASS(CJKAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; +namespace Lucene { + +/// An {@link Analyzer} that tokenizes text with {@link CJKTokenizer} and filters with {@link StopFilter} +class LPPCONTRIBAPI CJKAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + CJKAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + CJKAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~CJKAnalyzer(); + + LUCENE_CLASS(CJKAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stoptable; + + LuceneVersion::Version matchVersion; + + /// List of typical English stopwords. + static const wchar_t* _STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from {@link CJKTokenizer}, filtered with {@link StopFilter} + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI CJKAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~CJKAnalyzerSavedStreams(); + + LUCENE_CLASS(CJKAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/CJKTokenizer.h b/src/contrib/include/CJKTokenizer.h index 9e4dcc65..0209d441 100644 --- a/src/contrib/include/CJKTokenizer.h +++ b/src/contrib/include/CJKTokenizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,93 +9,93 @@ #include "Tokenizer.h" -namespace Lucene -{ - /// CJKTokenizer is designed for Chinese, Japanese, and Korean languages. - /// - /// The tokens returned are every two adjacent characters with overlap match. - /// - /// Example: "lucene C1C2C3C4" will be segmented to: "lucene" "C1C2" "C2C3" "C3C4". - /// - /// Additionally, the following is applied to Latin text (such as English): - ///
    - ///
  • Text is converted to lowercase. - ///
  • Numeric digits, '+', '#', and '_' are tokenized as letters. - ///
  • Full-width forms are converted to half-width forms. - ///
- /// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation: - /// please search google - class LPPCONTRIBAPI CJKTokenizer : public Tokenizer - { - public: - CJKTokenizer(ReaderPtr input); - CJKTokenizer(AttributeSourcePtr source, ReaderPtr input); - CJKTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~CJKTokenizer(); - - LUCENE_CLASS(CJKTokenizer); - - public: - /// Word token type - static const int32_t WORD_TYPE; - - /// Single byte token type - static const int32_t SINGLE_TOKEN_TYPE; - - /// Double byte token type - static const int32_t DOUBLE_TOKEN_TYPE; - - /// Names for token types - static const wchar_t* TOKEN_TYPE_NAMES[]; - - protected: - /// Max word length - static const int32_t MAX_WORD_LEN; - - static const int32_t IO_BUFFER_SIZE; - - enum UnicodeBlock { NONE, BASIC_LATIN, HALFWIDTH_AND_FULLWIDTH_FORMS }; - - protected: - /// word offset, used to imply which character(in) is parsed - int32_t offset; - - /// the index used only for ioBuffer - int32_t bufferIndex; - - /// data length - int32_t dataLen; - - /// character buffer, store the characters which are used to compose the returned Token - CharArray buffer; - - /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) - CharArray ioBuffer; - - /// word type: single=>ASCII double=>non-ASCII word=>default - int32_t tokenType; - - /// tag: previous character is a cached double-byte character "C1C2C3C4" - /// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened) - /// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4" - bool preIsTokened; - - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - TypeAttributePtr typeAtt; - - protected: - /// return unicode block for given character (see http://unicode.org/Public/UNIDATA/Blocks.txt) - UnicodeBlock unicodeBlock(wchar_t c); - - public: - virtual void initialize(); - virtual bool incrementToken(); - virtual void end(); - virtual void reset(); - virtual void reset(ReaderPtr input); - }; +namespace Lucene { + +/// CJKTokenizer is designed for Chinese, Japanese, and Korean languages. +/// +/// The tokens returned are every two adjacent characters with overlap match. +/// +/// Example: "lucene C1C2C3C4" will be segmented to: "lucene" "C1C2" "C2C3" "C3C4". +/// +/// Additionally, the following is applied to Latin text (such as English): +///
    +///
  • Text is converted to lowercase. +///
  • Numeric digits, '+', '#', and '_' are tokenized as letters. +///
  • Full-width forms are converted to half-width forms. +///
+/// For more info on Asian language (Chinese, Japanese, and Korean) text segmentation: +/// please search google +class LPPCONTRIBAPI CJKTokenizer : public Tokenizer { +public: + CJKTokenizer(const ReaderPtr& input); + CJKTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + CJKTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~CJKTokenizer(); + + LUCENE_CLASS(CJKTokenizer); + +public: + /// Word token type + static const int32_t WORD_TYPE; + + /// Single byte token type + static const int32_t SINGLE_TOKEN_TYPE; + + /// Double byte token type + static const int32_t DOUBLE_TOKEN_TYPE; + + /// Names for token types + static const wchar_t* TOKEN_TYPE_NAMES[]; + +protected: + /// Max word length + static const int32_t MAX_WORD_LEN; + + static const int32_t IO_BUFFER_SIZE; + + enum UnicodeBlock { NONE, BASIC_LATIN, HALFWIDTH_AND_FULLWIDTH_FORMS }; + +protected: + /// word offset, used to imply which character(in) is parsed + int32_t offset; + + /// the index used only for ioBuffer + int32_t bufferIndex; + + /// data length + int32_t dataLen; + + /// character buffer, store the characters which are used to compose the returned Token + CharArray buffer; + + /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) + CharArray ioBuffer; + + /// word type: single=>ASCII double=>non-ASCII word=>default + int32_t tokenType; + + /// tag: previous character is a cached double-byte character "C1C2C3C4" + /// ----(set the C1 isTokened) C1C2 "C2C3C4" ----(set the C2 isTokened) + /// C1C2 C2C3 "C3C4" ----(set the C3 isTokened) "C1C2 C2C3 C3C4" + bool preIsTokened; + + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + TypeAttributePtr typeAtt; + +protected: + /// return unicode block for given character (see http://unicode.org/Public/UNIDATA/Blocks.txt) + UnicodeBlock unicodeBlock(wchar_t c); + +public: + virtual void initialize(); + virtual bool incrementToken(); + virtual void end(); + virtual void reset(); + virtual void reset(const ReaderPtr& input); +}; + } #endif diff --git a/src/contrib/include/CMakeLists.txt b/src/contrib/include/CMakeLists.txt new file mode 100644 index 00000000..b497dc4f --- /dev/null +++ b/src/contrib/include/CMakeLists.txt @@ -0,0 +1,11 @@ +#################################### +# install headers +#################################### + +file(GLOB_RECURSE lucene_headers + "${CMAKE_CURRENT_SOURCE_DIR}/*.h" +) + +install( + FILES ${lucene_headers} + DESTINATION include/lucene++ ) diff --git a/src/contrib/include/ChineseAnalyzer.h b/src/contrib/include/ChineseAnalyzer.h index 4026164f..118ffa61 100644 --- a/src/contrib/include/ChineseAnalyzer.h +++ b/src/contrib/include/ChineseAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,40 +10,39 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and filters with {@link ChineseFilter} - class LPPCONTRIBAPI ChineseAnalyzer : public Analyzer - { - public: - virtual ~ChineseAnalyzer(); - - LUCENE_CLASS(ChineseAnalyzer); - - public: - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI ChineseAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~ChineseAnalyzerSavedStreams(); - - LUCENE_CLASS(ChineseAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; +namespace Lucene { + +/// An {@link Analyzer} that tokenizes text with {@link ChineseTokenizer} and filters with {@link ChineseFilter} +class LPPCONTRIBAPI ChineseAnalyzer : public Analyzer { +public: + virtual ~ChineseAnalyzer(); + + LUCENE_CLASS(ChineseAnalyzer); + +public: + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from {@link ChineseTokenizer}, filtered with {@link ChineseFilter} + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI ChineseAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~ChineseAnalyzerSavedStreams(); + + LUCENE_CLASS(ChineseAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/ChineseFilter.h b/src/contrib/include/ChineseFilter.h index efcb5fa9..1c15788d 100644 --- a/src/contrib/include/ChineseFilter.h +++ b/src/contrib/include/ChineseFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,33 +10,33 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} with a stop word table. - ///
    - ///
  • Numeric tokens are removed. - ///
  • English tokens must be larger than 1 character. - ///
  • One Chinese character as one Chinese word. - ///
- class LPPCONTRIBAPI ChineseFilter : public TokenFilter - { - public: - ChineseFilter(TokenStreamPtr input); - virtual ~ChineseFilter(); - - LUCENE_CLASS(ChineseFilter); - - public: - /// Only English now, Chinese to be added later. - static const wchar_t* STOP_WORDS[]; - - protected: - HashSet stopTable; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A {@link TokenFilter} with a stop word table. +///
    +///
  • Numeric tokens are removed. +///
  • English tokens must be larger than 1 character. +///
  • One Chinese character as one Chinese word. +///
+class LPPCONTRIBAPI ChineseFilter : public TokenFilter { +public: + ChineseFilter(const TokenStreamPtr& input); + virtual ~ChineseFilter(); + + LUCENE_CLASS(ChineseFilter); + +public: + /// Only English now, Chinese to be added later. + static const wchar_t* STOP_WORDS[]; + +protected: + HashSet stopTable; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/ChineseTokenizer.h b/src/contrib/include/ChineseTokenizer.h index 5af00cc8..968ee105 100644 --- a/src/contrib/include/ChineseTokenizer.h +++ b/src/contrib/include/ChineseTokenizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,73 +9,73 @@ #include "Tokenizer.h" -namespace Lucene -{ - /// Tokenize Chinese text as individual Chinese characters. - /// - /// The difference between ChineseTokenizer and ChineseTokenizer is that they have different - /// token parsing logic. - /// - /// For example, if the Chinese text "C1C2C3C4" is to be indexed: - ///
    - ///
  • The tokens returned from ChineseTokenizer are C1, C2, C3, C4. - ///
  • The tokens returned from the ChineseTokenizer are C1C2, C2C3, C3C4. - ///
- /// - /// Therefore the index created by ChineseTokenizer is much larger. - /// - /// The problem is that when searching for C1, C1C2, C1C3, C4C2, C1C2C3 ... the - /// ChineseTokenizer works, but the ChineseTokenizer will not work. - class LPPCONTRIBAPI ChineseTokenizer : public Tokenizer - { - public: - ChineseTokenizer(ReaderPtr input); - ChineseTokenizer(AttributeSourcePtr source, ReaderPtr input); - ChineseTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~ChineseTokenizer(); - - LUCENE_CLASS(ChineseTokenizer); - - protected: - /// Max word length - static const int32_t MAX_WORD_LEN; - - static const int32_t IO_BUFFER_SIZE; - - protected: - /// word offset, used to imply which character(in) is parsed - int32_t offset; - - /// the index used only for ioBuffer - int32_t bufferIndex; - - /// data length - int32_t dataLen; - - /// character buffer, store the characters which are used to compose the returned Token - CharArray buffer; - - /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) - CharArray ioBuffer; - - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - - int32_t length; - int32_t start; - - public: - virtual void initialize(); - virtual bool incrementToken(); - virtual void end(); - virtual void reset(); - virtual void reset(ReaderPtr input); - - protected: - void push(wchar_t c); - bool flush(); - }; +namespace Lucene { + +/// Tokenize Chinese text as individual Chinese characters. +/// +/// The difference between ChineseTokenizer and ChineseTokenizer is that they have different +/// token parsing logic. +/// +/// For example, if the Chinese text "C1C2C3C4" is to be indexed: +///
    +///
  • The tokens returned from ChineseTokenizer are C1, C2, C3, C4. +///
  • The tokens returned from the ChineseTokenizer are C1C2, C2C3, C3C4. +///
+/// +/// Therefore the index created by ChineseTokenizer is much larger. +/// +/// The problem is that when searching for C1, C1C2, C1C3, C4C2, C1C2C3 ... the +/// ChineseTokenizer works, but the ChineseTokenizer will not work. +class LPPCONTRIBAPI ChineseTokenizer : public Tokenizer { +public: + ChineseTokenizer(const ReaderPtr& input); + ChineseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + ChineseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~ChineseTokenizer(); + + LUCENE_CLASS(ChineseTokenizer); + +protected: + /// Max word length + static const int32_t MAX_WORD_LEN; + + static const int32_t IO_BUFFER_SIZE; + +protected: + /// word offset, used to imply which character(in) is parsed + int32_t offset; + + /// the index used only for ioBuffer + int32_t bufferIndex; + + /// data length + int32_t dataLen; + + /// character buffer, store the characters which are used to compose the returned Token + CharArray buffer; + + /// I/O buffer, used to store the content of the input (one of the members of Tokenizer) + CharArray ioBuffer; + + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + + int32_t length; + int32_t start; + +public: + virtual void initialize(); + virtual bool incrementToken(); + virtual void end(); + virtual void reset(); + virtual void reset(const ReaderPtr& input); + +protected: + void push(wchar_t c); + bool flush(); +}; + } #endif diff --git a/src/contrib/include/ContribInc.h b/src/contrib/include/ContribInc.h index e11e3716..87d50b5c 100644 --- a/src/contrib/include/ContribInc.h +++ b/src/contrib/include/ContribInc.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,8 +8,13 @@ #include "targetver.h" +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX #define NOMINMAX +#endif #include diff --git a/src/contrib/include/CzechAnalyzer.h b/src/contrib/include/CzechAnalyzer.h index e4c202c3..93c7d229 100644 --- a/src/contrib/include/CzechAnalyzer.h +++ b/src/contrib/include/CzechAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,65 +10,64 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Czech language. +namespace Lucene { + +/// {@link Analyzer} for Czech language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all). +/// A default set of stopwords is used unless an alternative list is specified. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI CzechAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + CzechAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~CzechAnalyzer(); + + LUCENE_CLASS(CzechAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stoptable; + + LuceneVersion::Version matchVersion; + + /// Default Czech stopwords in UTF-8 format. + static const uint8_t _CZECH_STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all). - /// A default set of stopwords is used unless an alternative list is specified. + /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, + /// {@link LowerCaseFilter}, and {@link StopFilter} + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI CzechAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - CzechAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - CzechAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~CzechAnalyzer(); - - LUCENE_CLASS(CzechAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stoptable; - - LuceneVersion::Version matchVersion; - - /// Default Czech stopwords in UTF-8 format. - static const uint8_t _CZECH_STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, - /// {@link LowerCaseFilter}, and {@link StopFilter} - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, - /// {@link LowerCaseFilter}, and {@link StopFilter} - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI CzechAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~CzechAnalyzerSavedStreams(); - - LUCENE_CLASS(CzechAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from {@link StandardTokenizer}, filtered with {@link StandardFilter}, + /// {@link LowerCaseFilter}, and {@link StopFilter} + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI CzechAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~CzechAnalyzerSavedStreams(); + + LUCENE_CLASS(CzechAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/DefaultEncoder.h b/src/contrib/include/DefaultEncoder.h index c23fdf5a..25b27328 100644 --- a/src/contrib/include/DefaultEncoder.h +++ b/src/contrib/include/DefaultEncoder.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,18 +9,18 @@ #include "Encoder.h" -namespace Lucene -{ - /// Simple {@link Encoder} implementation that does not modify the output. - class LPPCONTRIBAPI DefaultEncoder : public Encoder, public LuceneObject - { - public: - virtual ~DefaultEncoder(); - LUCENE_CLASS(DefaultEncoder); - - public: - virtual String encodeText(const String& originalText); - }; +namespace Lucene { + +/// Simple {@link Encoder} implementation that does not modify the output. +class LPPCONTRIBAPI DefaultEncoder : public Encoder, public LuceneObject { +public: + virtual ~DefaultEncoder(); + LUCENE_CLASS(DefaultEncoder); + +public: + virtual String encodeText(const String& originalText); +}; + } #endif diff --git a/src/contrib/include/DutchAnalyzer.h b/src/contrib/include/DutchAnalyzer.h index e2d1a8cf..83cfc585 100644 --- a/src/contrib/include/DutchAnalyzer.h +++ b/src/contrib/include/DutchAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,78 +10,77 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Dutch language. +namespace Lucene { + +/// {@link Analyzer} for Dutch language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all) and an external list of +/// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an +/// alternative list is specified, but the exclusion list is empty by default. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI DutchAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + DutchAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + /// Builds an analyzer with the given stop words and stemming exclusion words. + DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); + + virtual ~DutchAnalyzer(); + + LUCENE_CLASS(DutchAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stoptable; + + /// Contains words that should be indexed but not stemmed. + HashSet excltable; + + MapStringString stemdict; + + LuceneVersion::Version matchVersion; + + /// List of typical Dutch stopwords. + static const wchar_t* _DUTCH_STOP_WORDS[]; + +public: + virtual void initialize(); + + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + void setStemExclusionTable(HashSet exclusions); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of - /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an - /// alternative list is specified, but the exclusion list is empty by default. + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI DutchAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - DutchAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - /// Builds an analyzer with the given stop words and stemming exclusion words. - DutchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); - - virtual ~DutchAnalyzer(); - - LUCENE_CLASS(DutchAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stoptable; - - /// Contains words that should be indexed but not stemmed. - HashSet excltable; - - MapStringString stemdict; - - LuceneVersion::Version matchVersion; - - /// List of typical Dutch stopwords. - static const wchar_t* _DUTCH_STOP_WORDS[]; - - public: - virtual void initialize(); - - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - void setStemExclusionTable(HashSet exclusions); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI DutchAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~DutchAnalyzerSavedStreams(); - - LUCENE_CLASS(DutchAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link StandardFilter}, {@link StopFilter} and {@link DutchStemFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI DutchAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~DutchAnalyzerSavedStreams(); + + LUCENE_CLASS(DutchAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/DutchStemFilter.h b/src/contrib/include/DutchStemFilter.h index 2bce9a68..86ba03e4 100644 --- a/src/contrib/include/DutchStemFilter.h +++ b/src/contrib/include/DutchStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,53 +10,53 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that stems Dutch words. - /// - /// It supports a table of words that should not be stemmed at all. The stemmer used can - /// be changed at runtime after the filter object is created (as long as it is a - /// {@link DutchStemmer}). - /// - /// NOTE: This stemmer does not implement the Snowball algorithm correctly, specifically - /// doubled consonants. It is recommended that you consider using the "Dutch" stemmer in - /// the snowball package instead. This stemmer will likely be deprecated in a future release. - class LPPCONTRIBAPI DutchStemFilter : public TokenFilter - { - public: - DutchStemFilter(TokenStreamPtr input); - - /// Builds a DutchStemFilter that uses an exclusion table. - DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable); - - /// Builds a DutchStemFilter that uses an exclusion table and dictionary of word stem - /// pairs, that overrule the algorithm. - DutchStemFilter(TokenStreamPtr input, HashSet exclusiontable, MapStringString stemdictionary); - - virtual ~DutchStemFilter(); - - LUCENE_CLASS(DutchStemFilter); - - protected: - /// {@link DutchStemmer} in use by this filter. - DutchStemmerPtr stemmer; - - HashSet exclusions; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// Set a alternative/custom {@link DutchStemmer} for this filter. - void setStemmer(DutchStemmerPtr stemmer); - - /// Set an alternative exclusion list for this filter. - void setExclusionSet(HashSet exclusiontable); - - /// Set dictionary for stemming, this dictionary overrules the algorithm, so you can - /// correct for a particular unwanted word-stem pair. - void setStemDictionary(MapStringString dict); - }; +namespace Lucene { + +/// A {@link TokenFilter} that stems Dutch words. +/// +/// It supports a table of words that should not be stemmed at all. The stemmer used can +/// be changed at runtime after the filter object is created (as long as it is a +/// {@link DutchStemmer}). +/// +/// NOTE: This stemmer does not implement the Snowball algorithm correctly, specifically +/// doubled consonants. It is recommended that you consider using the "Dutch" stemmer in +/// the snowball package instead. This stemmer will likely be deprecated in a future release. +class LPPCONTRIBAPI DutchStemFilter : public TokenFilter { +public: + DutchStemFilter(const TokenStreamPtr& input); + + /// Builds a DutchStemFilter that uses an exclusion table. + DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); + + /// Builds a DutchStemFilter that uses an exclusion table and dictionary of word stem + /// pairs, that overrule the algorithm. + DutchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable, MapStringString stemdictionary); + + virtual ~DutchStemFilter(); + + LUCENE_CLASS(DutchStemFilter); + +protected: + /// {@link DutchStemmer} in use by this filter. + DutchStemmerPtr stemmer; + + HashSet exclusions; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// Set a alternative/custom {@link DutchStemmer} for this filter. + void setStemmer(const DutchStemmerPtr& stemmer); + + /// Set an alternative exclusion list for this filter. + void setExclusionSet(HashSet exclusiontable); + + /// Set dictionary for stemming, this dictionary overrules the algorithm, so you can + /// correct for a particular unwanted word-stem pair. + void setStemDictionary(MapStringString dict); +}; + } #endif diff --git a/src/contrib/include/DutchStemmer.h b/src/contrib/include/DutchStemmer.h index 1ace4ef7..1b400069 100644 --- a/src/contrib/include/DutchStemmer.h +++ b/src/contrib/include/DutchStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,84 +10,84 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// A stemmer for Dutch words. +namespace Lucene { + +/// A stemmer for Dutch words. +/// +/// The algorithm is an implementation of the +/// dutch stemming +/// algorithm in Martin Porter's snowball project. +class LPPCONTRIBAPI DutchStemmer : public LuceneObject { +public: + DutchStemmer(); + virtual ~DutchStemmer(); + + LUCENE_CLASS(DutchStemmer); + +protected: + /// Buffer for the terms while stemming them. + String buffer; + + bool removedE; + MapStringString stemDict; + + int32_t R1; + int32_t R2; + +public: + /// Stems the given term to a unique discriminator. /// - /// The algorithm is an implementation of the - /// dutch stemming - /// algorithm in Martin Porter's snowball project. - class LPPCONTRIBAPI DutchStemmer : public LuceneObject - { - public: - DutchStemmer(); - virtual ~DutchStemmer(); - - LUCENE_CLASS(DutchStemmer); - - protected: - /// Buffer for the terms while stemming them. - String buffer; - - bool removedE; - MapStringString stemDict; - - int32_t R1; - int32_t R2; - - public: - /// Stems the given term to a unique discriminator. - /// - /// @param term The term that should be stemmed. - /// @return Discriminator for term. - String stem(const String& term); - - void setStemDictionary(MapStringString dict); - - protected: - bool enEnding(); - - void step1(); - - /// Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending. - void step2(); - - /// Delete "heid" - void step3a(); - - /// A d-suffix, or derivational suffix, enables a new word, often with a different grammatical - /// category, or with a different sense, to be built from another word. Whether a d-suffix can - /// be attached is discovered not from the rules of grammar, but by referring to a dictionary. - /// So in English, ness can be added to certain adjectives to form corresponding nouns - /// (littleness, kindness, foolishness ...) but not to all adjectives (not for example, to big, - /// cruel, wise ...) d-suffixes can be used to change meaning, often in rather exotic ways. - /// Remove "ing", "end", "ig", "lijk", "baar" and "bar" - void step3b(); - - /// Undouble vowel. If the words ends CVD, where C is a non-vowel, D is a non-vowel other than - /// I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, - /// brood -> brod). - void step4(); - - /// Checks if a term could be stemmed. - bool isStemmable(); - - /// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú - void substitute(); - - bool isValidSEnding(int32_t index); - bool isValidEnEnding(int32_t index); - - void unDouble(); - void unDouble(int32_t endIndex); - - int32_t getRIndex(int32_t start); - - void storeYandI(); - void reStoreYandI(); - - bool isVowel(wchar_t c); - }; + /// @param term The term that should be stemmed. + /// @return Discriminator for term. + String stem(const String& term); + + void setStemDictionary(MapStringString dict); + +protected: + bool enEnding(); + + void step1(); + + /// Delete suffix e if in R1 and preceded by a non-vowel, and then undouble the ending. + void step2(); + + /// Delete "heid" + void step3a(); + + /// A d-suffix, or derivational suffix, enables a new word, often with a different grammatical + /// category, or with a different sense, to be built from another word. Whether a d-suffix can + /// be attached is discovered not from the rules of grammar, but by referring to a dictionary. + /// So in English, ness can be added to certain adjectives to form corresponding nouns + /// (littleness, kindness, foolishness ...) but not to all adjectives (not for example, to big, + /// cruel, wise ...) d-suffixes can be used to change meaning, often in rather exotic ways. + /// Remove "ing", "end", "ig", "lijk", "baar" and "bar" + void step3b(); + + /// Undouble vowel. If the words ends CVD, where C is a non-vowel, D is a non-vowel other than + /// I, and V is double a, e, o or u, remove one of the vowels from V (for example, maan -> man, + /// brood -> brod). + void step4(); + + /// Checks if a term could be stemmed. + bool isStemmable(); + + /// Substitute ä, ë, ï, ö, ü, á , é, í, ó, ú + void substitute(); + + bool isValidSEnding(int32_t index); + bool isValidEnEnding(int32_t index); + + void unDouble(); + void unDouble(int32_t endIndex); + + int32_t getRIndex(int32_t start); + + void storeYandI(); + void reStoreYandI(); + + bool isVowel(wchar_t c); +}; + } #endif diff --git a/src/contrib/include/ElisionFilter.h b/src/contrib/include/ElisionFilter.h index a1185741..32ff4c07 100644 --- a/src/contrib/include/ElisionFilter.h +++ b/src/contrib/include/ElisionFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,37 +10,37 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be - /// tokenized as "avion" (plane). - /// - /// Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out. - /// @see Elision in Wikipedia - class LPPCONTRIBAPI ElisionFilter : public TokenFilter - { - public: - /// Constructs an elision filter with standard stop words. - ElisionFilter(TokenStreamPtr input); - - /// Constructs an elision filter with a Set of stop words - ElisionFilter(TokenStreamPtr input, HashSet articles); - - virtual ~ElisionFilter(); - - LUCENE_CLASS(ElisionFilter); - - protected: - static const wchar_t apostrophes[]; - - CharArraySetPtr articles; - TermAttributePtr termAtt; - - public: - void setArticles(HashSet articles); - - virtual bool incrementToken(); - }; +namespace Lucene { + +/// Removes elisions from a {@link TokenStream}. For example, "l'avion" (the plane) will be +/// tokenized as "avion" (plane). +/// +/// Note that {@link StandardTokenizer} sees " ' " as a space, and cuts it out. +/// @see Elision in Wikipedia +class LPPCONTRIBAPI ElisionFilter : public TokenFilter { +public: + /// Constructs an elision filter with standard stop words. + ElisionFilter(const TokenStreamPtr& input); + + /// Constructs an elision filter with a Set of stop words + ElisionFilter(const TokenStreamPtr& input, HashSet articles); + + virtual ~ElisionFilter(); + + LUCENE_CLASS(ElisionFilter); + +protected: + static const wchar_t apostrophes[]; + + CharArraySetPtr articles; + TermAttributePtr termAtt; + +public: + void setArticles(HashSet articles); + + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/Encoder.h b/src/contrib/include/Encoder.h index a6d95747..d2ecdea8 100644 --- a/src/contrib/include/Encoder.h +++ b/src/contrib/include/Encoder.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,18 +10,18 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Encodes original text. The Encoder works with the {@link Formatter} to generate output. - class LPPCONTRIBAPI Encoder - { - public: - virtual ~Encoder(); - LUCENE_INTERFACE(Encoder); - - public: - virtual String encodeText(const String& originalText); - }; +namespace Lucene { + +/// Encodes original text. The Encoder works with the {@link Formatter} to generate output. +class LPPCONTRIBAPI Encoder { +public: + virtual ~Encoder(); + LUCENE_INTERFACE(Encoder); + +public: + virtual String encodeText(const String& originalText); +}; + } #endif diff --git a/src/contrib/include/Formatter.h b/src/contrib/include/Formatter.h index b93541a4..b090ff9a 100644 --- a/src/contrib/include/Formatter.h +++ b/src/contrib/include/Formatter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,21 +10,21 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Processes terms found in the original text, typically by applying some form of mark-up to highlight - /// terms in HTML search results pages. - class LPPCONTRIBAPI Formatter - { - public: - virtual ~Formatter(); - LUCENE_INTERFACE(Formatter); - - public: - /// @param originalText The section of text being considered for markup - /// @param tokenGroup contains one or several overlapping Tokens along with their scores and positions. - virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); - }; +namespace Lucene { + +/// Processes terms found in the original text, typically by applying some form of mark-up to highlight +/// terms in HTML search results pages. +class LPPCONTRIBAPI Formatter { +public: + virtual ~Formatter(); + LUCENE_INTERFACE(Formatter); + +public: + /// @param originalText The section of text being considered for markup + /// @param tokenGroup contains one or several overlapping Tokens along with their scores and positions. + virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); +}; + } #endif diff --git a/src/contrib/include/Fragmenter.h b/src/contrib/include/Fragmenter.h index e197e971..897c4bbe 100644 --- a/src/contrib/include/Fragmenter.h +++ b/src/contrib/include/Fragmenter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,29 +10,29 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Implements the policy for breaking text into multiple fragments for consideration by the - /// {@link Highlighter} class. A sophisticated implementation may do this on the basis of - /// detecting end of sentences in the text. - class LPPCONTRIBAPI Fragmenter - { - public: - virtual ~Fragmenter(); - LUCENE_INTERFACE(Fragmenter); - - public: - /// Initializes the Fragmenter. You can grab references to the Attributes you are - /// interested in from tokenStream and then access the values in {@link #isNewFragment()}. - /// @param originalText the original source text. - /// @param tokenStream the {@link TokenStream} to be fragmented. - virtual void start(const String& originalText, TokenStreamPtr tokenStream); - - /// Test to see if this token from the stream should be held in a new TextFragment. - /// Every time this is called, the TokenStream passed to start(String, TokenStream) - /// will have been incremented. - virtual bool isNewFragment(); - }; +namespace Lucene { + +/// Implements the policy for breaking text into multiple fragments for consideration by the +/// {@link Highlighter} class. A sophisticated implementation may do this on the basis of +/// detecting end of sentences in the text. +class LPPCONTRIBAPI Fragmenter { +public: + virtual ~Fragmenter(); + LUCENE_INTERFACE(Fragmenter); + +public: + /// Initializes the Fragmenter. You can grab references to the Attributes you are + /// interested in from tokenStream and then access the values in {@link #isNewFragment()}. + /// @param originalText the original source text. + /// @param tokenStream the {@link TokenStream} to be fragmented. + virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); + + /// Test to see if this token from the stream should be held in a new TextFragment. + /// Every time this is called, the TokenStream passed to start(String, TokenStream) + /// will have been incremented. + virtual bool isNewFragment(); +}; + } #endif diff --git a/src/contrib/include/FrenchAnalyzer.h b/src/contrib/include/FrenchAnalyzer.h index df72a790..efabd495 100644 --- a/src/contrib/include/FrenchAnalyzer.h +++ b/src/contrib/include/FrenchAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,74 +10,73 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for French language. +namespace Lucene { + +/// {@link Analyzer} for French language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all) and an external list of +/// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an +/// alternative list is specified, but the exclusion list is empty by default. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI FrenchAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + FrenchAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + /// Builds an analyzer with the given stop words and stemming exclusion words. + FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); + + virtual ~FrenchAnalyzer(); + + LUCENE_CLASS(FrenchAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stoptable; + + /// Contains words that should be indexed but not stemmed. + HashSet excltable; + + LuceneVersion::Version matchVersion; + + /// List of typical French stopwords. + static const wchar_t* _FRENCH_STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + void setStemExclusionTable(HashSet exclusions); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of - /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an - /// alternative list is specified, but the exclusion list is empty by default. + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter}, and {@link LowerCaseFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI FrenchAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - FrenchAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - /// Builds an analyzer with the given stop words and stemming exclusion words. - FrenchAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); - - virtual ~FrenchAnalyzer(); - - LUCENE_CLASS(FrenchAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stoptable; - - /// Contains words that should be indexed but not stemmed. - HashSet excltable; - - LuceneVersion::Version matchVersion; - - /// List of typical French stopwords. - static const wchar_t* _FRENCH_STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - void setStemExclusionTable(HashSet exclusions); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter}, and {@link LowerCaseFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link StandardTokenizer} filtered with - /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter} and {@link LowerCaseFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI FrenchAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~FrenchAnalyzerSavedStreams(); - - LUCENE_CLASS(FrenchAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link StandardTokenizer} filtered with + /// {@link StandardFilter}, {@link StopFilter}, {@link FrenchStemFilter} and {@link LowerCaseFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI FrenchAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~FrenchAnalyzerSavedStreams(); + + LUCENE_CLASS(FrenchAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/FrenchStemFilter.h b/src/contrib/include/FrenchStemFilter.h index e017e28d..40413d54 100644 --- a/src/contrib/include/FrenchStemFilter.h +++ b/src/contrib/include/FrenchStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,45 +10,45 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that stems French words. - /// - /// It supports a table of words that should not be stemmed at all. The stemmer used can - /// be changed at runtime after the filter object is created (as long as it is a - /// {@link FrenchStemmer}). - /// - /// NOTE: This stemmer does not implement the Snowball algorithm correctly, especially - /// involving case problems. It is recommended that you consider using the "French" stemmer - /// in the snowball package instead. This stemmer will likely be deprecated in a future release. - class LPPCONTRIBAPI FrenchStemFilter : public TokenFilter - { - public: - FrenchStemFilter(TokenStreamPtr input); - - /// Builds a FrenchStemFilter that uses an exclusion table. - FrenchStemFilter(TokenStreamPtr input, HashSet exclusiontable); - - virtual ~FrenchStemFilter(); - - LUCENE_CLASS(FrenchStemFilter); - - protected: - /// {@link FrenchStemmer} in use by this filter. - FrenchStemmerPtr stemmer; - - HashSet exclusions; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// Set a alternative/custom {@link FrenchStemmer} for this filter. - void setStemmer(FrenchStemmerPtr stemmer); - - /// Set an alternative exclusion list for this filter. - void setExclusionSet(HashSet exclusiontable); - }; +namespace Lucene { + +/// A {@link TokenFilter} that stems French words. +/// +/// It supports a table of words that should not be stemmed at all. The stemmer used can +/// be changed at runtime after the filter object is created (as long as it is a +/// {@link FrenchStemmer}). +/// +/// NOTE: This stemmer does not implement the Snowball algorithm correctly, especially +/// involving case problems. It is recommended that you consider using the "French" stemmer +/// in the snowball package instead. This stemmer will likely be deprecated in a future release. +class LPPCONTRIBAPI FrenchStemFilter : public TokenFilter { +public: + FrenchStemFilter(const TokenStreamPtr& input); + + /// Builds a FrenchStemFilter that uses an exclusion table. + FrenchStemFilter(const TokenStreamPtr& input, HashSet exclusiontable); + + virtual ~FrenchStemFilter(); + + LUCENE_CLASS(FrenchStemFilter); + +protected: + /// {@link FrenchStemmer} in use by this filter. + FrenchStemmerPtr stemmer; + + HashSet exclusions; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// Set a alternative/custom {@link FrenchStemmer} for this filter. + void setStemmer(const FrenchStemmerPtr& stemmer); + + /// Set an alternative exclusion list for this filter. + void setExclusionSet(HashSet exclusiontable); +}; + } #endif diff --git a/src/contrib/include/FrenchStemmer.h b/src/contrib/include/FrenchStemmer.h index 2e1b1417..b17b2dcd 100644 --- a/src/contrib/include/FrenchStemmer.h +++ b/src/contrib/include/FrenchStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,170 +10,170 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// A stemmer for French words. +namespace Lucene { + +/// A stemmer for French words. +/// +/// The algorithm is based on the work of Dr Martin Porter on his snowball project refer to +/// http://snowball.sourceforge.net/french/stemmer.html (French stemming algorithm) for details. +class LPPCONTRIBAPI FrenchStemmer : public LuceneObject { +public: + FrenchStemmer(); + virtual ~FrenchStemmer(); + + LUCENE_CLASS(FrenchStemmer); + +protected: + /// Buffer for the terms while stemming them. + String stringBuffer; + + /// A temporary buffer, used to reconstruct R2. + String tempBuffer; + + /// Region R0 is equal to the whole buffer. + String R0; + + /// Region RV + /// + /// "If the word begins with two vowels, RV is the region after the third letter, otherwise + /// the region after the first vowel not at the beginning of the word, or the end of the + /// word if these positions cannot be found." + String RV; + + /// Region R1 /// - /// The algorithm is based on the work of Dr Martin Porter on his snowball project refer to - /// http://snowball.sourceforge.net/french/stemmer.html (French stemming algorithm) for details. - class LPPCONTRIBAPI FrenchStemmer : public LuceneObject - { - public: - FrenchStemmer(); - virtual ~FrenchStemmer(); - - LUCENE_CLASS(FrenchStemmer); - - protected: - /// Buffer for the terms while stemming them. - String stringBuffer; - - /// A temporary buffer, used to reconstruct R2. - String tempBuffer; - - /// Region R0 is equal to the whole buffer. - String R0; - - /// Region RV - /// - /// "If the word begins with two vowels, RV is the region after the third letter, otherwise - /// the region after the first vowel not at the beginning of the word, or the end of the - /// word if these positions cannot be found." - String RV; - - /// Region R1 - /// - /// "R1 is the region after the first non-vowel following a vowel or is the null region at - /// the end of the word if there is no such non-vowel" - String R1; - - /// Region R2 - /// - /// "R2 is the region after the first non-vowel in R1 following a vowel or is the null region - /// at the end of the word if there is no such non-vowel" - String R2; - - /// Set to true if we need to perform step 2 - bool suite; - - /// Set to true if the buffer was modified - bool modified; - - public: - /// Stems the given term to a unique discriminator. - /// - /// @param term The term that should be stemmed. - /// @return Discriminator for term. - String stem(const String& term); - - protected: - /// Sets the search region Strings it needs to be done each time the buffer was modified. - void setStrings(); - - /// First step of the Porter Algorithm. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step1(); - - /// Second step (A) of the Porter Algorithm. - /// Will be performed if nothing changed from the first step or changed were done in the amment, - /// emment, ments or ment suffixes. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - /// @return true if something changed in the buffer - bool step2a(); - - /// Second step (B) of the Porter Algorithm. - /// Will be performed if step 2 A was performed unsuccessfully. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step2b(); - - /// Third step of the Porter Algorithm. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step3(); - - /// Fourth step of the Porter Algorithm. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step4(); - - /// Fifth step of the Porter Algorithm. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step5(); - - /// Sixth step of the Porter Algorithm. - /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. - void step6(); - - /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string. - /// @param source String - the primary source zone for search. - /// @param search String[] - the strings to search for suppression. - /// @param from String - the secondary source zone for search. - /// @param prefix String - the prefix to add to the search string to test. - /// @return true if modified - bool deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix); - - /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel. - /// @param source String - the primary source zone for search. - /// @param search String[] - the strings to search for suppression. - /// @param vowel boolean - true if we need a vowel before the search string. - /// @param from String - the secondary source zone for search (where vowel could be). - /// @return true if modified - bool deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from); - - /// Delete a suffix searched in zone "source" if preceded by the prefix. - /// @param source String - the primary source zone for search. - /// @param search String[] - the strings to search for suppression. - /// @param prefix String - the prefix to add to the search string to test. - /// @param without boolean - true if it will be deleted even without prefix found. - void deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without); - - /// Delete a suffix searched in zone "source" if preceded by prefix or replace it with the - /// replace string if preceded by the prefix in the zone "from" or delete the suffix if specified. - /// @param source String - the primary source zone for search. - /// @param search String[] - the strings to search for suppression. - /// @param prefix String - the prefix to add to the search string to test. - /// @param without boolean - true if it will be deleted even without prefix found. - void deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace); - - /// Replace a search string with another within the source zone. - /// @param source String - the source zone for search. - /// @param search String[] - the strings to search for replacement. - /// @param replace String - the replacement string. - bool replaceFrom(const String& source, Collection search, const String& replace); - - /// Delete a search string within the source zone. - /// @param source the source zone for search. - /// @param suffix the strings to search for suppression. - void deleteFrom(const String& source, Collection suffix); - - /// Test if a char is a French vowel, including accentuated ones. - /// @param ch the char to test. - /// @return true if the char is a vowel - bool isVowel(wchar_t ch); - - /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string. - /// "R is the region after the first non-vowel following a vowel or is the null region at the - /// end of the word if there is no such non-vowel". - /// @param buffer the in buffer. - /// @return the resulting string. - String retrieveR(const String& buffer); - - /// Retrieve the "RV zone" from a buffer an return the corresponding string. - /// "If the word begins with two vowels, RV is the region after the third letter, otherwise the - /// region after the first vowel not at the beginning of the word, or the end of the word if - /// these positions cannot be found." - /// @param buffer the in buffer - /// @return the resulting string - String retrieveRV(const String& buffer); - - /// Turns u and i preceded AND followed by a vowel to UpperCase<. - /// Turns y preceded OR followed by a vowel to UpperCase. - /// Turns u preceded by q to UpperCase. - /// @param buffer the buffer to treat - void treatVowels(String& buffer); - - /// Checks a term if it can be processed correctly. - /// @return boolean - true if, and only if, the given term consists in letters. - bool isStemmable(const String& term); - }; + /// "R1 is the region after the first non-vowel following a vowel or is the null region at + /// the end of the word if there is no such non-vowel" + String R1; + + /// Region R2 + /// + /// "R2 is the region after the first non-vowel in R1 following a vowel or is the null region + /// at the end of the word if there is no such non-vowel" + String R2; + + /// Set to true if we need to perform step 2 + bool suite; + + /// Set to true if the buffer was modified + bool modified; + +public: + /// Stems the given term to a unique discriminator. + /// + /// @param term The term that should be stemmed. + /// @return Discriminator for term. + String stem(const String& term); + +protected: + /// Sets the search region Strings it needs to be done each time the buffer was modified. + void setStrings(); + + /// First step of the Porter Algorithm. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step1(); + + /// Second step (A) of the Porter Algorithm. + /// Will be performed if nothing changed from the first step or changed were done in the amment, + /// emment, ments or ment suffixes. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + /// @return true if something changed in the buffer + bool step2a(); + + /// Second step (B) of the Porter Algorithm. + /// Will be performed if step 2 A was performed unsuccessfully. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step2b(); + + /// Third step of the Porter Algorithm. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step3(); + + /// Fourth step of the Porter Algorithm. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step4(); + + /// Fifth step of the Porter Algorithm. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step5(); + + /// Sixth step of the Porter Algorithm. + /// Refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation. + void step6(); + + /// Delete a suffix searched in zone "source" if zone "from" contains prefix + search string. + /// @param source String - the primary source zone for search. + /// @param search String[] - the strings to search for suppression. + /// @param from String - the secondary source zone for search. + /// @param prefix String - the prefix to add to the search string to test. + /// @return true if modified + bool deleteFromIfPrecededIn(const String& source, Collection search, const String& from, const String& prefix); + + /// Delete a suffix searched in zone "source" if the preceding letter is (or isn't) a vowel. + /// @param source String - the primary source zone for search. + /// @param search String[] - the strings to search for suppression. + /// @param vowel boolean - true if we need a vowel before the search string. + /// @param from String - the secondary source zone for search (where vowel could be). + /// @return true if modified + bool deleteFromIfTestVowelBeforeIn(const String& source, Collection search, bool vowel, const String& from); + + /// Delete a suffix searched in zone "source" if preceded by the prefix. + /// @param source String - the primary source zone for search. + /// @param search String[] - the strings to search for suppression. + /// @param prefix String - the prefix to add to the search string to test. + /// @param without boolean - true if it will be deleted even without prefix found. + void deleteButSuffixFrom(const String& source, Collection search, const String& prefix, bool without); + + /// Delete a suffix searched in zone "source" if preceded by prefix or replace it with the + /// replace string if preceded by the prefix in the zone "from" or delete the suffix if specified. + /// @param source String - the primary source zone for search. + /// @param search String[] - the strings to search for suppression. + /// @param prefix String - the prefix to add to the search string to test. + /// @param without boolean - true if it will be deleted even without prefix found. + void deleteButSuffixFromElseReplace(const String& source, Collection search, const String& prefix, bool without, const String& from, const String& replace); + + /// Replace a search string with another within the source zone. + /// @param source String - the source zone for search. + /// @param search String[] - the strings to search for replacement. + /// @param replace String - the replacement string. + bool replaceFrom(const String& source, Collection search, const String& replace); + + /// Delete a search string within the source zone. + /// @param source the source zone for search. + /// @param suffix the strings to search for suppression. + void deleteFrom(const String& source, Collection suffix); + + /// Test if a char is a French vowel, including accentuated ones. + /// @param ch the char to test. + /// @return true if the char is a vowel + bool isVowel(wchar_t ch); + + /// Retrieve the "R zone" (1 or 2 depending on the buffer) and return the corresponding string. + /// "R is the region after the first non-vowel following a vowel or is the null region at the + /// end of the word if there is no such non-vowel". + /// @param buffer the in buffer. + /// @return the resulting string. + String retrieveR(const String& buffer); + + /// Retrieve the "RV zone" from a buffer an return the corresponding string. + /// "If the word begins with two vowels, RV is the region after the third letter, otherwise the + /// region after the first vowel not at the beginning of the word, or the end of the word if + /// these positions cannot be found." + /// @param buffer the in buffer + /// @return the resulting string + String retrieveRV(const String& buffer); + + /// Turns u and i preceded AND followed by a vowel to UpperCase<. + /// Turns y preceded OR followed by a vowel to UpperCase. + /// Turns u preceded by q to UpperCase. + /// @param buffer the buffer to treat + void treatVowels(String& buffer); + + /// Checks a term if it can be processed correctly. + /// @return boolean - true if, and only if, the given term consists in letters. + bool isStemmable(const String& term); +}; + } #endif diff --git a/src/contrib/include/GermanAnalyzer.h b/src/contrib/include/GermanAnalyzer.h index 448ccb2d..16adc7ec 100644 --- a/src/contrib/include/GermanAnalyzer.h +++ b/src/contrib/include/GermanAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,75 +10,74 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for German language. +namespace Lucene { + +/// {@link Analyzer} for German language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all) and an external list of +/// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an +/// alternative list is specified, but the exclusion list is empty by default. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI GermanAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + GermanAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + /// Builds an analyzer with the given stop words and stemming exclusion words. + GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); + + virtual ~GermanAnalyzer(); + + LUCENE_CLASS(GermanAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stopSet; + + /// Contains words that should be indexed but not stemmed. + HashSet exclusionSet; + + LuceneVersion::Version matchVersion; + + /// List of typical German stopwords. + static const wchar_t* _GERMAN_STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + void setStemExclusionTable(HashSet exclusions); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all) and an external list of - /// exclusions (words that will not be stemmed, but indexed). A default set of stopwords is used unless an - /// alternative list is specified, but the exclusion list is empty by default. + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link GermanStemFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI GermanAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - GermanAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - /// Builds an analyzer with the given stop words and stemming exclusion words. - GermanAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords, HashSet exclusions); - - virtual ~GermanAnalyzer(); - - LUCENE_CLASS(GermanAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stopSet; - - /// Contains words that should be indexed but not stemmed. - HashSet exclusionSet; - - LuceneVersion::Version matchVersion; - - /// List of typical German stopwords. - static const wchar_t* _GERMAN_STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - void setStemExclusionTable(HashSet exclusions); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}, and {@link GermanStemFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link GermanLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link StopFilter}, {@link GermanNormalizationFilter} and - /// {@link GermanStemFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI GermanAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~GermanAnalyzerSavedStreams(); - - LUCENE_CLASS(GermanAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link GermanLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link StopFilter}, {@link GermanNormalizationFilter} and + /// {@link GermanStemFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI GermanAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~GermanAnalyzerSavedStreams(); + + LUCENE_CLASS(GermanAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/GermanStemFilter.h b/src/contrib/include/GermanStemFilter.h index be34a9d7..35b45279 100644 --- a/src/contrib/include/GermanStemFilter.h +++ b/src/contrib/include/GermanStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,41 +10,41 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that stems German words. - /// - /// It supports a table of words that should not be stemmed at all. The stemmer used can - /// be changed at runtime after the filter object is created (as long as it is a - /// {@link GermanStemmer}). - class LPPCONTRIBAPI GermanStemFilter : public TokenFilter - { - public: - GermanStemFilter(TokenStreamPtr input); - - /// Builds a GermanStemFilter that uses an exclusion table. - GermanStemFilter(TokenStreamPtr input, HashSet exclusionSet); - - virtual ~GermanStemFilter(); - - LUCENE_CLASS(GermanStemFilter); - - protected: - /// {@link GermanStemmer} in use by this filter. - GermanStemmerPtr stemmer; - - HashSet exclusionSet; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// Set a alternative/custom {@link GermanStemmer} for this filter. - void setStemmer(GermanStemmerPtr stemmer); - - /// Set an alternative exclusion list for this filter. - void setExclusionSet(HashSet exclusionSet); - }; +namespace Lucene { + +/// A {@link TokenFilter} that stems German words. +/// +/// It supports a table of words that should not be stemmed at all. The stemmer used can +/// be changed at runtime after the filter object is created (as long as it is a +/// {@link GermanStemmer}). +class LPPCONTRIBAPI GermanStemFilter : public TokenFilter { +public: + GermanStemFilter(const TokenStreamPtr& input); + + /// Builds a GermanStemFilter that uses an exclusion table. + GermanStemFilter(const TokenStreamPtr& input, HashSet exclusionSet); + + virtual ~GermanStemFilter(); + + LUCENE_CLASS(GermanStemFilter); + +protected: + /// {@link GermanStemmer} in use by this filter. + GermanStemmerPtr stemmer; + + HashSet exclusionSet; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// Set a alternative/custom {@link GermanStemmer} for this filter. + void setStemmer(const GermanStemmerPtr& stemmer); + + /// Set an alternative exclusion list for this filter. + void setExclusionSet(HashSet exclusionSet); +}; + } #endif diff --git a/src/contrib/include/GermanStemmer.h b/src/contrib/include/GermanStemmer.h index 252a68ce..66534293 100644 --- a/src/contrib/include/GermanStemmer.h +++ b/src/contrib/include/GermanStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,64 +10,64 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// A stemmer for German words. +namespace Lucene { + +/// A stemmer for German words. +/// +/// The algorithm is based on the report "A Fast and Simple Stemming Algorithm for German Words" by Jörg +/// Caumanns (joerg.caumanns at isst.fhg.de). +class LPPCONTRIBAPI GermanStemmer : public LuceneObject { +public: + GermanStemmer(); + virtual ~GermanStemmer(); + + LUCENE_CLASS(GermanStemmer); + +protected: + /// Buffer for the terms while stemming them. + String buffer; + + /// Amount of characters that are removed with substitute() while stemming. + int32_t substCount; + +public: + /// Stems the given term to a unique discriminator. /// - /// The algorithm is based on the report "A Fast and Simple Stemming Algorithm for German Words" by Jörg - /// Caumanns (joerg.caumanns at isst.fhg.de). - class LPPCONTRIBAPI GermanStemmer : public LuceneObject - { - public: - GermanStemmer(); - virtual ~GermanStemmer(); - - LUCENE_CLASS(GermanStemmer); - - protected: - /// Buffer for the terms while stemming them. - String buffer; - - /// Amount of characters that are removed with substitute() while stemming. - int32_t substCount; - - public: - /// Stems the given term to a unique discriminator. - /// - /// @param term The term that should be stemmed. - /// @return Discriminator for term. - String stem(const String& term); - - protected: - /// Checks if a term could be stemmed. - /// @return true if, and only if, the given term consists in letters. - bool isStemmable(); - - /// Suffix stripping (stemming) on the current term. The stripping is reduced to the seven "base" - /// suffixes "e", "s", "n", "t", "em", "er" and * "nd", from which all regular suffixes are build - /// of. The simplification causes some overstemming, and way more irregular stems, but still - /// provides unique. - /// Discriminators in the most of those cases. - /// The algorithm is context free, except of the length restrictions. - void strip(); - - /// Does some optimizations on the term. This optimisations are contextual. - void optimize(); - - /// Removes a particle denotion ("ge") from a term. - void removeParticleDenotion(); - - /// Do some substitutions for the term to reduce overstemming: - /// - /// - Substitute Umlauts with their corresponding vowel: äöü -> aou, "ß" is substituted by "ss" - /// - Substitute a second char of a pair of equal characters with an asterisk: ?? -> ?* - /// - Substitute some common character combinations with a token: sch/ch/ei/ie/ig/st -> $/§/%/&/#/! - void substitute(); - - /// Undoes the changes made by substitute(). That are character pairs and character combinations. - /// Umlauts will remain as their corresponding vowel, as "ß" remains as "ss". - void resubstitute(); - }; + /// @param term The term that should be stemmed. + /// @return Discriminator for term. + String stem(const String& term); + +protected: + /// Checks if a term could be stemmed. + /// @return true if, and only if, the given term consists in letters. + bool isStemmable(); + + /// Suffix stripping (stemming) on the current term. The stripping is reduced to the seven "base" + /// suffixes "e", "s", "n", "t", "em", "er" and * "nd", from which all regular suffixes are build + /// of. The simplification causes some overstemming, and way more irregular stems, but still + /// provides unique. + /// Discriminators in the most of those cases. + /// The algorithm is context free, except of the length restrictions. + void strip(); + + /// Does some optimizations on the term. This optimisations are contextual. + void optimize(); + + /// Removes a particle denotion ("ge") from a term. + void removeParticleDenotion(); + + /// Do some substitutions for the term to reduce overstemming: + /// + /// - Substitute Umlauts with their corresponding vowel: äöü -> aou, "ß" is substituted by "ss" + /// - Substitute a second char of a pair of equal characters with an asterisk: ?? -> ?* + /// - Substitute some common character combinations with a token: sch/ch/ei/ie/ig/st -> $/§/%/&/#/! + void substitute(); + + /// Undoes the changes made by substitute(). That are character pairs and character combinations. + /// Umlauts will remain as their corresponding vowel, as "ß" remains as "ss". + void resubstitute(); +}; + } #endif diff --git a/src/contrib/include/GradientFormatter.h b/src/contrib/include/GradientFormatter.h index be6a23af..853b243e 100644 --- a/src/contrib/include/GradientFormatter.h +++ b/src/contrib/include/GradientFormatter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,52 +9,52 @@ #include "Formatter.h" -namespace Lucene -{ - /// Formats text with different color intensity depending on the score of the term. - class LPPCONTRIBAPI GradientFormatter : public Formatter, public LuceneObject - { - public: - GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); - virtual ~GradientFormatter(); - - LUCENE_CLASS(GradientFormatter); - - protected: - double maxScore; - bool highlightForeground; - bool highlightBackground; - - public: - int32_t fgRMin; - int32_t fgGMin; - int32_t fgBMin; - - int32_t fgRMax; - int32_t fgGMax; - int32_t fgBMax; - - int32_t bgRMin; - int32_t bgGMin; - int32_t bgBMin; - - int32_t bgRMax; - int32_t bgGMax; - int32_t bgBMax; - - public: - virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); - - protected: - String getForegroundColorString(double score); - String getBackgroundColorString(double score); - int32_t getColorVal(int32_t colorMin, int32_t colorMax, double score); - - static String intToHex(int32_t i); - - /// Converts a hex string into an int. - static int32_t hexToInt(const String& hex); - }; +namespace Lucene { + +/// Formats text with different color intensity depending on the score of the term. +class LPPCONTRIBAPI GradientFormatter : public Formatter, public LuceneObject { +public: + GradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); + virtual ~GradientFormatter(); + + LUCENE_CLASS(GradientFormatter); + +protected: + double maxScore; + bool highlightForeground; + bool highlightBackground; + +public: + int32_t fgRMin; + int32_t fgGMin; + int32_t fgBMin; + + int32_t fgRMax; + int32_t fgGMax; + int32_t fgBMax; + + int32_t bgRMin; + int32_t bgGMin; + int32_t bgBMin; + + int32_t bgRMax; + int32_t bgGMax; + int32_t bgBMax; + +public: + virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); + +protected: + String getForegroundColorString(double score); + String getBackgroundColorString(double score); + int32_t getColorVal(int32_t colorMin, int32_t colorMax, double score); + + static String intToHex(int32_t i); + + /// Converts a hex string into an int. + static int32_t hexToInt(const String& hex); +}; + } #endif diff --git a/src/contrib/include/GreekAnalyzer.h b/src/contrib/include/GreekAnalyzer.h index dbc48e61..966d9f70 100644 --- a/src/contrib/include/GreekAnalyzer.h +++ b/src/contrib/include/GreekAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,65 +10,64 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Greek language. +namespace Lucene { + +/// {@link Analyzer} for Greek language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all). A default set of stopwords +/// is used unless an alternative list is specified. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI GreekAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + GreekAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~GreekAnalyzer(); + + LUCENE_CLASS(GreekAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stopSet; + + LuceneVersion::Version matchVersion; + + /// Default Greek stopwords in UTF-8 format. + static const uint8_t _GREEK_STOP_WORDS[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all). A default set of stopwords - /// is used unless an alternative list is specified. + /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with + /// {@link GreekLowerCaseFilter} and {@link StopFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI GreekAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - GreekAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - GreekAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~GreekAnalyzer(); - - LUCENE_CLASS(GreekAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stopSet; - - LuceneVersion::Version matchVersion; - - /// Default Greek stopwords in UTF-8 format. - static const uint8_t _GREEK_STOP_WORDS[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link StandardTokenizer} filtered with - /// {@link GreekLowerCaseFilter} and {@link StopFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link GreekLetterTokenizer} filtered with - /// {@link GreekLowerCaseFilter} and {@link StopFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI GreekAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~GreekAnalyzerSavedStreams(); - - LUCENE_CLASS(GreekAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link GreekLetterTokenizer} filtered with + /// {@link GreekLowerCaseFilter} and {@link StopFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI GreekAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~GreekAnalyzerSavedStreams(); + + LUCENE_CLASS(GreekAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/GreekLowerCaseFilter.h b/src/contrib/include/GreekLowerCaseFilter.h index b59b3284..d2bdc416 100644 --- a/src/contrib/include/GreekLowerCaseFilter.h +++ b/src/contrib/include/GreekLowerCaseFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,27 +10,27 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// Normalizes token text to lower case, removes some Greek diacritics, and standardizes - /// final sigma to sigma. - class LPPCONTRIBAPI GreekLowerCaseFilter : public TokenFilter - { - public: - GreekLowerCaseFilter(TokenStreamPtr input); - virtual ~GreekLowerCaseFilter(); - - LUCENE_CLASS(GreekLowerCaseFilter); - - protected: - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - protected: - wchar_t lowerCase(wchar_t codepoint); - }; +namespace Lucene { + +/// Normalizes token text to lower case, removes some Greek diacritics, and standardizes +/// final sigma to sigma. +class LPPCONTRIBAPI GreekLowerCaseFilter : public TokenFilter { +public: + GreekLowerCaseFilter(const TokenStreamPtr& input); + virtual ~GreekLowerCaseFilter(); + + LUCENE_CLASS(GreekLowerCaseFilter); + +protected: + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + +protected: + wchar_t lowerCase(wchar_t codepoint); +}; + } #endif diff --git a/src/contrib/include/Highlighter.h b/src/contrib/include/Highlighter.h index 1481c837..b71c9c47 100644 --- a/src/contrib/include/Highlighter.h +++ b/src/contrib/include/Highlighter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,120 +10,119 @@ #include "LuceneContrib.h" #include "PriorityQueue.h" -namespace Lucene -{ - /// Class used to markup highlighted terms found in the best sections of a text, using configurable - /// {@link Fragmenter}, {@link Scorer}, {@link Formatter}, {@link Encoder} and tokenizers. - class LPPCONTRIBAPI Highlighter : public LuceneObject - { - public: - Highlighter(HighlighterScorerPtr fragmentScorer); - Highlighter(FormatterPtr formatter, HighlighterScorerPtr fragmentScorer); - Highlighter(FormatterPtr formatter, EncoderPtr encoder, HighlighterScorerPtr fragmentScorer); - - virtual ~Highlighter(); - - LUCENE_CLASS(Highlighter); - - public: - static const int32_t DEFAULT_MAX_CHARS_TO_ANALYZE; - - protected: - int32_t maxDocCharsToAnalyze; - FormatterPtr formatter; - EncoderPtr encoder; - FragmenterPtr textFragmenter; - HighlighterScorerPtr fragmentScorer; - - public: - /// Highlights chosen terms in a text, extracting the most relevant section. This is a convenience - /// method that calls {@link #getBestFragment(TokenStreamPtr, const String&)} - /// - /// @param analyzer The analyzer that will be used to split text into chunks - /// @param text Text to highlight terms in - /// @param fieldName Name of field used to influence analyzer's tokenization policy - /// @return highlighted text fragment or null if no terms found - String getBestFragment(AnalyzerPtr analyzer, const String& fieldName, const String& text); - - /// Highlights chosen terms in a text, extracting the most relevant section. The document text is - /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the - /// fragment with the highest score is returned. - /// - /// @param tokenStream A stream of tokens identified in the text parameter, including offset - /// information. This is typically produced by an analyzer re-parsing a document's text. Some - /// work may be done on retrieving TokenStreams more efficiently by adding support for storing - /// original text position data in the Lucene index but this support is not currently available. - /// @param text Text to highlight terms in - /// @return highlighted text fragment or null if no terms found - String getBestFragment(TokenStreamPtr tokenStream, const String& text); - - /// Highlights chosen terms in a text, extracting the most relevant sections. This is a convenience - /// method that calls {@link #getBestFragments(TokenStreamPtr, const String&, int32_t)} - /// - /// @param analyzer The analyzer that will be used to split text into chunks - /// @param fieldName The name of the field being highlighted (used by analyzer) - /// @param text Text to highlight terms in - /// @param maxNumFragments The maximum number of fragments. - /// @return highlighted text fragments (between 0 and maxNumFragments number of fragments) - Collection getBestFragments(AnalyzerPtr analyzer, const String& fieldName, const String& text, int32_t maxNumFragments); - - /// Highlights chosen terms in a text, extracting the most relevant sections. The document text is - /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the - /// fragments with the highest scores are returned as an array of strings in order of score (contiguous - /// fragments are merged into one in their original order to improve readability) - /// - /// @param text Text to highlight terms in - /// @param maxNumFragments The maximum number of fragments. - /// @return highlighted Text fragments (between 0 and maxNumFragments number of fragments) - Collection getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments); - - /// Low level api to get the most relevant (formatted) sections of the document. - /// This method has been made public to allow visibility of score information held in TextFragment objects. - Collection getBestTextFragments(TokenStreamPtr tokenStream, const String& text, bool merge, int32_t maxNumFragments); - - /// Improves readability of a score-sorted list of TextFragments by merging any fragments that were - /// contiguous in the original text into one larger fragment with the correct order. This will leave - /// a "null" in the array entry for the lesser scored fragment. - /// - /// @param frag An array of document fragments in descending score - void mergeContiguousFragments(Collection frag); - - /// Highlights terms in the text , extracting the most relevant sections and concatenating the chosen - /// fragments with a separator (typically "..."). The document text is analyzed in chunks to record - /// hit statistics across the document. After accumulating stats, the fragments with the highest scores - /// are returned in order as "separator" delimited strings. - /// - /// @param text Text to highlight terms in - /// @param maxNumFragments The maximum number of fragments. - /// @param separator The separator used to intersperse the document fragments (typically "...") - /// @return highlighted text - String getBestFragments(TokenStreamPtr tokenStream, const String& text, int32_t maxNumFragments, const String& separator); - - int32_t getMaxDocCharsToAnalyze(); - void setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze); - FragmenterPtr getTextFragmenter(); - void setTextFragmenter(FragmenterPtr fragmenter); - - /// @return Object used to score each text fragment - HighlighterScorerPtr getFragmentScorer(); - - void setFragmentScorer(HighlighterScorerPtr scorer); - - EncoderPtr getEncoder(); - void setEncoder(EncoderPtr encoder); - }; - - class LPPCONTRIBAPI FragmentQueue : public PriorityQueue - { - public: - FragmentQueue(int32_t size); - virtual ~FragmentQueue(); - - LUCENE_CLASS(FragmentQueue); - - protected: - virtual bool lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second); - }; +namespace Lucene { + +/// Class used to markup highlighted terms found in the best sections of a text, using configurable +/// {@link Fragmenter}, {@link Scorer}, {@link Formatter}, {@link Encoder} and tokenizers. +class LPPCONTRIBAPI Highlighter : public LuceneObject { +public: + Highlighter(const HighlighterScorerPtr& fragmentScorer); + Highlighter(const FormatterPtr& formatter, const HighlighterScorerPtr& fragmentScorer); + Highlighter(const FormatterPtr& formatter, const EncoderPtr& encoder, const HighlighterScorerPtr& fragmentScorer); + + virtual ~Highlighter(); + + LUCENE_CLASS(Highlighter); + +public: + static const int32_t DEFAULT_MAX_CHARS_TO_ANALYZE; + +protected: + int32_t maxDocCharsToAnalyze; + FormatterPtr formatter; + EncoderPtr encoder; + FragmenterPtr textFragmenter; + HighlighterScorerPtr fragmentScorer; + +public: + /// Highlights chosen terms in a text, extracting the most relevant section. This is a convenience + /// method that calls {@link #getBestFragment(TokenStreamPtr, const String&)} + /// + /// @param analyzer The analyzer that will be used to split text into chunks + /// @param text Text to highlight terms in + /// @param fieldName Name of field used to influence analyzer's tokenization policy + /// @return highlighted text fragment or null if no terms found + String getBestFragment(const AnalyzerPtr& analyzer, const String& fieldName, const String& text); + + /// Highlights chosen terms in a text, extracting the most relevant section. The document text is + /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the + /// fragment with the highest score is returned. + /// + /// @param tokenStream A stream of tokens identified in the text parameter, including offset + /// information. This is typically produced by an analyzer re-parsing a document's text. Some + /// work may be done on retrieving TokenStreams more efficiently by adding support for storing + /// original text position data in the Lucene index but this support is not currently available. + /// @param text Text to highlight terms in + /// @return highlighted text fragment or null if no terms found + String getBestFragment(const TokenStreamPtr& tokenStream, const String& text); + + /// Highlights chosen terms in a text, extracting the most relevant sections. This is a convenience + /// method that calls {@link #getBestFragments(TokenStreamPtr, const String&, int32_t)} + /// + /// @param analyzer The analyzer that will be used to split text into chunks + /// @param fieldName The name of the field being highlighted (used by analyzer) + /// @param text Text to highlight terms in + /// @param maxNumFragments The maximum number of fragments. + /// @return highlighted text fragments (between 0 and maxNumFragments number of fragments) + Collection getBestFragments(const AnalyzerPtr& analyzer, const String& fieldName, const String& text, int32_t maxNumFragments); + + /// Highlights chosen terms in a text, extracting the most relevant sections. The document text is + /// analyzed in chunks to record hit statistics across the document. After accumulating stats, the + /// fragments with the highest scores are returned as an array of strings in order of score (contiguous + /// fragments are merged into one in their original order to improve readability) + /// + /// @param text Text to highlight terms in + /// @param maxNumFragments The maximum number of fragments. + /// @return highlighted Text fragments (between 0 and maxNumFragments number of fragments) + Collection getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments); + + /// Low level api to get the most relevant (formatted) sections of the document. + /// This method has been made public to allow visibility of score information held in TextFragment objects. + Collection getBestTextFragments(const TokenStreamPtr& tokenStream, const String& text, bool merge, int32_t maxNumFragments); + + /// Improves readability of a score-sorted list of TextFragments by merging any fragments that were + /// contiguous in the original text into one larger fragment with the correct order. This will leave + /// a "null" in the array entry for the lesser scored fragment. + /// + /// @param frag An array of document fragments in descending score + void mergeContiguousFragments(Collection frag); + + /// Highlights terms in the text , extracting the most relevant sections and concatenating the chosen + /// fragments with a separator (typically "..."). The document text is analyzed in chunks to record + /// hit statistics across the document. After accumulating stats, the fragments with the highest scores + /// are returned in order as "separator" delimited strings. + /// + /// @param text Text to highlight terms in + /// @param maxNumFragments The maximum number of fragments. + /// @param separator The separator used to intersperse the document fragments (typically "...") + /// @return highlighted text + String getBestFragments(const TokenStreamPtr& tokenStream, const String& text, int32_t maxNumFragments, const String& separator); + + int32_t getMaxDocCharsToAnalyze(); + void setMaxDocCharsToAnalyze(int32_t maxDocCharsToAnalyze); + FragmenterPtr getTextFragmenter(); + void setTextFragmenter(const FragmenterPtr& fragmenter); + + /// @return Object used to score each text fragment + HighlighterScorerPtr getFragmentScorer(); + + void setFragmentScorer(const HighlighterScorerPtr& scorer); + + EncoderPtr getEncoder(); + void setEncoder(const EncoderPtr& encoder); +}; + +class LPPCONTRIBAPI FragmentQueue : public PriorityQueue { +public: + FragmentQueue(int32_t size); + virtual ~FragmentQueue(); + + LUCENE_CLASS(FragmentQueue); + +protected: + virtual bool lessThan(const TextFragmentPtr& first, const TextFragmentPtr& second); +}; + } #endif diff --git a/src/contrib/include/HighlighterScorer.h b/src/contrib/include/HighlighterScorer.h index 574f838c..1fdd054d 100644 --- a/src/contrib/include/HighlighterScorer.h +++ b/src/contrib/include/HighlighterScorer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,43 +10,43 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// A HighlighterScorer is responsible for scoring a stream of tokens. These token scores - /// can then be used to compute {@link TextFragment} scores. - class LPPCONTRIBAPI HighlighterScorer - { - public: - virtual ~HighlighterScorer(); - LUCENE_INTERFACE(HighlighterScorer); - - public: - /// Called to init the Scorer with a {@link TokenStream}. You can grab references to the - /// attributes you are interested in here and access them from {@link #getTokenScore()}. - /// - /// @param tokenStream the {@link TokenStream} that will be scored. - /// @return either a {@link TokenStream} that the Highlighter should continue using (eg - /// if you read the tokenSream in this method) or null to continue using the same {@link - /// TokenStream} that was passed in. - virtual TokenStreamPtr init(TokenStreamPtr tokenStream); - - /// Called when a new fragment is started for consideration. - /// - /// @param newFragment the fragment that will be scored next - virtual void startFragment(TextFragmentPtr newFragment); - - /// Called for each token in the current fragment. The {@link Highlighter} will increment - /// the {@link TokenStream} passed to init on every call. - /// - /// @return a score which is passed to the {@link Highlighter} class to influence the - /// mark-up of the text (this return value is NOT used to score the fragment) - virtual double getTokenScore(); - - /// Called when the {@link Highlighter} has no more tokens for the current fragment - the - /// Scorer returns the weighting it has derived for the most recent fragment, typically - /// based on the results of {@link #getTokenScore()}. - virtual double getFragmentScore(); - }; +namespace Lucene { + +/// A HighlighterScorer is responsible for scoring a stream of tokens. These token scores +/// can then be used to compute {@link TextFragment} scores. +class LPPCONTRIBAPI HighlighterScorer { +public: + virtual ~HighlighterScorer(); + LUCENE_INTERFACE(HighlighterScorer); + +public: + /// Called to init the Scorer with a {@link TokenStream}. You can grab references to the + /// attributes you are interested in here and access them from {@link #getTokenScore()}. + /// + /// @param tokenStream the {@link TokenStream} that will be scored. + /// @return either a {@link TokenStream} that the Highlighter should continue using (eg + /// if you read the tokenSream in this method) or null to continue using the same {@link + /// TokenStream} that was passed in. + virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); + + /// Called when a new fragment is started for consideration. + /// + /// @param newFragment the fragment that will be scored next + virtual void startFragment(const TextFragmentPtr& newFragment); + + /// Called for each token in the current fragment. The {@link Highlighter} will increment + /// the {@link TokenStream} passed to init on every call. + /// + /// @return a score which is passed to the {@link Highlighter} class to influence the + /// mark-up of the text (this return value is NOT used to score the fragment) + virtual double getTokenScore(); + + /// Called when the {@link Highlighter} has no more tokens for the current fragment - the + /// Scorer returns the weighting it has derived for the most recent fragment, typically + /// based on the results of {@link #getTokenScore()}. + virtual double getFragmentScore(); +}; + } #endif diff --git a/src/contrib/include/LuceneContrib.h b/src/contrib/include/LuceneContrib.h index dee5d126..d1dd2c33 100644 --- a/src/contrib/include/LuceneContrib.h +++ b/src/contrib/include/LuceneContrib.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,108 +9,108 @@ #include "Lucene.h" -namespace Lucene -{ - // analyzers - DECLARE_SHARED_PTR(ArabicAnalyzer) - DECLARE_SHARED_PTR(ArabicAnalyzerSavedStreams) - DECLARE_SHARED_PTR(ArabicLetterTokenizer) - DECLARE_SHARED_PTR(ArabicNormalizationFilter) - DECLARE_SHARED_PTR(ArabicNormalizer) - DECLARE_SHARED_PTR(ArabicStemFilter) - DECLARE_SHARED_PTR(ArabicStemmer) - DECLARE_SHARED_PTR(BrazilianAnalyzer) - DECLARE_SHARED_PTR(BrazilianAnalyzerSavedStreams) - DECLARE_SHARED_PTR(BrazilianStemFilter) - DECLARE_SHARED_PTR(BrazilianStemmer) - DECLARE_SHARED_PTR(CJKAnalyzer) - DECLARE_SHARED_PTR(CJKAnalyzerSavedStreams) - DECLARE_SHARED_PTR(CJKTokenizer) - DECLARE_SHARED_PTR(ChineseAnalyzer) - DECLARE_SHARED_PTR(ChineseAnalyzerSavedStreams) - DECLARE_SHARED_PTR(ChineseFilter) - DECLARE_SHARED_PTR(ChineseTokenizer) - DECLARE_SHARED_PTR(CzechAnalyzer) - DECLARE_SHARED_PTR(CzechAnalyzerSavedStreams) - DECLARE_SHARED_PTR(DutchAnalyzer) - DECLARE_SHARED_PTR(DutchAnalyzerSavedStreams) - DECLARE_SHARED_PTR(DutchStemFilter) - DECLARE_SHARED_PTR(DutchStemmer) - DECLARE_SHARED_PTR(ElisionFilter) - DECLARE_SHARED_PTR(FrenchAnalyzer) - DECLARE_SHARED_PTR(FrenchAnalyzerSavedStreams) - DECLARE_SHARED_PTR(FrenchStemFilter) - DECLARE_SHARED_PTR(FrenchStemmer) - DECLARE_SHARED_PTR(GermanAnalyzer) - DECLARE_SHARED_PTR(GermanAnalyzerSavedStreams) - DECLARE_SHARED_PTR(GermanStemFilter) - DECLARE_SHARED_PTR(GermanStemmer) - DECLARE_SHARED_PTR(GreekLowerCaseFilter) - DECLARE_SHARED_PTR(GreekAnalyzer) - DECLARE_SHARED_PTR(GreekAnalyzerSavedStreams) - DECLARE_SHARED_PTR(PersianAnalyzer) - DECLARE_SHARED_PTR(PersianAnalyzerSavedStreams) - DECLARE_SHARED_PTR(PersianNormalizationFilter) - DECLARE_SHARED_PTR(PersianNormalizer) - DECLARE_SHARED_PTR(ReverseStringFilter) - DECLARE_SHARED_PTR(RussianAnalyzer) - DECLARE_SHARED_PTR(RussianAnalyzerSavedStreams) - DECLARE_SHARED_PTR(RussianLetterTokenizer) - DECLARE_SHARED_PTR(RussianLowerCaseFilter) - DECLARE_SHARED_PTR(RussianStemFilter) - DECLARE_SHARED_PTR(RussianStemmer) - DECLARE_SHARED_PTR(SnowballFilter) - DECLARE_SHARED_PTR(SnowballAnalyzer) - DECLARE_SHARED_PTR(SnowballAnalyzerSavedStreams) - - // highlighter - DECLARE_SHARED_PTR(DefaultEncoder) - DECLARE_SHARED_PTR(Encoder) - DECLARE_SHARED_PTR(FakeReader) - DECLARE_SHARED_PTR(Formatter) - DECLARE_SHARED_PTR(Fragmenter) - DECLARE_SHARED_PTR(FragmentQueue) - DECLARE_SHARED_PTR(GradientFormatter) - DECLARE_SHARED_PTR(Highlighter) - DECLARE_SHARED_PTR(HighlighterScorer) - DECLARE_SHARED_PTR(MapWeightedSpanTerm) - DECLARE_SHARED_PTR(NullFragmenter) - DECLARE_SHARED_PTR(PositionCheckingMap) - DECLARE_SHARED_PTR(PositionSpan) - DECLARE_SHARED_PTR(QueryScorer) - DECLARE_SHARED_PTR(QueryTermExtractor) - DECLARE_SHARED_PTR(QueryTermScorer) - DECLARE_SHARED_PTR(SimpleFragmenter) - DECLARE_SHARED_PTR(SimpleHTMLEncoder) - DECLARE_SHARED_PTR(SimpleHTMLFormatter) - DECLARE_SHARED_PTR(SimpleSpanFragmenter) - DECLARE_SHARED_PTR(SpanGradientFormatter) - DECLARE_SHARED_PTR(StringBuffer) - DECLARE_SHARED_PTR(TextFragment) - DECLARE_SHARED_PTR(TokenGroup) - DECLARE_SHARED_PTR(TokenSources) - DECLARE_SHARED_PTR(WeightedSpanTerm) - DECLARE_SHARED_PTR(WeightedSpanTermExtractor) - DECLARE_SHARED_PTR(WeightedTerm) +namespace Lucene { - // memory - DECLARE_SHARED_PTR(MemoryIndex) - DECLARE_SHARED_PTR(MemoryIndexInfo) - DECLARE_SHARED_PTR(MemoryIndexReader) +// analyzers +DECLARE_SHARED_PTR(ArabicAnalyzer) +DECLARE_SHARED_PTR(ArabicAnalyzerSavedStreams) +DECLARE_SHARED_PTR(ArabicLetterTokenizer) +DECLARE_SHARED_PTR(ArabicNormalizationFilter) +DECLARE_SHARED_PTR(ArabicNormalizer) +DECLARE_SHARED_PTR(ArabicStemFilter) +DECLARE_SHARED_PTR(ArabicStemmer) +DECLARE_SHARED_PTR(BrazilianAnalyzer) +DECLARE_SHARED_PTR(BrazilianAnalyzerSavedStreams) +DECLARE_SHARED_PTR(BrazilianStemFilter) +DECLARE_SHARED_PTR(BrazilianStemmer) +DECLARE_SHARED_PTR(CJKAnalyzer) +DECLARE_SHARED_PTR(CJKAnalyzerSavedStreams) +DECLARE_SHARED_PTR(CJKTokenizer) +DECLARE_SHARED_PTR(ChineseAnalyzer) +DECLARE_SHARED_PTR(ChineseAnalyzerSavedStreams) +DECLARE_SHARED_PTR(ChineseFilter) +DECLARE_SHARED_PTR(ChineseTokenizer) +DECLARE_SHARED_PTR(CzechAnalyzer) +DECLARE_SHARED_PTR(CzechAnalyzerSavedStreams) +DECLARE_SHARED_PTR(DutchAnalyzer) +DECLARE_SHARED_PTR(DutchAnalyzerSavedStreams) +DECLARE_SHARED_PTR(DutchStemFilter) +DECLARE_SHARED_PTR(DutchStemmer) +DECLARE_SHARED_PTR(ElisionFilter) +DECLARE_SHARED_PTR(FrenchAnalyzer) +DECLARE_SHARED_PTR(FrenchAnalyzerSavedStreams) +DECLARE_SHARED_PTR(FrenchStemFilter) +DECLARE_SHARED_PTR(FrenchStemmer) +DECLARE_SHARED_PTR(GermanAnalyzer) +DECLARE_SHARED_PTR(GermanAnalyzerSavedStreams) +DECLARE_SHARED_PTR(GermanStemFilter) +DECLARE_SHARED_PTR(GermanStemmer) +DECLARE_SHARED_PTR(GreekLowerCaseFilter) +DECLARE_SHARED_PTR(GreekAnalyzer) +DECLARE_SHARED_PTR(GreekAnalyzerSavedStreams) +DECLARE_SHARED_PTR(PersianAnalyzer) +DECLARE_SHARED_PTR(PersianAnalyzerSavedStreams) +DECLARE_SHARED_PTR(PersianNormalizationFilter) +DECLARE_SHARED_PTR(PersianNormalizer) +DECLARE_SHARED_PTR(ReverseStringFilter) +DECLARE_SHARED_PTR(RussianAnalyzer) +DECLARE_SHARED_PTR(RussianAnalyzerSavedStreams) +DECLARE_SHARED_PTR(RussianLetterTokenizer) +DECLARE_SHARED_PTR(RussianLowerCaseFilter) +DECLARE_SHARED_PTR(RussianStemFilter) +DECLARE_SHARED_PTR(RussianStemmer) +DECLARE_SHARED_PTR(SnowballFilter) +DECLARE_SHARED_PTR(SnowballAnalyzer) +DECLARE_SHARED_PTR(SnowballAnalyzerSavedStreams) - typedef HashMap< String, WeightedSpanTermPtr > MapStringWeightedSpanTerm; - typedef HashMap< String, WeightedTermPtr > MapStringWeightedTerm; - typedef HashMap< String, SpanQueryPtr > MapStringSpanQuery; - typedef HashMap< String, Collection > MapStringIntCollection; - typedef HashMap< String, MemoryIndexInfoPtr > MapStringMemoryIndexInfo; - - typedef std::pair< String, Collection > PairStringIntCollection; - typedef Collection< PairStringIntCollection > CollectionStringIntCollection; - - typedef std::pair< String, MemoryIndexInfoPtr > PairStringMemoryIndexInfo; - typedef Collection< PairStringMemoryIndexInfo > CollectionStringMemoryIndexInfo; - - typedef HashSet< WeightedTermPtr, luceneHash, luceneEquals > SetWeightedTerm; +// highlighter +DECLARE_SHARED_PTR(DefaultEncoder) +DECLARE_SHARED_PTR(Encoder) +DECLARE_SHARED_PTR(FakeReader) +DECLARE_SHARED_PTR(Formatter) +DECLARE_SHARED_PTR(Fragmenter) +DECLARE_SHARED_PTR(FragmentQueue) +DECLARE_SHARED_PTR(GradientFormatter) +DECLARE_SHARED_PTR(Highlighter) +DECLARE_SHARED_PTR(HighlighterScorer) +DECLARE_SHARED_PTR(MapWeightedSpanTerm) +DECLARE_SHARED_PTR(NullFragmenter) +DECLARE_SHARED_PTR(PositionCheckingMap) +DECLARE_SHARED_PTR(PositionSpan) +DECLARE_SHARED_PTR(QueryScorer) +DECLARE_SHARED_PTR(QueryTermExtractor) +DECLARE_SHARED_PTR(QueryTermScorer) +DECLARE_SHARED_PTR(SimpleFragmenter) +DECLARE_SHARED_PTR(SimpleHTMLEncoder) +DECLARE_SHARED_PTR(SimpleHTMLFormatter) +DECLARE_SHARED_PTR(SimpleSpanFragmenter) +DECLARE_SHARED_PTR(SpanGradientFormatter) +DECLARE_SHARED_PTR(StringBuffer) +DECLARE_SHARED_PTR(TextFragment) +DECLARE_SHARED_PTR(TokenGroup) +DECLARE_SHARED_PTR(TokenSources) +DECLARE_SHARED_PTR(WeightedSpanTerm) +DECLARE_SHARED_PTR(WeightedSpanTermExtractor) +DECLARE_SHARED_PTR(WeightedTerm) + +// memory +DECLARE_SHARED_PTR(MemoryIndex) +DECLARE_SHARED_PTR(MemoryIndexInfo) +DECLARE_SHARED_PTR(MemoryIndexReader) + +typedef HashMap< String, WeightedSpanTermPtr > MapStringWeightedSpanTerm; +typedef HashMap< String, WeightedTermPtr > MapStringWeightedTerm; +typedef HashMap< String, SpanQueryPtr > MapStringSpanQuery; +typedef HashMap< String, Collection > MapStringIntCollection; +typedef HashMap< String, MemoryIndexInfoPtr > MapStringMemoryIndexInfo; + +typedef std::pair< String, Collection > PairStringIntCollection; +typedef Collection< PairStringIntCollection > CollectionStringIntCollection; + +typedef std::pair< String, MemoryIndexInfoPtr > PairStringMemoryIndexInfo; +typedef Collection< PairStringMemoryIndexInfo > CollectionStringMemoryIndexInfo; + +typedef HashSet< WeightedTermPtr, luceneHash, luceneEquals > SetWeightedTerm; } #endif diff --git a/src/contrib/include/MapWeightedSpanTerm.h b/src/contrib/include/MapWeightedSpanTerm.h index c6aebef0..abab0872 100644 --- a/src/contrib/include/MapWeightedSpanTerm.h +++ b/src/contrib/include/MapWeightedSpanTerm.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,27 +10,27 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Utility class that encapsulates a StringWeightedSpanTerm map that can be overridden. - class LPPCONTRIBAPI MapWeightedSpanTerm : public LuceneObject - { - public: - MapWeightedSpanTerm(); - virtual ~MapWeightedSpanTerm(); - - LUCENE_CLASS(MapWeightedSpanTerm); - - protected: - MapStringWeightedSpanTerm map; - - public: - virtual MapStringWeightedSpanTerm::iterator begin(); - virtual MapStringWeightedSpanTerm::iterator end(); - virtual void put(const String& key, WeightedSpanTermPtr val); - virtual WeightedSpanTermPtr get(const String& key) const; - virtual void clear(); - }; +namespace Lucene { + +/// Utility class that encapsulates a StringWeightedSpanTerm map that can be overridden. +class LPPCONTRIBAPI MapWeightedSpanTerm : public LuceneObject { +public: + MapWeightedSpanTerm(); + virtual ~MapWeightedSpanTerm(); + + LUCENE_CLASS(MapWeightedSpanTerm); + +protected: + MapStringWeightedSpanTerm map; + +public: + virtual MapStringWeightedSpanTerm::iterator begin(); + virtual MapStringWeightedSpanTerm::iterator end(); + virtual void put(const String& key, const WeightedSpanTermPtr& val); + virtual WeightedSpanTermPtr get(const String& key) const; + virtual void clear(); +}; + } #endif diff --git a/src/contrib/include/MemoryIndex.h b/src/contrib/include/MemoryIndex.h index e5ee3633..8efc5282 100644 --- a/src/contrib/include/MemoryIndex.h +++ b/src/contrib/include/MemoryIndex.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,355 +14,349 @@ #include "TermPositions.h" #include "TermPositionVector.h" -namespace Lucene -{ - /// High-performance single-document main memory Lucene fulltext search index. - /// - /// Overview - /// - /// This class is a replacement/substitute for a large subset of {@link RAMDirectory} functionality. - /// It is designed to enable maximum efficiency for on-the-fly matchmaking combining structured and - /// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML message - /// queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and - /// distribution systems, application level routers, firewalls, classifiers, etc. Rather than - /// targeting fulltext search of infrequent queries over huge persistent data archives (historic - /// search), this class targets fulltext search of huge numbers of queries over comparatively small - /// transient realtime data (prospective search). - /// - /// For example as in - ///
-    /// double score = search(const String& text, QueryPtr query)
-    /// 
- /// - /// Each instance can hold at most one Lucene "document", with a document containing zero or more - /// "fields", each field having a name and a fulltext value. The fulltext value is tokenized - /// (split and transformed) into zero or more index terms (aka words) on addField(), according to - /// the policy implemented by an Analyzer. For example, Lucene analyzers can split on whitespace, - /// normalize to lower case for case insensitivity, ignore common terms with little discriminatory - /// value such as "he", "in", "and" (stop words), reduce the terms to their natural linguistic root - /// form such as "fishing" being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri - /// (upon indexing and/or querying), etc. - /// - /// Note that a Lucene query selects on the field names and associated (indexed) tokenized terms, - /// not on the original fulltext(s) - the latter are not stored but rather thrown away immediately - /// after tokenization. - /// - /// For some interesting background information on search technology, see Bob Wyman's Prospective Search, - /// Jim Gray's - /// A Call to Arms - Custom subscriptions, and Tim Bray's On Search, the Series. - /// - /// - /// Example Usage - ///
-    /// AnalyzerPtr analyzer = newLucene();
-    /// MemoryIndexPtr index = newLucene();
-    /// index->addField(L"content", L"Readings about Salmons and other select Alaska fishing Manuals", analyzer);
-    /// index->addField(L"author", L"Tales of James", analyzer);
-    /// QueryParserPtr parser = newLucene(L"content", analyzer);
-    /// double score = index->search(parser->parse(L"+author:james +salmon~ +fish* manual~"));
-    /// if (score > 0.0)
-    /// {
-    ///     // it's a match
-    /// }
-    /// else
-    /// {
-    ///     // no match found
-    /// }
-    /// 
- /// - /// - /// Performance Notes - /// - /// Internally there's a new data structure geared towards efficient indexing and searching, plus - /// the necessary support code to seamlessly plug into the Lucene framework. - /// - /// This class performs very well for very small texts (eg. 10 chars) as well as for large texts - /// (eg. 10 MB) and everything in between. Typically, it is about 10-100 times faster than - /// RAMDirectory. Note that RAMDirectory has particularly large efficiency overheads for small to - /// medium sized texts, both in time and space. Indexing a field with N tokens takes O(N) in the - /// best case, and O(N logN) in the worst case. Memory consumption is probably larger than for - /// RAMDirectory. - /// - class LPPCONTRIBAPI MemoryIndex : public LuceneObject - { - public: - /// Constructs an empty instance that can optionally store the start and end character offset - /// of each token term in the text. This can be useful for highlighting of hit locations with - /// the Lucene highlighter package. Private until the highlighter package matures, so that - /// this can actually be meaningfully integrated. - /// @param storeOffsets Whether or not to store the start and end character offset of each - /// token term in the text. - MemoryIndex(bool storeOffsets = false); - - virtual ~MemoryIndex(); - - LUCENE_CLASS(MemoryIndex); - - protected: - /// info for each field - MapStringMemoryIndexInfo fields; - - /// fields sorted ascending by fieldName; lazily computed on demand - CollectionStringMemoryIndexInfo sortedFields; - - /// pos: positions[3 * i], startOffset: positions[3 * i + 1], endOffset: positions[3 * i + 2] - int32_t stride; - - static const double docBoost; - - public: - /// Convenience method; Tokenizes the given field text and adds the resulting terms to the - /// index; Equivalent to adding an indexed non-keyword Lucene {@link Field} that is {@link - /// Field::INDEX_ANALYZED tokenized}, {@link Field::STORE_NO not stored}, {@link - /// Field::TERM_VECTOR_WITH_POSITIONS termVectorStored with positions} (or {@link - /// Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS termVectorStored with positions and offsets}) - /// @param fieldName A name to be associated with the text - /// @param text The text to tokenize and index. - /// @param analyzer The analyzer to use for tokenization - void addField(const String& fieldName, const String& text, AnalyzerPtr analyzer); - - /// Iterates over the given token stream and adds the resulting terms to the index; - /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, Lucene {@link - /// Field}. Finally closes the token stream. Note that untokenized keywords can be added - /// with this method via {@link #keywordTokenStream(Collection)}, the Lucene contrib - /// KeywordTokenizer or similar utilities. - /// @param fieldName A name to be associated with the text. - /// @param stream The token stream to retrieve tokens from. - /// @param boost The boost factor for hits for this field. - /// @see Field#setBoost(double) - void addField(const String& fieldName, TokenStreamPtr stream, double boost = 1.0); - - /// Creates and returns a searcher that can be used to execute arbitrary Lucene queries - /// and to collect the resulting query results as hits. - /// @return a searcher - IndexSearcherPtr createSearcher(); - - /// Convenience method that efficiently returns the relevance score by matching this index - /// against the given Lucene query expression. - /// @param query An arbitrary Lucene query to run against this index - /// @return the relevance score of the matchmaking; A number in the range [0.0 .. 1.0], - /// with 0.0 indicating no match. The higher the number the better the match. - double search(QueryPtr query); - - protected: - int32_t numPositions(Collection positions); - - /// sorts into ascending order (on demand), reusing memory along the way - void sortFields(); - - friend class MemoryIndexReader; - friend class MemoryIndexInfo; - friend class MemoryIndexTermEnum; - friend class MemoryIndexTermPositions; - friend class MemoryIndexTermPositionVector; - }; - - /// Index data structure for a field; Contains the tokenized term texts and their positions. - class LPPCONTRIBAPI MemoryIndexInfo : public LuceneObject - { - public: - MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost); - virtual ~MemoryIndexInfo(); - - LUCENE_CLASS(MemoryIndexInfo); - - protected: - /// Term strings and their positions for this field - MapStringIntCollection terms; - - /// Terms sorted ascending by term text; computed on demand - CollectionStringIntCollection sortedTerms; - - /// Number of added tokens for this field - int32_t numTokens; - - /// Number of overlapping tokens for this field - int32_t numOverlapTokens; - - /// Boost factor for hits for this field - double boost; - - /// Term for this field's fieldName, lazily computed on demand - TermPtr _template; - - public: - /// Sorts hashed terms into ascending order, reusing memory along the way. Note that - /// sorting is lazily delayed until required (often it's not required at all). - void sortTerms(); - - /// Note that the frequency can be calculated as numPosition(getPositions(x)) - Collection getPositions(const String& term); - - /// Note that the frequency can be calculated as numPosition(getPositions(x)) - Collection getPositions(int32_t pos); - - double getBoost(); - - friend class MemoryIndexReader; - friend class MemoryIndexTermEnum; - friend class MemoryIndexTermPositions; - friend class MemoryIndexTermPositionVector; - }; - - /// Search support for Lucene framework integration; implements all methods required by the - /// Lucene IndexReader contracts. - class LPPCONTRIBAPI MemoryIndexReader : public IndexReader - { - public: - MemoryIndexReader(MemoryIndexPtr memoryIndex); - virtual ~MemoryIndexReader(); - - LUCENE_CLASS(MemoryIndexReader); - - public: - static TermPtr MATCH_ALL_TERM(); - - protected: - MemoryIndexPtr memoryIndex; - SearcherWeakPtr _searcher; // needed to find searcher.getSimilarity() - - /// cache norms to avoid repeated expensive calculations - ByteArray cachedNorms; - String cachedFieldName; - SimilarityPtr cachedSimilarity; - - protected: - MemoryIndexInfoPtr getInfo(const String& fieldName); - MemoryIndexInfoPtr getInfo(int32_t pos); - - SimilarityPtr getSimilarity(); - void setSearcher(SearcherPtr searcher); - - public: - virtual int32_t docFreq(TermPtr t); - virtual TermEnumPtr terms(); - virtual TermEnumPtr terms(TermPtr t); - virtual TermPositionsPtr termPositions(); - virtual TermDocsPtr termDocs(); - virtual Collection getTermFreqVectors(int32_t docNumber); - virtual void getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper); - virtual void getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper); - virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); - virtual ByteArray norms(const String& field); - virtual void norms(const String& field, ByteArray norms, int32_t offset); - virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); - virtual int32_t numDocs(); - virtual int32_t maxDoc(); - virtual DocumentPtr document(int32_t n); - virtual DocumentPtr document(int32_t n, FieldSelectorPtr fieldSelector); - virtual bool isDeleted(int32_t n); - virtual bool hasDeletions(); - virtual void doDelete(int32_t docNum); - virtual void doUndeleteAll(); - virtual void doCommit(MapStringString commitUserData); - virtual void doClose(); - virtual HashSet getFieldNames(FieldOption fieldOption); - - friend class MemoryIndex; - friend class MemoryIndexTermEnum; - friend class MemoryIndexTermPositions; - friend class MemoryIndexTermPositionVector; - }; - - class LPPCONTRIBAPI MemoryIndexTermEnum : public TermEnum - { - public: - MemoryIndexTermEnum(MemoryIndexReaderPtr reader, int32_t ix, int32_t jx); - virtual ~MemoryIndexTermEnum(); - - LUCENE_CLASS(MemoryIndexTermEnum); - - protected: - MemoryIndexReaderWeakPtr _reader; - int32_t i; - int32_t j; - - public: - virtual bool next(); - virtual TermPtr term(); - virtual int32_t docFreq(); - virtual void close(); - - protected: - TermPtr createTerm(MemoryIndexInfoPtr info, int32_t pos, const String& text); - }; - - class LPPCONTRIBAPI MemoryIndexCollector : public Collector - { - public: - MemoryIndexCollector(Collection scores); - virtual ~MemoryIndexCollector(); - - LUCENE_CLASS(MemoryIndexCollector); - - protected: - Collection scores; - ScorerPtr scorer; - - public: - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - virtual bool acceptsDocsOutOfOrder(); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - }; - - class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public LuceneObject - { - public: - MemoryIndexTermPositions(MemoryIndexReaderPtr reader); - virtual ~MemoryIndexTermPositions(); - - LUCENE_CLASS(MemoryIndexTermPositions); - - protected: - MemoryIndexReaderWeakPtr _reader; - bool hasNext; - int32_t cursor; - Collection current; - TermPtr term; - - public: - virtual void seek(TermPtr term); - virtual void seek(TermEnumPtr termEnum); - virtual int32_t doc(); - virtual int32_t freq(); - virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); - virtual bool skipTo(int32_t target); - virtual void close(); - - virtual int32_t nextPosition(); - virtual int32_t getPayloadLength(); - virtual ByteArray getPayload(ByteArray data, int32_t offset); - virtual bool isPayloadAvailable(); - }; - - class MemoryIndexTermPositionVector : public TermPositionVector, public LuceneObject - { - public: - MemoryIndexTermPositionVector(MemoryIndexReaderPtr reader, MemoryIndexInfoPtr info, const String& fieldName); - virtual ~MemoryIndexTermPositionVector(); - - LUCENE_CLASS(MemoryIndexTermPositionVector); - - protected: - MemoryIndexReaderWeakPtr _reader; - CollectionStringIntCollection sortedTerms; - String fieldName; - - public: - virtual String getField(); - virtual int32_t size(); - virtual Collection getTerms(); - virtual Collection getTermFrequencies(); - virtual int32_t indexOf(const String& term); - virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); - - virtual Collection getTermPositions(int32_t index); - virtual Collection getOffsets(int32_t index); - }; +namespace Lucene { + +/// High-performance single-document main memory Lucene fulltext search index. +/// +/// Overview +/// +/// This class is a replacement/substitute for a large subset of {@link RAMDirectory} functionality. +/// It is designed to enable maximum efficiency for on-the-fly matchmaking combining structured and +/// fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML message +/// queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and +/// distribution systems, application level routers, firewalls, classifiers, etc. Rather than +/// targeting fulltext search of infrequent queries over huge persistent data archives (historic +/// search), this class targets fulltext search of huge numbers of queries over comparatively small +/// transient realtime data (prospective search). +/// +/// For example as in +///
+/// double score = search(const String& text, const QueryPtr& query)
+/// 
+/// +/// Each instance can hold at most one Lucene "document", with a document containing zero or more +/// "fields", each field having a name and a fulltext value. The fulltext value is tokenized +/// (split and transformed) into zero or more index terms (aka words) on addField(), according to +/// the policy implemented by an Analyzer. For example, Lucene analyzers can split on whitespace, +/// normalize to lower case for case insensitivity, ignore common terms with little discriminatory +/// value such as "he", "in", "and" (stop words), reduce the terms to their natural linguistic root +/// form such as "fishing" being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri +/// (upon indexing and/or querying), etc. +/// +/// Note that a Lucene query selects on the field names and associated (indexed) tokenized terms, +/// not on the original fulltext(s) - the latter are not stored but rather thrown away immediately +/// after tokenization. +/// +/// For some interesting background information on search technology, see Bob Wyman's Prospective Search, +/// Jim Gray's +/// A Call to Arms - Custom subscriptions, and Tim Bray's On Search, the Series. +/// +/// +/// Example Usage +///
+/// AnalyzerPtr analyzer = newLucene();
+/// MemoryIndexPtr index = newLucene();
+/// index->addField(L"content", L"Readings about Salmons and other select Alaska fishing Manuals", analyzer);
+/// index->addField(L"author", L"Tales of James", analyzer);
+/// QueryParserPtr parser = newLucene(L"content", analyzer);
+/// double score = index->search(parser->parse(L"+author:james +salmon~ +fish* manual~"));
+/// if (score > 0.0)
+/// {
+///     // it's a match
+/// }
+/// else
+/// {
+///     // no match found
+/// }
+/// 
+/// +/// +/// Performance Notes +/// +/// Internally there's a new data structure geared towards efficient indexing and searching, plus +/// the necessary support code to seamlessly plug into the Lucene framework. +/// +/// This class performs very well for very small texts (eg. 10 chars) as well as for large texts +/// (eg. 10 MB) and everything in between. Typically, it is about 10-100 times faster than +/// RAMDirectory. Note that RAMDirectory has particularly large efficiency overheads for small to +/// medium sized texts, both in time and space. Indexing a field with N tokens takes O(N) in the +/// best case, and O(N logN) in the worst case. Memory consumption is probably larger than for +/// RAMDirectory. +/// +class LPPCONTRIBAPI MemoryIndex : public LuceneObject { +public: + /// Constructs an empty instance that can optionally store the start and end character offset + /// of each token term in the text. This can be useful for highlighting of hit locations with + /// the Lucene highlighter package. Private until the highlighter package matures, so that + /// this can actually be meaningfully integrated. + /// @param storeOffsets Whether or not to store the start and end character offset of each + /// token term in the text. + MemoryIndex(bool storeOffsets = false); + + virtual ~MemoryIndex(); + + LUCENE_CLASS(MemoryIndex); + +protected: + /// info for each field + MapStringMemoryIndexInfo fields; + + /// fields sorted ascending by fieldName; lazily computed on demand + CollectionStringMemoryIndexInfo sortedFields; + + /// pos: positions[3 * i], startOffset: positions[3 * i + 1], endOffset: positions[3 * i + 2] + int32_t stride; + + static const double docBoost; + +public: + /// Convenience method; Tokenizes the given field text and adds the resulting terms to the + /// index; Equivalent to adding an indexed non-keyword Lucene {@link Field} that is {@link + /// Field::INDEX_ANALYZED tokenized}, {@link Field::STORE_NO not stored}, {@link + /// Field::TERM_VECTOR_WITH_POSITIONS termVectorStored with positions} (or {@link + /// Field::TERM_VECTOR_WITH_POSITIONS_OFFSETS termVectorStored with positions and offsets}) + /// @param fieldName A name to be associated with the text + /// @param text The text to tokenize and index. + /// @param analyzer The analyzer to use for tokenization + void addField(const String& fieldName, const String& text, const AnalyzerPtr& analyzer); + + /// Iterates over the given token stream and adds the resulting terms to the index; + /// Equivalent to adding a tokenized, indexed, termVectorStored, unstored, Lucene {@link + /// Field}. Finally closes the token stream. Note that untokenized keywords can be added + /// with this method via {@link #keywordTokenStream(Collection)}, the Lucene contrib + /// KeywordTokenizer or similar utilities. + /// @param fieldName A name to be associated with the text. + /// @param stream The token stream to retrieve tokens from. + /// @param boost The boost factor for hits for this field. + /// @see Field#setBoost(double) + void addField(const String& fieldName, const TokenStreamPtr& stream, double boost = 1.0); + + /// Creates and returns a searcher that can be used to execute arbitrary Lucene queries + /// and to collect the resulting query results as hits. + /// @return a searcher + IndexSearcherPtr createSearcher(); + + /// Convenience method that efficiently returns the relevance score by matching this index + /// against the given Lucene query expression. + /// @param query An arbitrary Lucene query to run against this index + /// @return the relevance score of the matchmaking; A number in the range [0.0 .. 1.0], + /// with 0.0 indicating no match. The higher the number the better the match. + double search(const QueryPtr& query); + +protected: + int32_t numPositions(Collection positions); + + /// sorts into ascending order (on demand), reusing memory along the way + void sortFields(); + + friend class MemoryIndexReader; + friend class MemoryIndexInfo; + friend class MemoryIndexTermEnum; + friend class MemoryIndexTermPositions; + friend class MemoryIndexTermPositionVector; +}; + +/// Index data structure for a field; Contains the tokenized term texts and their positions. +class LPPCONTRIBAPI MemoryIndexInfo : public LuceneObject { +public: + MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost); + virtual ~MemoryIndexInfo(); + + LUCENE_CLASS(MemoryIndexInfo); + +protected: + /// Term strings and their positions for this field + MapStringIntCollection terms; + + /// Terms sorted ascending by term text; computed on demand + CollectionStringIntCollection sortedTerms; + + /// Number of added tokens for this field + int32_t numTokens; + + /// Number of overlapping tokens for this field + int32_t numOverlapTokens; + + /// Boost factor for hits for this field + double boost; + + /// Term for this field's fieldName, lazily computed on demand + TermPtr _template; + +public: + /// Sorts hashed terms into ascending order, reusing memory along the way. Note that + /// sorting is lazily delayed until required (often it's not required at all). + void sortTerms(); + + /// Note that the frequency can be calculated as numPosition(getPositions(x)) + Collection getPositions(const String& term); + + /// Note that the frequency can be calculated as numPosition(getPositions(x)) + Collection getPositions(int32_t pos); + + double getBoost(); + + friend class MemoryIndexReader; + friend class MemoryIndexTermEnum; + friend class MemoryIndexTermPositions; + friend class MemoryIndexTermPositionVector; +}; + +/// Search support for Lucene framework integration; implements all methods required by the +/// Lucene IndexReader contracts. +class LPPCONTRIBAPI MemoryIndexReader : public IndexReader { +public: + MemoryIndexReader(const MemoryIndexPtr& memoryIndex); + virtual ~MemoryIndexReader(); + + LUCENE_CLASS(MemoryIndexReader); + +public: + static TermPtr MATCH_ALL_TERM(); + +protected: + MemoryIndexPtr memoryIndex; + SearcherWeakPtr _searcher; // needed to find searcher.getSimilarity() + + /// cache norms to avoid repeated expensive calculations + ByteArray cachedNorms; + String cachedFieldName; + SimilarityPtr cachedSimilarity; + +protected: + MemoryIndexInfoPtr getInfo(const String& fieldName); + MemoryIndexInfoPtr getInfo(int32_t pos); + + SimilarityPtr getSimilarity(); + void setSearcher(const SearcherPtr& searcher); + +public: + virtual int32_t docFreq(const TermPtr& t); + virtual TermEnumPtr terms(); + virtual TermEnumPtr terms(const TermPtr& t); + virtual TermPositionsPtr termPositions(); + virtual TermDocsPtr termDocs(); + virtual Collection getTermFreqVectors(int32_t docNumber); + virtual void getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper); + virtual void getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper); + virtual TermFreqVectorPtr getTermFreqVector(int32_t docNumber, const String& field); + virtual ByteArray norms(const String& field); + virtual void norms(const String& field, ByteArray norms, int32_t offset); + virtual void doSetNorm(int32_t doc, const String& field, uint8_t value); + virtual int32_t numDocs(); + virtual int32_t maxDoc(); + virtual DocumentPtr document(int32_t n); + virtual DocumentPtr document(int32_t n, const FieldSelectorPtr& fieldSelector); + virtual bool isDeleted(int32_t n); + virtual bool hasDeletions(); + virtual void doDelete(int32_t docNum); + virtual void doUndeleteAll(); + virtual void doCommit(MapStringString commitUserData); + virtual void doClose(); + virtual HashSet getFieldNames(FieldOption fieldOption); + + friend class MemoryIndex; + friend class MemoryIndexTermEnum; + friend class MemoryIndexTermPositions; + friend class MemoryIndexTermPositionVector; +}; + +class LPPCONTRIBAPI MemoryIndexTermEnum : public TermEnum { +public: + MemoryIndexTermEnum(const MemoryIndexReaderPtr& reader, int32_t ix, int32_t jx); + virtual ~MemoryIndexTermEnum(); + + LUCENE_CLASS(MemoryIndexTermEnum); + +protected: + MemoryIndexReaderWeakPtr _reader; + int32_t i; + int32_t j; + +public: + virtual bool next(); + virtual TermPtr term(); + virtual int32_t docFreq(); + virtual void close(); + +protected: + TermPtr createTerm(const MemoryIndexInfoPtr& info, int32_t pos, const String& text); +}; + +class LPPCONTRIBAPI MemoryIndexCollector : public Collector { +public: + MemoryIndexCollector(Collection scores); + virtual ~MemoryIndexCollector(); + + LUCENE_CLASS(MemoryIndexCollector); + +protected: + Collection scores; + ScorerPtr scorer; + +public: + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); + virtual bool acceptsDocsOutOfOrder(); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); +}; + +class LPPCONTRIBAPI MemoryIndexTermPositions : public TermPositions, public LuceneObject { +public: + MemoryIndexTermPositions(const MemoryIndexReaderPtr& reader); + virtual ~MemoryIndexTermPositions(); + + LUCENE_CLASS(MemoryIndexTermPositions); + +protected: + MemoryIndexReaderWeakPtr _reader; + bool hasNext; + int32_t cursor; + Collection current; + TermPtr term; + +public: + virtual void seek(const TermPtr& term); + virtual void seek(const TermEnumPtr& termEnum); + virtual int32_t doc(); + virtual int32_t freq(); + virtual bool next(); + virtual int32_t read(Collection& docs, Collection& freqs); + virtual bool skipTo(int32_t target); + virtual void close(); + + virtual int32_t nextPosition(); + virtual int32_t getPayloadLength(); + virtual ByteArray getPayload(ByteArray data, int32_t offset); + virtual bool isPayloadAvailable(); +}; + +class MemoryIndexTermPositionVector : public TermPositionVector, public LuceneObject { +public: + MemoryIndexTermPositionVector(const MemoryIndexReaderPtr& reader, const MemoryIndexInfoPtr& info, const String& fieldName); + virtual ~MemoryIndexTermPositionVector(); + + LUCENE_CLASS(MemoryIndexTermPositionVector); + +protected: + MemoryIndexReaderWeakPtr _reader; + CollectionStringIntCollection sortedTerms; + String fieldName; + +public: + virtual String getField(); + virtual int32_t size(); + virtual Collection getTerms(); + virtual Collection getTermFrequencies(); + virtual int32_t indexOf(const String& term); + virtual Collection indexesOf(Collection terms, int32_t start, int32_t length); + + virtual Collection getTermPositions(int32_t index); + virtual Collection getOffsets(int32_t index); +}; + } #endif diff --git a/src/contrib/include/NullFragmenter.h b/src/contrib/include/NullFragmenter.h index 97e3e48f..7ab7386f 100644 --- a/src/contrib/include/NullFragmenter.h +++ b/src/contrib/include/NullFragmenter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,20 +9,20 @@ #include "Fragmenter.h" -namespace Lucene -{ - /// {@link Fragmenter} implementation which does not fragment the text. This is useful for - /// highlighting the entire content of a document or field. - class LPPCONTRIBAPI NullFragmenter : public Fragmenter, public LuceneObject - { - public: - virtual ~NullFragmenter(); - LUCENE_CLASS(NullFragmenter); - - public: - virtual void start(const String& originalText, TokenStreamPtr tokenStream); - virtual bool isNewFragment(); - }; +namespace Lucene { + +/// {@link Fragmenter} implementation which does not fragment the text. This is useful for +/// highlighting the entire content of a document or field. +class LPPCONTRIBAPI NullFragmenter : public Fragmenter, public LuceneObject { +public: + virtual ~NullFragmenter(); + LUCENE_CLASS(NullFragmenter); + +public: + virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); + virtual bool isNewFragment(); +}; + } #endif diff --git a/src/contrib/include/PersianAnalyzer.h b/src/contrib/include/PersianAnalyzer.h index ea32e174..7a0ae59b 100644 --- a/src/contrib/include/PersianAnalyzer.h +++ b/src/contrib/include/PersianAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,71 +10,70 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Persian. +namespace Lucene { + +/// {@link Analyzer} for Persian. +/// +/// This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around +/// zero-width non-joiner in addition to whitespace. Some persian-specific variant +/// forms (such as farsi yeh and keheh) are standardized. "Stemming" is accomplished +/// via stopwords. +class LPPCONTRIBAPI PersianAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + PersianAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~PersianAnalyzer(); + + LUCENE_CLASS(PersianAnalyzer); + +public: + /// Default Persian stopwords in UTF-8 format. + /// + /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html + /// The stopword list is BSD-Licensed. + static const uint8_t DEFAULT_STOPWORD_FILE[]; + +protected: + /// Contains the stopwords used with the StopFilter. + HashSet stoptable; + + LuceneVersion::Version matchVersion; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} + /// and Persian Stop words. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// This Analyzer uses {@link ArabicLetterTokenizer} which implies tokenizing around - /// zero-width non-joiner in addition to whitespace. Some persian-specific variant - /// forms (such as farsi yeh and keheh) are standardized. "Stemming" is accomplished - /// via stopwords. - class LPPCONTRIBAPI PersianAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - PersianAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - PersianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~PersianAnalyzer(); - - LUCENE_CLASS(PersianAnalyzer); - - public: - /// Default Persian stopwords in UTF-8 format. - /// - /// Generated from http://members.unine.ch/jacques.savoy/clef/index.html - /// The stopword list is BSD-Licensed. - static const uint8_t DEFAULT_STOPWORD_FILE[]; - - protected: - /// Contains the stopwords used with the StopFilter. - HashSet stoptable; - - LuceneVersion::Version matchVersion; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} - /// and Persian Stop words. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with - /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} - /// and Persian Stop words. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI PersianAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~PersianAnalyzerSavedStreams(); - - LUCENE_CLASS(PersianAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from an {@link ArabicLetterTokenizer} filtered with + /// {@link LowerCaseFilter}, {@link ArabicNormalizationFilter}, {@link PersianNormalizationFilter} + /// and Persian Stop words. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI PersianAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~PersianAnalyzerSavedStreams(); + + LUCENE_CLASS(PersianAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/PersianNormalizationFilter.h b/src/contrib/include/PersianNormalizationFilter.h index 2c4bd604..3ad23728 100644 --- a/src/contrib/include/PersianNormalizationFilter.h +++ b/src/contrib/include/PersianNormalizationFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,24 +10,24 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the orthography. - class LPPCONTRIBAPI PersianNormalizationFilter : public TokenFilter - { - public: - PersianNormalizationFilter(TokenStreamPtr input); - virtual ~PersianNormalizationFilter(); - - LUCENE_CLASS(PersianNormalizationFilter); - - protected: - PersianNormalizerPtr normalizer; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A {@link TokenFilter} that applies {@link PersianNormalizer} to normalize the orthography. +class LPPCONTRIBAPI PersianNormalizationFilter : public TokenFilter { +public: + PersianNormalizationFilter(const TokenStreamPtr& input); + virtual ~PersianNormalizationFilter(); + + LUCENE_CLASS(PersianNormalizationFilter); + +protected: + PersianNormalizerPtr normalizer; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/PersianNormalizer.h b/src/contrib/include/PersianNormalizer.h index 523af01e..f12f9b5b 100644 --- a/src/contrib/include/PersianNormalizer.h +++ b/src/contrib/include/PersianNormalizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,50 +10,50 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Normalizer for Persian. - /// - /// Normalization is done in-place for efficiency, operating on a termbuffer. - /// - /// Normalization is defined as: - ///
    - ///
  • Normalization of various heh + hamza forms and heh goal to heh. - ///
  • Normalization of farsi yeh and yeh barree to arabic yeh. - ///
  • Normalization of persian keheh to arabic kaf. - ///
- class LPPCONTRIBAPI PersianNormalizer : public LuceneObject - { - public: - virtual ~PersianNormalizer(); - - LUCENE_CLASS(PersianNormalizer); - - public: - static const wchar_t YEH; - static const wchar_t FARSI_YEH; - static const wchar_t YEH_BARREE; - static const wchar_t KEHEH; - static const wchar_t KAF; - static const wchar_t HAMZA_ABOVE; - static const wchar_t HEH_YEH; - static const wchar_t HEH_GOAL; - static const wchar_t HEH; - - public: - /// Normalize an input buffer of Persian text - /// @param s input buffer - /// @param len length of input buffer - /// @return length of input buffer after normalization - int32_t normalize(wchar_t* s, int32_t len); - - /// Delete a character in-place - /// @param s Input Buffer - /// @param pos Position of character to delete - /// @param len length of input buffer - /// @return length of input buffer after deletion - int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); - }; +namespace Lucene { + +/// Normalizer for Persian. +/// +/// Normalization is done in-place for efficiency, operating on a termbuffer. +/// +/// Normalization is defined as: +///
    +///
  • Normalization of various heh + hamza forms and heh goal to heh. +///
  • Normalization of farsi yeh and yeh barree to arabic yeh. +///
  • Normalization of persian keheh to arabic kaf. +///
+class LPPCONTRIBAPI PersianNormalizer : public LuceneObject { +public: + virtual ~PersianNormalizer(); + + LUCENE_CLASS(PersianNormalizer); + +public: + static const wchar_t YEH; + static const wchar_t FARSI_YEH; + static const wchar_t YEH_BARREE; + static const wchar_t KEHEH; + static const wchar_t KAF; + static const wchar_t HAMZA_ABOVE; + static const wchar_t HEH_YEH; + static const wchar_t HEH_GOAL; + static const wchar_t HEH; + +public: + /// Normalize an input buffer of Persian text + /// @param s input buffer + /// @param len length of input buffer + /// @return length of input buffer after normalization + int32_t normalize(wchar_t* s, int32_t len); + + /// Delete a character in-place + /// @param s Input Buffer + /// @param pos Position of character to delete + /// @param len length of input buffer + /// @return length of input buffer after deletion + int32_t deleteChar(wchar_t* s, int32_t pos, int32_t len); +}; + } #endif diff --git a/src/contrib/include/QueryScorer.h b/src/contrib/include/QueryScorer.h index a5064391..9389d7bd 100644 --- a/src/contrib/include/QueryScorer.h +++ b/src/contrib/include/QueryScorer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,87 +10,87 @@ #include "LuceneContrib.h" #include "HighlighterScorer.h" -namespace Lucene -{ - /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. - /// This class converts appropriate {@link Query}s to {@link SpanQuery}s and attempts to score only - /// those terms that participated in generating the 'hit' on the document. - class LPPCONTRIBAPI QueryScorer : public HighlighterScorer, public LuceneObject - { - public: - /// @param query Query to use for highlighting - QueryScorer(QueryPtr query); - - /// @param query Query to use for highlighting - /// @param field Field to highlight - pass empty string to ignore fields - QueryScorer(QueryPtr query, const String& field); - - /// @param query Query to use for highlighting - /// @param reader {@link IndexReader} to use for quasi tf/idf scoring - /// @param field Field to highlight - pass empty string to ignore fields - QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field); - - /// @param query Query to use for highlighting - /// @param reader {@link IndexReader} to use for quasi tf/idf scoring - /// @param field Field to highlight - pass empty string to ignore fields - /// @param defaultField - QueryScorer(QueryPtr query, IndexReaderPtr reader, const String& field, const String& defaultField); - - /// @param query Query to use for highlighting - /// @param field Field to highlight - pass empty string to ignore fields - /// @param defaultField - QueryScorer(QueryPtr query, const String& field, const String& defaultField); - - /// @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s - QueryScorer(Collection weightedTerms); - - virtual ~QueryScorer(); - - LUCENE_CLASS(QueryScorer); - - protected: - double totalScore; - HashSet foundTerms; - MapWeightedSpanTermPtr fieldWeightedSpanTerms; - double maxTermWeight; - int32_t position; - String defaultField; - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncAtt; - bool expandMultiTermQuery; - QueryPtr query; - String field; - IndexReaderPtr reader; - bool skipInitExtractor; - bool wrapToCaching; - - protected: - void init(QueryPtr query, const String& field, IndexReaderPtr reader, bool expandMultiTermQuery); - TokenStreamPtr initExtractor(TokenStreamPtr tokenStream); - - public: - virtual double getFragmentScore(); - - /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale). - virtual double getMaxTermWeight(); - - virtual double getTokenScore(); - virtual TokenStreamPtr init(TokenStreamPtr tokenStream); - virtual WeightedSpanTermPtr getWeightedSpanTerm(const String& token); - virtual void startFragment(TextFragmentPtr newFragment); - - /// @return true if multi-term queries should be expanded - virtual bool isExpandMultiTermQuery(); - - /// Controls whether or not multi-term queries are expanded against a {@link MemoryIndex} {@link IndexReader}. - /// @param expandMultiTermQuery true if multi-term queries should be expanded - virtual void setExpandMultiTermQuery(bool expandMultiTermQuery); - - /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} are wrapped in a {@link - /// CachingTokenFilter} to ensure an efficient reset - if you are already using a different caching {@link - /// TokenStream} impl and you don't want it to be wrapped, set this to false. - virtual void setWrapIfNotCachingTokenFilter(bool wrap); - }; +namespace Lucene { + +/// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. +/// This class converts appropriate {@link Query}s to {@link SpanQuery}s and attempts to score only +/// those terms that participated in generating the 'hit' on the document. +class LPPCONTRIBAPI QueryScorer : public HighlighterScorer, public LuceneObject { +public: + /// @param query Query to use for highlighting + QueryScorer(const QueryPtr& query); + + /// @param query Query to use for highlighting + /// @param field Field to highlight - pass empty string to ignore fields + QueryScorer(const QueryPtr& query, const String& field); + + /// @param query Query to use for highlighting + /// @param reader {@link IndexReader} to use for quasi tf/idf scoring + /// @param field Field to highlight - pass empty string to ignore fields + QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field); + + /// @param query Query to use for highlighting + /// @param reader {@link IndexReader} to use for quasi tf/idf scoring + /// @param field Field to highlight - pass empty string to ignore fields + /// @param defaultField + QueryScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& field, const String& defaultField); + + /// @param query Query to use for highlighting + /// @param field Field to highlight - pass empty string to ignore fields + /// @param defaultField + QueryScorer(const QueryPtr& query, const String& field, const String& defaultField); + + /// @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s + QueryScorer(Collection weightedTerms); + + virtual ~QueryScorer(); + + LUCENE_CLASS(QueryScorer); + +protected: + double totalScore; + HashSet foundTerms; + MapWeightedSpanTermPtr fieldWeightedSpanTerms; + double maxTermWeight; + int32_t position; + String defaultField; + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncAtt; + bool expandMultiTermQuery; + QueryPtr query; + String field; + IndexReaderPtr reader; + bool skipInitExtractor; + bool wrapToCaching; + +protected: + void init(const QueryPtr& query, const String& field, const IndexReaderPtr& reader, bool expandMultiTermQuery); + TokenStreamPtr initExtractor(const TokenStreamPtr& tokenStream); + +public: + virtual double getFragmentScore(); + + /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale). + virtual double getMaxTermWeight(); + + virtual double getTokenScore(); + virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); + virtual WeightedSpanTermPtr getWeightedSpanTerm(const String& token); + virtual void startFragment(const TextFragmentPtr& newFragment); + + /// @return true if multi-term queries should be expanded + virtual bool isExpandMultiTermQuery(); + + /// Controls whether or not multi-term queries are expanded against a {@link MemoryIndex} {@link IndexReader}. + /// @param expandMultiTermQuery true if multi-term queries should be expanded + virtual void setExpandMultiTermQuery(bool expandMultiTermQuery); + + /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} are wrapped in a {@link + /// CachingTokenFilter} to ensure an efficient reset - if you are already using a different caching {@link + /// TokenStream} impl and you don't want it to be wrapped, set this to false. + virtual void setWrapIfNotCachingTokenFilter(bool wrap); +}; + } #endif diff --git a/src/contrib/include/QueryTermExtractor.h b/src/contrib/include/QueryTermExtractor.h index 509ded3e..b6bbabf6 100644 --- a/src/contrib/include/QueryTermExtractor.h +++ b/src/contrib/include/QueryTermExtractor.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,59 +10,59 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Utility class used to extract the terms used in a query, plus any weights. This class will not - /// find terms for MultiTermQuery, TermRangeQuery and PrefixQuery classes so the caller must pass a - /// rewritten query (see Query.rewrite) to obtain a list of expanded terms. - class LPPCONTRIBAPI QueryTermExtractor : public LuceneObject - { - public: - virtual ~QueryTermExtractor(); - LUCENE_CLASS(QueryTermExtractor); - - public: - /// Extracts all terms texts of a given Query into an array of WeightedTerms - /// - /// @param query Query to extract term texts from. - /// @return an array of the terms used in a query, plus their weights. - static Collection getTerms(QueryPtr query); - - /// Extracts all terms texts of a given Query into an array of WeightedTerms - /// - /// @param query Query to extract term texts from. - /// @param reader used to compute IDF which can be used to - /// a) score selected fragments better - /// b) use graded highlights eg changing intensity of font color - /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based. - /// @return an array of the terms used in a query, plus their weights. - static Collection getIdfWeightedTerms(QueryPtr query, IndexReaderPtr reader, const String& fieldName); - - /// Extracts all terms texts of a given Query into an array of WeightedTerms - /// - /// @param query Query to extract term texts from. - /// @param prohibited true to extract "prohibited" terms, too. - /// @param fieldName The fieldName used to filter query terms. - /// @return an array of the terms used in a query, plus their weights. - static Collection getTerms(QueryPtr query, bool prohibited, const String& fieldName); - - /// Extracts all terms texts of a given Query into an array of WeightedTerms - /// - /// @param query Query to extract term texts from. - /// @param prohibited true to extract "prohibited" terms, too. - /// @return an array of the terms used in a query, plus their weights. - static Collection getTerms(QueryPtr query, bool prohibited); - - static void getTerms(QueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); - - protected: - /// extractTerms is currently the only query-independent means of introspecting queries but it only reveals - /// a list of terms for that query - not the boosts each individual term in that query may or may not have. - /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held in each child - /// element. - static void getTermsFromBooleanQuery(BooleanQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); - static void getTermsFromFilteredQuery(FilteredQueryPtr query, SetWeightedTerm terms, bool prohibited, const String& fieldName); - }; +namespace Lucene { + +/// Utility class used to extract the terms used in a query, plus any weights. This class will not +/// find terms for MultiTermQuery, TermRangeQuery and PrefixQuery classes so the caller must pass a +/// rewritten query (see Query.rewrite) to obtain a list of expanded terms. +class LPPCONTRIBAPI QueryTermExtractor : public LuceneObject { +public: + virtual ~QueryTermExtractor(); + LUCENE_CLASS(QueryTermExtractor); + +public: + /// Extracts all terms texts of a given Query into an array of WeightedTerms + /// + /// @param query Query to extract term texts from. + /// @return an array of the terms used in a query, plus their weights. + static Collection getTerms(const QueryPtr& query); + + /// Extracts all terms texts of a given Query into an array of WeightedTerms + /// + /// @param query Query to extract term texts from. + /// @param reader used to compute IDF which can be used to + /// a) score selected fragments better + /// b) use graded highlights eg changing intensity of font color + /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based. + /// @return an array of the terms used in a query, plus their weights. + static Collection getIdfWeightedTerms(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName); + + /// Extracts all terms texts of a given Query into an array of WeightedTerms + /// + /// @param query Query to extract term texts from. + /// @param prohibited true to extract "prohibited" terms, too. + /// @param fieldName The fieldName used to filter query terms. + /// @return an array of the terms used in a query, plus their weights. + static Collection getTerms(const QueryPtr& query, bool prohibited, const String& fieldName); + + /// Extracts all terms texts of a given Query into an array of WeightedTerms + /// + /// @param query Query to extract term texts from. + /// @param prohibited true to extract "prohibited" terms, too. + /// @return an array of the terms used in a query, plus their weights. + static Collection getTerms(const QueryPtr& query, bool prohibited); + + static void getTerms(const QueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); + +protected: + /// extractTerms is currently the only query-independent means of introspecting queries but it only reveals + /// a list of terms for that query - not the boosts each individual term in that query may or may not have. + /// "Container" queries such as BooleanQuery should be unwrapped to get at the boost info held in each child + /// element. + static void getTermsFromBooleanQuery(const BooleanQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); + static void getTermsFromFilteredQuery(const FilteredQueryPtr& query, SetWeightedTerm terms, bool prohibited, const String& fieldName); +}; + } #endif diff --git a/src/contrib/include/QueryTermScorer.h b/src/contrib/include/QueryTermScorer.h index 99ee41f9..9a1c1fa6 100644 --- a/src/contrib/include/QueryTermScorer.h +++ b/src/contrib/include/QueryTermScorer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,62 +10,62 @@ #include "LuceneContrib.h" #include "HighlighterScorer.h" -namespace Lucene -{ - /// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. - /// This class uses the {@link QueryTermExtractor} class to process determine the query terms and their - /// boosts to be used. - class LPPCONTRIBAPI QueryTermScorer : public HighlighterScorer, public LuceneObject - { - public: - /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class - /// and the searcher) - QueryTermScorer(QueryPtr query); - - /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class - /// and the searcher) - /// @param fieldName the Field name which is used to match Query terms - QueryTermScorer(QueryPtr query, const String& fieldName); - - /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class - /// and the searcher) - /// @param reader used to compute IDF which can be used to - /// a) score selected fragments better - /// b) use graded highlights eg set font color intensity - /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based - QueryTermScorer(QueryPtr query, IndexReaderPtr reader, const String& fieldName); - - /// @param weightedTerms an array of pre-created {@link WeightedTerm}s - QueryTermScorer(Collection weightedTerms); - - virtual ~QueryTermScorer(); - - LUCENE_CLASS(QueryTermScorer); - - public: - TextFragmentPtr currentTextFragment; - HashSet uniqueTermsInFragment; - - double totalScore; - double maxTermWeight; - - protected: - MapStringWeightedTerm termsToFind; - TermAttributePtr termAtt; - - protected: - void ConstructQueryTermScorer(Collection weightedTerms); - - public: - virtual TokenStreamPtr init(TokenStreamPtr tokenStream); - virtual void startFragment(TextFragmentPtr newFragment); - virtual double getTokenScore(); - virtual double getFragmentScore(); - virtual void allFragmentsProcessed(); - - /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale. - virtual double getMaxTermWeight(); - }; +namespace Lucene { + +/// {@link HighlighterScorer} implementation which scores text fragments by the number of unique query terms found. +/// This class uses the {@link QueryTermExtractor} class to process determine the query terms and their +/// boosts to be used. +class LPPCONTRIBAPI QueryTermScorer : public HighlighterScorer, public LuceneObject { +public: + /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class + /// and the searcher) + QueryTermScorer(const QueryPtr& query); + + /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class + /// and the searcher) + /// @param fieldName the Field name which is used to match Query terms + QueryTermScorer(const QueryPtr& query, const String& fieldName); + + /// @param query a Lucene query (ideally rewritten using query.rewrite before being passed to this class + /// and the searcher) + /// @param reader used to compute IDF which can be used to + /// a) score selected fragments better + /// b) use graded highlights eg set font color intensity + /// @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based + QueryTermScorer(const QueryPtr& query, const IndexReaderPtr& reader, const String& fieldName); + + /// @param weightedTerms an array of pre-created {@link WeightedTerm}s + QueryTermScorer(Collection weightedTerms); + + virtual ~QueryTermScorer(); + + LUCENE_CLASS(QueryTermScorer); + +public: + TextFragmentPtr currentTextFragment; + HashSet uniqueTermsInFragment; + + double totalScore; + double maxTermWeight; + +protected: + MapStringWeightedTerm termsToFind; + TermAttributePtr termAtt; + +protected: + void ConstructQueryTermScorer(Collection weightedTerms); + +public: + virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream); + virtual void startFragment(const TextFragmentPtr& newFragment); + virtual double getTokenScore(); + virtual double getFragmentScore(); + virtual void allFragmentsProcessed(); + + /// @return The highest weighted term (useful for passing to GradientFormatter to set top end of coloring scale. + virtual double getMaxTermWeight(); +}; + } #endif diff --git a/src/contrib/include/ReverseStringFilter.h b/src/contrib/include/ReverseStringFilter.h index 64adaa24..20124e5e 100644 --- a/src/contrib/include/ReverseStringFilter.h +++ b/src/contrib/include/ReverseStringFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,53 +9,53 @@ #include "TokenFilter.h" -namespace Lucene -{ - /// Reverse token string, for example "country" => "yrtnuoc". +namespace Lucene { + +/// Reverse token string, for example "country" => "yrtnuoc". +/// +/// If marker is supplied, then tokens will be also prepended by that character. For example, with a +/// marker of \u0001, "country" => "\u0001yrtnuoc". This is useful when implementing efficient +/// leading wildcards search. +class LPPCONTRIBAPI ReverseStringFilter : public TokenFilter { +public: + /// Create a new ReverseStringFilter that reverses all tokens in the supplied {@link TokenStream}. + /// + /// The reversed tokens will not be marked. + ReverseStringFilter(const TokenStreamPtr& input); + + /// Create a new ReverseStringFilter that reverses and marks all tokens in the supplied {@link + /// TokenStream}. /// - /// If marker is supplied, then tokens will be also prepended by that character. For example, with a - /// marker of \u0001, "country" => "\u0001yrtnuoc". This is useful when implementing efficient - /// leading wildcards search. - class LPPCONTRIBAPI ReverseStringFilter : public TokenFilter - { - public: - /// Create a new ReverseStringFilter that reverses all tokens in the supplied {@link TokenStream}. - /// - /// The reversed tokens will not be marked. - ReverseStringFilter(TokenStreamPtr input); - - /// Create a new ReverseStringFilter that reverses and marks all tokens in the supplied {@link - /// TokenStream}. - /// - /// The reversed tokens will be prepended (marked) by the marker character. - ReverseStringFilter(TokenStreamPtr input, wchar_t marker); - - virtual ~ReverseStringFilter(); - - LUCENE_CLASS(ReverseStringFilter); - - protected: - TermAttributePtr termAtt; - wchar_t marker; - - static const wchar_t NOMARKER; - - public: - /// Example marker character: U+0001 (START OF HEADING) - static const wchar_t START_OF_HEADING_MARKER; - - /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) - static const wchar_t INFORMATION_SEPARATOR_MARKER; - - /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) - static const wchar_t PUA_EC00_MARKER; - - /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) - static const wchar_t RTL_DIRECTION_MARKER; - - public: - virtual bool incrementToken(); - }; + /// The reversed tokens will be prepended (marked) by the marker character. + ReverseStringFilter(const TokenStreamPtr& input, wchar_t marker); + + virtual ~ReverseStringFilter(); + + LUCENE_CLASS(ReverseStringFilter); + +protected: + TermAttributePtr termAtt; + wchar_t marker; + + static const wchar_t NOMARKER; + +public: + /// Example marker character: U+0001 (START OF HEADING) + static const wchar_t START_OF_HEADING_MARKER; + + /// Example marker character: U+001F (INFORMATION SEPARATOR ONE) + static const wchar_t INFORMATION_SEPARATOR_MARKER; + + /// Example marker character: U+EC00 (PRIVATE USE AREA: EC00) + static const wchar_t PUA_EC00_MARKER; + + /// Example marker character: U+200F (RIGHT-TO-LEFT MARK) + static const wchar_t RTL_DIRECTION_MARKER; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/RussianAnalyzer.h b/src/contrib/include/RussianAnalyzer.h index f1b9c24e..86f8aebd 100644 --- a/src/contrib/include/RussianAnalyzer.h +++ b/src/contrib/include/RussianAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,63 +10,62 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// {@link Analyzer} for Russian language. +namespace Lucene { + +/// {@link Analyzer} for Russian language. +/// +/// Supports an external list of stopwords (words that will not be indexed at all). +/// A default set of stopwords is used unless an alternative list is specified. +class LPPCONTRIBAPI RussianAnalyzer : public Analyzer { +public: + /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. + RussianAnalyzer(LuceneVersion::Version matchVersion); + + /// Builds an analyzer with the given stop words. + RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); + + virtual ~RussianAnalyzer(); + + LUCENE_CLASS(RussianAnalyzer); + +protected: + /// Contains the stopwords used with the {@link StopFilter}. + HashSet stopSet; + + LuceneVersion::Version matchVersion; + + /// List of typical Russian stopwords. + static const uint8_t DEFAULT_STOPWORD_FILE[]; + +public: + /// Returns an unmodifiable instance of the default stop-words set. + static const HashSet getDefaultStopSet(); + + /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. + /// + /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with + /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the + /// provided {@link Reader}. /// - /// Supports an external list of stopwords (words that will not be indexed at all). - /// A default set of stopwords is used unless an alternative list is specified. - class LPPCONTRIBAPI RussianAnalyzer : public Analyzer - { - public: - /// Builds an analyzer with the default stop words: {@link #getDefaultStopSet}. - RussianAnalyzer(LuceneVersion::Version matchVersion); - - /// Builds an analyzer with the given stop words. - RussianAnalyzer(LuceneVersion::Version matchVersion, HashSet stopwords); - - virtual ~RussianAnalyzer(); - - LUCENE_CLASS(RussianAnalyzer); - - protected: - /// Contains the stopwords used with the {@link StopFilter}. - HashSet stopSet; - - LuceneVersion::Version matchVersion; - - /// List of typical Russian stopwords. - static const uint8_t DEFAULT_STOPWORD_FILE[]; - - public: - /// Returns an unmodifiable instance of the default stop-words set. - static const HashSet getDefaultStopSet(); - - /// Creates a {@link TokenStream} which tokenizes all the text in the provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with - /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link TokenStream} which tokenizes all the text in the - /// provided {@link Reader}. - /// - /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with - /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI RussianAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~RussianAnalyzerSavedStreams(); - - LUCENE_CLASS(RussianAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; + /// @return A {@link TokenStream} built from a {@link RussianLetterTokenizer} filtered with + /// {@link RussianLowerCaseFilter}, {@link StopFilter} and {@link RussianStemFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI RussianAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~RussianAnalyzerSavedStreams(); + + LUCENE_CLASS(RussianAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/RussianLetterTokenizer.h b/src/contrib/include/RussianLetterTokenizer.h index 6241a0b3..5bbe1544 100644 --- a/src/contrib/include/RussianLetterTokenizer.h +++ b/src/contrib/include/RussianLetterTokenizer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,30 +9,30 @@ #include "CharTokenizer.h" -namespace Lucene -{ - /// A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer} by also - /// allowing the basic Latin digits 0-9. - class LPPCONTRIBAPI RussianLetterTokenizer : public CharTokenizer - { - public: - /// Construct a new RussianLetterTokenizer. - RussianLetterTokenizer(ReaderPtr input); - - /// Construct a new RussianLetterTokenizer using a given {@link AttributeSource}. - RussianLetterTokenizer(AttributeSourcePtr source, ReaderPtr input); - - /// Construct a new RussianLetterTokenizer using a given {@link AttributeFactory}. - RussianLetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input); - - virtual ~RussianLetterTokenizer(); - - LUCENE_CLASS(RussianLetterTokenizer); - - public: - /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). - virtual bool isTokenChar(wchar_t c); - }; +namespace Lucene { + +/// A RussianLetterTokenizer is a {@link Tokenizer} that extends {@link LetterTokenizer} by also +/// allowing the basic Latin digits 0-9. +class LPPCONTRIBAPI RussianLetterTokenizer : public CharTokenizer { +public: + /// Construct a new RussianLetterTokenizer. + RussianLetterTokenizer(const ReaderPtr& input); + + /// Construct a new RussianLetterTokenizer using a given {@link AttributeSource}. + RussianLetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input); + + /// Construct a new RussianLetterTokenizer using a given {@link AttributeFactory}. + RussianLetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input); + + virtual ~RussianLetterTokenizer(); + + LUCENE_CLASS(RussianLetterTokenizer); + +public: + /// Collects only characters which satisfy UnicodeUtil::isAlpha(c). + virtual bool isTokenChar(wchar_t c); +}; + } #endif diff --git a/src/contrib/include/RussianLowerCaseFilter.h b/src/contrib/include/RussianLowerCaseFilter.h index aef8bdfe..6fffec0f 100644 --- a/src/contrib/include/RussianLowerCaseFilter.h +++ b/src/contrib/include/RussianLowerCaseFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,24 +10,24 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// Normalizes token text to lower case. - class LPPCONTRIBAPI RussianLowerCaseFilter : public TokenFilter - { - public: - RussianLowerCaseFilter(TokenStreamPtr input); - - virtual ~RussianLowerCaseFilter(); - - LUCENE_CLASS(RussianLowerCaseFilter); - - protected: - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// Normalizes token text to lower case. +class LPPCONTRIBAPI RussianLowerCaseFilter : public TokenFilter { +public: + RussianLowerCaseFilter(const TokenStreamPtr& input); + + virtual ~RussianLowerCaseFilter(); + + LUCENE_CLASS(RussianLowerCaseFilter); + +protected: + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/RussianStemFilter.h b/src/contrib/include/RussianStemFilter.h index f29b909b..72e05c30 100644 --- a/src/contrib/include/RussianStemFilter.h +++ b/src/contrib/include/RussianStemFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,35 +10,35 @@ #include "LuceneContrib.h" #include "TokenFilter.h" -namespace Lucene -{ - /// A {@link TokenFilter} that stems Russian words. - /// - /// The implementation was inspired by GermanStemFilter. - /// - /// The input should be filtered by {@link LowerCaseFilter} before passing it to RussianStemFilter, - /// because RussianStemFilter only works with lowercase characters. - class LPPCONTRIBAPI RussianStemFilter : public TokenFilter - { - public: - RussianStemFilter(TokenStreamPtr input); - - virtual ~RussianStemFilter(); - - LUCENE_CLASS(RussianStemFilter); - - protected: - /// {@link RussianStemmer} in use by this filter. - RussianStemmerPtr stemmer; - - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - - /// Set a alternative/custom {@link RussianStemmer} for this filter. - void setStemmer(RussianStemmerPtr stemmer); - }; +namespace Lucene { + +/// A {@link TokenFilter} that stems Russian words. +/// +/// The implementation was inspired by GermanStemFilter. +/// +/// The input should be filtered by {@link LowerCaseFilter} before passing it to RussianStemFilter, +/// because RussianStemFilter only works with lowercase characters. +class LPPCONTRIBAPI RussianStemFilter : public TokenFilter { +public: + RussianStemFilter(const TokenStreamPtr& input); + + virtual ~RussianStemFilter(); + + LUCENE_CLASS(RussianStemFilter); + +protected: + /// {@link RussianStemmer} in use by this filter. + RussianStemmerPtr stemmer; + + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); + + /// Set a alternative/custom {@link RussianStemmer} for this filter. + void setStemmer(const RussianStemmerPtr& stemmer); +}; + } #endif diff --git a/src/contrib/include/RussianStemmer.h b/src/contrib/include/RussianStemmer.h index a92f5504..5b5e12d4 100644 --- a/src/contrib/include/RussianStemmer.h +++ b/src/contrib/include/RussianStemmer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,117 +10,117 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for - /// detailed description). - class LPPCONTRIBAPI RussianStemmer : public LuceneObject - { - public: - RussianStemmer(); - virtual ~RussianStemmer(); - - LUCENE_CLASS(RussianStemmer); - - protected: - /// positions of RV, R1 and R2 respectively - int32_t RV; - int32_t R1; - int32_t R2; - - static const wchar_t A; - static const wchar_t V; - static const wchar_t G; - static const wchar_t E; - static const wchar_t I; - static const wchar_t I_; - static const wchar_t L; - static const wchar_t M; - static const wchar_t N; - static const wchar_t O; - static const wchar_t S; - static const wchar_t T; - static const wchar_t U; - static const wchar_t X; - static const wchar_t SH; - static const wchar_t SHCH; - static const wchar_t Y; - static const wchar_t SOFT; - static const wchar_t AE; - static const wchar_t IU; - static const wchar_t IA; - - /// stem definitions - static const wchar_t vowels[]; - - Collection perfectiveGerundEndings1(); - Collection perfectiveGerund1Predessors(); - Collection perfectiveGerundEndings2(); - Collection adjectiveEndings(); - Collection participleEndings1(); - Collection participleEndings2(); - Collection participle1Predessors(); - Collection reflexiveEndings(); - Collection verbEndings1(); - Collection verbEndings2(); - Collection verb1Predessors(); - Collection nounEndings(); - Collection superlativeEndings(); - Collection derivationalEndings(); - Collection doubleN(); - - public: - /// Finds the stem for given Russian word. - String stem(const String& input); - - /// Static method for stemming. - static String stemWord(const String& word); - - protected: - /// Adjectival ending is an adjective ending, optionally preceded by participle ending. - bool adjectival(String& stemmingZone); - - /// Derivational endings - bool derivational(String& stemmingZone); - - /// Finds ending among given ending class and returns the length of ending found(0, if not found). - int32_t findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass); - int32_t findEnding(String& stemmingZone, Collection theEndingClass); - - /// Finds the ending among the given class of endings and removes it from stemming zone. - bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass); - - /// Finds the ending among the given class of endings, then checks if this ending was - /// preceded by any of given predecessors, and if so, removes it from stemming zone. - bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors); - - /// Marks positions of RV, R1 and R2 in a given word. - void markPositions(const String& word); - - /// Checks if character is a vowel. - bool isVowel(wchar_t letter); - - /// Noun endings. - bool noun(String& stemmingZone); - - /// Perfective gerund endings. - bool perfectiveGerund(String& stemmingZone); - - /// Reflexive endings. - bool reflexive(String& stemmingZone); - - bool removeI(String& stemmingZone); - bool removeSoft(String& stemmingZone); - - /// Superlative endings. - bool superlative(String& stemmingZone); - - /// Undoubles N. - bool undoubleN(String& stemmingZone); - - /// Verb endings. - bool verb(String& stemmingZone); - }; +namespace Lucene { + +/// Russian stemming algorithm implementation (see http://snowball.sourceforge.net for +/// detailed description). +class LPPCONTRIBAPI RussianStemmer : public LuceneObject { +public: + RussianStemmer(); + virtual ~RussianStemmer(); + + LUCENE_CLASS(RussianStemmer); + +protected: + /// positions of RV, R1 and R2 respectively + int32_t RV; + int32_t R1; + int32_t R2; + + static const wchar_t A; + static const wchar_t V; + static const wchar_t G; + static const wchar_t E; + static const wchar_t I; + static const wchar_t I_; + static const wchar_t L; + static const wchar_t M; + static const wchar_t N; + static const wchar_t O; + static const wchar_t S; + static const wchar_t T; + static const wchar_t U; + static const wchar_t X; + static const wchar_t SH; + static const wchar_t SHCH; + static const wchar_t Y; + static const wchar_t SOFT; + static const wchar_t AE; + static const wchar_t IU; + static const wchar_t IA; + + /// stem definitions + static const wchar_t vowels[]; + + Collection perfectiveGerundEndings1(); + Collection perfectiveGerund1Predessors(); + Collection perfectiveGerundEndings2(); + Collection adjectiveEndings(); + Collection participleEndings1(); + Collection participleEndings2(); + Collection participle1Predessors(); + Collection reflexiveEndings(); + Collection verbEndings1(); + Collection verbEndings2(); + Collection verb1Predessors(); + Collection nounEndings(); + Collection superlativeEndings(); + Collection derivationalEndings(); + Collection doubleN(); + +public: + /// Finds the stem for given Russian word. + String stem(const String& input); + + /// Static method for stemming. + static String stemWord(const String& word); + +protected: + /// Adjectival ending is an adjective ending, optionally preceded by participle ending. + bool adjectival(String& stemmingZone); + + /// Derivational endings + bool derivational(String& stemmingZone); + + /// Finds ending among given ending class and returns the length of ending found(0, if not found). + int32_t findEnding(String& stemmingZone, int32_t startIndex, Collection theEndingClass); + int32_t findEnding(String& stemmingZone, Collection theEndingClass); + + /// Finds the ending among the given class of endings and removes it from stemming zone. + bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass); + + /// Finds the ending among the given class of endings, then checks if this ending was + /// preceded by any of given predecessors, and if so, removes it from stemming zone. + bool findAndRemoveEnding(String& stemmingZone, Collection theEndingClass, Collection thePredessors); + + /// Marks positions of RV, R1 and R2 in a given word. + void markPositions(const String& word); + + /// Checks if character is a vowel. + bool isVowel(wchar_t letter); + + /// Noun endings. + bool noun(String& stemmingZone); + + /// Perfective gerund endings. + bool perfectiveGerund(String& stemmingZone); + + /// Reflexive endings. + bool reflexive(String& stemmingZone); + + bool removeI(String& stemmingZone); + bool removeSoft(String& stemmingZone); + + /// Superlative endings. + bool superlative(String& stemmingZone); + + /// Undoubles N. + bool undoubleN(String& stemmingZone); + + /// Verb endings. + bool verb(String& stemmingZone); +}; + } #endif diff --git a/src/contrib/include/SimpleFragmenter.h b/src/contrib/include/SimpleFragmenter.h index 6b7e4e1d..1d61077c 100644 --- a/src/contrib/include/SimpleFragmenter.h +++ b/src/contrib/include/SimpleFragmenter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,36 +9,36 @@ #include "Fragmenter.h" -namespace Lucene -{ - /// {@link Fragmenter} implementation which breaks text up into same-size fragments with - /// no concerns over spotting sentence boundaries. - class LPPCONTRIBAPI SimpleFragmenter : public Fragmenter, public LuceneObject - { - public: - SimpleFragmenter(); - SimpleFragmenter(int32_t fragmentSize); - - virtual ~SimpleFragmenter(); - - LUCENE_CLASS(SimpleFragmenter); - - protected: - static const int32_t DEFAULT_FRAGMENT_SIZE; - int32_t currentNumFrags; - int32_t fragmentSize; - OffsetAttributePtr offsetAtt; - - public: - virtual void start(const String& originalText, TokenStreamPtr tokenStream); - virtual bool isNewFragment(); - - /// @return size in number of characters of each fragment - int32_t getFragmentSize(); - - /// @param size size in characters of each fragment - void setFragmentSize(int32_t size); - }; +namespace Lucene { + +/// {@link Fragmenter} implementation which breaks text up into same-size fragments with +/// no concerns over spotting sentence boundaries. +class LPPCONTRIBAPI SimpleFragmenter : public Fragmenter, public LuceneObject { +public: + SimpleFragmenter(); + SimpleFragmenter(int32_t fragmentSize); + + virtual ~SimpleFragmenter(); + + LUCENE_CLASS(SimpleFragmenter); + +protected: + static const int32_t DEFAULT_FRAGMENT_SIZE; + int32_t currentNumFrags; + int32_t fragmentSize; + OffsetAttributePtr offsetAtt; + +public: + virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); + virtual bool isNewFragment(); + + /// @return size in number of characters of each fragment + int32_t getFragmentSize(); + + /// @param size size in characters of each fragment + void setFragmentSize(int32_t size); +}; + } #endif diff --git a/src/contrib/include/SimpleHTMLEncoder.h b/src/contrib/include/SimpleHTMLEncoder.h index 4b0fdc6b..8051209a 100644 --- a/src/contrib/include/SimpleHTMLEncoder.h +++ b/src/contrib/include/SimpleHTMLEncoder.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,21 +9,21 @@ #include "Encoder.h" -namespace Lucene -{ - /// Simple {@link Encoder} implementation to escape text for HTML output. - class LPPCONTRIBAPI SimpleHTMLEncoder : public Encoder, public LuceneObject - { - public: - virtual ~SimpleHTMLEncoder(); - LUCENE_CLASS(SimpleHTMLEncoder); - - public: - virtual String encodeText(const String& originalText); - - /// Encode string into HTML - static String htmlEncode(const String& plainText); - }; +namespace Lucene { + +/// Simple {@link Encoder} implementation to escape text for HTML output. +class LPPCONTRIBAPI SimpleHTMLEncoder : public Encoder, public LuceneObject { +public: + virtual ~SimpleHTMLEncoder(); + LUCENE_CLASS(SimpleHTMLEncoder); + +public: + virtual String encodeText(const String& originalText); + + /// Encode string into HTML + static String htmlEncode(const String& plainText); +}; + } #endif diff --git a/src/contrib/include/SimpleHTMLFormatter.h b/src/contrib/include/SimpleHTMLFormatter.h index cec23c5a..efdac7f6 100644 --- a/src/contrib/include/SimpleHTMLFormatter.h +++ b/src/contrib/include/SimpleHTMLFormatter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,31 +9,31 @@ #include "Formatter.h" -namespace Lucene -{ - /// Simple {@link Formatter} implementation to highlight terms with a pre and post tag. - class LPPCONTRIBAPI SimpleHTMLFormatter : public Formatter, public LuceneObject - { - public: - /// Default constructor uses HTML: <B> tags to markup terms. - SimpleHTMLFormatter(); - - SimpleHTMLFormatter(const String& preTag, const String& postTag); - - virtual ~SimpleHTMLFormatter(); - - LUCENE_CLASS(SimpleHTMLFormatter); - - protected: - static const String DEFAULT_PRE_TAG; - static const String DEFAULT_POST_TAG; - - String preTag; - String postTag; - - public: - virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); - }; +namespace Lucene { + +/// Simple {@link Formatter} implementation to highlight terms with a pre and post tag. +class LPPCONTRIBAPI SimpleHTMLFormatter : public Formatter, public LuceneObject { +public: + /// Default constructor uses HTML: <B> tags to markup terms. + SimpleHTMLFormatter(); + + SimpleHTMLFormatter(const String& preTag, const String& postTag); + + virtual ~SimpleHTMLFormatter(); + + LUCENE_CLASS(SimpleHTMLFormatter); + +protected: + static const String DEFAULT_PRE_TAG; + static const String DEFAULT_POST_TAG; + + String preTag; + String postTag; + +public: + virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); +}; + } #endif diff --git a/src/contrib/include/SimpleSpanFragmenter.h b/src/contrib/include/SimpleSpanFragmenter.h index 018db4e4..f058d8a8 100644 --- a/src/contrib/include/SimpleSpanFragmenter.h +++ b/src/contrib/include/SimpleSpanFragmenter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,41 +9,41 @@ #include "Fragmenter.h" -namespace Lucene -{ - /// {@link Fragmenter} implementation which breaks text up into same-size fragments but - /// does not split up {@link Spans}. This is a simple sample class. - class LPPCONTRIBAPI SimpleSpanFragmenter : public Fragmenter, public LuceneObject - { - public: - /// @param queryScorer QueryScorer that was used to score hits - SimpleSpanFragmenter(QueryScorerPtr queryScorer); - - /// @param queryScorer QueryScorer that was used to score hits - /// @param fragmentSize size in bytes of each fragment - SimpleSpanFragmenter(QueryScorerPtr queryScorer, int32_t fragmentSize); - - virtual ~SimpleSpanFragmenter(); - - LUCENE_CLASS(SimpleSpanFragmenter); - - protected: - static const int32_t DEFAULT_FRAGMENT_SIZE; - - int32_t fragmentSize; - int32_t currentNumFrags; - int32_t position; - QueryScorerPtr queryScorer; - int32_t waitForPos; - int32_t textSize; - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool isNewFragment(); - virtual void start(const String& originalText, TokenStreamPtr tokenStream); - }; +namespace Lucene { + +/// {@link Fragmenter} implementation which breaks text up into same-size fragments but +/// does not split up {@link Spans}. This is a simple sample class. +class LPPCONTRIBAPI SimpleSpanFragmenter : public Fragmenter, public LuceneObject { +public: + /// @param queryScorer QueryScorer that was used to score hits + SimpleSpanFragmenter(const QueryScorerPtr& queryScorer); + + /// @param queryScorer QueryScorer that was used to score hits + /// @param fragmentSize size in bytes of each fragment + SimpleSpanFragmenter(const QueryScorerPtr& queryScorer, int32_t fragmentSize); + + virtual ~SimpleSpanFragmenter(); + + LUCENE_CLASS(SimpleSpanFragmenter); + +protected: + static const int32_t DEFAULT_FRAGMENT_SIZE; + + int32_t fragmentSize; + int32_t currentNumFrags; + int32_t position; + QueryScorerPtr queryScorer; + int32_t waitForPos; + int32_t textSize; + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool isNewFragment(); + virtual void start(const String& originalText, const TokenStreamPtr& tokenStream); +}; + } #endif diff --git a/src/contrib/include/SnowballAnalyzer.h b/src/contrib/include/SnowballAnalyzer.h index 2439cc6b..f342e89a 100644 --- a/src/contrib/include/SnowballAnalyzer.h +++ b/src/contrib/include/SnowballAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,53 +10,52 @@ #include "LuceneContrib.h" #include "Analyzer.h" -namespace Lucene -{ - /// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} - /// and {@link SnowballFilter}. - /// - /// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. - class LPPCONTRIBAPI SnowballAnalyzer : public Analyzer - { - public: - /// Builds the named analyzer with no stop words. - SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name); - - /// Builds an analyzer with the given stop words. - SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords); - - virtual ~SnowballAnalyzer(); - - LUCENE_CLASS(SnowballAnalyzer); - - protected: - /// Contains the stopwords used with the StopFilter. - HashSet stopSet; - - String name; - LuceneVersion::Version matchVersion; - - public: - /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, - /// a {@link StopFilter} and a {@link SnowballFilter}. - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader); - - /// Returns a (possibly reused) {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link - /// LowerCaseFilter}, a {@link StopFilter} and a {@link SnowballFilter}. - virtual TokenStreamPtr reusableTokenStream(const String& fieldName, ReaderPtr reader); - }; - - class LPPCONTRIBAPI SnowballAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~SnowballAnalyzerSavedStreams(); - - LUCENE_CLASS(SnowballAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; +namespace Lucene { + +/// Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} +/// and {@link SnowballFilter}. +/// +/// NOTE: This class uses the same {@link LuceneVersion#Version} dependent settings as {@link StandardAnalyzer}. +class LPPCONTRIBAPI SnowballAnalyzer : public Analyzer { +public: + /// Builds the named analyzer with no stop words. + SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name); + + /// Builds an analyzer with the given stop words. + SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords); + + virtual ~SnowballAnalyzer(); + + LUCENE_CLASS(SnowballAnalyzer); + +protected: + /// Contains the stopwords used with the StopFilter. + HashSet stopSet; + + String name; + LuceneVersion::Version matchVersion; + +public: + /// Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, + /// a {@link StopFilter} and a {@link SnowballFilter}. + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader); + + /// Returns a (possibly reused) {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link + /// LowerCaseFilter}, a {@link StopFilter} and a {@link SnowballFilter}. + virtual TokenStreamPtr reusableTokenStream(const String& fieldName, const ReaderPtr& reader); +}; + +class LPPCONTRIBAPI SnowballAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~SnowballAnalyzerSavedStreams(); + + LUCENE_CLASS(SnowballAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/contrib/include/SnowballFilter.h b/src/contrib/include/SnowballFilter.h index bfaf74a6..8364d7b0 100644 --- a/src/contrib/include/SnowballFilter.h +++ b/src/contrib/include/SnowballFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,25 +12,25 @@ struct sb_stemmer; -namespace Lucene -{ - /// A filter that stems words using a Snowball-generated stemmer. - class LPPCONTRIBAPI SnowballFilter : public TokenFilter - { - public: - SnowballFilter(TokenStreamPtr input, const String& name); - virtual ~SnowballFilter(); - - LUCENE_CLASS(SnowballFilter); - - protected: - struct sb_stemmer* stemmer; - UTF8ResultPtr utf8Result; - TermAttributePtr termAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// A filter that stems words using a Snowball-generated stemmer. +class LPPCONTRIBAPI SnowballFilter : public TokenFilter { +public: + SnowballFilter(const TokenStreamPtr& input, const String& name); + virtual ~SnowballFilter(); + + LUCENE_CLASS(SnowballFilter); + +protected: + struct sb_stemmer* stemmer; + UTF8ResultPtr utf8Result; + TermAttributePtr termAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/SpanGradientFormatter.h b/src/contrib/include/SpanGradientFormatter.h index 3d93f680..d89ee204 100644 --- a/src/contrib/include/SpanGradientFormatter.h +++ b/src/contrib/include/SpanGradientFormatter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,23 +9,23 @@ #include "GradientFormatter.h" -namespace Lucene -{ - /// Formats text with different color intensity depending on the score of the term using the - /// span tag. GradientFormatter uses a bgcolor argument to the font tag which doesn't work - /// in Mozilla, thus this class. - /// @see GradientFormatter - class LPPCONTRIBAPI SpanGradientFormatter : public GradientFormatter - { - public: - SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); - virtual ~SpanGradientFormatter(); - - LUCENE_CLASS(SpanGradientFormatter); - - public: - virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); - }; +namespace Lucene { + +/// Formats text with different color intensity depending on the score of the term using the +/// span tag. GradientFormatter uses a bgcolor argument to the font tag which doesn't work +/// in Mozilla, thus this class. +/// @see GradientFormatter +class LPPCONTRIBAPI SpanGradientFormatter : public GradientFormatter { +public: + SpanGradientFormatter(double maxScore, const String& minForegroundColor, const String& maxForegroundColor, const String& minBackgroundColor, const String& maxBackgroundColor); + virtual ~SpanGradientFormatter(); + + LUCENE_CLASS(SpanGradientFormatter); + +public: + virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); +}; + } #endif diff --git a/src/contrib/include/TextFragment.h b/src/contrib/include/TextFragment.h index 9e94990e..f077e78e 100644 --- a/src/contrib/include/TextFragment.h +++ b/src/contrib/include/TextFragment.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,57 +10,56 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Low-level class used to record information about a section of a document with a score. - class LPPCONTRIBAPI TextFragment : public LuceneObject - { - public: - TextFragment(StringBufferPtr markedUpText, int32_t textStartPos, int32_t fragNum); - virtual ~TextFragment(); - - LUCENE_CLASS(TextFragment); - - public: - StringBufferPtr markedUpText; - int32_t fragNum; - int32_t textStartPos; - int32_t textEndPos; - double score; - - public: - void setScore(double score); - double getScore(); - - /// @param frag2 Fragment to be merged into this one - void merge(TextFragmentPtr frag2); - - /// @return true if this fragment follows the one passed - bool follows(TextFragmentPtr fragment); - - /// @return the fragment sequence number - int32_t getFragNum(); - - /// Returns the marked-up text for this text fragment - virtual String toString(); - }; - - /// Utility class to store a string buffer that contains text fragment - class LPPCONTRIBAPI StringBuffer : public LuceneObject - { - public: - virtual ~StringBuffer(); - LUCENE_CLASS(StringBuffer); - - protected: - StringStream buffer; - - public: - virtual String toString(); - virtual int32_t length(); - virtual void append(const String& str); - virtual void clear(); - }; +namespace Lucene { + +/// Low-level class used to record information about a section of a document with a score. +class LPPCONTRIBAPI TextFragment : public LuceneObject { +public: + TextFragment(const StringBufferPtr& markedUpText, int32_t textStartPos, int32_t fragNum); + virtual ~TextFragment(); + + LUCENE_CLASS(TextFragment); + +public: + StringBufferPtr markedUpText; + int32_t fragNum; + int32_t textStartPos; + int32_t textEndPos; + double score; + +public: + void setScore(double score); + double getScore(); + + /// @param frag2 Fragment to be merged into this one + void merge(const TextFragmentPtr& frag2); + + /// @return true if this fragment follows the one passed + bool follows(const TextFragmentPtr& fragment); + + /// @return the fragment sequence number + int32_t getFragNum(); + + /// Returns the marked-up text for this text fragment + virtual String toString(); +}; + +/// Utility class to store a string buffer that contains text fragment +class LPPCONTRIBAPI StringBuffer : public LuceneObject { +public: + virtual ~StringBuffer(); + LUCENE_CLASS(StringBuffer); + +protected: + StringStream buffer; + +public: + virtual String toString(); + virtual int32_t length(); + virtual void append(const String& str); + virtual void clear(); +}; + } #endif diff --git a/src/contrib/include/TokenGroup.h b/src/contrib/include/TokenGroup.h index 2302808d..ba563196 100644 --- a/src/contrib/include/TokenGroup.h +++ b/src/contrib/include/TokenGroup.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,59 +10,59 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// One, or several overlapping tokens, along with the score(s) and the scope of the original text - class LPPCONTRIBAPI TokenGroup : public LuceneObject - { - public: - TokenGroup(TokenStreamPtr tokenStream); - virtual ~TokenGroup(); - - LUCENE_CLASS(TokenGroup); - - protected: - static const int32_t MAX_NUM_TOKENS_PER_GROUP; - - OffsetAttributePtr offsetAtt; - TermAttributePtr termAtt; - - public: - Collection tokens; - Collection scores; - - int32_t numTokens; - int32_t startOffset; - int32_t endOffset; - double tot; - int32_t matchStartOffset; - int32_t matchEndOffset; - - public: - void addToken(double score); - bool isDistinct(); - void clear(); - - /// @param index a value between 0 and numTokens -1 - /// @return the "n"th token - TokenPtr getToken(int32_t index); - - /// @param index a value between 0 and numTokens -1 - /// @return the "n"th score - double getScore(int32_t index); - - /// @return the end position in the original text - int32_t getEndOffset(); - - /// @return the number of tokens in this group - int32_t getNumTokens(); - - /// @return the start position in the original text - int32_t getStartOffset(); - - /// @return all tokens' scores summed up - double getTotalScore(); - }; +namespace Lucene { + +/// One, or several overlapping tokens, along with the score(s) and the scope of the original text +class LPPCONTRIBAPI TokenGroup : public LuceneObject { +public: + TokenGroup(const TokenStreamPtr& tokenStream); + virtual ~TokenGroup(); + + LUCENE_CLASS(TokenGroup); + +protected: + static const int32_t MAX_NUM_TOKENS_PER_GROUP; + + OffsetAttributePtr offsetAtt; + TermAttributePtr termAtt; + +public: + Collection tokens; + Collection scores; + + int32_t numTokens; + int32_t startOffset; + int32_t endOffset; + double tot; + int32_t matchStartOffset; + int32_t matchEndOffset; + +public: + void addToken(double score); + bool isDistinct(); + void clear(); + + /// @param index a value between 0 and numTokens -1 + /// @return the "n"th token + TokenPtr getToken(int32_t index); + + /// @param index a value between 0 and numTokens -1 + /// @return the "n"th score + double getScore(int32_t index); + + /// @return the end position in the original text + int32_t getEndOffset(); + + /// @return the number of tokens in this group + int32_t getNumTokens(); + + /// @return the start position in the original text + int32_t getStartOffset(); + + /// @return all tokens' scores summed up + double getTotalScore(); +}; + } #endif diff --git a/src/contrib/include/TokenSources.h b/src/contrib/include/TokenSources.h index ca5e019c..671a81fc 100644 --- a/src/contrib/include/TokenSources.h +++ b/src/contrib/include/TokenSources.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,83 +10,82 @@ #include "LuceneContrib.h" #include "TokenStream.h" -namespace Lucene -{ - /// Hides implementation issues associated with obtaining a TokenStream for use with the highlighter - can obtain - /// from TermFreqVectors with offsets and (optionally) positions or from Analyzer class re-parsing the stored content. - class LPPCONTRIBAPI TokenSources : public LuceneObject - { - public: - virtual ~TokenSources(); - LUCENE_CLASS(TokenSources); - - public: - /// A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to - /// using the passed in {@link Document} to retrieve the TokenStream. This is useful when you already have the - /// document, but would prefer to use the vector first. - /// @param reader The {@link IndexReader} to use to try and get the vector from. - /// @param docId The docId to retrieve. - /// @param field The field to retrieve on the document. - /// @param doc The document to fall back on. - /// @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist. - /// @return The {@link TokenStream} for the {@link Fieldable} on the {@link Document} - static TokenStreamPtr getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, DocumentPtr doc, AnalyzerPtr analyzer); - - /// A convenience method that tries a number of approaches to getting a token stream. The cost of finding there - /// are no termVectors in the index is minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?) - /// approach to coding is probably acceptable - static TokenStreamPtr getAnyTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer); - - static TokenStreamPtr getTokenStream(TermPositionVectorPtr tpv); - - /// Low level api. - /// Returns a token stream or null if no offset info available in index. This can be used to feed the highlighter - /// with a pre-parsed token stream. - /// - /// In my tests the speeds to recreate 1000 token streams using this method are: - /// - with TermVector offset only data stored - 420 milliseconds - /// - with TermVector offset AND position data stored - 271 milliseconds - /// (nb timings for TermVector with position data are based on a tokenizer with contiguous positions - no overlaps - /// or gaps) The cost of not using TermPositionVector to store pre-parsed content and using an analyzer to re-parse - /// the original content: - /// - reanalyzing the original content - 980 milliseconds - /// - /// The re-analyze timings will typically vary depending on - - /// 1) The complexity of the analyzer code (timings above were using a stemmer/lowercaser/stopword combo) - /// 2) The number of other fields (Lucene reads ALL fields off the disk when accessing just one document field - - /// can cost dear!) - /// 3) Use of compression on field storage - could be faster due to compression (less disk IO) or slower (more CPU - /// burn) depending on the content. - /// - /// @param tpv - /// @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking - /// to eek out the last drops of performance, set to true. If in doubt, set to false. - static TokenStreamPtr getTokenStream(TermPositionVectorPtr tpv, bool tokenPositionsGuaranteedContiguous); - - static TokenStreamPtr getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field); - static TokenStreamPtr getTokenStream(IndexReaderPtr reader, int32_t docId, const String& field, AnalyzerPtr analyzer); - static TokenStreamPtr getTokenStream(DocumentPtr doc, const String& field, AnalyzerPtr analyzer); - static TokenStreamPtr getTokenStream(const String& field, const String& contents, AnalyzerPtr analyzer); - }; - - /// an object used to iterate across an array of tokens - class LPPCONTRIBAPI StoredTokenStream : public TokenStream - { - public: - StoredTokenStream(Collection tokens); - virtual ~StoredTokenStream(); - - LUCENE_CLASS(StoredTokenStream); - - public: - Collection tokens; - int32_t currentToken; - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken(); - }; +namespace Lucene { + +/// Hides implementation issues associated with obtaining a TokenStream for use with the highlighter - can obtain +/// from TermFreqVectors with offsets and (optionally) positions or from Analyzer class re-parsing the stored content. +class LPPCONTRIBAPI TokenSources : public LuceneObject { +public: + virtual ~TokenSources(); + LUCENE_CLASS(TokenSources); + +public: + /// A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to + /// using the passed in {@link Document} to retrieve the TokenStream. This is useful when you already have the + /// document, but would prefer to use the vector first. + /// @param reader The {@link IndexReader} to use to try and get the vector from. + /// @param docId The docId to retrieve. + /// @param field The field to retrieve on the document. + /// @param doc The document to fall back on. + /// @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist. + /// @return The {@link TokenStream} for the {@link Fieldable} on the {@link Document} + static TokenStreamPtr getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const DocumentPtr& doc, const AnalyzerPtr& analyzer); + + /// A convenience method that tries a number of approaches to getting a token stream. The cost of finding there + /// are no termVectors in the index is minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?) + /// approach to coding is probably acceptable + static TokenStreamPtr getAnyTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer); + + static TokenStreamPtr getTokenStream(const TermPositionVectorPtr& tpv); + + /// Low level api. + /// Returns a token stream or null if no offset info available in index. This can be used to feed the highlighter + /// with a pre-parsed token stream. + /// + /// In my tests the speeds to recreate 1000 token streams using this method are: + /// - with TermVector offset only data stored - 420 milliseconds + /// - with TermVector offset AND position data stored - 271 milliseconds + /// (nb timings for TermVector with position data are based on a tokenizer with contiguous positions - no overlaps + /// or gaps) The cost of not using TermPositionVector to store pre-parsed content and using an analyzer to re-parse + /// the original content: + /// - reanalyzing the original content - 980 milliseconds + /// + /// The re-analyze timings will typically vary depending on - + /// 1) The complexity of the analyzer code (timings above were using a stemmer/lowercaser/stopword combo) + /// 2) The number of other fields (Lucene reads ALL fields off the disk when accessing just one document field - + /// can cost dear!) + /// 3) Use of compression on field storage - could be faster due to compression (less disk IO) or slower (more CPU + /// burn) depending on the content. + /// + /// @param tpv + /// @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking + /// to eek out the last drops of performance, set to true. If in doubt, set to false. + static TokenStreamPtr getTokenStream(const TermPositionVectorPtr& tpv, bool tokenPositionsGuaranteedContiguous); + + static TokenStreamPtr getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field); + static TokenStreamPtr getTokenStream(const IndexReaderPtr& reader, int32_t docId, const String& field, const AnalyzerPtr& analyzer); + static TokenStreamPtr getTokenStream(const DocumentPtr& doc, const String& field, const AnalyzerPtr& analyzer); + static TokenStreamPtr getTokenStream(const String& field, const String& contents, const AnalyzerPtr& analyzer); +}; + +/// an object used to iterate across an array of tokens +class LPPCONTRIBAPI StoredTokenStream : public TokenStream { +public: + StoredTokenStream(Collection tokens); + virtual ~StoredTokenStream(); + + LUCENE_CLASS(StoredTokenStream); + +public: + Collection tokens; + int32_t currentToken; + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken(); +}; + } #endif diff --git a/src/contrib/include/WeightedSpanTerm.h b/src/contrib/include/WeightedSpanTerm.h index db0f4598..9b8eccf1 100644 --- a/src/contrib/include/WeightedSpanTerm.h +++ b/src/contrib/include/WeightedSpanTerm.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,48 +9,47 @@ #include "WeightedTerm.h" -namespace Lucene -{ - /// Lightweight class to hold term, weight, and positions used for scoring this term. - class LPPCONTRIBAPI WeightedSpanTerm : public WeightedTerm - { - public: - WeightedSpanTerm(double weight, const String& term, bool positionSensitive = false); - virtual ~WeightedSpanTerm(); - - LUCENE_CLASS(WeightedSpanTerm); - - public: - bool positionSensitive; - - protected: - Collection positionSpans; - - public: - /// Checks to see if this term is valid at position. - /// @param position To check against valid term positions. - /// @return true if this term is a hit at this position. - bool checkPosition(int32_t position); - - void addPositionSpans(Collection positionSpans); - bool isPositionSensitive(); - void setPositionSensitive(bool positionSensitive); - Collection getPositionSpans(); - }; - - /// Utility class to store a Span - class LPPCONTRIBAPI PositionSpan : public LuceneObject - { - public: - PositionSpan(int32_t start, int32_t end); - virtual ~PositionSpan(); - - LUCENE_CLASS(PositionSpan); - - public: - int32_t start; - int32_t end; - }; +namespace Lucene { + +/// Lightweight class to hold term, weight, and positions used for scoring this term. +class LPPCONTRIBAPI WeightedSpanTerm : public WeightedTerm { +public: + WeightedSpanTerm(double weight, const String& term, bool positionSensitive = false); + virtual ~WeightedSpanTerm(); + + LUCENE_CLASS(WeightedSpanTerm); + +public: + bool positionSensitive; + +protected: + Collection positionSpans; + +public: + /// Checks to see if this term is valid at position. + /// @param position To check against valid term positions. + /// @return true if this term is a hit at this position. + bool checkPosition(int32_t position); + + void addPositionSpans(Collection positionSpans); + bool isPositionSensitive(); + void setPositionSensitive(bool positionSensitive); + Collection getPositionSpans(); +}; + +/// Utility class to store a Span +class LPPCONTRIBAPI PositionSpan : public LuceneObject { +public: + PositionSpan(int32_t start, int32_t end); + virtual ~PositionSpan(); + + LUCENE_CLASS(PositionSpan); + +public: + int32_t start; + int32_t end; +}; + } #endif diff --git a/src/contrib/include/WeightedSpanTermExtractor.h b/src/contrib/include/WeightedSpanTermExtractor.h index 26b3e934..7a178ddd 100644 --- a/src/contrib/include/WeightedSpanTermExtractor.h +++ b/src/contrib/include/WeightedSpanTermExtractor.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,124 +11,122 @@ #include "FilterIndexReader.h" #include "MapWeightedSpanTerm.h" -namespace Lucene -{ - /// Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether {@link Term}s - /// from the {@link Query} are contained in a supplied {@link TokenStream}. - class LPPCONTRIBAPI WeightedSpanTermExtractor : public LuceneObject - { - public: - WeightedSpanTermExtractor(const String& defaultField = L""); - virtual ~WeightedSpanTermExtractor(); - - LUCENE_CLASS(WeightedSpanTermExtractor); - - protected: - String fieldName; - TokenStreamPtr tokenStream; - MapStringIndexReader readers; - String defaultField; - bool expandMultiTermQuery; - bool cachedTokenStream; - bool wrapToCaching; - - protected: - void closeReaders(); - - /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. - /// - /// @param query Query to extract Terms from - /// @param terms Map to place created WeightedSpanTerms in - void extract(QueryPtr query, MapWeightedSpanTermPtr terms); - - /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied SpanQuery. - /// - /// @param terms Map to place created WeightedSpanTerms in. - /// @param spanQuery SpanQuery to extract Terms from - void extractWeightedSpanTerms(MapWeightedSpanTermPtr terms, SpanQueryPtr spanQuery); - - /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. - /// @param terms Map to place created WeightedSpanTerms in - /// @param query Query to extract Terms from - void extractWeightedTerms(MapWeightedSpanTermPtr terms, QueryPtr query); - - /// Necessary to implement matches for queries against defaultField - bool fieldNameComparator(const String& fieldNameToCheck); - - IndexReaderPtr getReaderForField(const String& field); - - void collectSpanQueryFields(SpanQueryPtr spanQuery, HashSet fieldNames); - bool mustRewriteQuery(SpanQueryPtr spanQuery); - - public: - /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. - /// - /// @param query That caused hit - /// @param tokenStream Of text to be highlighted - /// @return Map containing WeightedSpanTerms - MapWeightedSpanTermPtr getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream); - - /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. - /// - /// @param query That caused hit - /// @param tokenStream Of text to be highlighted - /// @param fieldName Restricts Term's used based on field name - /// @return Map containing WeightedSpanTerms - MapWeightedSpanTermPtr getWeightedSpanTerms(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName); - - /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. Uses a supplied - /// IndexReader to properly weight terms (for gradient highlighting). - /// - /// @param query That caused hit - /// @param tokenStream Of text to be highlighted - /// @param fieldName Restricts Term's used based on field name - /// @param reader To use for scoring - /// @return Map containing WeightedSpanTerms - MapWeightedSpanTermPtr getWeightedSpanTermsWithScores(QueryPtr query, TokenStreamPtr tokenStream, const String& fieldName, IndexReaderPtr reader); - - bool getExpandMultiTermQuery(); - void setExpandMultiTermQuery(bool expandMultiTermQuery); - - bool isCachedTokenStream(); - TokenStreamPtr getTokenStream(); - - /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} - /// are wrapped in a {@link CachingTokenFilter} to ensure an efficient reset - if you - /// are already using a different caching {@link TokenStream} impl and you don't want - /// it to be wrapped, set this to false. - void setWrapIfNotCachingTokenFilter(bool wrap); - }; - - /// This class makes sure that if both position sensitive and insensitive versions of the same - /// term are added, the position insensitive one wins. - class LPPCONTRIBAPI PositionCheckingMap : public MapWeightedSpanTerm - { - public: - virtual ~PositionCheckingMap(); - LUCENE_CLASS(PositionCheckingMap); - - public: - virtual void put(const String& key, WeightedSpanTermPtr val); - }; - - /// A fake IndexReader class to extract the field from a MultiTermQuery - class LPPCONTRIBAPI FakeReader : public FilterIndexReader - { - public: - FakeReader(); - virtual ~FakeReader(); - - LUCENE_CLASS(FakeReader); - - public: - String field; - - protected: - static IndexReaderPtr EMPTY_MEMORY_INDEX_READER(); - - public: - virtual TermEnumPtr terms(TermPtr t); - }; +namespace Lucene { + +/// Class used to extract {@link WeightedSpanTerm}s from a {@link Query} based on whether {@link Term}s +/// from the {@link Query} are contained in a supplied {@link TokenStream}. +class LPPCONTRIBAPI WeightedSpanTermExtractor : public LuceneObject { +public: + WeightedSpanTermExtractor(const String& defaultField = L""); + virtual ~WeightedSpanTermExtractor(); + + LUCENE_CLASS(WeightedSpanTermExtractor); + +protected: + String fieldName; + TokenStreamPtr tokenStream; + MapStringIndexReader readers; + String defaultField; + bool expandMultiTermQuery; + bool cachedTokenStream; + bool wrapToCaching; + +protected: + void closeReaders(); + + /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. + /// + /// @param query Query to extract Terms from + /// @param terms Map to place created WeightedSpanTerms in + void extract(const QueryPtr& query, const MapWeightedSpanTermPtr& terms); + + /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied SpanQuery. + /// + /// @param terms Map to place created WeightedSpanTerms in. + /// @param spanQuery SpanQuery to extract Terms from + void extractWeightedSpanTerms(const MapWeightedSpanTermPtr& terms, const SpanQueryPtr& spanQuery); + + /// Fills a Map with {@link WeightedSpanTerm}s using the terms from the supplied Query. + /// @param terms Map to place created WeightedSpanTerms in + /// @param query Query to extract Terms from + void extractWeightedTerms(const MapWeightedSpanTermPtr& terms, const QueryPtr& query); + + /// Necessary to implement matches for queries against defaultField + bool fieldNameComparator(const String& fieldNameToCheck); + + IndexReaderPtr getReaderForField(const String& field); + + void collectSpanQueryFields(const SpanQueryPtr& spanQuery, HashSet fieldNames); + bool mustRewriteQuery(const SpanQueryPtr& spanQuery); + +public: + /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. + /// + /// @param query That caused hit + /// @param tokenStream Of text to be highlighted + /// @return Map containing WeightedSpanTerms + MapWeightedSpanTermPtr getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream); + + /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. + /// + /// @param query That caused hit + /// @param tokenStream Of text to be highlighted + /// @param fieldName Restricts Term's used based on field name + /// @return Map containing WeightedSpanTerms + MapWeightedSpanTermPtr getWeightedSpanTerms(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName); + + /// Creates a Map of WeightedSpanTerms from the given Query and TokenStream. Uses a supplied + /// IndexReader to properly weight terms (for gradient highlighting). + /// + /// @param query That caused hit + /// @param tokenStream Of text to be highlighted + /// @param fieldName Restricts Term's used based on field name + /// @param reader To use for scoring + /// @return Map containing WeightedSpanTerms + MapWeightedSpanTermPtr getWeightedSpanTermsWithScores(const QueryPtr& query, const TokenStreamPtr& tokenStream, const String& fieldName, const IndexReaderPtr& reader); + + bool getExpandMultiTermQuery(); + void setExpandMultiTermQuery(bool expandMultiTermQuery); + + bool isCachedTokenStream(); + TokenStreamPtr getTokenStream(); + + /// By default, {@link TokenStream}s that are not of the type {@link CachingTokenFilter} + /// are wrapped in a {@link CachingTokenFilter} to ensure an efficient reset - if you + /// are already using a different caching {@link TokenStream} impl and you don't want + /// it to be wrapped, set this to false. + void setWrapIfNotCachingTokenFilter(bool wrap); +}; + +/// This class makes sure that if both position sensitive and insensitive versions of the same +/// term are added, the position insensitive one wins. +class LPPCONTRIBAPI PositionCheckingMap : public MapWeightedSpanTerm { +public: + virtual ~PositionCheckingMap(); + LUCENE_CLASS(PositionCheckingMap); + +public: + virtual void put(const String& key, const WeightedSpanTermPtr& val); +}; + +/// A fake IndexReader class to extract the field from a MultiTermQuery +class LPPCONTRIBAPI FakeReader : public FilterIndexReader { +public: + FakeReader(); + virtual ~FakeReader(); + + LUCENE_CLASS(FakeReader); + +public: + String field; + +protected: + static IndexReaderPtr EMPTY_MEMORY_INDEX_READER(); + +public: + virtual TermEnumPtr terms(const TermPtr& t); +}; + } #endif diff --git a/src/contrib/include/WeightedTerm.h b/src/contrib/include/WeightedTerm.h index 16d0818a..15985424 100644 --- a/src/contrib/include/WeightedTerm.h +++ b/src/contrib/include/WeightedTerm.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,34 +10,34 @@ #include "LuceneContrib.h" #include "LuceneObject.h" -namespace Lucene -{ - /// Lightweight class to hold term and a weight value used for scoring this term - class LPPCONTRIBAPI WeightedTerm : public LuceneObject - { - public: - WeightedTerm(double weight, const String& term); - virtual ~WeightedTerm(); - - LUCENE_CLASS(WeightedTerm); - - public: - double weight; // multiplier - String term; // stemmed form - - public: - /// @return the term value (stemmed) - String getTerm(); - - /// @return the weight associated with this term - double getWeight(); - - /// @param term the term value (stemmed) - void setTerm(const String& term); - - /// @param weight the weight associated with this term - void setWeight(double weight); - }; +namespace Lucene { + +/// Lightweight class to hold term and a weight value used for scoring this term +class LPPCONTRIBAPI WeightedTerm : public LuceneObject { +public: + WeightedTerm(double weight, const String& term); + virtual ~WeightedTerm(); + + LUCENE_CLASS(WeightedTerm); + +public: + double weight; // multiplier + String term; // stemmed form + +public: + /// @return the term value (stemmed) + String getTerm(); + + /// @return the weight associated with this term + double getWeight(); + + /// @param term the term value (stemmed) + void setTerm(const String& term); + + /// @param weight the weight associated with this term + void setWeight(double weight); +}; + } #endif diff --git a/src/contrib/memory/MemoryIndex.cpp b/src/contrib/memory/MemoryIndex.cpp index 01c2072d..e1e50d9b 100644 --- a/src/contrib/memory/MemoryIndex.cpp +++ b/src/contrib/memory/MemoryIndex.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -23,735 +23,656 @@ #include "Document.h" #include "MiscUtils.h" -namespace Lucene -{ - const double MemoryIndex::docBoost = 1.0; - - MemoryIndex::MemoryIndex(bool storeOffsets) - { - stride = storeOffsets ? 3 : 1; - fields = MapStringMemoryIndexInfo::newInstance(); - } - - MemoryIndex::~MemoryIndex() - { - } - - void MemoryIndex::addField(const String& fieldName, const String& text, AnalyzerPtr analyzer) - { - if (fieldName.empty()) +namespace Lucene { + +const double MemoryIndex::docBoost = 1.0; + +MemoryIndex::MemoryIndex(bool storeOffsets) { + stride = storeOffsets ? 3 : 1; + fields = MapStringMemoryIndexInfo::newInstance(); +} + +MemoryIndex::~MemoryIndex() { +} + +void MemoryIndex::addField(const String& fieldName, const String& text, const AnalyzerPtr& analyzer) { + if (fieldName.empty()) { + boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); + } + if (text.empty()) { + boost::throw_exception(IllegalArgumentException(L"text must not be empty")); + } + if (!analyzer) { + boost::throw_exception(IllegalArgumentException(L"analyzer must not be null")); + } + + TokenStreamPtr stream(analyzer->tokenStream(fieldName, newLucene(text))); + addField(fieldName, stream); +} + +void MemoryIndex::addField(const String& fieldName, const TokenStreamPtr& stream, double boost) { + LuceneException finally; + try { + if (fieldName.empty()) { boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); - if (text.empty()) - boost::throw_exception(IllegalArgumentException(L"text must not be empty")); - if (!analyzer) - boost::throw_exception(IllegalArgumentException(L"analyzer must not be null")); - - TokenStreamPtr stream(analyzer->tokenStream(fieldName, newLucene(text))); - addField(fieldName, stream); - } - - void MemoryIndex::addField(const String& fieldName, TokenStreamPtr stream, double boost) - { - LuceneException finally; - try - { - if (fieldName.empty()) - boost::throw_exception(IllegalArgumentException(L"fieldName must not be empty")); - if (!stream) - boost::throw_exception(IllegalArgumentException(L"token stream must not be null")); - if (boost <= 0.0) - boost::throw_exception(IllegalArgumentException(L"boost factor must be greater than 0.0")); - if (fields.contains(fieldName)) - boost::throw_exception(IllegalArgumentException(L"field must not be added more than once")); - - MapStringIntCollection terms(MapStringIntCollection::newInstance()); - int32_t numTokens = 0; - int32_t numOverlapTokens = 0; - int32_t pos = -1; - - TermAttributePtr termAtt(stream->addAttribute()); - PositionIncrementAttributePtr posIncrAttribute(stream->addAttribute()); - OffsetAttributePtr offsetAtt(stream->addAttribute()); - - stream->reset(); - while (stream->incrementToken()) - { - String term(termAtt->term()); - if (term.empty()) - continue; // nothing to do - ++numTokens; - int32_t posIncr = posIncrAttribute->getPositionIncrement(); - if (posIncr == 0) - ++numOverlapTokens; - pos += posIncr; - - Collection positions(terms.get(term)); - if (!positions) - { - // term not seen before - positions = Collection::newInstance(); - terms.put(term, positions); - } - positions.add(pos); - if (stride != 1) - { - positions.add(offsetAtt->startOffset()); - positions.add(offsetAtt->endOffset()); - } - } - stream->end(); - - // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() - if (numTokens > 0) - { - boost = boost * docBoost; // see DocumentWriter.addDocument(...) - fields.put(fieldName, newLucene(terms, numTokens, numOverlapTokens, boost)); - sortedFields.reset(); // invalidate sorted view, if any - } } - catch (IOException& e) - { - // can never happen - boost::throw_exception(RuntimeException(e.getError())); + if (!stream) { + boost::throw_exception(IllegalArgumentException(L"token stream must not be null")); } - catch (LuceneException& e) - { - finally = e; + if (boost <= 0.0) { + boost::throw_exception(IllegalArgumentException(L"boost factor must be greater than 0.0")); } - try - { - if (stream) - stream->close(); + if (fields.contains(fieldName)) { + boost::throw_exception(IllegalArgumentException(L"field must not be added more than once")); } - catch (IOException& e) - { - boost::throw_exception(RuntimeException(e.getError())); - } - finally.throwException(); - } - - IndexSearcherPtr MemoryIndex::createSearcher() - { - MemoryIndexReaderPtr reader(newLucene(shared_from_this())); - IndexSearcherPtr searcher(newLucene(reader)); // ensures no auto-close - reader->setSearcher(searcher); // to later get hold of searcher.getSimilarity() - return searcher; - } - - double MemoryIndex::search(QueryPtr query) - { - if (!query) - boost::throw_exception(IllegalArgumentException(L"query must not be null")); - - SearcherPtr searcher(createSearcher()); - LuceneException finally; - try - { - Collection scores = Collection::newInstance(1); - scores[0] = 0.0; // inits to 0.0 (no match) - searcher->search(query, newLucene(scores)); - return scores[0]; - } - catch (IOException& e) - { - // can never happen - boost::throw_exception(RuntimeException(e.getError())); - } - catch (LuceneException& e) - { - finally = e; - } - finally.throwException(); - return 0; // silence static analyzers - } - - int32_t MemoryIndex::numPositions(Collection positions) - { - return (positions.size() / stride); - } - - struct lessField - { - inline bool operator()(const PairStringMemoryIndexInfo& first, const PairStringMemoryIndexInfo& second) const - { - return (first.first < second.first); - } - }; - - void MemoryIndex::sortFields() - { - if (!sortedFields) - { - sortedFields = CollectionStringMemoryIndexInfo::newInstance(fields.begin(), fields.end()); - std::sort(sortedFields.begin(), sortedFields.end(), lessField()); + + MapStringIntCollection terms(MapStringIntCollection::newInstance()); + int32_t numTokens = 0; + int32_t numOverlapTokens = 0; + int32_t pos = -1; + + TermAttributePtr termAtt(stream->addAttribute()); + PositionIncrementAttributePtr posIncrAttribute(stream->addAttribute()); + OffsetAttributePtr offsetAtt(stream->addAttribute()); + + stream->reset(); + while (stream->incrementToken()) { + String term(termAtt->term()); + if (term.empty()) { + continue; // nothing to do + } + ++numTokens; + int32_t posIncr = posIncrAttribute->getPositionIncrement(); + if (posIncr == 0) { + ++numOverlapTokens; + } + pos += posIncr; + + Collection positions(terms.get(term)); + if (!positions) { + // term not seen before + positions = Collection::newInstance(); + terms.put(term, positions); + } + positions.add(pos); + if (stride != 1) { + positions.add(offsetAtt->startOffset()); + positions.add(offsetAtt->endOffset()); + } } - } - - MemoryIndexInfo::MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost) - { - this->terms = terms; - this->numTokens = numTokens; - this->numOverlapTokens = numOverlapTokens; - this->boost = boost; - } - - MemoryIndexInfo::~MemoryIndexInfo() - { - } - - struct lessTerm - { - inline bool operator()(const PairStringIntCollection& first, const PairStringIntCollection& second) const - { - return (first.first < second.first); + stream->end(); + + // ensure infos.numTokens > 0 invariant; needed for correct operation of terms() + if (numTokens > 0) { + boost = boost * docBoost; // see DocumentWriter.addDocument(...) + fields.put(fieldName, newLucene(terms, numTokens, numOverlapTokens, boost)); + sortedFields.reset(); // invalidate sorted view, if any } - }; - - void MemoryIndexInfo::sortTerms() - { - if (!sortedTerms) - { - sortedTerms = CollectionStringIntCollection::newInstance(terms.begin(), terms.end()); - std::sort(sortedTerms.begin(), sortedTerms.end(), lessTerm()); + } catch (IOException& e) { + // can never happen + boost::throw_exception(RuntimeException(e.getError())); + } catch (LuceneException& e) { + finally = e; + } + try { + if (stream) { + stream->close(); } + } catch (IOException& e) { + boost::throw_exception(RuntimeException(e.getError())); } - - Collection MemoryIndexInfo::getPositions(const String& term) - { - return terms.get(term); - } - - Collection MemoryIndexInfo::getPositions(int32_t pos) - { - return sortedTerms[pos].second; - } - - double MemoryIndexInfo::getBoost() - { - return boost; - } - - MemoryIndexReader::MemoryIndexReader(MemoryIndexPtr memoryIndex) - { - this->memoryIndex = memoryIndex; - } - - MemoryIndexReader::~MemoryIndexReader() - { - } - - TermPtr MemoryIndexReader::MATCH_ALL_TERM() - { - static TermPtr _MATCH_ALL_TERM; - if (!_MATCH_ALL_TERM) - { - _MATCH_ALL_TERM = newLucene(L""); - CycleCheck::addStatic(_MATCH_ALL_TERM); - } - return _MATCH_ALL_TERM; - } - - MemoryIndexInfoPtr MemoryIndexReader::getInfo(const String& fieldName) - { - return memoryIndex->fields.get(fieldName); - } - - MemoryIndexInfoPtr MemoryIndexReader::getInfo(int32_t pos) - { - return memoryIndex->sortedFields[pos].second; - } - - int32_t MemoryIndexReader::docFreq(TermPtr t) - { - MemoryIndexInfoPtr info(getInfo(t->field())); - int32_t freq = 0; - if (info) - freq = info->getPositions(t->text()) ? 1 : 0; - return freq; - } - - TermEnumPtr MemoryIndexReader::terms() - { - return terms(MATCH_ALL_TERM()); - } - - TermEnumPtr MemoryIndexReader::terms(TermPtr t) - { - int32_t i = 0; // index into info.sortedTerms - int32_t j = 0; // index into sortedFields - - memoryIndex->sortFields(); - if (memoryIndex->sortedFields.size() == 1 && memoryIndex->sortedFields[0].first == t->field()) - j = 0; // fast path - else - { - CollectionStringMemoryIndexInfo::iterator search = std::lower_bound(memoryIndex->sortedFields.begin(), memoryIndex->sortedFields.end(), std::make_pair(t->field(), MemoryIndexInfoPtr()), lessField()); - int32_t keyPos = std::distance(memoryIndex->sortedFields.begin(), search); - j = (search == memoryIndex->sortedFields.end() || t->field() < search->first) ? -(keyPos + 1) : keyPos; - } - - if (j < 0) // not found; choose successor - { - j = -j - 1; - i = 0; - if (j < memoryIndex->sortedFields.size()) - getInfo(j)->sortTerms(); + finally.throwException(); +} + +IndexSearcherPtr MemoryIndex::createSearcher() { + MemoryIndexReaderPtr reader(newLucene(shared_from_this())); + IndexSearcherPtr searcher(newLucene(reader)); // ensures no auto-close + reader->setSearcher(searcher); // to later get hold of searcher.getSimilarity() + return searcher; +} + +double MemoryIndex::search(const QueryPtr& query) { + if (!query) { + boost::throw_exception(IllegalArgumentException(L"query must not be null")); + } + + SearcherPtr searcher(createSearcher()); + LuceneException finally; + try { + Collection scores = Collection::newInstance(1); + scores[0] = 0.0; // inits to 0.0 (no match) + searcher->search(query, newLucene(scores)); + return scores[0]; + } catch (IOException& e) { + // can never happen + boost::throw_exception(RuntimeException(e.getError())); + } catch (LuceneException& e) { + finally = e; + } + finally.throwException(); + return 0; // silence static analyzers +} + +int32_t MemoryIndex::numPositions(Collection positions) { + return (positions.size() / stride); +} + +struct lessField { + inline bool operator()(const PairStringMemoryIndexInfo& first, const PairStringMemoryIndexInfo& second) const { + return (first.first < second.first); + } +}; + +void MemoryIndex::sortFields() { + if (!sortedFields) { + sortedFields = CollectionStringMemoryIndexInfo::newInstance(fields.begin(), fields.end()); + std::sort(sortedFields.begin(), sortedFields.end(), lessField()); + } +} + +MemoryIndexInfo::MemoryIndexInfo(MapStringIntCollection terms, int32_t numTokens, int32_t numOverlapTokens, double boost) { + this->terms = terms; + this->numTokens = numTokens; + this->numOverlapTokens = numOverlapTokens; + this->boost = boost; +} + +MemoryIndexInfo::~MemoryIndexInfo() { +} + +struct lessTerm { + inline bool operator()(const PairStringIntCollection& first, const PairStringIntCollection& second) const { + return (first.first < second.first); + } +}; + +void MemoryIndexInfo::sortTerms() { + if (!sortedTerms) { + sortedTerms = CollectionStringIntCollection::newInstance(terms.begin(), terms.end()); + std::sort(sortedTerms.begin(), sortedTerms.end(), lessTerm()); + } +} + +Collection MemoryIndexInfo::getPositions(const String& term) { + return terms.get(term); +} + +Collection MemoryIndexInfo::getPositions(int32_t pos) { + return sortedTerms[pos].second; +} + +double MemoryIndexInfo::getBoost() { + return boost; +} + +MemoryIndexReader::MemoryIndexReader(const MemoryIndexPtr& memoryIndex) { + this->memoryIndex = memoryIndex; +} + +MemoryIndexReader::~MemoryIndexReader() { +} + +TermPtr MemoryIndexReader::MATCH_ALL_TERM() { + static TermPtr _MATCH_ALL_TERM; + LUCENE_RUN_ONCE( + _MATCH_ALL_TERM = newLucene(L""); + CycleCheck::addStatic(_MATCH_ALL_TERM); + ); + return _MATCH_ALL_TERM; +} + +MemoryIndexInfoPtr MemoryIndexReader::getInfo(const String& fieldName) { + return memoryIndex->fields.get(fieldName); +} + +MemoryIndexInfoPtr MemoryIndexReader::getInfo(int32_t pos) { + return memoryIndex->sortedFields[pos].second; +} + +int32_t MemoryIndexReader::docFreq(const TermPtr& t) { + MemoryIndexInfoPtr info(getInfo(t->field())); + int32_t freq = 0; + if (info) { + freq = info->getPositions(t->text()) ? 1 : 0; + } + return freq; +} + +TermEnumPtr MemoryIndexReader::terms() { + return terms(MATCH_ALL_TERM()); +} + +TermEnumPtr MemoryIndexReader::terms(const TermPtr& t) { + int32_t i = 0; // index into info.sortedTerms + int32_t j = 0; // index into sortedFields + + memoryIndex->sortFields(); + if (memoryIndex->sortedFields.size() == 1 && memoryIndex->sortedFields[0].first == t->field()) { + j = 0; // fast path + } else { + CollectionStringMemoryIndexInfo::iterator search = std::lower_bound(memoryIndex->sortedFields.begin(), memoryIndex->sortedFields.end(), std::make_pair(t->field(), MemoryIndexInfoPtr()), lessField()); + int32_t keyPos = std::distance(memoryIndex->sortedFields.begin(), search); + j = (search == memoryIndex->sortedFields.end() || t->field() < search->first) ? -(keyPos + 1) : keyPos; + } + + if (j < 0) { // not found; choose successor + j = -j - 1; + i = 0; + if (j < memoryIndex->sortedFields.size()) { + getInfo(j)->sortTerms(); } - else // found - { - MemoryIndexInfoPtr info(getInfo(j)); - info->sortTerms(); - CollectionStringIntCollection::iterator search = std::lower_bound(info->sortedTerms.begin(), info->sortedTerms.end(), std::make_pair(t->text(), Collection()), lessTerm()); - int32_t keyPos = std::distance(info->sortedTerms.begin(), search); - i = (search == info->sortedTerms.end() || t->text() < search->first) ? -(keyPos + 1) : keyPos; - if (i < 0) // not found; choose successor - { - i = -i - 1; - if (i >= info->sortedTerms.size()) // move to next successor - { - ++j; - i = 0; - if (j < memoryIndex->sortedFields.size()) - getInfo(j)->sortTerms(); + } else { // found + MemoryIndexInfoPtr info(getInfo(j)); + info->sortTerms(); + CollectionStringIntCollection::iterator search = std::lower_bound(info->sortedTerms.begin(), info->sortedTerms.end(), std::make_pair(t->text(), Collection()), lessTerm()); + int32_t keyPos = std::distance(info->sortedTerms.begin(), search); + i = (search == info->sortedTerms.end() || t->text() < search->first) ? -(keyPos + 1) : keyPos; + if (i < 0) { // not found; choose successor + i = -i - 1; + if (i >= info->sortedTerms.size()) { // move to next successor + ++j; + i = 0; + if (j < memoryIndex->sortedFields.size()) { + getInfo(j)->sortTerms(); } } } - - return newLucene(shared_from_this(), i, j); - } - - TermPositionsPtr MemoryIndexReader::termPositions() - { - return newLucene(shared_from_this()); - } - - TermDocsPtr MemoryIndexReader::termDocs() - { - return termPositions(); - } - - Collection MemoryIndexReader::getTermFreqVectors(int32_t docNumber) - { - Collection vectors(Collection::newInstance()); - for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) - vectors.add(getTermFreqVector(docNumber, fieldName->first)); - return vectors; - } - - void MemoryIndexReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) - getTermFreqVector(docNumber, fieldName->first, mapper); - } - - void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - MemoryIndexInfoPtr info(getInfo(field)); - if (!info) - return; - info->sortTerms(); - mapper->setExpectations(field, info->sortedTerms.size(), memoryIndex->stride != 1, true); - for (int32_t i = info->sortedTerms.size(); --i >=0;) - { - Collection positions(info->sortedTerms[i].second); - int32_t size = positions.size(); - Collection offsets(Collection::newInstance(size / memoryIndex->stride)); - for (int32_t k = 0, j = 1; j < size; ++k, j += memoryIndex->stride) - { - int32_t start = positions[j]; - int32_t end = positions[j + 1]; - offsets[k] = newLucene(start, end); - } - mapper->map(info->sortedTerms[i].first, memoryIndex->numPositions(info->sortedTerms[i].second), offsets, info->sortedTerms[i].second); + } + + return newLucene(shared_from_this(), i, j); +} + +TermPositionsPtr MemoryIndexReader::termPositions() { + return newLucene(shared_from_this()); +} + +TermDocsPtr MemoryIndexReader::termDocs() { + return termPositions(); +} + +Collection MemoryIndexReader::getTermFreqVectors(int32_t docNumber) { + Collection vectors(Collection::newInstance()); + for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) { + vectors.add(getTermFreqVector(docNumber, fieldName->first)); + } + return vectors; +} + +void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + for (MapStringMemoryIndexInfo::iterator fieldName = memoryIndex->fields.begin(); fieldName != memoryIndex->fields.end(); ++fieldName) { + getTermFreqVector(docNumber, fieldName->first, mapper); + } +} + +void MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + MemoryIndexInfoPtr info(getInfo(field)); + if (!info) { + return; + } + info->sortTerms(); + mapper->setExpectations(field, info->sortedTerms.size(), memoryIndex->stride != 1, true); + for (int32_t i = info->sortedTerms.size(); --i >=0;) { + Collection positions(info->sortedTerms[i].second); + int32_t size = positions.size(); + Collection offsets(Collection::newInstance(size / memoryIndex->stride)); + for (int32_t k = 0, j = 1; j < size; ++k, j += memoryIndex->stride) { + int32_t start = positions[j]; + int32_t end = positions[j + 1]; + offsets[k] = newLucene(start, end); } + mapper->map(info->sortedTerms[i].first, memoryIndex->numPositions(info->sortedTerms[i].second), offsets, info->sortedTerms[i].second); } - - TermFreqVectorPtr MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field) - { +} + +TermFreqVectorPtr MemoryIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { + MemoryIndexInfoPtr info(getInfo(field)); + if (!info) { + return TermFreqVectorPtr(); + } + info->sortTerms(); + return newLucene(shared_from_this(), info, field); +} + +SimilarityPtr MemoryIndexReader::getSimilarity() { + SearcherPtr searcher(_searcher.lock()); + if (searcher) { + return searcher->getSimilarity(); + } + return Similarity::getDefault(); +} + +void MemoryIndexReader::setSearcher(const SearcherPtr& searcher) { + _searcher = searcher; +} + +ByteArray MemoryIndexReader::norms(const String& field) { + ByteArray norms(cachedNorms); + SimilarityPtr sim(getSimilarity()); + if (field != cachedFieldName || sim != cachedSimilarity) { // not cached? MemoryIndexInfoPtr info(getInfo(field)); - if (!info) - return TermFreqVectorPtr(); - info->sortTerms(); - return newLucene(shared_from_this(), info, field); - } - - SimilarityPtr MemoryIndexReader::getSimilarity() - { - SearcherPtr searcher(_searcher.lock()); - if (searcher) - return searcher->getSimilarity(); - return Similarity::getDefault(); - } - - void MemoryIndexReader::setSearcher(SearcherPtr searcher) - { - _searcher = searcher; - } - - ByteArray MemoryIndexReader::norms(const String& field) - { - ByteArray norms(cachedNorms); - SimilarityPtr sim(getSimilarity()); - if (field != cachedFieldName || sim != cachedSimilarity) // not cached? - { - MemoryIndexInfoPtr info(getInfo(field)); - int32_t numTokens = info ? info->numTokens : 0; - int32_t numOverlapTokens = info ? info->numOverlapTokens : 0; - double boost = info ? info->getBoost() : 1.0; - FieldInvertStatePtr invertState(newLucene(0, numTokens, numOverlapTokens, 0, boost)); - double n = sim->computeNorm(field, invertState); - uint8_t norm = Similarity::encodeNorm(n); - norms = ByteArray::newInstance(1); - norms[0] = norm; - - // cache it for future reuse - cachedNorms = norms; - cachedFieldName = field; - cachedSimilarity = sim; - } - return norms; - } - - void MemoryIndexReader::norms(const String& field, ByteArray norms, int32_t offset) - { - ByteArray _norms(this->norms(field)); - MiscUtils::arrayCopy(_norms.get(), 0, norms.get(), offset, _norms.size()); - } - - void MemoryIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - boost::throw_exception(UnsupportedOperationException()); - } - - int32_t MemoryIndexReader::numDocs() - { - return memoryIndex->fields.empty() ? 0 : 1; - } - - int32_t MemoryIndexReader::maxDoc() - { - return 1; - } - - DocumentPtr MemoryIndexReader::document(int32_t n) - { - return newLucene(); // there are no stored fields - } - - DocumentPtr MemoryIndexReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - return newLucene(); // there are no stored fields - } - - bool MemoryIndexReader::isDeleted(int32_t n) - { - return false; + int32_t numTokens = info ? info->numTokens : 0; + int32_t numOverlapTokens = info ? info->numOverlapTokens : 0; + double boost = info ? info->getBoost() : 1.0; + FieldInvertStatePtr invertState(newLucene(0, numTokens, numOverlapTokens, 0, boost)); + double n = sim->computeNorm(field, invertState); + uint8_t norm = Similarity::encodeNorm(n); + norms = ByteArray::newInstance(1); + norms[0] = norm; + + // cache it for future reuse + cachedNorms = norms; + cachedFieldName = field; + cachedSimilarity = sim; } - - bool MemoryIndexReader::hasDeletions() - { - return false; + return norms; +} + +void MemoryIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { + ByteArray _norms(this->norms(field)); + MiscUtils::arrayCopy(_norms.get(), 0, norms.get(), offset, _norms.size()); +} + +void MemoryIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { + boost::throw_exception(UnsupportedOperationException()); +} + +int32_t MemoryIndexReader::numDocs() { + return memoryIndex->fields.empty() ? 0 : 1; +} + +int32_t MemoryIndexReader::maxDoc() { + return 1; +} + +DocumentPtr MemoryIndexReader::document(int32_t n) { + return newLucene(); // there are no stored fields +} + +DocumentPtr MemoryIndexReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + return newLucene(); // there are no stored fields +} + +bool MemoryIndexReader::isDeleted(int32_t n) { + return false; +} + +bool MemoryIndexReader::hasDeletions() { + return false; +} + +void MemoryIndexReader::doDelete(int32_t docNum) { + boost::throw_exception(UnsupportedOperationException()); +} + +void MemoryIndexReader::doUndeleteAll() { + boost::throw_exception(UnsupportedOperationException()); +} + +void MemoryIndexReader::doCommit(MapStringString commitUserData) { +} + +void MemoryIndexReader::doClose() { +} + +HashSet MemoryIndexReader::getFieldNames(FieldOption fieldOption) { + static HashSet emptySet; + LUCENE_RUN_ONCE( + emptySet = HashSet::newInstance(); + ); + if (fieldOption == FIELD_OPTION_UNINDEXED) { + return emptySet; } - - void MemoryIndexReader::doDelete(int32_t docNum) - { - boost::throw_exception(UnsupportedOperationException()); - } - - void MemoryIndexReader::doUndeleteAll() - { - boost::throw_exception(UnsupportedOperationException()); - } - - void MemoryIndexReader::doCommit(MapStringString commitUserData) - { - } - - void MemoryIndexReader::doClose() - { - } - - HashSet MemoryIndexReader::getFieldNames(FieldOption fieldOption) - { - static HashSet emptySet; - if (!emptySet) - emptySet = HashSet::newInstance(); - if (fieldOption == FIELD_OPTION_UNINDEXED) - return emptySet; - if (fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) - return emptySet; - if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET && memoryIndex->stride == 1) - return emptySet; - if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET && memoryIndex->stride == 1) - return emptySet; - HashSet fieldSet(HashSet::newInstance()); - for (MapStringMemoryIndexInfo::iterator field = memoryIndex->fields.begin(); field != memoryIndex->fields.end(); ++field) - fieldSet.add(field->first); - return fieldSet; - } - - MemoryIndexTermEnum::MemoryIndexTermEnum(MemoryIndexReaderPtr reader, int32_t ix, int32_t jx) - { - _reader = reader; - i = ix; - j = jx; - } - - MemoryIndexTermEnum::~MemoryIndexTermEnum() - { - } - - bool MemoryIndexTermEnum::next() - { - MemoryIndexReaderPtr reader(_reader); - if (j >= reader->memoryIndex->sortedFields.size()) - return false; - MemoryIndexInfoPtr info(reader->getInfo(j)); - if (++i < info->sortedTerms.size()) - return true; - - // move to successor - ++j; - i = 0; - if (j >= reader->memoryIndex->sortedFields.size()) - return false; - reader->getInfo(j)->sortTerms(); - return true; + if (fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) { + return emptySet; } - - TermPtr MemoryIndexTermEnum::term() - { - MemoryIndexReaderPtr reader(_reader); - if (j >= reader->memoryIndex->sortedFields.size()) - return TermPtr(); - MemoryIndexInfoPtr info(reader->getInfo(j)); - if (i >= info->sortedTerms.size()) - return TermPtr(); - return createTerm(info, j, info->sortedTerms[i].first); - } - - int32_t MemoryIndexTermEnum::docFreq() - { - MemoryIndexReaderPtr reader(_reader); - if (j >= reader->memoryIndex->sortedFields.size()) - return 0; - MemoryIndexInfoPtr info(reader->getInfo(j)); - if (i >= info->sortedTerms.size()) - return 0; - return reader->memoryIndex->numPositions(info->getPositions(i)); - } - - void MemoryIndexTermEnum::close() - { - } - - TermPtr MemoryIndexTermEnum::createTerm(MemoryIndexInfoPtr info, int32_t pos, const String& text) - { - TermPtr _template(info->_template); - if (!_template) // not yet cached? - { - MemoryIndexReaderPtr reader(_reader); - String fieldName(reader->memoryIndex->sortedFields[pos].first); - _template = newLucene(fieldName); - info->_template = _template; - } - return _template->createTerm(text); - } - - MemoryIndexCollector::MemoryIndexCollector(Collection scores) - { - this->scores = scores; - } - - MemoryIndexCollector::~MemoryIndexCollector() - { - } - - void MemoryIndexCollector::collect(int32_t doc) - { - scores[0] = scorer->score(); - } - - void MemoryIndexCollector::setScorer(ScorerPtr scorer) - { - this->scorer = scorer; - } - - bool MemoryIndexCollector::acceptsDocsOutOfOrder() - { + if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET && memoryIndex->stride == 1) { + return emptySet; + } + if (fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET && memoryIndex->stride == 1) { + return emptySet; + } + HashSet fieldSet(HashSet::newInstance()); + for (MapStringMemoryIndexInfo::iterator field = memoryIndex->fields.begin(); field != memoryIndex->fields.end(); ++field) { + fieldSet.add(field->first); + } + return fieldSet; +} + +MemoryIndexTermEnum::MemoryIndexTermEnum(const MemoryIndexReaderPtr& reader, int32_t ix, int32_t jx) { + _reader = reader; + i = ix; + j = jx; +} + +MemoryIndexTermEnum::~MemoryIndexTermEnum() { +} + +bool MemoryIndexTermEnum::next() { + MemoryIndexReaderPtr reader(_reader); + if (j >= reader->memoryIndex->sortedFields.size()) { + return false; + } + MemoryIndexInfoPtr info(reader->getInfo(j)); + if (++i < info->sortedTerms.size()) { return true; } - - void MemoryIndexCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { + + // move to successor + ++j; + i = 0; + if (j >= reader->memoryIndex->sortedFields.size()) { + return false; } - - MemoryIndexTermPositions::MemoryIndexTermPositions(MemoryIndexReaderPtr reader) - { - _reader = reader; - hasNext = false; - cursor = 0; + reader->getInfo(j)->sortTerms(); + return true; +} + +TermPtr MemoryIndexTermEnum::term() { + MemoryIndexReaderPtr reader(_reader); + if (j >= reader->memoryIndex->sortedFields.size()) { + return TermPtr(); } - - MemoryIndexTermPositions::~MemoryIndexTermPositions() - { - } - - void MemoryIndexTermPositions::seek(TermPtr term) - { - this->term = term; - if (!term) - hasNext = true; // term == null means match all docs - else - { - MemoryIndexReaderPtr reader(_reader); - MemoryIndexInfoPtr info(reader->getInfo(term->field())); - current = info ? info->getPositions(term->text()) : Collection(); - hasNext = current; - cursor = 0; - } + MemoryIndexInfoPtr info(reader->getInfo(j)); + if (i >= info->sortedTerms.size()) { + return TermPtr(); } - - void MemoryIndexTermPositions::seek(TermEnumPtr termEnum) - { - seek(termEnum->term()); + return createTerm(info, j, info->sortedTerms[i].first); +} + +int32_t MemoryIndexTermEnum::docFreq() { + MemoryIndexReaderPtr reader(_reader); + if (j >= reader->memoryIndex->sortedFields.size()) { + return 0; } - - int32_t MemoryIndexTermPositions::doc() - { + MemoryIndexInfoPtr info(reader->getInfo(j)); + if (i >= info->sortedTerms.size()) { return 0; } - - int32_t MemoryIndexTermPositions::freq() - { - MemoryIndexReaderPtr reader(_reader); - int32_t freq = current ? reader->memoryIndex->numPositions(current) : (term ? 0 : 1); - return freq; - } - - bool MemoryIndexTermPositions::next() - { - bool _next = hasNext; - hasNext = false; - return _next; - } - - int32_t MemoryIndexTermPositions::read(Collection docs, Collection freqs) - { - if (!hasNext) - return 0; - hasNext = false; - docs[0] = 0; - freqs[0] = freq(); - return 1; - } - - bool MemoryIndexTermPositions::skipTo(int32_t target) - { - return next(); - } - - void MemoryIndexTermPositions::close() - { - } - - int32_t MemoryIndexTermPositions::nextPosition() - { - // implements TermPositions - MemoryIndexReaderPtr reader(_reader); - int32_t pos = current[cursor]; - cursor += reader->memoryIndex->stride; - return pos; - } - - int32_t MemoryIndexTermPositions::getPayloadLength() - { - boost::throw_exception(UnsupportedOperationException()); - } - - ByteArray MemoryIndexTermPositions::getPayload(ByteArray data, int32_t offset) - { - boost::throw_exception(UnsupportedOperationException()); - return ByteArray(); - } - - bool MemoryIndexTermPositions::isPayloadAvailable() - { - return false; // unsupported - } - - MemoryIndexTermPositionVector::MemoryIndexTermPositionVector(MemoryIndexReaderPtr reader, MemoryIndexInfoPtr info, const String& fieldName) - { - this->_reader = reader; - this->sortedTerms = info->sortedTerms; - this->fieldName = fieldName; - } - - MemoryIndexTermPositionVector::~MemoryIndexTermPositionVector() - { - } - - String MemoryIndexTermPositionVector::getField() - { - return fieldName; - } - - int32_t MemoryIndexTermPositionVector::size() - { - return sortedTerms.size(); - } - - Collection MemoryIndexTermPositionVector::getTerms() - { - Collection terms(Collection::newInstance(sortedTerms.size())); - for (int32_t i = sortedTerms.size(); --i >= 0;) - terms[i] = sortedTerms[i].first; - return terms; - } - - Collection MemoryIndexTermPositionVector::getTermFrequencies() - { + return reader->memoryIndex->numPositions(info->getPositions(i)); +} + +void MemoryIndexTermEnum::close() { +} + +TermPtr MemoryIndexTermEnum::createTerm(const MemoryIndexInfoPtr& info, int32_t pos, const String& text) { + TermPtr _template(info->_template); + if (!_template) { // not yet cached? MemoryIndexReaderPtr reader(_reader); - Collection freqs(Collection::newInstance(sortedTerms.size())); - for (int32_t i = sortedTerms.size(); --i >= 0;) - freqs[i] = reader->memoryIndex->numPositions(sortedTerms[i].second); - return freqs; - } - - int32_t MemoryIndexTermPositionVector::indexOf(const String& term) - { - CollectionStringIntCollection::iterator search = std::lower_bound(sortedTerms.begin(), sortedTerms.end(), std::make_pair(term, Collection()), lessTerm()); - return (search == sortedTerms.end() || term < search->first) ? -1 : std::distance(sortedTerms.begin(), search); - } - - Collection MemoryIndexTermPositionVector::indexesOf(Collection terms, int32_t start, int32_t length) - { - Collection indexes(Collection::newInstance(length)); - for (int32_t i = 0; i < length; ++i) - indexes[i] = indexOf(terms[start++]); - return indexes; - } - - Collection MemoryIndexTermPositionVector::getTermPositions(int32_t index) - { - return sortedTerms[index].second; - } - - Collection MemoryIndexTermPositionVector::getOffsets(int32_t index) - { + String fieldName(reader->memoryIndex->sortedFields[pos].first); + _template = newLucene(fieldName); + info->_template = _template; + } + return _template->createTerm(text); +} + +MemoryIndexCollector::MemoryIndexCollector(Collection scores) { + this->scores = scores; +} + +MemoryIndexCollector::~MemoryIndexCollector() { +} + +void MemoryIndexCollector::collect(int32_t doc) { + scores[0] = scorer->score(); +} + +void MemoryIndexCollector::setScorer(const ScorerPtr& scorer) { + this->scorer = scorer; +} + +bool MemoryIndexCollector::acceptsDocsOutOfOrder() { + return true; +} + +void MemoryIndexCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { +} + +MemoryIndexTermPositions::MemoryIndexTermPositions(const MemoryIndexReaderPtr& reader) { + _reader = reader; + hasNext = false; + cursor = 0; +} + +MemoryIndexTermPositions::~MemoryIndexTermPositions() { +} + +void MemoryIndexTermPositions::seek(const TermPtr& term) { + this->term = term; + if (!term) { + hasNext = true; // term == null means match all docs + } else { MemoryIndexReaderPtr reader(_reader); - if (reader->memoryIndex->stride == 1) - return Collection(); // no offsets stored - - Collection positions(sortedTerms[index].second); - int32_t size = positions.size(); - Collection offsets(Collection::newInstance(size / reader->memoryIndex->stride)); - for (int32_t i = 0, j = 1; j < size; ++i, j += reader->memoryIndex->stride) - { - int32_t start = positions[j]; - int32_t end = positions[j + 1]; - offsets[i] = newLucene(start, end); - } - return offsets; + MemoryIndexInfoPtr info(reader->getInfo(term->field())); + current = info ? info->getPositions(term->text()) : Collection(); + hasNext = current; + cursor = 0; + } +} + +void MemoryIndexTermPositions::seek(const TermEnumPtr& termEnum) { + seek(termEnum->term()); +} + +int32_t MemoryIndexTermPositions::doc() { + return 0; +} + +int32_t MemoryIndexTermPositions::freq() { + MemoryIndexReaderPtr reader(_reader); + int32_t freq = current ? reader->memoryIndex->numPositions(current) : (term ? 0 : 1); + return freq; +} + +bool MemoryIndexTermPositions::next() { + bool _next = hasNext; + hasNext = false; + return _next; +} + +int32_t MemoryIndexTermPositions::read(Collection& docs, Collection& freqs) { + if (!hasNext) { + return 0; + } + hasNext = false; + docs[0] = 0; + freqs[0] = freq(); + return 1; +} + +bool MemoryIndexTermPositions::skipTo(int32_t target) { + return next(); +} + +void MemoryIndexTermPositions::close() { +} + +int32_t MemoryIndexTermPositions::nextPosition() { + // implements TermPositions + MemoryIndexReaderPtr reader(_reader); + int32_t pos = current[cursor]; + cursor += reader->memoryIndex->stride; + return pos; +} + +int32_t MemoryIndexTermPositions::getPayloadLength() { + boost::throw_exception(UnsupportedOperationException()); +} + +ByteArray MemoryIndexTermPositions::getPayload(ByteArray data, int32_t offset) { + boost::throw_exception(UnsupportedOperationException()); + return ByteArray(); +} + +bool MemoryIndexTermPositions::isPayloadAvailable() { + return false; // unsupported +} + +MemoryIndexTermPositionVector::MemoryIndexTermPositionVector(const MemoryIndexReaderPtr& reader, const MemoryIndexInfoPtr& info, const String& fieldName) { + this->_reader = reader; + this->sortedTerms = info->sortedTerms; + this->fieldName = fieldName; +} + +MemoryIndexTermPositionVector::~MemoryIndexTermPositionVector() { +} + +String MemoryIndexTermPositionVector::getField() { + return fieldName; +} + +int32_t MemoryIndexTermPositionVector::size() { + return sortedTerms.size(); +} + +Collection MemoryIndexTermPositionVector::getTerms() { + Collection terms(Collection::newInstance(sortedTerms.size())); + for (int32_t i = sortedTerms.size(); --i >= 0;) { + terms[i] = sortedTerms[i].first; } + return terms; +} + +Collection MemoryIndexTermPositionVector::getTermFrequencies() { + MemoryIndexReaderPtr reader(_reader); + Collection freqs(Collection::newInstance(sortedTerms.size())); + for (int32_t i = sortedTerms.size(); --i >= 0;) { + freqs[i] = reader->memoryIndex->numPositions(sortedTerms[i].second); + } + return freqs; +} + +int32_t MemoryIndexTermPositionVector::indexOf(const String& term) { + CollectionStringIntCollection::iterator search = std::lower_bound(sortedTerms.begin(), sortedTerms.end(), std::make_pair(term, Collection()), lessTerm()); + return (search == sortedTerms.end() || term < search->first) ? -1 : std::distance(sortedTerms.begin(), search); +} + +Collection MemoryIndexTermPositionVector::indexesOf(Collection terms, int32_t start, int32_t length) { + Collection indexes(Collection::newInstance(length)); + for (int32_t i = 0; i < length; ++i) { + indexes[i] = indexOf(terms[start++]); + } + return indexes; +} + +Collection MemoryIndexTermPositionVector::getTermPositions(int32_t index) { + return sortedTerms[index].second; +} + +Collection MemoryIndexTermPositionVector::getOffsets(int32_t index) { + MemoryIndexReaderPtr reader(_reader); + if (reader->memoryIndex->stride == 1) { + return Collection(); // no offsets stored + } + + Collection positions(sortedTerms[index].second); + int32_t size = positions.size(); + Collection offsets(Collection::newInstance(size / reader->memoryIndex->stride)); + for (int32_t i = 0, j = 1; j < size; ++i, j += reader->memoryIndex->stride) { + int32_t start = positions[j]; + int32_t end = positions[j + 1]; + offsets[i] = newLucene(start, end); + } + return offsets; +} + } diff --git a/src/contrib/msvc/ContribInc.cpp b/src/contrib/msvc/ContribInc.cpp index 4d3fe4b2..df166262 100644 --- a/src/contrib/msvc/ContribInc.cpp +++ b/src/contrib/msvc/ContribInc.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// diff --git a/src/contrib/msvc/dllmain.cpp b/src/contrib/msvc/dllmain.cpp index 480902b8..a7be1fca 100644 --- a/src/contrib/msvc/dllmain.cpp +++ b/src/contrib/msvc/dllmain.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,15 +8,13 @@ #if defined(_WIN32) && defined(LPP_HAVE_DLL) -BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) -{ - switch (ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - case DLL_THREAD_ATTACH: - case DLL_THREAD_DETACH: - case DLL_PROCESS_DETACH: - break; +BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { + switch (ul_reason_for_call) { + case DLL_PROCESS_ATTACH: + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; } return TRUE; } diff --git a/src/contrib/msvc/lucene_contrib.vcxproj b/src/contrib/msvc/lucene_contrib.vcxproj new file mode 100644 index 00000000..984af9b0 --- /dev/null +++ b/src/contrib/msvc/lucene_contrib.vcxproj @@ -0,0 +1,641 @@ + + + + + Debug DLL + Win32 + + + Debug Static + Win32 + + + Release DLL + Win32 + + + Release Static + Win32 + + + + {46A95AFD-95FD-4280-B22E-1B56F273144B} + lucene_contrib + Win32Proj + + + + StaticLibrary + Unicode + true + + + StaticLibrary + Unicode + + + DynamicLibrary + Unicode + true + + + DynamicLibrary + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(ProjectDir)$(Configuration)\ + $(Configuration)\ + true + $(ProjectDir)$(Configuration)\ + $(Configuration)\ + false + $(ProjectDir)$(Configuration)\ + $(Configuration)\ + $(ProjectDir)$(Configuration)\ + $(Configuration)\ + + + + /Zm120 %(AdditionalOptions) + Disabled + ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + Use + ContribInc.h + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + false + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\lib;%(AdditionalLibraryDirectories) + true + Windows + MachineX86 + + + copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." +copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." + + + + + + /Zm120 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + Use + ContribInc.h + Level3 + ProgramDatabase + 4996;%(DisableSpecificWarnings) + false + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\lib;%(AdditionalLibraryDirectories) + true + Windows + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." +copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." + + + + + + /Zm120 %(AdditionalOptions) + Disabled + ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + Use + ContribInc.h + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + false + + + /IGNORE:4221 %(AdditionalOptions) + $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) + + + copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." + + + + + /Zm120 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\include;..\..\..\include;..\snowball\libstemmer_c\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + MultiThreadedDLL + true + Use + ContribInc.h + Level3 + ProgramDatabase + 4996;%(DisableSpecificWarnings) + false + + + /IGNORE:4221 %(AdditionalOptions) + $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) + + + copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." + + + + + + Create + Create + Create + Create + + + + + false + + + false + + + false + + + false + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + Config.h;%(ForcedIncludeFiles) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {46a95afd-95fd-4280-b22e-1b56f273144a} + false + + + + + + \ No newline at end of file diff --git a/src/contrib/msvc/lucene_contrib.vcxproj.filters b/src/contrib/msvc/lucene_contrib.vcxproj.filters new file mode 100644 index 00000000..cd070873 --- /dev/null +++ b/src/contrib/msvc/lucene_contrib.vcxproj.filters @@ -0,0 +1,599 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {ed281916-1385-43dd-ba49-d40e8504292f} + + + {cbc62969-9e1a-4ea1-8a5b-70dc05c54a74} + + + {af08d7d6-346f-4315-8a08-e5670d8ea661} + + + {5f664ed0-5376-4c1d-be74-9f437a8f28a0} + + + {25265c9f-fb03-4de0-af5b-1dc8a22b6a39} + + + {cbd88c58-498d-4cf6-a8b8-cab8ae9c1a33} + + + {f22aa0ca-0c74-4b24-8c13-ac4bf2eaeb62} + + + {9d6a5d6b-5270-4d71-bf47-e1fd628f0de5} + + + {de0f0dac-e3c9-4c68-9645-bd7facc23cad} + + + {b3fa72fe-e465-4c37-879c-ca78a4cdfa04} + + + {ff7c09cf-c9c4-48f1-b627-c4577cf04005} + + + {9a006a1c-6c67-4ad5-9a3a-1e73278296de} + + + {15af4be9-0258-4006-8d4a-e14efa2d23d2} + + + {897982c5-a448-4a64-a394-5b6621b73523} + + + {892af3ea-b061-4b0e-83ba-d221682d4d98} + + + {1f5a687a-5a25-4d1f-aff1-224dd446d1b4} + + + {65078152-1671-49dc-8eff-10daa009b07b} + + + {99f2b070-1b33-44cc-b645-c887144eb557} + + + {a5432a73-3546-49ee-9b10-974d0dfbd3a7} + + + {0df31b77-1d5d-42dc-839d-c77784ea81c4} + + + {72f1545b-b051-45f8-b0b3-bf0facf48e9b} + + + {2d2a742a-d676-42ee-be16-54d6ed3e42b0} + + + + + source files + + + source files + + + analyzers\common\analysis\reverse + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\br + + + analyzers\common\analysis\br + + + analyzers\common\analysis\br + + + analyzers\common\analysis\cjk + + + analyzers\common\analysis\cjk + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cz + + + analyzers\common\analysis\de + + + analyzers\common\analysis\de + + + analyzers\common\analysis\de + + + analyzers\common\analysis\el + + + analyzers\common\analysis\el + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + snowball + + + snowball + + + snowball\libstemmer\runtime + + + snowball\libstemmer\runtime + + + snowball\libstemmer\libstemmer + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + memory + + + + + header files + + + header files + + + header files + + + header files + + + header files + + + header files + + + header files + + + analyzers\common\analysis\reverse + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\ar + + + analyzers\common\analysis\br + + + analyzers\common\analysis\br + + + analyzers\common\analysis\br + + + analyzers\common\analysis\cjk + + + analyzers\common\analysis\cjk + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cn + + + analyzers\common\analysis\cz + + + analyzers\common\analysis\de + + + analyzers\common\analysis\de + + + analyzers\common\analysis\de + + + analyzers\common\analysis\el + + + analyzers\common\analysis\el + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fa + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\fr + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\nl + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + analyzers\common\analysis\ru + + + snowball + + + snowball + + + snowball\libstemmer\runtime + + + snowball\libstemmer\runtime + + + snowball\libstemmer\libstemmer + + + snowball\libstemmer\libstemmer + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + snowball\libstemmer\src + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + highlighter + + + memory + + + \ No newline at end of file diff --git a/src/contrib/snowball/SnowballAnalyzer.cpp b/src/contrib/snowball/SnowballAnalyzer.cpp index 56fdb1ad..2fa4444e 100644 --- a/src/contrib/snowball/SnowballAnalyzer.cpp +++ b/src/contrib/snowball/SnowballAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,56 +12,52 @@ #include "StopFilter.h" #include "SnowballFilter.h" -namespace Lucene -{ - SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name) - { - this->matchVersion = matchVersion; - this->name = name; - } - - SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords) - { - this->stopSet = stopwords; - this->matchVersion = matchVersion; - this->name = name; - } - - SnowballAnalyzer::~SnowballAnalyzer() - { - } - - TokenStreamPtr SnowballAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - TokenStreamPtr result = newLucene(matchVersion, reader); - result = newLucene(result); - result = newLucene(result); - if (stopSet) - result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); - result = newLucene(result, name); - return result; +namespace Lucene { + +SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name) { + this->matchVersion = matchVersion; + this->name = name; +} + +SnowballAnalyzer::SnowballAnalyzer(LuceneVersion::Version matchVersion, const String& name, HashSet stopwords) { + this->stopSet = stopwords; + this->matchVersion = matchVersion; + this->name = name; +} + +SnowballAnalyzer::~SnowballAnalyzer() { +} + +TokenStreamPtr SnowballAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenStreamPtr result = newLucene(matchVersion, reader); + result = newLucene(result); + result = newLucene(result); + if (stopSet) { + result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), result, stopSet); } - - TokenStreamPtr SnowballAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - SnowballAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(matchVersion, reader); - streams->result = newLucene(streams->source); - streams->result = newLucene(streams->result); - if (stopSet) - streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); - streams->result = newLucene(streams->result, name); - setPreviousTokenStream(streams); + result = newLucene(result, name); + return result; +} + +TokenStreamPtr SnowballAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + SnowballAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(matchVersion, reader); + streams->result = newLucene(streams->source); + streams->result = newLucene(streams->result); + if (stopSet) { + streams->result = newLucene(StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion), streams->result, stopSet); } - else - streams->source->reset(reader); - return streams->result; - } - - SnowballAnalyzerSavedStreams::~SnowballAnalyzerSavedStreams() - { + streams->result = newLucene(streams->result, name); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +SnowballAnalyzerSavedStreams::~SnowballAnalyzerSavedStreams() { +} + } diff --git a/src/contrib/snowball/SnowballFilter.cpp b/src/contrib/snowball/SnowballFilter.cpp index 037b53e5..adb86b53 100644 --- a/src/contrib/snowball/SnowballFilter.cpp +++ b/src/contrib/snowball/SnowballFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,34 +12,33 @@ #include "StringUtils.h" #include "libstemmer_c/include/libstemmer.h" -namespace Lucene -{ - SnowballFilter::SnowballFilter(TokenStreamPtr input, const String& name) : TokenFilter(input) - { - stemmer = sb_stemmer_new(StringUtils::toUTF8(name).c_str(), "UTF_8"); - if (stemmer == NULL) - boost::throw_exception(IllegalArgumentException(L"language not available for stemming:" + name)); - termAtt = addAttribute(); - utf8Result = newLucene(); - } - - SnowballFilter::~SnowballFilter() - { +namespace Lucene { + +SnowballFilter::SnowballFilter(const TokenStreamPtr& input, const String& name) : TokenFilter(input) { + stemmer = sb_stemmer_new(StringUtils::toUTF8(name).c_str(), "UTF_8"); + if (stemmer == NULL) { + boost::throw_exception(IllegalArgumentException(L"language not available for stemming:" + name)); } - - bool SnowballFilter::incrementToken() - { - if (input->incrementToken()) - { - StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result); - const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length); - if (stemmed == NULL) - boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term())); - int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer()); - termAtt->setTermLength(newlen); - return true; + termAtt = addAttribute(); + utf8Result = newLucene(); +} + +SnowballFilter::~SnowballFilter() { +} + +bool SnowballFilter::incrementToken() { + if (input->incrementToken()) { + StringUtils::toUTF8(termAtt->termBuffer().get(), termAtt->termLength(), utf8Result); + const sb_symbol* stemmed = sb_stemmer_stem(stemmer, utf8Result->result.get(), utf8Result->length); + if (stemmed == NULL) { + boost::throw_exception(RuntimeException(L"exception stemming word:" + termAtt->term())); } - else - return false; + int32_t newlen = StringUtils::toUnicode(stemmed, sb_stemmer_length(stemmer), termAtt->termBuffer()); + termAtt->setTermLength(newlen); + return true; + } else { + return false; } } + +} diff --git a/src/contrib/snowball/libstemmer_c/include/libstemmer.h b/src/contrib/snowball/libstemmer_c/include/libstemmer.h index 9d86b858..8892a2b0 100644 --- a/src/contrib/snowball/libstemmer_c/include/libstemmer.h +++ b/src/contrib/snowball/libstemmer_c/include/libstemmer.h @@ -17,7 +17,7 @@ typedef unsigned char sb_symbol; * * The list must not be modified in any way. */ -const char ** sb_stemmer_list(void); +const char** sb_stemmer_list(void); /** Create a new stemmer object, using the specified algorithm, for the * specified character encoding. @@ -43,7 +43,7 @@ const char ** sb_stemmer_list(void); * * @note NULL will also be returned if an out of memory error occurs. */ -struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc); +struct sb_stemmer* sb_stemmer_new(const char* algorithm, const char* charenc); /** Delete a stemmer object. * @@ -53,7 +53,7 @@ struct sb_stemmer * sb_stemmer_new(const char * algorithm, const char * charenc) * It is safe to pass a null pointer to this function - this will have * no effect. */ -void sb_stemmer_delete(struct sb_stemmer * stemmer); +void sb_stemmer_delete(struct sb_stemmer* stemmer); /** Stem a word. * @@ -65,13 +65,13 @@ void sb_stemmer_delete(struct sb_stemmer * stemmer); * * If an out-of-memory error occurs, this will return NULL. */ -const sb_symbol * sb_stemmer_stem(struct sb_stemmer * stemmer, - const sb_symbol * word, int size); +const sb_symbol* sb_stemmer_stem(struct sb_stemmer* stemmer, + const sb_symbol* word, int size); /** Get the length of the result of the last stemmed word. * This should not be called before sb_stemmer_stem() has been called. */ -int sb_stemmer_length(struct sb_stemmer * stemmer); +int sb_stemmer_length(struct sb_stemmer* stemmer); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/libstemmer/modules.h b/src/contrib/snowball/libstemmer_c/libstemmer/modules.h index 7a1f6856..cbe0431c 100644 --- a/src/contrib/snowball/libstemmer_c/libstemmer/modules.h +++ b/src/contrib/snowball/libstemmer_c/libstemmer/modules.h @@ -41,150 +41,150 @@ #include "../src_c/stem_UTF_8_turkish.h" typedef enum { - ENC_UNKNOWN=0, - ENC_ISO_8859_1, - ENC_ISO_8859_2, - ENC_KOI8_R, - ENC_UTF_8 + ENC_UNKNOWN=0, + ENC_ISO_8859_1, + ENC_ISO_8859_2, + ENC_KOI8_R, + ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { - const char * name; - stemmer_encoding_t enc; + const char* name; + stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { - {"ISO_8859_1", ENC_ISO_8859_1}, - {"ISO_8859_2", ENC_ISO_8859_2}, - {"KOI8_R", ENC_KOI8_R}, - {"UTF_8", ENC_UTF_8}, - {0,ENC_UNKNOWN} + {"ISO_8859_1", ENC_ISO_8859_1}, + {"ISO_8859_2", ENC_ISO_8859_2}, + {"KOI8_R", ENC_KOI8_R}, + {"UTF_8", ENC_UTF_8}, + {0,ENC_UNKNOWN} }; struct stemmer_modules { - const char * name; - stemmer_encoding_t enc; - struct SN_env * (*create)(void); - void (*close)(struct SN_env *); - int (*stem)(struct SN_env *); + const char* name; + stemmer_encoding_t enc; + struct SN_env* (*create)(void); + void (*close)(struct SN_env*); + int (*stem)(struct SN_env*); }; static struct stemmer_modules modules[] = { - {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, - {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, - {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, - {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, - {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, - {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, - {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, - {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, - {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, - {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, - {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, - {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, - {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, - {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, - {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, - {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, - {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, - {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, - {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, - {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, - {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, - {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, - {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, - {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, - {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, - {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, - {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, - {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, - {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, - {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, - {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, - {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, - {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, - {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, - {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, - {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, - {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, - {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, - {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, - {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, - {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, - {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, - {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, - {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, - {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, - {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, - {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, - {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, - {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, - {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {0,ENC_UNKNOWN,0,0,0} + {"da", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, + {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"dan", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, + {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"danish", ENC_ISO_8859_1, danish_ISO_8859_1_create_env, danish_ISO_8859_1_close_env, danish_ISO_8859_1_stem}, + {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"de", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, + {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"deu", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, + {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"dut", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, + {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"dutch", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, + {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"en", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, + {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"eng", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, + {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"english", ENC_ISO_8859_1, english_ISO_8859_1_create_env, english_ISO_8859_1_close_env, english_ISO_8859_1_stem}, + {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"es", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, + {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"esl", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, + {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"fi", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, + {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"fin", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, + {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"finnish", ENC_ISO_8859_1, finnish_ISO_8859_1_create_env, finnish_ISO_8859_1_close_env, finnish_ISO_8859_1_stem}, + {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"fr", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, + {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"fra", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, + {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"fre", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, + {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"french", ENC_ISO_8859_1, french_ISO_8859_1_create_env, french_ISO_8859_1_close_env, french_ISO_8859_1_stem}, + {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"ger", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, + {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"german", ENC_ISO_8859_1, german_ISO_8859_1_create_env, german_ISO_8859_1_close_env, german_ISO_8859_1_stem}, + {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"hu", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, + {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"hun", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, + {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"hungarian", ENC_ISO_8859_1, hungarian_ISO_8859_1_create_env, hungarian_ISO_8859_1_close_env, hungarian_ISO_8859_1_stem}, + {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"it", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, + {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"ita", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, + {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"italian", ENC_ISO_8859_1, italian_ISO_8859_1_create_env, italian_ISO_8859_1_close_env, italian_ISO_8859_1_stem}, + {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"nl", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, + {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"nld", ENC_ISO_8859_1, dutch_ISO_8859_1_create_env, dutch_ISO_8859_1_close_env, dutch_ISO_8859_1_stem}, + {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"no", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, + {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"nor", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, + {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"norwegian", ENC_ISO_8859_1, norwegian_ISO_8859_1_create_env, norwegian_ISO_8859_1_close_env, norwegian_ISO_8859_1_stem}, + {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"por", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, + {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"porter", ENC_ISO_8859_1, porter_ISO_8859_1_create_env, porter_ISO_8859_1_close_env, porter_ISO_8859_1_stem}, + {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, + {"portuguese", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, + {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"pt", ENC_ISO_8859_1, portuguese_ISO_8859_1_create_env, portuguese_ISO_8859_1_close_env, portuguese_ISO_8859_1_stem}, + {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"ro", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, + {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"romanian", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, + {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"ron", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, + {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"ru", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, + {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"rum", ENC_ISO_8859_2, romanian_ISO_8859_2_create_env, romanian_ISO_8859_2_close_env, romanian_ISO_8859_2_stem}, + {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"rus", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, + {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"russian", ENC_KOI8_R, russian_KOI8_R_create_env, russian_KOI8_R_close_env, russian_KOI8_R_stem}, + {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"spa", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, + {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"spanish", ENC_ISO_8859_1, spanish_ISO_8859_1_create_env, spanish_ISO_8859_1_close_env, spanish_ISO_8859_1_stem}, + {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"sv", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, + {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"swe", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, + {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"swedish", ENC_ISO_8859_1, swedish_ISO_8859_1_create_env, swedish_ISO_8859_1_close_env, swedish_ISO_8859_1_stem}, + {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {0,ENC_UNKNOWN,0,0,0} }; -static const char * algorithm_names[] = { - "danish", - "dutch", - "english", - "finnish", - "french", - "german", - "hungarian", - "italian", - "norwegian", - "porter", - "portuguese", - "romanian", - "russian", - "spanish", - "swedish", - "turkish", - 0 +static const char* algorithm_names[] = { + "danish", + "dutch", + "english", + "finnish", + "french", + "german", + "hungarian", + "italian", + "norwegian", + "porter", + "portuguese", + "romanian", + "russian", + "spanish", + "swedish", + "turkish", + 0 }; diff --git a/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h b/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h index 6a7cc924..47c845fb 100644 --- a/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h +++ b/src/contrib/snowball/libstemmer_c/libstemmer/modules_utf8.h @@ -26,96 +26,96 @@ #include "../src_c/stem_UTF_8_turkish.h" typedef enum { - ENC_UNKNOWN=0, - ENC_UTF_8 + ENC_UNKNOWN=0, + ENC_UTF_8 } stemmer_encoding_t; struct stemmer_encoding { - const char * name; - stemmer_encoding_t enc; + const char* name; + stemmer_encoding_t enc; }; static struct stemmer_encoding encodings[] = { - {"UTF_8", ENC_UTF_8}, - {0,ENC_UNKNOWN} + {"UTF_8", ENC_UTF_8}, + {0,ENC_UNKNOWN} }; struct stemmer_modules { - const char * name; - stemmer_encoding_t enc; - struct SN_env * (*create)(void); - void (*close)(struct SN_env *); - int (*stem)(struct SN_env *); + const char* name; + stemmer_encoding_t enc; + struct SN_env* (*create)(void); + void (*close)(struct SN_env*); + int (*stem)(struct SN_env*); }; static struct stemmer_modules modules[] = { - {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, - {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, - {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, - {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, - {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, - {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, - {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, - {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, - {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, - {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, - {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, - {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, - {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, - {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, - {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, - {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, - {0,ENC_UNKNOWN,0,0,0} + {"da", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"dan", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"danish", ENC_UTF_8, danish_UTF_8_create_env, danish_UTF_8_close_env, danish_UTF_8_stem}, + {"de", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"deu", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"dut", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"dutch", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"en", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"eng", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"english", ENC_UTF_8, english_UTF_8_create_env, english_UTF_8_close_env, english_UTF_8_stem}, + {"es", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"esl", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"fi", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"fin", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"finnish", ENC_UTF_8, finnish_UTF_8_create_env, finnish_UTF_8_close_env, finnish_UTF_8_stem}, + {"fr", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"fra", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"fre", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"french", ENC_UTF_8, french_UTF_8_create_env, french_UTF_8_close_env, french_UTF_8_stem}, + {"ger", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"german", ENC_UTF_8, german_UTF_8_create_env, german_UTF_8_close_env, german_UTF_8_stem}, + {"hu", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"hun", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"hungarian", ENC_UTF_8, hungarian_UTF_8_create_env, hungarian_UTF_8_close_env, hungarian_UTF_8_stem}, + {"it", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"ita", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"italian", ENC_UTF_8, italian_UTF_8_create_env, italian_UTF_8_close_env, italian_UTF_8_stem}, + {"nl", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"nld", ENC_UTF_8, dutch_UTF_8_create_env, dutch_UTF_8_close_env, dutch_UTF_8_stem}, + {"no", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"nor", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"norwegian", ENC_UTF_8, norwegian_UTF_8_create_env, norwegian_UTF_8_close_env, norwegian_UTF_8_stem}, + {"por", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"porter", ENC_UTF_8, porter_UTF_8_create_env, porter_UTF_8_close_env, porter_UTF_8_stem}, + {"portuguese", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"pt", ENC_UTF_8, portuguese_UTF_8_create_env, portuguese_UTF_8_close_env, portuguese_UTF_8_stem}, + {"ro", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"romanian", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"ron", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"ru", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"rum", ENC_UTF_8, romanian_UTF_8_create_env, romanian_UTF_8_close_env, romanian_UTF_8_stem}, + {"rus", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"russian", ENC_UTF_8, russian_UTF_8_create_env, russian_UTF_8_close_env, russian_UTF_8_stem}, + {"spa", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"spanish", ENC_UTF_8, spanish_UTF_8_create_env, spanish_UTF_8_close_env, spanish_UTF_8_stem}, + {"sv", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"swe", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"swedish", ENC_UTF_8, swedish_UTF_8_create_env, swedish_UTF_8_close_env, swedish_UTF_8_stem}, + {"tr", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {"tur", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {"turkish", ENC_UTF_8, turkish_UTF_8_create_env, turkish_UTF_8_close_env, turkish_UTF_8_stem}, + {0,ENC_UNKNOWN,0,0,0} }; -static const char * algorithm_names[] = { - "danish", - "dutch", - "english", - "finnish", - "french", - "german", - "hungarian", - "italian", - "norwegian", - "porter", - "portuguese", - "romanian", - "russian", - "spanish", - "swedish", - "turkish", - 0 +static const char* algorithm_names[] = { + "danish", + "dutch", + "english", + "finnish", + "french", + "german", + "hungarian", + "italian", + "norwegian", + "porter", + "portuguese", + "romanian", + "russian", + "spanish", + "swedish", + "turkish", + 0 }; diff --git a/src/contrib/snowball/libstemmer_c/runtime/api.h b/src/contrib/snowball/libstemmer_c/runtime/api.h index 8b997f0c..5498aa0f 100644 --- a/src/contrib/snowball/libstemmer_c/runtime/api.h +++ b/src/contrib/snowball/libstemmer_c/runtime/api.h @@ -12,15 +12,19 @@ typedef unsigned char symbol; */ struct SN_env { - symbol * p; - int c; int l; int lb; int bra; int ket; - symbol * * S; - int * I; - unsigned char * B; + symbol* p; + int c; + int l; + int lb; + int bra; + int ket; + symbol** S; + int* I; + unsigned char* B; }; -extern struct SN_env * SN_create_env(int S_size, int I_size, int B_size); -extern void SN_close_env(struct SN_env * z, int S_size); +extern struct SN_env* SN_create_env(int S_size, int I_size, int B_size); +extern void SN_close_env(struct SN_env* z, int S_size); -extern int SN_set_current(struct SN_env * z, int size, const symbol * s); +extern int SN_set_current(struct SN_env* z, int size, const symbol* s); diff --git a/src/contrib/snowball/libstemmer_c/runtime/header.h b/src/contrib/snowball/libstemmer_c/runtime/header.h index 4d3078f5..7609f726 100644 --- a/src/contrib/snowball/libstemmer_c/runtime/header.h +++ b/src/contrib/snowball/libstemmer_c/runtime/header.h @@ -12,47 +12,47 @@ #define SET_SIZE(p, n) ((int *)(p))[-1] = n #define CAPACITY(p) ((int *)(p))[-2] -struct among -{ int s_size; /* number of chars in string */ - const symbol * s; /* search string */ +struct among { + int s_size; /* number of chars in string */ + const symbol* s; /* search string */ int substring_i;/* index to longest matching substring */ int result; /* result of the lookup */ - int (* function)(struct SN_env *); + int (* function)(struct SN_env*); }; -extern symbol * create_s(void); -extern void lose_s(symbol * p); +extern symbol* create_s(void); +extern void lose_s(symbol* p); -extern int skip_utf8(const symbol * p, int c, int lb, int l, int n); +extern int skip_utf8(const symbol* p, int c, int lb, int l, int n); -extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int in_grouping_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int in_grouping_b_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int out_grouping_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int out_grouping_b_U(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); -extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); -extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat); +extern int in_grouping(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int in_grouping_b(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int out_grouping(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); +extern int out_grouping_b(struct SN_env* z, const unsigned char* s, int min, int max, int repeat); -extern int eq_s(struct SN_env * z, int s_size, const symbol * s); -extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s); -extern int eq_v(struct SN_env * z, const symbol * p); -extern int eq_v_b(struct SN_env * z, const symbol * p); +extern int eq_s(struct SN_env* z, int s_size, const symbol* s); +extern int eq_s_b(struct SN_env* z, int s_size, const symbol* s); +extern int eq_v(struct SN_env* z, const symbol* p); +extern int eq_v_b(struct SN_env* z, const symbol* p); -extern int find_among(struct SN_env * z, const struct among * v, int v_size); -extern int find_among_b(struct SN_env * z, const struct among * v, int v_size); +extern int find_among(struct SN_env* z, const struct among* v, int v_size); +extern int find_among_b(struct SN_env* z, const struct among* v, int v_size); -extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment); -extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s); -extern int slice_from_v(struct SN_env * z, const symbol * p); -extern int slice_del(struct SN_env * z); +extern int replace_s(struct SN_env* z, int c_bra, int c_ket, int s_size, const symbol* s, int* adjustment); +extern int slice_from_s(struct SN_env* z, int s_size, const symbol* s); +extern int slice_from_v(struct SN_env* z, const symbol* p); +extern int slice_del(struct SN_env* z); -extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s); -extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p); +extern int insert_s(struct SN_env* z, int bra, int ket, int s_size, const symbol* s); +extern int insert_v(struct SN_env* z, int bra, int ket, const symbol* p); -extern symbol * slice_to(struct SN_env * z, symbol * p); -extern symbol * assign_to(struct SN_env * z, symbol * p); +extern symbol* slice_to(struct SN_env* z, symbol* p); +extern symbol* assign_to(struct SN_env* z, symbol* p); -extern void debug(struct SN_env * z, int number, int line_count); +extern void debug(struct SN_env* z, int number, int line_count); diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h index 49c5559c..36aba971 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_danish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * danish_ISO_8859_1_create_env(void); -extern void danish_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* danish_ISO_8859_1_create_env(void); +extern void danish_ISO_8859_1_close_env(struct SN_env* z); -extern int danish_ISO_8859_1_stem(struct SN_env * z); +extern int danish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h index e67d1115..76d06b77 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_dutch.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * dutch_ISO_8859_1_create_env(void); -extern void dutch_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* dutch_ISO_8859_1_create_env(void); +extern void dutch_ISO_8859_1_close_env(struct SN_env* z); -extern int dutch_ISO_8859_1_stem(struct SN_env * z); +extern int dutch_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h index e685dcf7..2af9dd3d 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_english.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * english_ISO_8859_1_create_env(void); -extern void english_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* english_ISO_8859_1_create_env(void); +extern void english_ISO_8859_1_close_env(struct SN_env* z); -extern int english_ISO_8859_1_stem(struct SN_env * z); +extern int english_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h index c67b67b9..3f76446c 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_finnish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * finnish_ISO_8859_1_create_env(void); -extern void finnish_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* finnish_ISO_8859_1_create_env(void); +extern void finnish_ISO_8859_1_close_env(struct SN_env* z); -extern int finnish_ISO_8859_1_stem(struct SN_env * z); +extern int finnish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h index 21244d61..b966be81 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_french.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * french_ISO_8859_1_create_env(void); -extern void french_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* french_ISO_8859_1_create_env(void); +extern void french_ISO_8859_1_close_env(struct SN_env* z); -extern int french_ISO_8859_1_stem(struct SN_env * z); +extern int french_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h index 85253892..0a039f94 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_german.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * german_ISO_8859_1_create_env(void); -extern void german_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* german_ISO_8859_1_create_env(void); +extern void german_ISO_8859_1_close_env(struct SN_env* z); -extern int german_ISO_8859_1_stem(struct SN_env * z); +extern int german_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h index c3177e50..fb8c6356 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_hungarian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * hungarian_ISO_8859_1_create_env(void); -extern void hungarian_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* hungarian_ISO_8859_1_create_env(void); +extern void hungarian_ISO_8859_1_close_env(struct SN_env* z); -extern int hungarian_ISO_8859_1_stem(struct SN_env * z); +extern int hungarian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h index dccbfd5e..36638442 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_italian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * italian_ISO_8859_1_create_env(void); -extern void italian_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* italian_ISO_8859_1_create_env(void); +extern void italian_ISO_8859_1_close_env(struct SN_env* z); -extern int italian_ISO_8859_1_stem(struct SN_env * z); +extern int italian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h index e09e34e5..7764998a 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_norwegian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * norwegian_ISO_8859_1_create_env(void); -extern void norwegian_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* norwegian_ISO_8859_1_create_env(void); +extern void norwegian_ISO_8859_1_close_env(struct SN_env* z); -extern int norwegian_ISO_8859_1_stem(struct SN_env * z); +extern int norwegian_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h index 5c8fd01d..35304144 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_porter.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * porter_ISO_8859_1_create_env(void); -extern void porter_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* porter_ISO_8859_1_create_env(void); +extern void porter_ISO_8859_1_close_env(struct SN_env* z); -extern int porter_ISO_8859_1_stem(struct SN_env * z); +extern int porter_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h index 0279bc94..aa4133bd 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_portuguese.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * portuguese_ISO_8859_1_create_env(void); -extern void portuguese_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* portuguese_ISO_8859_1_create_env(void); +extern void portuguese_ISO_8859_1_close_env(struct SN_env* z); -extern int portuguese_ISO_8859_1_stem(struct SN_env * z); +extern int portuguese_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h index 83f14984..2235ac84 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_spanish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * spanish_ISO_8859_1_create_env(void); -extern void spanish_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* spanish_ISO_8859_1_create_env(void); +extern void spanish_ISO_8859_1_close_env(struct SN_env* z); -extern int spanish_ISO_8859_1_stem(struct SN_env * z); +extern int spanish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h index 4184e5ca..81ae66b5 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_1_swedish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * swedish_ISO_8859_1_create_env(void); -extern void swedish_ISO_8859_1_close_env(struct SN_env * z); +extern struct SN_env* swedish_ISO_8859_1_create_env(void); +extern void swedish_ISO_8859_1_close_env(struct SN_env* z); -extern int swedish_ISO_8859_1_stem(struct SN_env * z); +extern int swedish_ISO_8859_1_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h index 931f269c..ebb15c5d 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_ISO_8859_2_romanian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * romanian_ISO_8859_2_create_env(void); -extern void romanian_ISO_8859_2_close_env(struct SN_env * z); +extern struct SN_env* romanian_ISO_8859_2_create_env(void); +extern void romanian_ISO_8859_2_close_env(struct SN_env* z); -extern int romanian_ISO_8859_2_stem(struct SN_env * z); +extern int romanian_ISO_8859_2_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h index de2179d2..e3b90be0 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_KOI8_R_russian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * russian_KOI8_R_create_env(void); -extern void russian_KOI8_R_close_env(struct SN_env * z); +extern struct SN_env* russian_KOI8_R_create_env(void); +extern void russian_KOI8_R_close_env(struct SN_env* z); -extern int russian_KOI8_R_stem(struct SN_env * z); +extern int russian_KOI8_R_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h index ed744d45..bd54c88a 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_danish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * danish_UTF_8_create_env(void); -extern void danish_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* danish_UTF_8_create_env(void); +extern void danish_UTF_8_close_env(struct SN_env* z); -extern int danish_UTF_8_stem(struct SN_env * z); +extern int danish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h index a9964645..7990b174 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_dutch.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * dutch_UTF_8_create_env(void); -extern void dutch_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* dutch_UTF_8_create_env(void); +extern void dutch_UTF_8_close_env(struct SN_env* z); -extern int dutch_UTF_8_stem(struct SN_env * z); +extern int dutch_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h index 619a8bc7..e6c5e507 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_english.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * english_UTF_8_create_env(void); -extern void english_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* english_UTF_8_create_env(void); +extern void english_UTF_8_close_env(struct SN_env* z); -extern int english_UTF_8_stem(struct SN_env * z); +extern int english_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h index d2f2fd96..9fb23a22 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_finnish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * finnish_UTF_8_create_env(void); -extern void finnish_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* finnish_UTF_8_create_env(void); +extern void finnish_UTF_8_close_env(struct SN_env* z); -extern int finnish_UTF_8_stem(struct SN_env * z); +extern int finnish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h index 08e34184..2964fe29 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_french.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * french_UTF_8_create_env(void); -extern void french_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* french_UTF_8_create_env(void); +extern void french_UTF_8_close_env(struct SN_env* z); -extern int french_UTF_8_stem(struct SN_env * z); +extern int french_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h index 5bd84d43..2dc04c2d 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_german.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * german_UTF_8_create_env(void); -extern void german_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* german_UTF_8_create_env(void); +extern void german_UTF_8_close_env(struct SN_env* z); -extern int german_UTF_8_stem(struct SN_env * z); +extern int german_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h index d81bd234..f66eb2e2 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_hungarian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * hungarian_UTF_8_create_env(void); -extern void hungarian_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* hungarian_UTF_8_create_env(void); +extern void hungarian_UTF_8_close_env(struct SN_env* z); -extern int hungarian_UTF_8_stem(struct SN_env * z); +extern int hungarian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h index 3bee080d..62a1a425 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_italian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * italian_UTF_8_create_env(void); -extern void italian_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* italian_UTF_8_create_env(void); +extern void italian_UTF_8_close_env(struct SN_env* z); -extern int italian_UTF_8_stem(struct SN_env * z); +extern int italian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h index c75444bc..475de029 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_norwegian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * norwegian_UTF_8_create_env(void); -extern void norwegian_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* norwegian_UTF_8_create_env(void); +extern void norwegian_UTF_8_close_env(struct SN_env* z); -extern int norwegian_UTF_8_stem(struct SN_env * z); +extern int norwegian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h index 82d469ac..af3acefb 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_porter.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * porter_UTF_8_create_env(void); -extern void porter_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* porter_UTF_8_create_env(void); +extern void porter_UTF_8_close_env(struct SN_env* z); -extern int porter_UTF_8_stem(struct SN_env * z); +extern int porter_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h index 9fe7f9aa..27f99936 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_portuguese.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * portuguese_UTF_8_create_env(void); -extern void portuguese_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* portuguese_UTF_8_create_env(void); +extern void portuguese_UTF_8_close_env(struct SN_env* z); -extern int portuguese_UTF_8_stem(struct SN_env * z); +extern int portuguese_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h index d01e8132..e7a14166 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_romanian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * romanian_UTF_8_create_env(void); -extern void romanian_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* romanian_UTF_8_create_env(void); +extern void romanian_UTF_8_close_env(struct SN_env* z); -extern int romanian_UTF_8_stem(struct SN_env * z); +extern int romanian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h index 4ef774dd..c7b02965 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_russian.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * russian_UTF_8_create_env(void); -extern void russian_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* russian_UTF_8_create_env(void); +extern void russian_UTF_8_close_env(struct SN_env* z); -extern int russian_UTF_8_stem(struct SN_env * z); +extern int russian_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h index 10572ecc..c76297b6 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_spanish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * spanish_UTF_8_create_env(void); -extern void spanish_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* spanish_UTF_8_create_env(void); +extern void spanish_UTF_8_close_env(struct SN_env* z); -extern int spanish_UTF_8_stem(struct SN_env * z); +extern int spanish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h index 1444ebb4..6f2625e1 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_swedish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * swedish_UTF_8_create_env(void); -extern void swedish_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* swedish_UTF_8_create_env(void); +extern void swedish_UTF_8_close_env(struct SN_env* z); -extern int swedish_UTF_8_stem(struct SN_env * z); +extern int swedish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h index 8173a174..05974aba 100644 --- a/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h +++ b/src/contrib/snowball/libstemmer_c/src_c/stem_UTF_8_turkish.h @@ -5,10 +5,10 @@ extern "C" { #endif -extern struct SN_env * turkish_UTF_8_create_env(void); -extern void turkish_UTF_8_close_env(struct SN_env * z); +extern struct SN_env* turkish_UTF_8_create_env(void); +extern void turkish_UTF_8_close_env(struct SN_env* z); -extern int turkish_UTF_8_stem(struct SN_env * z); +extern int turkish_UTF_8_stem(struct SN_env* z); #ifdef __cplusplus } diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt index 5639a2c7..690f3826 100644 --- a/src/core/CMakeLists.txt +++ b/src/core/CMakeLists.txt @@ -1,73 +1,87 @@ -PROJECT (lucene++-lib) +project(core) #################################### -# THE lucene++ library +# create library target +#################################### +if (LUCENE_BUILD_SHARED) + add_library(lucene++ SHARED) +else() + add_library(lucene++ STATIC) +endif() + +add_library(lucene++::lucene++ ALIAS lucene++) + + +#################################### +# src #################################### file(GLOB_RECURSE lucene_sources - ${lucene++-lib_SOURCE_DIR}/search/*.cpp - ${lucene++-lib_SOURCE_DIR}/analysis/*.cpp - ${lucene++-lib_SOURCE_DIR}/document/*.cpp - ${lucene++-lib_SOURCE_DIR}/index/*.cpp - ${lucene++-lib_SOURCE_DIR}/queryparser/*.cpp - ${lucene++-lib_SOURCE_DIR}/store/*.cpp - ${lucene++-lib_SOURCE_DIR}/util/*.cpp) - -file(GLOB_RECURSE INTERN_HEADERS ${lucene++-libs_SOURCE_DIR}/include/*.h) -file(GLOB_RECURSE HEADERS ${lucene++-base_SOURCE_DIR}/include/*.h) - -#C sources... -file(GLOB_RECURSE lucene_c_sources - ${lucene++-lib_SOURCE_DIR}/util/*.c) - - -IF ( ENABLE_NEDMALLOC ) - ADD_DEFINITIONS(-DLPP_USE_NEDMALLOC) -ENDIF() - -LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) -ADD_DEFINITIONS(-DLPP_BUILDING_LIB) -ADD_LIBRARY(lucene++-c STATIC - ${lucene_c_sources} -) -install(FILES ${HEADERS} - DESTINATION include/lucene++ - COMPONENT development) - -################################# -# lucene++ shared library -################################# -ADD_LIBRARY(lucene++ SHARED - ${lucene_sources} ${HEADERS} ${INTERN_HEADERS} -) -SET(PCH_ADDITIONAL_COMPILER_FLAGS_lucene++ -DLPP_HAVE_DLL) -ADD_PRECOMPILED_HEADER(lucene++ ${lucene++-lib_SOURCE_DIR}/include/LuceneInc.h) -#set properties on the libraries -SET_TARGET_PROPERTIES(lucene++ PROPERTIES - VERSION ${LUCENE++_VERSION} - SOVERSION ${LUCENE++_SOVERSION} + "search/*.cpp" + "analysis/*.cpp" + "document/*.cpp" + "index/*.cpp" + "queryparser/*.cpp" + "store/*.cpp" + "util/*.c*" ) -TARGET_LINK_LIBRARIES(lucene++ - lucene++-c - ${CMAKE_THREAD_LIBS_INIT} - ${LUCENE_BOOST_LIBS} ) -install(TARGETS lucene++ - DESTINATION ${LIB_DESTINATION} - COMPONENT runtime) - -################################# -# lucene++ static library -################################# -ADD_LIBRARY(lucene++-static STATIC EXCLUDE_FROM_ALL - ${lucene_sources} ${HEADERS} ${INTERN_HEADERS} + +file(GLOB_RECURSE lucene_internal_headers + "${lucene++-lib_SOURCE_DIR}/include/*.h" ) -SET(PCH_ADDITIONAL_COMPILER_FLAGS_lucene++-static -DLPP_HAVE_DLL) -ADD_PRECOMPILED_HEADER(lucene++-static ${lucene++-lib_SOURCE_DIR}/include/LuceneInc.h) -#set properties on the libraries -SET_TARGET_PROPERTIES(lucene++-static PROPERTIES - VERSION ${LUCENE++_VERSION} - SOVERSION ${LUCENE++_SOVERSION} + + +target_sources(lucene++ + PRIVATE + ${lucene_sources} + ${lucene_internal_headers}) + + +#################################### +# include directories +#################################### +target_include_directories(lucene++ + PUBLIC + $ + $ + $ + ${Boost_INCLUDE_DIRS}) + + + +#################################### +# dependencies +#################################### +target_link_libraries(lucene++ + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB ) +if(WIN32) + target_link_libraries(lucene++ ws2_32) +endif() + +#################################### +# link args +#################################### +target_compile_options(lucene++ PRIVATE -DLPP_BUILDING_LIB) + +set_target_properties(lucene++ + PROPERTIES + COTIRE_CXX_PREFIX_HEADER_INIT "include/LuceneInc.h" + CXX_VISIBILITY_PRESET hidden + VISIBILITY_INLINES_HIDDEN 1 + VERSION ${lucene++_VERSION} + SOVERSION ${lucene++_SOVERSION}) + +cotire(lucene++) + + +install(TARGETS lucene++ + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + COMPONENT runtime) diff --git a/src/core/analysis/ASCIIFoldingFilter.cpp b/src/core/analysis/ASCIIFoldingFilter.cpp index c4c735c8..0d0ca4fe 100644 --- a/src/core/analysis/ASCIIFoldingFilter.cpp +++ b/src/core/analysis/ASCIIFoldingFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,1976 +9,1968 @@ #include "TermAttribute.h" #include "MiscUtils.h" -namespace Lucene -{ - ASCIIFoldingFilter::ASCIIFoldingFilter(TokenStreamPtr input) : TokenFilter(input) - { - output = CharArray::newInstance(512); - outputPos = 0; - termAtt = addAttribute(); - } - - ASCIIFoldingFilter::~ASCIIFoldingFilter() - { - } - - bool ASCIIFoldingFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* buffer = termAtt->termBufferArray(); - int32_t length = termAtt->termLength(); - - // If no characters actually require rewriting then we just return token as-is - for (int32_t i = 0; i < length; ++i) - { - wchar_t c = buffer[i]; - if (c >= 0x0080) - { - foldToASCII(buffer, length); - termAtt->setTermBuffer(output.get(), 0, outputPos); - break; - } +namespace Lucene { + +ASCIIFoldingFilter::ASCIIFoldingFilter(const TokenStreamPtr& input) : TokenFilter(input) { + output = CharArray::newInstance(512); + outputPos = 0; + termAtt = addAttribute(); +} + +ASCIIFoldingFilter::~ASCIIFoldingFilter() { +} + +bool ASCIIFoldingFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* buffer = termAtt->termBufferArray(); + int32_t length = termAtt->termLength(); + + // If no characters actually require rewriting then we just return token as-is + for (int32_t i = 0; i < length; ++i) { + wchar_t c = buffer[i]; + if (c >= 0x0080) { + foldToASCII(buffer, length); + termAtt->setTermBuffer(output.get(), 0, outputPos); + break; } - return true; } - else - return false; + return true; + } else { + return false; } - - void ASCIIFoldingFilter::foldToASCII(const wchar_t* input, int32_t length) - { - // Worst-case length required - int32_t maxSizeNeeded = 4 * length; - if (output.size() < maxSizeNeeded) - output.resize(MiscUtils::getNextSize(maxSizeNeeded)); - - outputPos = 0; - wchar_t* output = this->output.get(); - - for (int32_t pos = 0; pos < length; ++pos) - { - wchar_t c = input[pos]; - - // Quick test: if it's not in range then just keep current character - if (c < 0x0080) +} + +void ASCIIFoldingFilter::foldToASCII(const wchar_t* input, int32_t length) { + // Worst-case length required + int32_t maxSizeNeeded = 4 * length; + if (output.size() < maxSizeNeeded) { + output.resize(MiscUtils::getNextSize(maxSizeNeeded)); + } + + outputPos = 0; + wchar_t* output = this->output.get(); + + for (int32_t pos = 0; pos < length; ++pos) { + wchar_t c = input[pos]; + + // Quick test: if it's not in range then just keep current character + if (c < 0x0080) { + output[outputPos++] = c; + } else { + switch (c) { + case 0x00C0: // [LATIN CAPITAL LETTER A WITH GRAVE] + case 0x00C1: // [LATIN CAPITAL LETTER A WITH ACUTE] + case 0x00C2: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] + case 0x00C3: // [LATIN CAPITAL LETTER A WITH TILDE] + case 0x00C4: // [LATIN CAPITAL LETTER A WITH DIAERESIS] + case 0x00C5: // [LATIN CAPITAL LETTER A WITH RING ABOVE] + case 0x0100: // [LATIN CAPITAL LETTER A WITH MACRON] + case 0x0102: // [LATIN CAPITAL LETTER A WITH BREVE] + case 0x0104: // [LATIN CAPITAL LETTER A WITH OGONEK] + case 0x018F: // [LATIN CAPITAL LETTER SCHWA] + case 0x01CD: // [LATIN CAPITAL LETTER A WITH CARON] + case 0x01DE: // [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] + case 0x01E0: // [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] + case 0x01FA: // [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] + case 0x0200: // [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] + case 0x0202: // [LATIN CAPITAL LETTER A WITH INVERTED BREVE] + case 0x0226: // [LATIN CAPITAL LETTER A WITH DOT ABOVE] + case 0x023A: // [LATIN CAPITAL LETTER A WITH STROKE] + case 0x1D00: // [LATIN LETTER SMALL CAPITAL A] + case 0x1E00: // [LATIN CAPITAL LETTER A WITH RING BELOW] + case 0x1EA0: // [LATIN CAPITAL LETTER A WITH DOT BELOW] + case 0x1EA2: // [LATIN CAPITAL LETTER A WITH HOOK ABOVE] + case 0x1EA4: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] + case 0x1EA6: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] + case 0x1EA8: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1EAA: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] + case 0x1EAC: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case 0x1EAE: // [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] + case 0x1EB0: // [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] + case 0x1EB2: // [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] + case 0x1EB4: // [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] + case 0x1EB6: // [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] + case 0x24B6: // [CIRCLED LATIN CAPITAL LETTER A] + case 0xFF21: // [FULLWIDTH LATIN CAPITAL LETTER A] + output[outputPos++] = L'A'; + break; + case 0x00E0: // [LATIN SMALL LETTER A WITH GRAVE] + case 0x00E1: // [LATIN SMALL LETTER A WITH ACUTE] + case 0x00E2: // [LATIN SMALL LETTER A WITH CIRCUMFLEX] + case 0x00E3: // [LATIN SMALL LETTER A WITH TILDE] + case 0x00E4: // [LATIN SMALL LETTER A WITH DIAERESIS] + case 0x00E5: // [LATIN SMALL LETTER A WITH RING ABOVE] + case 0x0101: // [LATIN SMALL LETTER A WITH MACRON] + case 0x0103: // [LATIN SMALL LETTER A WITH BREVE] + case 0x0105: // [LATIN SMALL LETTER A WITH OGONEK] + case 0x01CE: // [LATIN SMALL LETTER A WITH CARON] + case 0x01DF: // [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] + case 0x01E1: // [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] + case 0x01FB: // [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] + case 0x0201: // [LATIN SMALL LETTER A WITH DOUBLE GRAVE] + case 0x0203: // [LATIN SMALL LETTER A WITH INVERTED BREVE] + case 0x0227: // [LATIN SMALL LETTER A WITH DOT ABOVE] + case 0x0250: // [LATIN SMALL LETTER TURNED A] + case 0x0259: // [LATIN SMALL LETTER SCHWA] + case 0x025A: // [LATIN SMALL LETTER SCHWA WITH HOOK] + case 0x1D8F: // [LATIN SMALL LETTER A WITH RETROFLEX HOOK] + case 0x1D95: // [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] + case 0x1E01: // [LATIN SMALL LETTER A WITH RING BELOW] + case 0x1E9A: // [LATIN SMALL LETTER A WITH RIGHT HALF RING] + case 0x1EA1: // [LATIN SMALL LETTER A WITH DOT BELOW] + case 0x1EA3: // [LATIN SMALL LETTER A WITH HOOK ABOVE] + case 0x1EA5: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] + case 0x1EA7: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] + case 0x1EA9: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1EAB: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] + case 0x1EAD: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] + case 0x1EAF: // [LATIN SMALL LETTER A WITH BREVE AND ACUTE] + case 0x1EB1: // [LATIN SMALL LETTER A WITH BREVE AND GRAVE] + case 0x1EB3: // [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] + case 0x1EB5: // [LATIN SMALL LETTER A WITH BREVE AND TILDE] + case 0x1EB7: // [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] + case 0x2090: // [LATIN SUBSCRIPT SMALL LETTER A] + case 0x2094: // [LATIN SUBSCRIPT SMALL LETTER SCHWA] + case 0x24D0: // [CIRCLED LATIN SMALL LETTER A] + case 0x2C65: // [LATIN SMALL LETTER A WITH STROKE] + case 0x2C6F: // [LATIN CAPITAL LETTER TURNED A] + case 0xFF41: // [FULLWIDTH LATIN SMALL LETTER A] + output[outputPos++] = L'a'; + break; + case 0xA732: // [LATIN CAPITAL LETTER AA] + output[outputPos++] = L'A'; + output[outputPos++] = L'A'; + break; + case 0x00C6: // [LATIN CAPITAL LETTER AE] + case 0x01E2: // [LATIN CAPITAL LETTER AE WITH MACRON] + case 0x01FC: // [LATIN CAPITAL LETTER AE WITH ACUTE] + case 0x1D01: // [LATIN LETTER SMALL CAPITAL AE] + output[outputPos++] = L'A'; + output[outputPos++] = L'E'; + break; + case 0xA734: // [LATIN CAPITAL LETTER AO] + output[outputPos++] = L'A'; + output[outputPos++] = L'O'; + break; + case 0xA736: // [LATIN CAPITAL LETTER AU] + output[outputPos++] = L'A'; + output[outputPos++] = L'U'; + break; + case 0xA738: // [LATIN CAPITAL LETTER AV] + case 0xA73A: // [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = L'A'; + output[outputPos++] = L'V'; + break; + case 0xA73C: // [LATIN CAPITAL LETTER AY] + output[outputPos++] = L'A'; + output[outputPos++] = L'Y'; + break; + case 0x249C: // [PARENTHESIZED LATIN SMALL LETTER A] + output[outputPos++] = L'('; + output[outputPos++] = L'a'; + output[outputPos++] = L')'; + break; + case 0xA733: // [LATIN SMALL LETTER AA] + output[outputPos++] = L'a'; + output[outputPos++] = L'a'; + break; + case 0x00E6: // [LATIN SMALL LETTER AE] + case 0x01E3: // [LATIN SMALL LETTER AE WITH MACRON] + case 0x01FD: // [LATIN SMALL LETTER AE WITH ACUTE] + case 0x1D02: // [LATIN SMALL LETTER TURNED AE] + output[outputPos++] = L'a'; + output[outputPos++] = L'e'; + break; + case 0xA735: // [LATIN SMALL LETTER AO] + output[outputPos++] = L'a'; + output[outputPos++] = L'o'; + break; + case 0xA737: // [LATIN SMALL LETTER AU] + output[outputPos++] = L'a'; + output[outputPos++] = L'u'; + break; + case 0xA739: // [LATIN SMALL LETTER AV] + case 0xA73B: // [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] + output[outputPos++] = L'a'; + output[outputPos++] = L'v'; + break; + case 0xA73D: // [LATIN SMALL LETTER AY] + output[outputPos++] = L'a'; + output[outputPos++] = L'y'; + break; + case 0x0181: // [LATIN CAPITAL LETTER B WITH HOOK] + case 0x0182: // [LATIN CAPITAL LETTER B WITH TOPBAR] + case 0x0243: // [LATIN CAPITAL LETTER B WITH STROKE] + case 0x0299: // [LATIN LETTER SMALL CAPITAL B] + case 0x1D03: // [LATIN LETTER SMALL CAPITAL BARRED B] + case 0x1E02: // [LATIN CAPITAL LETTER B WITH DOT ABOVE] + case 0x1E04: // [LATIN CAPITAL LETTER B WITH DOT BELOW] + case 0x1E06: // [LATIN CAPITAL LETTER B WITH LINE BELOW] + case 0x24B7: // [CIRCLED LATIN CAPITAL LETTER B] + case 0xFF22: // [FULLWIDTH LATIN CAPITAL LETTER B] + output[outputPos++] = L'B'; + break; + case 0x0180: // [LATIN SMALL LETTER B WITH STROKE] + case 0x0183: // [LATIN SMALL LETTER B WITH TOPBAR] + case 0x0253: // [LATIN SMALL LETTER B WITH HOOK] + case 0x1D6C: // [LATIN SMALL LETTER B WITH MIDDLE TILDE] + case 0x1D80: // [LATIN SMALL LETTER B WITH PALATAL HOOK] + case 0x1E03: // [LATIN SMALL LETTER B WITH DOT ABOVE] + case 0x1E05: // [LATIN SMALL LETTER B WITH DOT BELOW] + case 0x1E07: // [LATIN SMALL LETTER B WITH LINE BELOW] + case 0x24D1: // [CIRCLED LATIN SMALL LETTER B] + case 0xFF42: // [FULLWIDTH LATIN SMALL LETTER B] + output[outputPos++] = L'b'; + break; + case 0x249D: // [PARENTHESIZED LATIN SMALL LETTER B] + output[outputPos++] = L'('; + output[outputPos++] = L'b'; + output[outputPos++] = L')'; + break; + case 0x00C7: // [LATIN CAPITAL LETTER C WITH CEDILLA] + case 0x0106: // [LATIN CAPITAL LETTER C WITH ACUTE] + case 0x0108: // [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] + case 0x010A: // [LATIN CAPITAL LETTER C WITH DOT ABOVE] + case 0x010C: // [LATIN CAPITAL LETTER C WITH CARON] + case 0x0187: // [LATIN CAPITAL LETTER C WITH HOOK] + case 0x023B: // [LATIN CAPITAL LETTER C WITH STROKE] + case 0x0297: // [LATIN LETTER STRETCHED C] + case 0x1D04: // [LATIN LETTER SMALL CAPITAL C] + case 0x1E08: // [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] + case 0x24B8: // [CIRCLED LATIN CAPITAL LETTER C] + case 0xFF23: // [FULLWIDTH LATIN CAPITAL LETTER C] + output[outputPos++] = L'C'; + break; + case 0x00E7: // [LATIN SMALL LETTER C WITH CEDILLA] + case 0x0107: // [LATIN SMALL LETTER C WITH ACUTE] + case 0x0109: // [LATIN SMALL LETTER C WITH CIRCUMFLEX] + case 0x010B: // [LATIN SMALL LETTER C WITH DOT ABOVE] + case 0x010D: // [LATIN SMALL LETTER C WITH CARON] + case 0x0188: // [LATIN SMALL LETTER C WITH HOOK] + case 0x023C: // [LATIN SMALL LETTER C WITH STROKE] + case 0x0255: // [LATIN SMALL LETTER C WITH CURL] + case 0x1E09: // [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] + case 0x2184: // [LATIN SMALL LETTER REVERSED C] + case 0x24D2: // [CIRCLED LATIN SMALL LETTER C] + case 0xA73E: // [LATIN CAPITAL LETTER REVERSED C WITH DOT] + case 0xA73F: // [LATIN SMALL LETTER REVERSED C WITH DOT] + case 0xFF43: // [FULLWIDTH LATIN SMALL LETTER C] + output[outputPos++] = L'c'; + break; + case 0x249E: // [PARENTHESIZED LATIN SMALL LETTER C] + output[outputPos++] = L'('; + output[outputPos++] = L'c'; + output[outputPos++] = L')'; + break; + case 0x00D0: // [LATIN CAPITAL LETTER ETH] + case 0x010E: // [LATIN CAPITAL LETTER D WITH CARON] + case 0x0110: // [LATIN CAPITAL LETTER D WITH STROKE] + case 0x0189: // [LATIN CAPITAL LETTER AFRICAN D] + case 0x018A: // [LATIN CAPITAL LETTER D WITH HOOK] + case 0x018B: // [LATIN CAPITAL LETTER D WITH TOPBAR] + case 0x1D05: // [LATIN LETTER SMALL CAPITAL D] + case 0x1D06: // [LATIN LETTER SMALL CAPITAL ETH] + case 0x1E0A: // [LATIN CAPITAL LETTER D WITH DOT ABOVE] + case 0x1E0C: // [LATIN CAPITAL LETTER D WITH DOT BELOW] + case 0x1E0E: // [LATIN CAPITAL LETTER D WITH LINE BELOW] + case 0x1E10: // [LATIN CAPITAL LETTER D WITH CEDILLA] + case 0x1E12: // [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] + case 0x24B9: // [CIRCLED LATIN CAPITAL LETTER D] + case 0xA779: // [LATIN CAPITAL LETTER INSULAR D] + case 0xFF24: // [FULLWIDTH LATIN CAPITAL LETTER D] + output[outputPos++] = L'D'; + break; + case 0x00F0: // [LATIN SMALL LETTER ETH] + case 0x010F: // [LATIN SMALL LETTER D WITH CARON] + case 0x0111: // [LATIN SMALL LETTER D WITH STROKE] + case 0x018C: // [LATIN SMALL LETTER D WITH TOPBAR] + case 0x0221: // [LATIN SMALL LETTER D WITH CURL] + case 0x0256: // [LATIN SMALL LETTER D WITH TAIL] + case 0x0257: // [LATIN SMALL LETTER D WITH HOOK] + case 0x1D6D: // [LATIN SMALL LETTER D WITH MIDDLE TILDE] + case 0x1D81: // [LATIN SMALL LETTER D WITH PALATAL HOOK] + case 0x1D91: // [LATIN SMALL LETTER D WITH HOOK AND TAIL] + case 0x1E0B: // [LATIN SMALL LETTER D WITH DOT ABOVE] + case 0x1E0D: // [LATIN SMALL LETTER D WITH DOT BELOW] + case 0x1E0F: // [LATIN SMALL LETTER D WITH LINE BELOW] + case 0x1E11: // [LATIN SMALL LETTER D WITH CEDILLA] + case 0x1E13: // [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] + case 0x24D3: // [CIRCLED LATIN SMALL LETTER D] + case 0xA77A: // [LATIN SMALL LETTER INSULAR D] + case 0xFF44: // [FULLWIDTH LATIN SMALL LETTER D] + output[outputPos++] = L'd'; + break; + case 0x01C4: // [LATIN CAPITAL LETTER DZ WITH CARON] + case 0x01F1: // [LATIN CAPITAL LETTER DZ] + output[outputPos++] = L'D'; + output[outputPos++] = L'Z'; + break; + case 0x01C5: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] + case 0x01F2: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] + output[outputPos++] = L'D'; + output[outputPos++] = L'z'; + break; + case 0x249F: // [PARENTHESIZED LATIN SMALL LETTER D] + output[outputPos++] = L'('; + output[outputPos++] = L'd'; + output[outputPos++] = L')'; + break; + case 0x0238: // [LATIN SMALL LETTER DB DIGRAPH] + output[outputPos++] = L'd'; + output[outputPos++] = L'b'; + break; + case 0x01C6: // [LATIN SMALL LETTER DZ WITH CARON] + case 0x01F3: // [LATIN SMALL LETTER DZ] + case 0x02A3: // [LATIN SMALL LETTER DZ DIGRAPH] + case 0x02A5: // [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] + output[outputPos++] = L'd'; + output[outputPos++] = L'z'; + break; + case 0x00C8: // [LATIN CAPITAL LETTER E WITH GRAVE] + case 0x00C9: // [LATIN CAPITAL LETTER E WITH ACUTE] + case 0x00CA: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] + case 0x00CB: // [LATIN CAPITAL LETTER E WITH DIAERESIS] + case 0x0112: // [LATIN CAPITAL LETTER E WITH MACRON] + case 0x0114: // [LATIN CAPITAL LETTER E WITH BREVE] + case 0x0116: // [LATIN CAPITAL LETTER E WITH DOT ABOVE] + case 0x0118: // [LATIN CAPITAL LETTER E WITH OGONEK] + case 0x011A: // [LATIN CAPITAL LETTER E WITH CARON] + case 0x018E: // [LATIN CAPITAL LETTER REVERSED E] + case 0x0190: // [LATIN CAPITAL LETTER OPEN E] + case 0x0204: // [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] + case 0x0206: // [LATIN CAPITAL LETTER E WITH INVERTED BREVE] + case 0x0228: // [LATIN CAPITAL LETTER E WITH CEDILLA] + case 0x0246: // [LATIN CAPITAL LETTER E WITH STROKE] + case 0x1D07: // [LATIN LETTER SMALL CAPITAL E] + case 0x1E14: // [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] + case 0x1E16: // [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] + case 0x1E18: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] + case 0x1E1A: // [LATIN CAPITAL LETTER E WITH TILDE BELOW] + case 0x1E1C: // [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] + case 0x1EB8: // [LATIN CAPITAL LETTER E WITH DOT BELOW] + case 0x1EBA: // [LATIN CAPITAL LETTER E WITH HOOK ABOVE] + case 0x1EBC: // [LATIN CAPITAL LETTER E WITH TILDE] + case 0x1EBE: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] + case 0x1EC0: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] + case 0x1EC2: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1EC4: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] + case 0x1EC6: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case 0x24BA: // [CIRCLED LATIN CAPITAL LETTER E] + case 0x2C7B: // [LATIN LETTER SMALL CAPITAL TURNED E] + case 0xFF25: // [FULLWIDTH LATIN CAPITAL LETTER E] + output[outputPos++] = L'E'; + break; + case 0x00E8: // [LATIN SMALL LETTER E WITH GRAVE] + case 0x00E9: // [LATIN SMALL LETTER E WITH ACUTE] + case 0x00EA: // [LATIN SMALL LETTER E WITH CIRCUMFLEX] + case 0x00EB: // [LATIN SMALL LETTER E WITH DIAERESIS] + case 0x0113: // [LATIN SMALL LETTER E WITH MACRON] + case 0x0115: // [LATIN SMALL LETTER E WITH BREVE] + case 0x0117: // [LATIN SMALL LETTER E WITH DOT ABOVE] + case 0x0119: // [LATIN SMALL LETTER E WITH OGONEK] + case 0x011B: // [LATIN SMALL LETTER E WITH CARON] + case 0x01DD: // [LATIN SMALL LETTER TURNED E] + case 0x0205: // [LATIN SMALL LETTER E WITH DOUBLE GRAVE] + case 0x0207: // [LATIN SMALL LETTER E WITH INVERTED BREVE] + case 0x0229: // [LATIN SMALL LETTER E WITH CEDILLA] + case 0x0247: // [LATIN SMALL LETTER E WITH STROKE] + case 0x0258: // [LATIN SMALL LETTER REVERSED E] + case 0x025B: // [LATIN SMALL LETTER OPEN E] + case 0x025C: // [LATIN SMALL LETTER REVERSED OPEN E] + case 0x025D: // [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] + case 0x025E: // [LATIN SMALL LETTER CLOSED REVERSED OPEN E] + case 0x029A: // [LATIN SMALL LETTER CLOSED OPEN E] + case 0x1D08: // [LATIN SMALL LETTER TURNED OPEN E] + case 0x1D92: // [LATIN SMALL LETTER E WITH RETROFLEX HOOK] + case 0x1D93: // [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] + case 0x1D94: // [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] + case 0x1E15: // [LATIN SMALL LETTER E WITH MACRON AND GRAVE] + case 0x1E17: // [LATIN SMALL LETTER E WITH MACRON AND ACUTE] + case 0x1E19: // [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] + case 0x1E1B: // [LATIN SMALL LETTER E WITH TILDE BELOW] + case 0x1E1D: // [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] + case 0x1EB9: // [LATIN SMALL LETTER E WITH DOT BELOW] + case 0x1EBB: // [LATIN SMALL LETTER E WITH HOOK ABOVE] + case 0x1EBD: // [LATIN SMALL LETTER E WITH TILDE] + case 0x1EBF: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] + case 0x1EC1: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] + case 0x1EC3: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1EC5: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] + case 0x1EC7: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] + case 0x2091: // [LATIN SUBSCRIPT SMALL LETTER E] + case 0x24D4: // [CIRCLED LATIN SMALL LETTER E] + case 0x2C78: // [LATIN SMALL LETTER E WITH NOTCH] + case 0xFF45: // [FULLWIDTH LATIN SMALL LETTER E] + output[outputPos++] = L'e'; + break; + case 0x24A0: // [PARENTHESIZED LATIN SMALL LETTER E] + output[outputPos++] = L'('; + output[outputPos++] = L'e'; + output[outputPos++] = L')'; + break; + case 0x0191: // [LATIN CAPITAL LETTER F WITH HOOK] + case 0x1E1E: // [LATIN CAPITAL LETTER F WITH DOT ABOVE] + case 0x24BB: // [CIRCLED LATIN CAPITAL LETTER F] + case 0xA730: // [LATIN LETTER SMALL CAPITAL F] + case 0xA77B: // [LATIN CAPITAL LETTER INSULAR F] + case 0xA7FB: // [LATIN EPIGRAPHIC LETTER REVERSED F] + case 0xFF26: // [FULLWIDTH LATIN CAPITAL LETTER F] + output[outputPos++] = L'F'; + break; + case 0x0192: // [LATIN SMALL LETTER F WITH HOOK] + case 0x1D6E: // [LATIN SMALL LETTER F WITH MIDDLE TILDE] + case 0x1D82: // [LATIN SMALL LETTER F WITH PALATAL HOOK] + case 0x1E1F: // [LATIN SMALL LETTER F WITH DOT ABOVE] + case 0x1E9B: // [LATIN SMALL LETTER LONG S WITH DOT ABOVE] + case 0x24D5: // [CIRCLED LATIN SMALL LETTER F] + case 0xA77C: // [LATIN SMALL LETTER INSULAR F] + case 0xFF46: // [FULLWIDTH LATIN SMALL LETTER F] + output[outputPos++] = L'f'; + break; + case 0x24A1: // [PARENTHESIZED LATIN SMALL LETTER F] + output[outputPos++] = L'('; + output[outputPos++] = L'f'; + output[outputPos++] = L')'; + break; + case 0xFB00: // [LATIN SMALL LIGATURE FF] + output[outputPos++] = L'f'; + output[outputPos++] = L'f'; + break; + case 0xFB03: // [LATIN SMALL LIGATURE FFI] + output[outputPos++] = L'f'; + output[outputPos++] = L'f'; + output[outputPos++] = L'i'; + break; + case 0xFB04: // [LATIN SMALL LIGATURE FFL] + output[outputPos++] = L'f'; + output[outputPos++] = L'f'; + output[outputPos++] = L'l'; + break; + case 0xFB01: // [LATIN SMALL LIGATURE FI] + output[outputPos++] = L'f'; + output[outputPos++] = L'i'; + break; + case 0xFB02: // [LATIN SMALL LIGATURE FL] + output[outputPos++] = L'f'; + output[outputPos++] = L'l'; + break; + case 0x011C: // [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] + case 0x011E: // [LATIN CAPITAL LETTER G WITH BREVE] + case 0x0120: // [LATIN CAPITAL LETTER G WITH DOT ABOVE] + case 0x0122: // [LATIN CAPITAL LETTER G WITH CEDILLA] + case 0x0193: // [LATIN CAPITAL LETTER G WITH HOOK] + case 0x01E4: // [LATIN CAPITAL LETTER G WITH STROKE] + case 0x01E5: // [LATIN SMALL LETTER G WITH STROKE] + case 0x01E6: // [LATIN CAPITAL LETTER G WITH CARON] + case 0x01E7: // [LATIN SMALL LETTER G WITH CARON] + case 0x01F4: // [LATIN CAPITAL LETTER G WITH ACUTE] + case 0x0262: // [LATIN LETTER SMALL CAPITAL G] + case 0x029B: // [LATIN LETTER SMALL CAPITAL G WITH HOOK] + case 0x1E20: // [LATIN CAPITAL LETTER G WITH MACRON] + case 0x24BC: // [CIRCLED LATIN CAPITAL LETTER G] + case 0xA77D: // [LATIN CAPITAL LETTER INSULAR G] + case 0xA77E: // [LATIN CAPITAL LETTER TURNED INSULAR G] + case 0xFF27: // [FULLWIDTH LATIN CAPITAL LETTER G] + output[outputPos++] = L'G'; + break; + case 0x011D: // [LATIN SMALL LETTER G WITH CIRCUMFLEX] + case 0x011F: // [LATIN SMALL LETTER G WITH BREVE] + case 0x0121: // [LATIN SMALL LETTER G WITH DOT ABOVE] + case 0x0123: // [LATIN SMALL LETTER G WITH CEDILLA] + case 0x01F5: // [LATIN SMALL LETTER G WITH ACUTE] + case 0x0260: // [LATIN SMALL LETTER G WITH HOOK] + case 0x0261: // [LATIN SMALL LETTER SCRIPT G] + case 0x1D77: // [LATIN SMALL LETTER TURNED G] + case 0x1D79: // [LATIN SMALL LETTER INSULAR G] + case 0x1D83: // [LATIN SMALL LETTER G WITH PALATAL HOOK] + case 0x1E21: // [LATIN SMALL LETTER G WITH MACRON] + case 0x24D6: // [CIRCLED LATIN SMALL LETTER G] + case 0xA77F: // [LATIN SMALL LETTER TURNED INSULAR G] + case 0xFF47: // [FULLWIDTH LATIN SMALL LETTER G] + output[outputPos++] = L'g'; + break; + case 0x24A2: // [PARENTHESIZED LATIN SMALL LETTER G] + output[outputPos++] = L'('; + output[outputPos++] = L'g'; + output[outputPos++] = L')'; + break; + case 0x0124: // [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] + case 0x0126: // [LATIN CAPITAL LETTER H WITH STROKE] + case 0x021E: // [LATIN CAPITAL LETTER H WITH CARON] + case 0x029C: // [LATIN LETTER SMALL CAPITAL H] + case 0x1E22: // [LATIN CAPITAL LETTER H WITH DOT ABOVE] + case 0x1E24: // [LATIN CAPITAL LETTER H WITH DOT BELOW] + case 0x1E26: // [LATIN CAPITAL LETTER H WITH DIAERESIS] + case 0x1E28: // [LATIN CAPITAL LETTER H WITH CEDILLA] + case 0x1E2A: // [LATIN CAPITAL LETTER H WITH BREVE BELOW] + case 0x24BD: // [CIRCLED LATIN CAPITAL LETTER H] + case 0x2C67: // [LATIN CAPITAL LETTER H WITH DESCENDER] + case 0x2C75: // [LATIN CAPITAL LETTER HALF H] + case 0xFF28: // [FULLWIDTH LATIN CAPITAL LETTER H] + output[outputPos++] = L'H'; + break; + case 0x0125: // [LATIN SMALL LETTER H WITH CIRCUMFLEX] + case 0x0127: // [LATIN SMALL LETTER H WITH STROKE] + case 0x021F: // [LATIN SMALL LETTER H WITH CARON] + case 0x0265: // [LATIN SMALL LETTER TURNED H] + case 0x0266: // [LATIN SMALL LETTER H WITH HOOK] + case 0x02AE: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK] + case 0x02AF: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] + case 0x1E23: // [LATIN SMALL LETTER H WITH DOT ABOVE] + case 0x1E25: // [LATIN SMALL LETTER H WITH DOT BELOW] + case 0x1E27: // [LATIN SMALL LETTER H WITH DIAERESIS] + case 0x1E29: // [LATIN SMALL LETTER H WITH CEDILLA] + case 0x1E2B: // [LATIN SMALL LETTER H WITH BREVE BELOW] + case 0x1E96: // [LATIN SMALL LETTER H WITH LINE BELOW] + case 0x24D7: // [CIRCLED LATIN SMALL LETTER H] + case 0x2C68: // [LATIN SMALL LETTER H WITH DESCENDER] + case 0x2C76: // [LATIN SMALL LETTER HALF H] + case 0xFF48: // [FULLWIDTH LATIN SMALL LETTER H] + output[outputPos++] = L'h'; + break; + case 0x01F6: // [LATIN CAPITAL LETTER HWAIR] + output[outputPos++] = L'H'; + output[outputPos++] = L'V'; + break; + case 0x24A3: // [PARENTHESIZED LATIN SMALL LETTER H] + output[outputPos++] = L'('; + output[outputPos++] = L'h'; + output[outputPos++] = L')'; + break; + case 0x0195: // [LATIN SMALL LETTER HV] + output[outputPos++] = L'h'; + output[outputPos++] = L'v'; + break; + case 0x00CC: // [LATIN CAPITAL LETTER I WITH GRAVE] + case 0x00CD: // [LATIN CAPITAL LETTER I WITH ACUTE] + case 0x00CE: // [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] + case 0x00CF: // [LATIN CAPITAL LETTER I WITH DIAERESIS] + case 0x0128: // [LATIN CAPITAL LETTER I WITH TILDE] + case 0x012A: // [LATIN CAPITAL LETTER I WITH MACRON] + case 0x012C: // [LATIN CAPITAL LETTER I WITH BREVE] + case 0x012E: // [LATIN CAPITAL LETTER I WITH OGONEK] + case 0x0130: // [LATIN CAPITAL LETTER I WITH DOT ABOVE] + case 0x0196: // [LATIN CAPITAL LETTER IOTA] + case 0x0197: // [LATIN CAPITAL LETTER I WITH STROKE] + case 0x01CF: // [LATIN CAPITAL LETTER I WITH CARON] + case 0x0208: // [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] + case 0x020A: // [LATIN CAPITAL LETTER I WITH INVERTED BREVE] + case 0x026A: // [LATIN LETTER SMALL CAPITAL I] + case 0x1D7B: // [LATIN SMALL CAPITAL LETTER I WITH STROKE] + case 0x1E2C: // [LATIN CAPITAL LETTER I WITH TILDE BELOW] + case 0x1E2E: // [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] + case 0x1EC8: // [LATIN CAPITAL LETTER I WITH HOOK ABOVE] + case 0x1ECA: // [LATIN CAPITAL LETTER I WITH DOT BELOW] + case 0x24BE: // [CIRCLED LATIN CAPITAL LETTER I] + case 0xA7FE: // [LATIN EPIGRAPHIC LETTER I LONGA] + case 0xFF29: // [FULLWIDTH LATIN CAPITAL LETTER I] + output[outputPos++] = L'I'; + break; + case 0x00EC: // [LATIN SMALL LETTER I WITH GRAVE] + case 0x00ED: // [LATIN SMALL LETTER I WITH ACUTE] + case 0x00EE: // [LATIN SMALL LETTER I WITH CIRCUMFLEX] + case 0x00EF: // [LATIN SMALL LETTER I WITH DIAERESIS] + case 0x0129: // [LATIN SMALL LETTER I WITH TILDE] + case 0x012B: // [LATIN SMALL LETTER I WITH MACRON] + case 0x012D: // [LATIN SMALL LETTER I WITH BREVE] + case 0x012F: // [LATIN SMALL LETTER I WITH OGONEK] + case 0x0131: // [LATIN SMALL LETTER DOTLESS I] + case 0x01D0: // [LATIN SMALL LETTER I WITH CARON] + case 0x0209: // [LATIN SMALL LETTER I WITH DOUBLE GRAVE] + case 0x020B: // [LATIN SMALL LETTER I WITH INVERTED BREVE] + case 0x0268: // [LATIN SMALL LETTER I WITH STROKE] + case 0x1D09: // [LATIN SMALL LETTER TURNED I] + case 0x1D62: // [LATIN SUBSCRIPT SMALL LETTER I] + case 0x1D7C: // [LATIN SMALL LETTER IOTA WITH STROKE] + case 0x1D96: // [LATIN SMALL LETTER I WITH RETROFLEX HOOK] + case 0x1E2D: // [LATIN SMALL LETTER I WITH TILDE BELOW] + case 0x1E2F: // [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] + case 0x1EC9: // [LATIN SMALL LETTER I WITH HOOK ABOVE] + case 0x1ECB: // [LATIN SMALL LETTER I WITH DOT BELOW] + case 0x2071: // [SUPERSCRIPT LATIN SMALL LETTER I] + case 0x24D8: // [CIRCLED LATIN SMALL LETTER I] + case 0xFF49: // [FULLWIDTH LATIN SMALL LETTER I] + output[outputPos++] = L'i'; + break; + case 0x0132: // [LATIN CAPITAL LIGATURE IJ] + output[outputPos++] = L'I'; + output[outputPos++] = L'J'; + break; + case 0x24A4: // [PARENTHESIZED LATIN SMALL LETTER I] + output[outputPos++] = L'('; + output[outputPos++] = L'i'; + output[outputPos++] = L')'; + break; + case 0x0133: // [LATIN SMALL LIGATURE IJ] + output[outputPos++] = L'i'; + output[outputPos++] = L'j'; + break; + case 0x0134: // [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] + case 0x0248: // [LATIN CAPITAL LETTER J WITH STROKE] + case 0x1D0A: // [LATIN LETTER SMALL CAPITAL J] + case 0x24BF: // [CIRCLED LATIN CAPITAL LETTER J] + case 0xFF2A: // [FULLWIDTH LATIN CAPITAL LETTER J] + output[outputPos++] = L'J'; + break; + case 0x0135: // [LATIN SMALL LETTER J WITH CIRCUMFLEX] + case 0x01F0: // [LATIN SMALL LETTER J WITH CARON] + case 0x0237: // [LATIN SMALL LETTER DOTLESS J] + case 0x0249: // [LATIN SMALL LETTER J WITH STROKE] + case 0x025F: // [LATIN SMALL LETTER DOTLESS J WITH STROKE] + case 0x0284: // [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] + case 0x029D: // [LATIN SMALL LETTER J WITH CROSSED-TAIL] + case 0x24D9: // [CIRCLED LATIN SMALL LETTER J] + case 0x2C7C: // [LATIN SUBSCRIPT SMALL LETTER J] + case 0xFF4A: // [FULLWIDTH LATIN SMALL LETTER J] + output[outputPos++] = L'j'; + break; + case 0x24A5: // [PARENTHESIZED LATIN SMALL LETTER J] + output[outputPos++] = L'('; + output[outputPos++] = L'j'; + output[outputPos++] = L')'; + break; + case 0x0136: // [LATIN CAPITAL LETTER K WITH CEDILLA] + case 0x0198: // [LATIN CAPITAL LETTER K WITH HOOK] + case 0x01E8: // [LATIN CAPITAL LETTER K WITH CARON] + case 0x1D0B: // [LATIN LETTER SMALL CAPITAL K] + case 0x1E30: // [LATIN CAPITAL LETTER K WITH ACUTE] + case 0x1E32: // [LATIN CAPITAL LETTER K WITH DOT BELOW] + case 0x1E34: // [LATIN CAPITAL LETTER K WITH LINE BELOW] + case 0x24C0: // [CIRCLED LATIN CAPITAL LETTER K] + case 0x2C69: // [LATIN CAPITAL LETTER K WITH DESCENDER] + case 0xA740: // [LATIN CAPITAL LETTER K WITH STROKE] + case 0xA742: // [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] + case 0xA744: // [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] + case 0xFF2B: // [FULLWIDTH LATIN CAPITAL LETTER K] + output[outputPos++] = L'K'; + break; + case 0x0137: // [LATIN SMALL LETTER K WITH CEDILLA] + case 0x0199: // [LATIN SMALL LETTER K WITH HOOK] + case 0x01E9: // [LATIN SMALL LETTER K WITH CARON] + case 0x029E: // [LATIN SMALL LETTER TURNED K] + case 0x1D84: // [LATIN SMALL LETTER K WITH PALATAL HOOK] + case 0x1E31: // [LATIN SMALL LETTER K WITH ACUTE] + case 0x1E33: // [LATIN SMALL LETTER K WITH DOT BELOW] + case 0x1E35: // [LATIN SMALL LETTER K WITH LINE BELOW] + case 0x24DA: // [CIRCLED LATIN SMALL LETTER K] + case 0x2C6A: // [LATIN SMALL LETTER K WITH DESCENDER] + case 0xA741: // [LATIN SMALL LETTER K WITH STROKE] + case 0xA743: // [LATIN SMALL LETTER K WITH DIAGONAL STROKE] + case 0xA745: // [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] + case 0xFF4B: // [FULLWIDTH LATIN SMALL LETTER K] + output[outputPos++] = L'k'; + break; + case 0x24A6: // [PARENTHESIZED LATIN SMALL LETTER K] + output[outputPos++] = L'('; + output[outputPos++] = L'k'; + output[outputPos++] = L')'; + break; + case 0x0139: // [LATIN CAPITAL LETTER L WITH ACUTE] + case 0x013B: // [LATIN CAPITAL LETTER L WITH CEDILLA] + case 0x013D: // [LATIN CAPITAL LETTER L WITH CARON] + case 0x013F: // [LATIN CAPITAL LETTER L WITH MIDDLE DOT] + case 0x0141: // [LATIN CAPITAL LETTER L WITH STROKE] + case 0x023D: // [LATIN CAPITAL LETTER L WITH BAR] + case 0x029F: // [LATIN LETTER SMALL CAPITAL L] + case 0x1D0C: // [LATIN LETTER SMALL CAPITAL L WITH STROKE] + case 0x1E36: // [LATIN CAPITAL LETTER L WITH DOT BELOW] + case 0x1E38: // [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] + case 0x1E3A: // [LATIN CAPITAL LETTER L WITH LINE BELOW] + case 0x1E3C: // [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] + case 0x24C1: // [CIRCLED LATIN CAPITAL LETTER L] + case 0x2C60: // [LATIN CAPITAL LETTER L WITH DOUBLE BAR] + case 0x2C62: // [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] + case 0xA746: // [LATIN CAPITAL LETTER BROKEN L] + case 0xA748: // [LATIN CAPITAL LETTER L WITH HIGH STROKE] + case 0xA780: // [LATIN CAPITAL LETTER TURNED L] + case 0xFF2C: // [FULLWIDTH LATIN CAPITAL LETTER L] + output[outputPos++] = L'L'; + break; + case 0x013A: // [LATIN SMALL LETTER L WITH ACUTE] + case 0x013C: // [LATIN SMALL LETTER L WITH CEDILLA] + case 0x013E: // [LATIN SMALL LETTER L WITH CARON] + case 0x0140: // [LATIN SMALL LETTER L WITH MIDDLE DOT] + case 0x0142: // [LATIN SMALL LETTER L WITH STROKE] + case 0x019A: // [LATIN SMALL LETTER L WITH BAR] + case 0x0234: // [LATIN SMALL LETTER L WITH CURL] + case 0x026B: // [LATIN SMALL LETTER L WITH MIDDLE TILDE] + case 0x026C: // [LATIN SMALL LETTER L WITH BELT] + case 0x026D: // [LATIN SMALL LETTER L WITH RETROFLEX HOOK] + case 0x1D85: // [LATIN SMALL LETTER L WITH PALATAL HOOK] + case 0x1E37: // [LATIN SMALL LETTER L WITH DOT BELOW] + case 0x1E39: // [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] + case 0x1E3B: // [LATIN SMALL LETTER L WITH LINE BELOW] + case 0x1E3D: // [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] + case 0x24DB: // [CIRCLED LATIN SMALL LETTER L] + case 0x2C61: // [LATIN SMALL LETTER L WITH DOUBLE BAR] + case 0xA747: // [LATIN SMALL LETTER BROKEN L] + case 0xA749: // [LATIN SMALL LETTER L WITH HIGH STROKE] + case 0xA781: // [LATIN SMALL LETTER TURNED L] + case 0xFF4C: // [FULLWIDTH LATIN SMALL LETTER L] + output[outputPos++] = L'l'; + break; + case 0x01C7: // [LATIN CAPITAL LETTER LJ] + output[outputPos++] = L'L'; + output[outputPos++] = L'J'; + break; + case 0x1EFA: // [LATIN CAPITAL LETTER MIDDLE-WELSH LL] + output[outputPos++] = L'L'; + output[outputPos++] = L'L'; + break; + case 0x01C8: // [LATIN CAPITAL LETTER L WITH SMALL LETTER J] + output[outputPos++] = L'L'; + output[outputPos++] = L'j'; + break; + case 0x24A7: // [PARENTHESIZED LATIN SMALL LETTER L] + output[outputPos++] = L'('; + output[outputPos++] = L'l'; + output[outputPos++] = L')'; + break; + case 0x01C9: // [LATIN SMALL LETTER LJ] + output[outputPos++] = L'l'; + output[outputPos++] = L'j'; + break; + case 0x1EFB: // [LATIN SMALL LETTER MIDDLE-WELSH LL] + output[outputPos++] = L'l'; + output[outputPos++] = L'l'; + break; + case 0x02AA: // [LATIN SMALL LETTER LS DIGRAPH] + output[outputPos++] = L'l'; + output[outputPos++] = L's'; + break; + case 0x02AB: // [LATIN SMALL LETTER LZ DIGRAPH] + output[outputPos++] = L'l'; + output[outputPos++] = L'z'; + break; + case 0x019C: // [LATIN CAPITAL LETTER TURNED M] + case 0x1D0D: // [LATIN LETTER SMALL CAPITAL M] + case 0x1E3E: // [LATIN CAPITAL LETTER M WITH ACUTE] + case 0x1E40: // [LATIN CAPITAL LETTER M WITH DOT ABOVE] + case 0x1E42: // [LATIN CAPITAL LETTER M WITH DOT BELOW] + case 0x24C2: // [CIRCLED LATIN CAPITAL LETTER M] + case 0x2C6E: // [LATIN CAPITAL LETTER M WITH HOOK] + case 0xA7FD: // [LATIN EPIGRAPHIC LETTER INVERTED M] + case 0xA7FF: // [LATIN EPIGRAPHIC LETTER ARCHAIC M] + case 0xFF2D: // [FULLWIDTH LATIN CAPITAL LETTER M] + output[outputPos++] = L'M'; + break; + case 0x026F: // [LATIN SMALL LETTER TURNED M] + case 0x0270: // [LATIN SMALL LETTER TURNED M WITH LONG LEG] + case 0x0271: // [LATIN SMALL LETTER M WITH HOOK] + case 0x1D6F: // [LATIN SMALL LETTER M WITH MIDDLE TILDE] + case 0x1D86: // [LATIN SMALL LETTER M WITH PALATAL HOOK] + case 0x1E3F: // [LATIN SMALL LETTER M WITH ACUTE] + case 0x1E41: // [LATIN SMALL LETTER M WITH DOT ABOVE] + case 0x1E43: // [LATIN SMALL LETTER M WITH DOT BELOW] + case 0x24DC: // [CIRCLED LATIN SMALL LETTER M] + case 0xFF4D: // [FULLWIDTH LATIN SMALL LETTER M] + output[outputPos++] = L'm'; + break; + case 0x24A8: // [PARENTHESIZED LATIN SMALL LETTER M] + output[outputPos++] = L'('; + output[outputPos++] = L'm'; + output[outputPos++] = L')'; + break; + case 0x00D1: // [LATIN CAPITAL LETTER N WITH TILDE] + case 0x0143: // [LATIN CAPITAL LETTER N WITH ACUTE] + case 0x0145: // [LATIN CAPITAL LETTER N WITH CEDILLA] + case 0x0147: // [LATIN CAPITAL LETTER N WITH CARON] + case 0x014A: // [LATIN CAPITAL LETTER ENG] + case 0x019D: // [LATIN CAPITAL LETTER N WITH LEFT HOOK] + case 0x01F8: // [LATIN CAPITAL LETTER N WITH GRAVE] + case 0x0220: // [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] + case 0x0274: // [LATIN LETTER SMALL CAPITAL N] + case 0x1D0E: // [LATIN LETTER SMALL CAPITAL REVERSED N] + case 0x1E44: // [LATIN CAPITAL LETTER N WITH DOT ABOVE] + case 0x1E46: // [LATIN CAPITAL LETTER N WITH DOT BELOW] + case 0x1E48: // [LATIN CAPITAL LETTER N WITH LINE BELOW] + case 0x1E4A: // [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] + case 0x24C3: // [CIRCLED LATIN CAPITAL LETTER N] + case 0xFF2E: // [FULLWIDTH LATIN CAPITAL LETTER N] + output[outputPos++] = L'N'; + break; + case 0x00F1: // [LATIN SMALL LETTER N WITH TILDE] + case 0x0144: // [LATIN SMALL LETTER N WITH ACUTE] + case 0x0146: // [LATIN SMALL LETTER N WITH CEDILLA] + case 0x0148: // [LATIN SMALL LETTER N WITH CARON] + case 0x0149: // [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] + case 0x014B: // [LATIN SMALL LETTER ENG] + case 0x019E: // [LATIN SMALL LETTER N WITH LONG RIGHT LEG] + case 0x01F9: // [LATIN SMALL LETTER N WITH GRAVE] + case 0x0235: // [LATIN SMALL LETTER N WITH CURL] + case 0x0272: // [LATIN SMALL LETTER N WITH LEFT HOOK] + case 0x0273: // [LATIN SMALL LETTER N WITH RETROFLEX HOOK] + case 0x1D70: // [LATIN SMALL LETTER N WITH MIDDLE TILDE] + case 0x1D87: // [LATIN SMALL LETTER N WITH PALATAL HOOK] + case 0x1E45: // [LATIN SMALL LETTER N WITH DOT ABOVE] + case 0x1E47: // [LATIN SMALL LETTER N WITH DOT BELOW] + case 0x1E49: // [LATIN SMALL LETTER N WITH LINE BELOW] + case 0x1E4B: // [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] + case 0x207F: // [SUPERSCRIPT LATIN SMALL LETTER N] + case 0x24DD: // [CIRCLED LATIN SMALL LETTER N] + case 0xFF4E: // [FULLWIDTH LATIN SMALL LETTER N] + output[outputPos++] = L'n'; + break; + case 0x01CA: // [LATIN CAPITAL LETTER NJ] + output[outputPos++] = L'N'; + output[outputPos++] = L'J'; + break; + case 0x01CB: // [LATIN CAPITAL LETTER N WITH SMALL LETTER J] + output[outputPos++] = L'N'; + output[outputPos++] = L'j'; + break; + case 0x24A9: // [PARENTHESIZED LATIN SMALL LETTER N] + output[outputPos++] = L'('; + output[outputPos++] = L'n'; + output[outputPos++] = L')'; + break; + case 0x01CC: // [LATIN SMALL LETTER NJ] + output[outputPos++] = L'n'; + output[outputPos++] = L'j'; + break; + case 0x00D2: // [LATIN CAPITAL LETTER O WITH GRAVE] + case 0x00D3: // [LATIN CAPITAL LETTER O WITH ACUTE] + case 0x00D4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] + case 0x00D5: // [LATIN CAPITAL LETTER O WITH TILDE] + case 0x00D6: // [LATIN CAPITAL LETTER O WITH DIAERESIS] + case 0x00D8: // [LATIN CAPITAL LETTER O WITH STROKE] + case 0x014C: // [LATIN CAPITAL LETTER O WITH MACRON] + case 0x014E: // [LATIN CAPITAL LETTER O WITH BREVE] + case 0x0150: // [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] + case 0x0186: // [LATIN CAPITAL LETTER OPEN O] + case 0x019F: // [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] + case 0x01A0: // [LATIN CAPITAL LETTER O WITH HORN] + case 0x01D1: // [LATIN CAPITAL LETTER O WITH CARON] + case 0x01EA: // [LATIN CAPITAL LETTER O WITH OGONEK] + case 0x01EC: // [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] + case 0x01FE: // [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] + case 0x020C: // [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] + case 0x020E: // [LATIN CAPITAL LETTER O WITH INVERTED BREVE] + case 0x022A: // [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] + case 0x022C: // [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] + case 0x022E: // [LATIN CAPITAL LETTER O WITH DOT ABOVE] + case 0x0230: // [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] + case 0x1D0F: // [LATIN LETTER SMALL CAPITAL O] + case 0x1D10: // [LATIN LETTER SMALL CAPITAL OPEN O] + case 0x1E4C: // [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] + case 0x1E4E: // [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] + case 0x1E50: // [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] + case 0x1E52: // [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] + case 0x1ECC: // [LATIN CAPITAL LETTER O WITH DOT BELOW] + case 0x1ECE: // [LATIN CAPITAL LETTER O WITH HOOK ABOVE] + case 0x1ED0: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] + case 0x1ED2: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] + case 0x1ED4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1ED6: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] + case 0x1ED8: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case 0x1EDA: // [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] + case 0x1EDC: // [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] + case 0x1EDE: // [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] + case 0x1EE0: // [LATIN CAPITAL LETTER O WITH HORN AND TILDE] + case 0x1EE2: // [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] + case 0x24C4: // [CIRCLED LATIN CAPITAL LETTER O] + case 0xA74A: // [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] + case 0xA74C: // [LATIN CAPITAL LETTER O WITH LOOP] + case 0xFF2F: // [FULLWIDTH LATIN CAPITAL LETTER O] + output[outputPos++] = L'O'; + break; + case 0x00F2: // [LATIN SMALL LETTER O WITH GRAVE] + case 0x00F3: // [LATIN SMALL LETTER O WITH ACUTE] + case 0x00F4: // [LATIN SMALL LETTER O WITH CIRCUMFLEX] + case 0x00F5: // [LATIN SMALL LETTER O WITH TILDE] + case 0x00F6: // [LATIN SMALL LETTER O WITH DIAERESIS] + case 0x00F8: // [LATIN SMALL LETTER O WITH STROKE] + case 0x014D: // [LATIN SMALL LETTER O WITH MACRON] + case 0x014F: // [LATIN SMALL LETTER O WITH BREVE] + case 0x0151: // [LATIN SMALL LETTER O WITH DOUBLE ACUTE] + case 0x01A1: // [LATIN SMALL LETTER O WITH HORN] + case 0x01D2: // [LATIN SMALL LETTER O WITH CARON] + case 0x01EB: // [LATIN SMALL LETTER O WITH OGONEK] + case 0x01ED: // [LATIN SMALL LETTER O WITH OGONEK AND MACRON] + case 0x01FF: // [LATIN SMALL LETTER O WITH STROKE AND ACUTE] + case 0x020D: // [LATIN SMALL LETTER O WITH DOUBLE GRAVE] + case 0x020F: // [LATIN SMALL LETTER O WITH INVERTED BREVE] + case 0x022B: // [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] + case 0x022D: // [LATIN SMALL LETTER O WITH TILDE AND MACRON] + case 0x022F: // [LATIN SMALL LETTER O WITH DOT ABOVE] + case 0x0231: // [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] + case 0x0254: // [LATIN SMALL LETTER OPEN O] + case 0x0275: // [LATIN SMALL LETTER BARRED O] + case 0x1D16: // [LATIN SMALL LETTER TOP HALF O] + case 0x1D17: // [LATIN SMALL LETTER BOTTOM HALF O] + case 0x1D97: // [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] + case 0x1E4D: // [LATIN SMALL LETTER O WITH TILDE AND ACUTE] + case 0x1E4F: // [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] + case 0x1E51: // [LATIN SMALL LETTER O WITH MACRON AND GRAVE] + case 0x1E53: // [LATIN SMALL LETTER O WITH MACRON AND ACUTE] + case 0x1ECD: // [LATIN SMALL LETTER O WITH DOT BELOW] + case 0x1ECF: // [LATIN SMALL LETTER O WITH HOOK ABOVE] + case 0x1ED1: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] + case 0x1ED3: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] + case 0x1ED5: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] + case 0x1ED7: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] + case 0x1ED9: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] + case 0x1EDB: // [LATIN SMALL LETTER O WITH HORN AND ACUTE] + case 0x1EDD: // [LATIN SMALL LETTER O WITH HORN AND GRAVE] + case 0x1EDF: // [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] + case 0x1EE1: // [LATIN SMALL LETTER O WITH HORN AND TILDE] + case 0x1EE3: // [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] + case 0x2092: // [LATIN SUBSCRIPT SMALL LETTER O] + case 0x24DE: // [CIRCLED LATIN SMALL LETTER O] + case 0x2C7A: // [LATIN SMALL LETTER O WITH LOW RING INSIDE] + case 0xA74B: // [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] + case 0xA74D: // [LATIN SMALL LETTER O WITH LOOP] + case 0xFF4F: // [FULLWIDTH LATIN SMALL LETTER O] + output[outputPos++] = L'o'; + break; + case 0x0152: // [LATIN CAPITAL LIGATURE OE] + case 0x0276: // [LATIN LETTER SMALL CAPITAL OE] + output[outputPos++] = L'O'; + output[outputPos++] = L'E'; + break; + case 0xA74E: // [LATIN CAPITAL LETTER OO] + output[outputPos++] = L'O'; + output[outputPos++] = L'O'; + break; + case 0x0222: // [LATIN CAPITAL LETTER OU] + case 0x1D15: // [LATIN LETTER SMALL CAPITAL OU] + output[outputPos++] = L'O'; + output[outputPos++] = L'U'; + break; + case 0x24AA: // [PARENTHESIZED LATIN SMALL LETTER O] + output[outputPos++] = L'('; + output[outputPos++] = L'o'; + output[outputPos++] = L')'; + break; + case 0x0153: // [LATIN SMALL LIGATURE OE] + case 0x1D14: // [LATIN SMALL LETTER TURNED OE] + output[outputPos++] = L'o'; + output[outputPos++] = L'e'; + break; + case 0xA74F: // [LATIN SMALL LETTER OO] + output[outputPos++] = L'o'; + output[outputPos++] = L'o'; + break; + case 0x0223: // [LATIN SMALL LETTER OU] + output[outputPos++] = L'o'; + output[outputPos++] = L'u'; + break; + case 0x01A4: // [LATIN CAPITAL LETTER P WITH HOOK] + case 0x1D18: // [LATIN LETTER SMALL CAPITAL P] + case 0x1E54: // [LATIN CAPITAL LETTER P WITH ACUTE] + case 0x1E56: // [LATIN CAPITAL LETTER P WITH DOT ABOVE] + case 0x24C5: // [CIRCLED LATIN CAPITAL LETTER P] + case 0x2C63: // [LATIN CAPITAL LETTER P WITH STROKE] + case 0xA750: // [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] + case 0xA752: // [LATIN CAPITAL LETTER P WITH FLOURISH] + case 0xA754: // [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] + case 0xFF30: // [FULLWIDTH LATIN CAPITAL LETTER P] + output[outputPos++] = L'P'; + break; + case 0x01A5: // [LATIN SMALL LETTER P WITH HOOK] + case 0x1D71: // [LATIN SMALL LETTER P WITH MIDDLE TILDE] + case 0x1D7D: // [LATIN SMALL LETTER P WITH STROKE] + case 0x1D88: // [LATIN SMALL LETTER P WITH PALATAL HOOK] + case 0x1E55: // [LATIN SMALL LETTER P WITH ACUTE] + case 0x1E57: // [LATIN SMALL LETTER P WITH DOT ABOVE] + case 0x24DF: // [CIRCLED LATIN SMALL LETTER P] + case 0xA751: // [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] + case 0xA753: // [LATIN SMALL LETTER P WITH FLOURISH] + case 0xA755: // [LATIN SMALL LETTER P WITH SQUIRREL TAIL] + case 0xA7FC: // [LATIN EPIGRAPHIC LETTER REVERSED P] + case 0xFF50: // [FULLWIDTH LATIN SMALL LETTER P] + output[outputPos++] = L'p'; + break; + case 0x24AB: // [PARENTHESIZED LATIN SMALL LETTER P] + output[outputPos++] = L'('; + output[outputPos++] = L'p'; + output[outputPos++] = L')'; + break; + case 0x024A: // [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] + case 0x24C6: // [CIRCLED LATIN CAPITAL LETTER Q] + case 0xA756: // [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] + case 0xA758: // [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] + case 0xFF31: // [FULLWIDTH LATIN CAPITAL LETTER Q] + output[outputPos++] = L'Q'; + break; + case 0x0138: // [LATIN SMALL LETTER KRA] + case 0x024B: // [LATIN SMALL LETTER Q WITH HOOK TAIL] + case 0x02A0: // [LATIN SMALL LETTER Q WITH HOOK] + case 0x24E0: // [CIRCLED LATIN SMALL LETTER Q] + case 0xA757: // [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] + case 0xA759: // [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] + case 0xFF51: // [FULLWIDTH LATIN SMALL LETTER Q] + output[outputPos++] = L'q'; + break; + case 0x24AC: // [PARENTHESIZED LATIN SMALL LETTER Q] + output[outputPos++] = L'('; + output[outputPos++] = L'q'; + output[outputPos++] = L')'; + break; + case 0x0239: // [LATIN SMALL LETTER QP DIGRAPH] + output[outputPos++] = L'q'; + output[outputPos++] = L'p'; + break; + case 0x0154: // [LATIN CAPITAL LETTER R WITH ACUTE] + case 0x0156: // [LATIN CAPITAL LETTER R WITH CEDILLA] + case 0x0158: // [LATIN CAPITAL LETTER R WITH CARON] + case 0x0210: // [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] + case 0x0212: // [LATIN CAPITAL LETTER R WITH INVERTED BREVE] + case 0x024C: // [LATIN CAPITAL LETTER R WITH STROKE] + case 0x0280: // [LATIN LETTER SMALL CAPITAL R] + case 0x0281: // [LATIN LETTER SMALL CAPITAL INVERTED R] + case 0x1D19: // [LATIN LETTER SMALL CAPITAL REVERSED R] + case 0x1D1A: // [LATIN LETTER SMALL CAPITAL TURNED R] + case 0x1E58: // [LATIN CAPITAL LETTER R WITH DOT ABOVE] + case 0x1E5A: // [LATIN CAPITAL LETTER R WITH DOT BELOW] + case 0x1E5C: // [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] + case 0x1E5E: // [LATIN CAPITAL LETTER R WITH LINE BELOW] + case 0x24C7: // [CIRCLED LATIN CAPITAL LETTER R] + case 0x2C64: // [LATIN CAPITAL LETTER R WITH TAIL] + case 0xA75A: // [LATIN CAPITAL LETTER R ROTUNDA] + case 0xA782: // [LATIN CAPITAL LETTER INSULAR R] + case 0xFF32: // [FULLWIDTH LATIN CAPITAL LETTER R] + output[outputPos++] = L'R'; + break; + case 0x0155: // [LATIN SMALL LETTER R WITH ACUTE] + case 0x0157: // [LATIN SMALL LETTER R WITH CEDILLA] + case 0x0159: // [LATIN SMALL LETTER R WITH CARON] + case 0x0211: // [LATIN SMALL LETTER R WITH DOUBLE GRAVE] + case 0x0213: // [LATIN SMALL LETTER R WITH INVERTED BREVE] + case 0x024D: // [LATIN SMALL LETTER R WITH STROKE] + case 0x027C: // [LATIN SMALL LETTER R WITH LONG LEG] + case 0x027D: // [LATIN SMALL LETTER R WITH TAIL] + case 0x027E: // [LATIN SMALL LETTER R WITH FISHHOOK] + case 0x027F: // [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] + case 0x1D63: // [LATIN SUBSCRIPT SMALL LETTER R] + case 0x1D72: // [LATIN SMALL LETTER R WITH MIDDLE TILDE] + case 0x1D73: // [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] + case 0x1D89: // [LATIN SMALL LETTER R WITH PALATAL HOOK] + case 0x1E59: // [LATIN SMALL LETTER R WITH DOT ABOVE] + case 0x1E5B: // [LATIN SMALL LETTER R WITH DOT BELOW] + case 0x1E5D: // [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] + case 0x1E5F: // [LATIN SMALL LETTER R WITH LINE BELOW] + case 0x24E1: // [CIRCLED LATIN SMALL LETTER R] + case 0xA75B: // [LATIN SMALL LETTER R ROTUNDA] + case 0xA783: // [LATIN SMALL LETTER INSULAR R] + case 0xFF52: // [FULLWIDTH LATIN SMALL LETTER R] + output[outputPos++] = L'r'; + break; + case 0x24AD: // [PARENTHESIZED LATIN SMALL LETTER R] + output[outputPos++] = L'('; + output[outputPos++] = L'r'; + output[outputPos++] = L')'; + break; + case 0x015A: // [LATIN CAPITAL LETTER S WITH ACUTE] + case 0x015C: // [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] + case 0x015E: // [LATIN CAPITAL LETTER S WITH CEDILLA] + case 0x0160: // [LATIN CAPITAL LETTER S WITH CARON] + case 0x0218: // [LATIN CAPITAL LETTER S WITH COMMA BELOW] + case 0x1E60: // [LATIN CAPITAL LETTER S WITH DOT ABOVE] + case 0x1E62: // [LATIN CAPITAL LETTER S WITH DOT BELOW] + case 0x1E64: // [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] + case 0x1E66: // [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] + case 0x1E68: // [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] + case 0x24C8: // [CIRCLED LATIN CAPITAL LETTER S] + case 0xA731: // [LATIN LETTER SMALL CAPITAL S] + case 0xA785: // [LATIN SMALL LETTER INSULAR S] + case 0xFF33: // [FULLWIDTH LATIN CAPITAL LETTER S] + output[outputPos++] = L'S'; + break; + case 0x015B: // [LATIN SMALL LETTER S WITH ACUTE] + case 0x015D: // [LATIN SMALL LETTER S WITH CIRCUMFLEX] + case 0x015F: // [LATIN SMALL LETTER S WITH CEDILLA] + case 0x0161: // [LATIN SMALL LETTER S WITH CARON] + case 0x017F: // [LATIN SMALL LETTER LONG S] + case 0x0219: // [LATIN SMALL LETTER S WITH COMMA BELOW] + case 0x023F: // [LATIN SMALL LETTER S WITH SWASH TAIL] + case 0x0282: // [LATIN SMALL LETTER S WITH HOOK] + case 0x1D74: // [LATIN SMALL LETTER S WITH MIDDLE TILDE] + case 0x1D8A: // [LATIN SMALL LETTER S WITH PALATAL HOOK] + case 0x1E61: // [LATIN SMALL LETTER S WITH DOT ABOVE] + case 0x1E63: // [LATIN SMALL LETTER S WITH DOT BELOW] + case 0x1E65: // [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] + case 0x1E67: // [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] + case 0x1E69: // [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] + case 0x1E9C: // [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] + case 0x1E9D: // [LATIN SMALL LETTER LONG S WITH HIGH STROKE] + case 0x24E2: // [CIRCLED LATIN SMALL LETTER S] + case 0xA784: // [LATIN CAPITAL LETTER INSULAR S] + case 0xFF53: // [FULLWIDTH LATIN SMALL LETTER S] + output[outputPos++] = L's'; + break; + case 0x1E9E: // [LATIN CAPITAL LETTER SHARP S] + output[outputPos++] = L'S'; + output[outputPos++] = L'S'; + break; + case 0x24AE: // [PARENTHESIZED LATIN SMALL LETTER S] + output[outputPos++] = L'('; + output[outputPos++] = L's'; + output[outputPos++] = L')'; + break; + case 0x00DF: // [LATIN SMALL LETTER SHARP S] + output[outputPos++] = L's'; + output[outputPos++] = L's'; + break; + case 0xFB06: // [LATIN SMALL LIGATURE ST] + output[outputPos++] = L's'; + output[outputPos++] = L't'; + break; + case 0x0162: // [LATIN CAPITAL LETTER T WITH CEDILLA] + case 0x0164: // [LATIN CAPITAL LETTER T WITH CARON] + case 0x0166: // [LATIN CAPITAL LETTER T WITH STROKE] + case 0x01AC: // [LATIN CAPITAL LETTER T WITH HOOK] + case 0x01AE: // [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] + case 0x021A: // [LATIN CAPITAL LETTER T WITH COMMA BELOW] + case 0x023E: // [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] + case 0x1D1B: // [LATIN LETTER SMALL CAPITAL T] + case 0x1E6A: // [LATIN CAPITAL LETTER T WITH DOT ABOVE] + case 0x1E6C: // [LATIN CAPITAL LETTER T WITH DOT BELOW] + case 0x1E6E: // [LATIN CAPITAL LETTER T WITH LINE BELOW] + case 0x1E70: // [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] + case 0x24C9: // [CIRCLED LATIN CAPITAL LETTER T] + case 0xA786: // [LATIN CAPITAL LETTER INSULAR T] + case 0xFF34: // [FULLWIDTH LATIN CAPITAL LETTER T] + output[outputPos++] = L'T'; + break; + case 0x0163: // [LATIN SMALL LETTER T WITH CEDILLA] + case 0x0165: // [LATIN SMALL LETTER T WITH CARON] + case 0x0167: // [LATIN SMALL LETTER T WITH STROKE] + case 0x01AB: // [LATIN SMALL LETTER T WITH PALATAL HOOK] + case 0x01AD: // [LATIN SMALL LETTER T WITH HOOK] + case 0x021B: // [LATIN SMALL LETTER T WITH COMMA BELOW] + case 0x0236: // [LATIN SMALL LETTER T WITH CURL] + case 0x0287: // [LATIN SMALL LETTER TURNED T] + case 0x0288: // [LATIN SMALL LETTER T WITH RETROFLEX HOOK] + case 0x1D75: // [LATIN SMALL LETTER T WITH MIDDLE TILDE] + case 0x1E6B: // [LATIN SMALL LETTER T WITH DOT ABOVE] + case 0x1E6D: // [LATIN SMALL LETTER T WITH DOT BELOW] + case 0x1E6F: // [LATIN SMALL LETTER T WITH LINE BELOW] + case 0x1E71: // [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] + case 0x1E97: // [LATIN SMALL LETTER T WITH DIAERESIS] + case 0x24E3: // [CIRCLED LATIN SMALL LETTER T] + case 0x2C66: // [LATIN SMALL LETTER T WITH DIAGONAL STROKE] + case 0xFF54: // [FULLWIDTH LATIN SMALL LETTER T] + output[outputPos++] = L't'; + break; + case 0x00DE: // [LATIN CAPITAL LETTER THORN] + case 0xA766: // [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[outputPos++] = L'T'; + output[outputPos++] = L'H'; + break; + case 0xA728: // [LATIN CAPITAL LETTER TZ] + output[outputPos++] = L'T'; + output[outputPos++] = L'Z'; + break; + case 0x24AF: // [PARENTHESIZED LATIN SMALL LETTER T] + output[outputPos++] = L'('; + output[outputPos++] = L't'; + output[outputPos++] = L')'; + break; + case 0x02A8: // [LATIN SMALL LETTER TC DIGRAPH WITH CURL] + output[outputPos++] = L't'; + output[outputPos++] = L'c'; + break; + case 0x00FE: // [LATIN SMALL LETTER THORN] + case 0x1D7A: // [LATIN SMALL LETTER TH WITH STRIKETHROUGH] + case 0xA767: // [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] + output[outputPos++] = L't'; + output[outputPos++] = L'h'; + break; + case 0x02A6: // [LATIN SMALL LETTER TS DIGRAPH] + output[outputPos++] = L't'; + output[outputPos++] = L's'; + break; + case 0xA729: // [LATIN SMALL LETTER TZ] + output[outputPos++] = L't'; + output[outputPos++] = L'z'; + break; + case 0x00D9: // [LATIN CAPITAL LETTER U WITH GRAVE] + case 0x00DA: // [LATIN CAPITAL LETTER U WITH ACUTE] + case 0x00DB: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] + case 0x00DC: // [LATIN CAPITAL LETTER U WITH DIAERESIS] + case 0x0168: // [LATIN CAPITAL LETTER U WITH TILDE] + case 0x016A: // [LATIN CAPITAL LETTER U WITH MACRON] + case 0x016C: // [LATIN CAPITAL LETTER U WITH BREVE] + case 0x016E: // [LATIN CAPITAL LETTER U WITH RING ABOVE] + case 0x0170: // [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] + case 0x0172: // [LATIN CAPITAL LETTER U WITH OGONEK] + case 0x01AF: // [LATIN CAPITAL LETTER U WITH HORN] + case 0x01D3: // [LATIN CAPITAL LETTER U WITH CARON] + case 0x01D5: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] + case 0x01D7: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] + case 0x01D9: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] + case 0x01DB: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] + case 0x0214: // [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] + case 0x0216: // [LATIN CAPITAL LETTER U WITH INVERTED BREVE] + case 0x0244: // [LATIN CAPITAL LETTER U BAR] + case 0x1D1C: // [LATIN LETTER SMALL CAPITAL U] + case 0x1D7E: // [LATIN SMALL CAPITAL LETTER U WITH STROKE] + case 0x1E72: // [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] + case 0x1E74: // [LATIN CAPITAL LETTER U WITH TILDE BELOW] + case 0x1E76: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] + case 0x1E78: // [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] + case 0x1E7A: // [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] + case 0x1EE4: // [LATIN CAPITAL LETTER U WITH DOT BELOW] + case 0x1EE6: // [LATIN CAPITAL LETTER U WITH HOOK ABOVE] + case 0x1EE8: // [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] + case 0x1EEA: // [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] + case 0x1EEC: // [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] + case 0x1EEE: // [LATIN CAPITAL LETTER U WITH HORN AND TILDE] + case 0x1EF0: // [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] + case 0x24CA: // [CIRCLED LATIN CAPITAL LETTER U] + case 0xFF35: // [FULLWIDTH LATIN CAPITAL LETTER U] + output[outputPos++] = L'U'; + break; + case 0x00F9: // [LATIN SMALL LETTER U WITH GRAVE] + case 0x00FA: // [LATIN SMALL LETTER U WITH ACUTE] + case 0x00FB: // [LATIN SMALL LETTER U WITH CIRCUMFLEX] + case 0x00FC: // [LATIN SMALL LETTER U WITH DIAERESIS] + case 0x0169: // [LATIN SMALL LETTER U WITH TILDE] + case 0x016B: // [LATIN SMALL LETTER U WITH MACRON] + case 0x016D: // [LATIN SMALL LETTER U WITH BREVE] + case 0x016F: // [LATIN SMALL LETTER U WITH RING ABOVE] + case 0x0171: // [LATIN SMALL LETTER U WITH DOUBLE ACUTE] + case 0x0173: // [LATIN SMALL LETTER U WITH OGONEK] + case 0x01B0: // [LATIN SMALL LETTER U WITH HORN] + case 0x01D4: // [LATIN SMALL LETTER U WITH CARON] + case 0x01D6: // [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] + case 0x01D8: // [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] + case 0x01DA: // [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] + case 0x01DC: // [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] + case 0x0215: // [LATIN SMALL LETTER U WITH DOUBLE GRAVE] + case 0x0217: // [LATIN SMALL LETTER U WITH INVERTED BREVE] + case 0x0289: // [LATIN SMALL LETTER U BAR] + case 0x1D64: // [LATIN SUBSCRIPT SMALL LETTER U] + case 0x1D99: // [LATIN SMALL LETTER U WITH RETROFLEX HOOK] + case 0x1E73: // [LATIN SMALL LETTER U WITH DIAERESIS BELOW] + case 0x1E75: // [LATIN SMALL LETTER U WITH TILDE BELOW] + case 0x1E77: // [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] + case 0x1E79: // [LATIN SMALL LETTER U WITH TILDE AND ACUTE] + case 0x1E7B: // [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] + case 0x1EE5: // [LATIN SMALL LETTER U WITH DOT BELOW] + case 0x1EE7: // [LATIN SMALL LETTER U WITH HOOK ABOVE] + case 0x1EE9: // [LATIN SMALL LETTER U WITH HORN AND ACUTE] + case 0x1EEB: // [LATIN SMALL LETTER U WITH HORN AND GRAVE] + case 0x1EED: // [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] + case 0x1EEF: // [LATIN SMALL LETTER U WITH HORN AND TILDE] + case 0x1EF1: // [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] + case 0x24E4: // [CIRCLED LATIN SMALL LETTER U] + case 0xFF55: // [FULLWIDTH LATIN SMALL LETTER U] + output[outputPos++] = L'u'; + break; + case 0x24B0: // [PARENTHESIZED LATIN SMALL LETTER U] + output[outputPos++] = L'('; + output[outputPos++] = L'u'; + output[outputPos++] = L')'; + break; + case 0x1D6B: // [LATIN SMALL LETTER UE] + output[outputPos++] = L'u'; + output[outputPos++] = L'e'; + break; + case 0x01B2: // [LATIN CAPITAL LETTER V WITH HOOK] + case 0x0245: // [LATIN CAPITAL LETTER TURNED V] + case 0x1D20: // [LATIN LETTER SMALL CAPITAL V] + case 0x1E7C: // [LATIN CAPITAL LETTER V WITH TILDE] + case 0x1E7E: // [LATIN CAPITAL LETTER V WITH DOT BELOW] + case 0x1EFC: // [LATIN CAPITAL LETTER MIDDLE-WELSH V] + case 0x24CB: // [CIRCLED LATIN CAPITAL LETTER V] + case 0xA75E: // [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] + case 0xA768: // [LATIN CAPITAL LETTER VEND] + case 0xFF36: // [FULLWIDTH LATIN CAPITAL LETTER V] + output[outputPos++] = L'V'; + break; + case 0x028B: // [LATIN SMALL LETTER V WITH HOOK] + case 0x028C: // [LATIN SMALL LETTER TURNED V] + case 0x1D65: // [LATIN SUBSCRIPT SMALL LETTER V] + case 0x1D8C: // [LATIN SMALL LETTER V WITH PALATAL HOOK] + case 0x1E7D: // [LATIN SMALL LETTER V WITH TILDE] + case 0x1E7F: // [LATIN SMALL LETTER V WITH DOT BELOW] + case 0x24E5: // [CIRCLED LATIN SMALL LETTER V] + case 0x2C71: // [LATIN SMALL LETTER V WITH RIGHT HOOK] + case 0x2C74: // [LATIN SMALL LETTER V WITH CURL] + case 0xA75F: // [LATIN SMALL LETTER V WITH DIAGONAL STROKE] + case 0xFF56: // [FULLWIDTH LATIN SMALL LETTER V] + output[outputPos++] = L'v'; + break; + case 0xA760: // [LATIN CAPITAL LETTER VY] + output[outputPos++] = L'V'; + output[outputPos++] = L'Y'; + break; + case 0x24B1: // [PARENTHESIZED LATIN SMALL LETTER V] + output[outputPos++] = L'('; + output[outputPos++] = L'v'; + output[outputPos++] = L')'; + break; + case 0xA761: // [LATIN SMALL LETTER VY] + output[outputPos++] = L'v'; + output[outputPos++] = L'y'; + break; + case 0x0174: // [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] + case 0x01F7: // [LATIN CAPITAL LETTER WYNN] + case 0x1D21: // [LATIN LETTER SMALL CAPITAL W] + case 0x1E80: // [LATIN CAPITAL LETTER W WITH GRAVE] + case 0x1E82: // [LATIN CAPITAL LETTER W WITH ACUTE] + case 0x1E84: // [LATIN CAPITAL LETTER W WITH DIAERESIS] + case 0x1E86: // [LATIN CAPITAL LETTER W WITH DOT ABOVE] + case 0x1E88: // [LATIN CAPITAL LETTER W WITH DOT BELOW] + case 0x24CC: // [CIRCLED LATIN CAPITAL LETTER W] + case 0x2C72: // [LATIN CAPITAL LETTER W WITH HOOK] + case 0xFF37: // [FULLWIDTH LATIN CAPITAL LETTER W] + output[outputPos++] = L'W'; + break; + case 0x0175: // [LATIN SMALL LETTER W WITH CIRCUMFLEX] + case 0x01BF: // [LATIN LETTER WYNN] + case 0x028D: // [LATIN SMALL LETTER TURNED W] + case 0x1E81: // [LATIN SMALL LETTER W WITH GRAVE] + case 0x1E83: // [LATIN SMALL LETTER W WITH ACUTE] + case 0x1E85: // [LATIN SMALL LETTER W WITH DIAERESIS] + case 0x1E87: // [LATIN SMALL LETTER W WITH DOT ABOVE] + case 0x1E89: // [LATIN SMALL LETTER W WITH DOT BELOW] + case 0x1E98: // [LATIN SMALL LETTER W WITH RING ABOVE] + case 0x24E6: // [CIRCLED LATIN SMALL LETTER W] + case 0x2C73: // [LATIN SMALL LETTER W WITH HOOK] + case 0xFF57: // [FULLWIDTH LATIN SMALL LETTER W] + output[outputPos++] = L'w'; + break; + case 0x24B2: // [PARENTHESIZED LATIN SMALL LETTER W] + output[outputPos++] = L'('; + output[outputPos++] = L'w'; + output[outputPos++] = L')'; + break; + case 0x1E8A: // [LATIN CAPITAL LETTER X WITH DOT ABOVE] + case 0x1E8C: // [LATIN CAPITAL LETTER X WITH DIAERESIS] + case 0x24CD: // [CIRCLED LATIN CAPITAL LETTER X] + case 0xFF38: // [FULLWIDTH LATIN CAPITAL LETTER X] + output[outputPos++] = L'X'; + break; + case 0x1D8D: // [LATIN SMALL LETTER X WITH PALATAL HOOK] + case 0x1E8B: // [LATIN SMALL LETTER X WITH DOT ABOVE] + case 0x1E8D: // [LATIN SMALL LETTER X WITH DIAERESIS] + case 0x2093: // [LATIN SUBSCRIPT SMALL LETTER X] + case 0x24E7: // [CIRCLED LATIN SMALL LETTER X] + case 0xFF58: // [FULLWIDTH LATIN SMALL LETTER X] + output[outputPos++] = L'x'; + break; + case 0x24B3: // [PARENTHESIZED LATIN SMALL LETTER X] + output[outputPos++] = L'('; + output[outputPos++] = L'x'; + output[outputPos++] = L')'; + break; + case 0x00DD: // [LATIN CAPITAL LETTER Y WITH ACUTE] + case 0x0176: // [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] + case 0x0178: // [LATIN CAPITAL LETTER Y WITH DIAERESIS] + case 0x01B3: // [LATIN CAPITAL LETTER Y WITH HOOK] + case 0x0232: // [LATIN CAPITAL LETTER Y WITH MACRON] + case 0x024E: // [LATIN CAPITAL LETTER Y WITH STROKE] + case 0x028F: // [LATIN LETTER SMALL CAPITAL Y] + case 0x1E8E: // [LATIN CAPITAL LETTER Y WITH DOT ABOVE] + case 0x1EF2: // [LATIN CAPITAL LETTER Y WITH GRAVE] + case 0x1EF4: // [LATIN CAPITAL LETTER Y WITH DOT BELOW] + case 0x1EF6: // [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] + case 0x1EF8: // [LATIN CAPITAL LETTER Y WITH TILDE] + case 0x1EFE: // [LATIN CAPITAL LETTER Y WITH LOOP] + case 0x24CE: // [CIRCLED LATIN CAPITAL LETTER Y] + case 0xFF39: // [FULLWIDTH LATIN CAPITAL LETTER Y] + output[outputPos++] = L'Y'; + break; + case 0x00FD: // [LATIN SMALL LETTER Y WITH ACUTE] + case 0x00FF: // [LATIN SMALL LETTER Y WITH DIAERESIS] + case 0x0177: // [LATIN SMALL LETTER Y WITH CIRCUMFLEX] + case 0x01B4: // [LATIN SMALL LETTER Y WITH HOOK] + case 0x0233: // [LATIN SMALL LETTER Y WITH MACRON] + case 0x024F: // [LATIN SMALL LETTER Y WITH STROKE] + case 0x028E: // [LATIN SMALL LETTER TURNED Y] + case 0x1E8F: // [LATIN SMALL LETTER Y WITH DOT ABOVE] + case 0x1E99: // [LATIN SMALL LETTER Y WITH RING ABOVE] + case 0x1EF3: // [LATIN SMALL LETTER Y WITH GRAVE] + case 0x1EF5: // [LATIN SMALL LETTER Y WITH DOT BELOW] + case 0x1EF7: // [LATIN SMALL LETTER Y WITH HOOK ABOVE] + case 0x1EF9: // [LATIN SMALL LETTER Y WITH TILDE] + case 0x1EFF: // [LATIN SMALL LETTER Y WITH LOOP] + case 0x24E8: // [CIRCLED LATIN SMALL LETTER Y] + case 0xFF59: // [FULLWIDTH LATIN SMALL LETTER Y] + output[outputPos++] = L'y'; + break; + case 0x24B4: // [PARENTHESIZED LATIN SMALL LETTER Y] + output[outputPos++] = L'('; + output[outputPos++] = L'y'; + output[outputPos++] = L')'; + break; + case 0x0179: // [LATIN CAPITAL LETTER Z WITH ACUTE] + case 0x017B: // [LATIN CAPITAL LETTER Z WITH DOT ABOVE] + case 0x017D: // [LATIN CAPITAL LETTER Z WITH CARON] + case 0x01B5: // [LATIN CAPITAL LETTER Z WITH STROKE] + case 0x021C: // [LATIN CAPITAL LETTER YOGH] + case 0x0224: // [LATIN CAPITAL LETTER Z WITH HOOK] + case 0x1D22: // [LATIN LETTER SMALL CAPITAL Z] + case 0x1E90: // [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] + case 0x1E92: // [LATIN CAPITAL LETTER Z WITH DOT BELOW] + case 0x1E94: // [LATIN CAPITAL LETTER Z WITH LINE BELOW] + case 0x24CF: // [CIRCLED LATIN CAPITAL LETTER Z] + case 0x2C6B: // [LATIN CAPITAL LETTER Z WITH DESCENDER] + case 0xA762: // [LATIN CAPITAL LETTER VISIGOTHIC Z] + case 0xFF3A: // [FULLWIDTH LATIN CAPITAL LETTER Z] + output[outputPos++] = L'Z'; + break; + case 0x017A: // [LATIN SMALL LETTER Z WITH ACUTE] + case 0x017C: // [LATIN SMALL LETTER Z WITH DOT ABOVE] + case 0x017E: // [LATIN SMALL LETTER Z WITH CARON] + case 0x01B6: // [LATIN SMALL LETTER Z WITH STROKE] + case 0x021D: // [LATIN SMALL LETTER YOGH] + case 0x0225: // [LATIN SMALL LETTER Z WITH HOOK] + case 0x0240: // [LATIN SMALL LETTER Z WITH SWASH TAIL] + case 0x0290: // [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] + case 0x0291: // [LATIN SMALL LETTER Z WITH CURL] + case 0x1D76: // [LATIN SMALL LETTER Z WITH MIDDLE TILDE] + case 0x1D8E: // [LATIN SMALL LETTER Z WITH PALATAL HOOK] + case 0x1E91: // [LATIN SMALL LETTER Z WITH CIRCUMFLEX] + case 0x1E93: // [LATIN SMALL LETTER Z WITH DOT BELOW] + case 0x1E95: // [LATIN SMALL LETTER Z WITH LINE BELOW] + case 0x24E9: // [CIRCLED LATIN SMALL LETTER Z] + case 0x2C6C: // [LATIN SMALL LETTER Z WITH DESCENDER] + case 0xA763: // [LATIN SMALL LETTER VISIGOTHIC Z] + case 0xFF5A: // [FULLWIDTH LATIN SMALL LETTER Z] + output[outputPos++] = L'z'; + break; + case 0x24B5: // [PARENTHESIZED LATIN SMALL LETTER Z] + output[outputPos++] = L'('; + output[outputPos++] = L'z'; + output[outputPos++] = L')'; + break; + case 0x2070: // [SUPERSCRIPT ZERO] + case 0x2080: // [SUBSCRIPT ZERO] + case 0x24EA: // [CIRCLED DIGIT ZERO] + case 0x24FF: // [NEGATIVE CIRCLED DIGIT ZERO] + case 0xFF10: // [FULLWIDTH DIGIT ZERO] + output[outputPos++] = L'0'; + break; + case 0x00B9: // [SUPERSCRIPT ONE] + case 0x2081: // [SUBSCRIPT ONE] + case 0x2460: // [CIRCLED DIGIT ONE] + case 0x24F5: // [DOUBLE CIRCLED DIGIT ONE] + case 0x2776: // [DINGBAT NEGATIVE CIRCLED DIGIT ONE] + case 0x2780: // [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] + case 0x278A: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] + case 0xFF11: // [FULLWIDTH DIGIT ONE] + output[outputPos++] = L'1'; + break; + case 0x2488: // [DIGIT ONE FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'.'; + break; + case 0x2474: // [PARENTHESIZED DIGIT ONE] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L')'; + break; + case 0x00B2: // [SUPERSCRIPT TWO] + case 0x2082: // [SUBSCRIPT TWO] + case 0x2461: // [CIRCLED DIGIT TWO] + case 0x24F6: // [DOUBLE CIRCLED DIGIT TWO] + case 0x2777: // [DINGBAT NEGATIVE CIRCLED DIGIT TWO] + case 0x2781: // [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] + case 0x278B: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] + case 0xFF12: // [FULLWIDTH DIGIT TWO] + output[outputPos++] = L'2'; + break; + case 0x2489: // [DIGIT TWO FULL STOP] + output[outputPos++] = L'2'; + output[outputPos++] = L'.'; + break; + case 0x2475: // [PARENTHESIZED DIGIT TWO] + output[outputPos++] = L'('; + output[outputPos++] = L'2'; + output[outputPos++] = L')'; + break; + case 0x00B3: // [SUPERSCRIPT THREE] + case 0x2083: // [SUBSCRIPT THREE] + case 0x2462: // [CIRCLED DIGIT THREE] + case 0x24F7: // [DOUBLE CIRCLED DIGIT THREE] + case 0x2778: // [DINGBAT NEGATIVE CIRCLED DIGIT THREE] + case 0x2782: // [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] + case 0x278C: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] + case 0xFF13: // [FULLWIDTH DIGIT THREE] + output[outputPos++] = L'3'; + break; + case 0x248A: // [DIGIT THREE FULL STOP] + output[outputPos++] = L'3'; + output[outputPos++] = L'.'; + break; + case 0x2476: // [PARENTHESIZED DIGIT THREE] + output[outputPos++] = L'('; + output[outputPos++] = L'3'; + output[outputPos++] = L')'; + break; + case 0x2074: // [SUPERSCRIPT FOUR] + case 0x2084: // [SUBSCRIPT FOUR] + case 0x2463: // [CIRCLED DIGIT FOUR] + case 0x24F8: // [DOUBLE CIRCLED DIGIT FOUR] + case 0x2779: // [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] + case 0x2783: // [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] + case 0x278D: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] + case 0xFF14: // [FULLWIDTH DIGIT FOUR] + output[outputPos++] = L'4'; + break; + case 0x248B: // [DIGIT FOUR FULL STOP] + output[outputPos++] = L'4'; + output[outputPos++] = L'.'; + break; + case 0x2477: // [PARENTHESIZED DIGIT FOUR] + output[outputPos++] = L'('; + output[outputPos++] = L'4'; + output[outputPos++] = L')'; + break; + case 0x2075: // [SUPERSCRIPT FIVE] + case 0x2085: // [SUBSCRIPT FIVE] + case 0x2464: // [CIRCLED DIGIT FIVE] + case 0x24F9: // [DOUBLE CIRCLED DIGIT FIVE] + case 0x277A: // [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] + case 0x2784: // [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] + case 0x278E: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] + case 0xFF15: // [FULLWIDTH DIGIT FIVE] + output[outputPos++] = L'5'; + break; + case 0x248C: // [DIGIT FIVE FULL STOP] + output[outputPos++] = L'5'; + output[outputPos++] = L'.'; + break; + case 0x2478: // [PARENTHESIZED DIGIT FIVE] + output[outputPos++] = L'('; + output[outputPos++] = L'5'; + output[outputPos++] = L')'; + break; + case 0x2076: // [SUPERSCRIPT SIX] + case 0x2086: // [SUBSCRIPT SIX] + case 0x2465: // [CIRCLED DIGIT SIX] + case 0x24FA: // [DOUBLE CIRCLED DIGIT SIX] + case 0x277B: // [DINGBAT NEGATIVE CIRCLED DIGIT SIX] + case 0x2785: // [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] + case 0x278F: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] + case 0xFF16: // [FULLWIDTH DIGIT SIX] + output[outputPos++] = L'6'; + break; + case 0x248D: // [DIGIT SIX FULL STOP] + output[outputPos++] = L'6'; + output[outputPos++] = L'.'; + break; + case 0x2479: // [PARENTHESIZED DIGIT SIX] + output[outputPos++] = L'('; + output[outputPos++] = L'6'; + output[outputPos++] = L')'; + break; + case 0x2077: // [SUPERSCRIPT SEVEN] + case 0x2087: // [SUBSCRIPT SEVEN] + case 0x2466: // [CIRCLED DIGIT SEVEN] + case 0x24FB: // [DOUBLE CIRCLED DIGIT SEVEN] + case 0x277C: // [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] + case 0x2786: // [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] + case 0x2790: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] + case 0xFF17: // [FULLWIDTH DIGIT SEVEN] + output[outputPos++] = L'7'; + break; + case 0x248E: // [DIGIT SEVEN FULL STOP] + output[outputPos++] = L'7'; + output[outputPos++] = L'.'; + break; + case 0x247A: // [PARENTHESIZED DIGIT SEVEN] + output[outputPos++] = L'('; + output[outputPos++] = L'7'; + output[outputPos++] = L')'; + break; + case 0x2078: // [SUPERSCRIPT EIGHT] + case 0x2088: // [SUBSCRIPT EIGHT] + case 0x2467: // [CIRCLED DIGIT EIGHT] + case 0x24FC: // [DOUBLE CIRCLED DIGIT EIGHT] + case 0x277D: // [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] + case 0x2787: // [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] + case 0x2791: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] + case 0xFF18: // [FULLWIDTH DIGIT EIGHT] + output[outputPos++] = L'8'; + break; + case 0x248F: // [DIGIT EIGHT FULL STOP] + output[outputPos++] = L'8'; + output[outputPos++] = L'.'; + break; + case 0x247B: // [PARENTHESIZED DIGIT EIGHT] + output[outputPos++] = L'('; + output[outputPos++] = L'8'; + output[outputPos++] = L')'; + break; + case 0x2079: // [SUPERSCRIPT NINE] + case 0x2089: // [SUBSCRIPT NINE] + case 0x2468: // [CIRCLED DIGIT NINE] + case 0x24FD: // [DOUBLE CIRCLED DIGIT NINE] + case 0x277E: // [DINGBAT NEGATIVE CIRCLED DIGIT NINE] + case 0x2788: // [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] + case 0x2792: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] + case 0xFF19: // [FULLWIDTH DIGIT NINE] + output[outputPos++] = L'9'; + break; + case 0x2490: // [DIGIT NINE FULL STOP] + output[outputPos++] = L'9'; + output[outputPos++] = L'.'; + break; + case 0x247C: // [PARENTHESIZED DIGIT NINE] + output[outputPos++] = L'('; + output[outputPos++] = L'9'; + output[outputPos++] = L')'; + break; + case 0x2469: // [CIRCLED NUMBER TEN] + case 0x24FE: // [DOUBLE CIRCLED NUMBER TEN] + case 0x277F: // [DINGBAT NEGATIVE CIRCLED NUMBER TEN] + case 0x2789: // [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] + case 0x2793: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'0'; + break; + case 0x2491: // [NUMBER TEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'0'; + output[outputPos++] = L'.'; + break; + case 0x247D: // [PARENTHESIZED NUMBER TEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'0'; + output[outputPos++] = L')'; + break; + case 0x246A: // [CIRCLED NUMBER ELEVEN] + case 0x24EB: // [NEGATIVE CIRCLED NUMBER ELEVEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'1'; + break; + case 0x2492: // [NUMBER ELEVEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'1'; + output[outputPos++] = L'.'; + break; + case 0x247E: // [PARENTHESIZED NUMBER ELEVEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'1'; + output[outputPos++] = L')'; + break; + case 0x246B: // [CIRCLED NUMBER TWELVE] + case 0x24EC: // [NEGATIVE CIRCLED NUMBER TWELVE] + output[outputPos++] = L'1'; + output[outputPos++] = L'2'; + break; + case 0x2493: // [NUMBER TWELVE FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'2'; + output[outputPos++] = L'.'; + break; + case 0x247F: // [PARENTHESIZED NUMBER TWELVE] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'2'; + output[outputPos++] = L')'; + break; + case 0x246C: // [CIRCLED NUMBER THIRTEEN] + case 0x24ED: // [NEGATIVE CIRCLED NUMBER THIRTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'3'; + break; + case 0x2494: // [NUMBER THIRTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'3'; + output[outputPos++] = L'.'; + break; + case 0x2480: // [PARENTHESIZED NUMBER THIRTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'3'; + output[outputPos++] = L')'; + break; + case 0x246D: // [CIRCLED NUMBER FOURTEEN] + case 0x24EE: // [NEGATIVE CIRCLED NUMBER FOURTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'4'; + break; + case 0x2495: // [NUMBER FOURTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'4'; + output[outputPos++] = L'.'; + break; + case 0x2481: // [PARENTHESIZED NUMBER FOURTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'4'; + output[outputPos++] = L')'; + break; + case 0x246E: // [CIRCLED NUMBER FIFTEEN] + case 0x24EF: // [NEGATIVE CIRCLED NUMBER FIFTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'5'; + break; + case 0x2496: // [NUMBER FIFTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'5'; + output[outputPos++] = L'.'; + break; + case 0x2482: // [PARENTHESIZED NUMBER FIFTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'5'; + output[outputPos++] = L')'; + break; + case 0x246F: // [CIRCLED NUMBER SIXTEEN] + case 0x24F0: // [NEGATIVE CIRCLED NUMBER SIXTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'6'; + break; + case 0x2497: // [NUMBER SIXTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'6'; + output[outputPos++] = L'.'; + break; + case 0x2483: // [PARENTHESIZED NUMBER SIXTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'6'; + output[outputPos++] = L')'; + break; + case 0x2470: // [CIRCLED NUMBER SEVENTEEN] + case 0x24F1: // [NEGATIVE CIRCLED NUMBER SEVENTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'7'; + break; + case 0x2498: // [NUMBER SEVENTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'7'; + output[outputPos++] = L'.'; + break; + case 0x2484: // [PARENTHESIZED NUMBER SEVENTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'7'; + output[outputPos++] = L')'; + break; + case 0x2471: // [CIRCLED NUMBER EIGHTEEN] + case 0x24F2: // [NEGATIVE CIRCLED NUMBER EIGHTEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'8'; + break; + case 0x2499: // [NUMBER EIGHTEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'8'; + output[outputPos++] = L'.'; + break; + case 0x2485: // [PARENTHESIZED NUMBER EIGHTEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'8'; + output[outputPos++] = L')'; + break; + case 0x2472: // [CIRCLED NUMBER NINETEEN] + case 0x24F3: // [NEGATIVE CIRCLED NUMBER NINETEEN] + output[outputPos++] = L'1'; + output[outputPos++] = L'9'; + break; + case 0x249A: // [NUMBER NINETEEN FULL STOP] + output[outputPos++] = L'1'; + output[outputPos++] = L'9'; + output[outputPos++] = L'.'; + break; + case 0x2486: // [PARENTHESIZED NUMBER NINETEEN] + output[outputPos++] = L'('; + output[outputPos++] = L'1'; + output[outputPos++] = L'9'; + output[outputPos++] = L')'; + break; + case 0x2473: // [CIRCLED NUMBER TWENTY] + case 0x24F4: // [NEGATIVE CIRCLED NUMBER TWENTY] + output[outputPos++] = L'2'; + output[outputPos++] = L'0'; + break; + case 0x249B: // [NUMBER TWENTY FULL STOP] + output[outputPos++] = L'2'; + output[outputPos++] = L'0'; + output[outputPos++] = L'.'; + break; + case 0x2487: // [PARENTHESIZED NUMBER TWENTY] + output[outputPos++] = L'('; + output[outputPos++] = L'2'; + output[outputPos++] = L'0'; + output[outputPos++] = L')'; + break; + case 0x00AB: // [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] + case 0x00BB: // [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] + case 0x201C: // [LEFT DOUBLE QUOTATION MARK] + case 0x201D: // [RIGHT DOUBLE QUOTATION MARK] + case 0x201E: // [DOUBLE LOW-9 QUOTATION MARK] + case 0x2033: // [DOUBLE PRIME] + case 0x2036: // [REVERSED DOUBLE PRIME] + case 0x275D: // [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] + case 0x275E: // [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] + case 0x276E: // [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case 0x276F: // [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] + case 0xFF02: // [FULLWIDTH QUOTATION MARK] + output[outputPos++] = L'"'; + break; + case 0x2018: // [LEFT SINGLE QUOTATION MARK] + case 0x2019: // [RIGHT SINGLE QUOTATION MARK] + case 0x201A: // [SINGLE LOW-9 QUOTATION MARK] + case 0x201B: // [SINGLE HIGH-REVERSED-9 QUOTATION MARK] + case 0x2032: // [PRIME] + case 0x2035: // [REVERSED PRIME] + case 0x2039: // [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] + case 0x203A: // [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] + case 0x275B: // [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] + case 0x275C: // [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] + case 0xFF07: // [FULLWIDTH APOSTROPHE] + output[outputPos++] = L'\''; + break; + case 0x2010: // [HYPHEN] + case 0x2011: // [NON-BREAKING HYPHEN] + case 0x2012: // [FIGURE DASH] + case 0x2013: // [EN DASH] + case 0x2014: // [EM DASH] + case 0x207B: // [SUPERSCRIPT MINUS] + case 0x208B: // [SUBSCRIPT MINUS] + case 0xFF0D: // [FULLWIDTH HYPHEN-MINUS] + output[outputPos++] = L'-'; + break; + case 0x2045: // [LEFT SQUARE BRACKET WITH QUILL] + case 0x2772: // [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] + case 0xFF3B: // [FULLWIDTH LEFT SQUARE BRACKET] + output[outputPos++] = L'['; + break; + case 0x2046: // [RIGHT SQUARE BRACKET WITH QUILL] + case 0x2773: // [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] + case 0xFF3D: // [FULLWIDTH RIGHT SQUARE BRACKET] + output[outputPos++] = L']'; + break; + case 0x207D: // [SUPERSCRIPT LEFT PARENTHESIS] + case 0x208D: // [SUBSCRIPT LEFT PARENTHESIS] + case 0x2768: // [MEDIUM LEFT PARENTHESIS ORNAMENT] + case 0x276A: // [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] + case 0xFF08: // [FULLWIDTH LEFT PARENTHESIS] + output[outputPos++] = L'('; + break; + case 0x2E28: // [LEFT DOUBLE PARENTHESIS] + output[outputPos++] = L'('; + output[outputPos++] = L'('; + break; + case 0x207E: // [SUPERSCRIPT RIGHT PARENTHESIS] + case 0x208E: // [SUBSCRIPT RIGHT PARENTHESIS] + case 0x2769: // [MEDIUM RIGHT PARENTHESIS ORNAMENT] + case 0x276B: // [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] + case 0xFF09: // [FULLWIDTH RIGHT PARENTHESIS] + output[outputPos++] = L')'; + break; + case 0x2E29: // [RIGHT DOUBLE PARENTHESIS] + output[outputPos++] = L')'; + output[outputPos++] = L')'; + break; + case 0x276C: // [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] + case 0x2770: // [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] + case 0xFF1C: // [FULLWIDTH LESS-THAN SIGN] + output[outputPos++] = L'<'; + break; + case 0x276D: // [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case 0x2771: // [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] + case 0xFF1E: // [FULLWIDTH GREATER-THAN SIGN] + output[outputPos++] = L'>'; + break; + case 0x2774: // [MEDIUM LEFT CURLY BRACKET ORNAMENT] + case 0xFF5B: // [FULLWIDTH LEFT CURLY BRACKET] + output[outputPos++] = L'{'; + break; + case 0x2775: // [MEDIUM RIGHT CURLY BRACKET ORNAMENT] + case 0xFF5D: // [FULLWIDTH RIGHT CURLY BRACKET] + output[outputPos++] = L'}'; + break; + case 0x207A: // [SUPERSCRIPT PLUS SIGN] + case 0x208A: // [SUBSCRIPT PLUS SIGN] + case 0xFF0B: // [FULLWIDTH PLUS SIGN] + output[outputPos++] = L'+'; + break; + case 0x207C: // [SUPERSCRIPT EQUALS SIGN] + case 0x208C: // [SUBSCRIPT EQUALS SIGN] + case 0xFF1D: // [FULLWIDTH EQUALS SIGN] + output[outputPos++] = L'='; + break; + case 0xFF01: // [FULLWIDTH EXCLAMATION MARK] + output[outputPos++] = L'!'; + break; + case 0x203C: // [DOUBLE EXCLAMATION MARK] + output[outputPos++] = L'!'; + output[outputPos++] = L'!'; + break; + case 0x2049: // [EXCLAMATION QUESTION MARK] + output[outputPos++] = L'!'; + output[outputPos++] = L'?'; + break; + case 0xFF03: // [FULLWIDTH NUMBER SIGN] + output[outputPos++] = L'#'; + break; + case 0xFF04: // [FULLWIDTH DOLLAR SIGN] + output[outputPos++] = L'$'; + break; + case 0x2052: // [COMMERCIAL MINUS SIGN] + case 0xFF05: // [FULLWIDTH PERCENT SIGN] + output[outputPos++] = L'%'; + break; + case 0xFF06: // [FULLWIDTH AMPERSAND] + output[outputPos++] = L'&'; + break; + case 0x204E: // [LOW ASTERISK] + case 0xFF0A: // [FULLWIDTH ASTERISK] + output[outputPos++] = L'*'; + break; + case 0xFF0C: // [FULLWIDTH COMMA] + output[outputPos++] = L','; + break; + case 0xFF0E: // [FULLWIDTH FULL STOP] + output[outputPos++] = L'.'; + break; + case 0x2044: // [FRACTION SLASH] + case 0xFF0F: // [FULLWIDTH SOLIDUS] + output[outputPos++] = L'/'; + break; + case 0xFF1A: // [FULLWIDTH COLON] + output[outputPos++] = L':'; + break; + case 0x204F: // [REVERSED SEMICOLON] + case 0xFF1B: // [FULLWIDTH SEMICOLON] + output[outputPos++] = L';'; + break; + case 0xFF1F: // [FULLWIDTH QUESTION MARK] + output[outputPos++] = L'?'; + break; + case 0x2047: // [DOUBLE QUESTION MARK] + output[outputPos++] = L'?'; + output[outputPos++] = L'?'; + break; + case 0x2048: // [QUESTION EXCLAMATION MARK] + output[outputPos++] = L'?'; + output[outputPos++] = L'!'; + break; + case 0xFF20: // [FULLWIDTH COMMERCIAL AT] + output[outputPos++] = L'@'; + break; + case 0xFF3C: // [FULLWIDTH REVERSE SOLIDUS] + output[outputPos++] = L'\\'; + break; + case 0x2038: // [CARET] + case 0xFF3E: // [FULLWIDTH CIRCUMFLEX ACCENT] + output[outputPos++] = L'^'; + break; + case 0xFF3F: // [FULLWIDTH LOW LINE] + output[outputPos++] = L'_'; + break; + case 0x2053: // [SWUNG DASH] + case 0xFF5E: // [FULLWIDTH TILDE] + output[outputPos++] = L'~'; + break; + default: output[outputPos++] = c; - else - { - switch (c) - { - case 0x00C0: // [LATIN CAPITAL LETTER A WITH GRAVE] - case 0x00C1: // [LATIN CAPITAL LETTER A WITH ACUTE] - case 0x00C2: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX] - case 0x00C3: // [LATIN CAPITAL LETTER A WITH TILDE] - case 0x00C4: // [LATIN CAPITAL LETTER A WITH DIAERESIS] - case 0x00C5: // [LATIN CAPITAL LETTER A WITH RING ABOVE] - case 0x0100: // [LATIN CAPITAL LETTER A WITH MACRON] - case 0x0102: // [LATIN CAPITAL LETTER A WITH BREVE] - case 0x0104: // [LATIN CAPITAL LETTER A WITH OGONEK] - case 0x018F: // [LATIN CAPITAL LETTER SCHWA] - case 0x01CD: // [LATIN CAPITAL LETTER A WITH CARON] - case 0x01DE: // [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON] - case 0x01E0: // [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON] - case 0x01FA: // [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE] - case 0x0200: // [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE] - case 0x0202: // [LATIN CAPITAL LETTER A WITH INVERTED BREVE] - case 0x0226: // [LATIN CAPITAL LETTER A WITH DOT ABOVE] - case 0x023A: // [LATIN CAPITAL LETTER A WITH STROKE] - case 0x1D00: // [LATIN LETTER SMALL CAPITAL A] - case 0x1E00: // [LATIN CAPITAL LETTER A WITH RING BELOW] - case 0x1EA0: // [LATIN CAPITAL LETTER A WITH DOT BELOW] - case 0x1EA2: // [LATIN CAPITAL LETTER A WITH HOOK ABOVE] - case 0x1EA4: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE] - case 0x1EA6: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE] - case 0x1EA8: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1EAA: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE] - case 0x1EAC: // [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW] - case 0x1EAE: // [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE] - case 0x1EB0: // [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE] - case 0x1EB2: // [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE] - case 0x1EB4: // [LATIN CAPITAL LETTER A WITH BREVE AND TILDE] - case 0x1EB6: // [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW] - case 0x24B6: // [CIRCLED LATIN CAPITAL LETTER A] - case 0xFF21: // [FULLWIDTH LATIN CAPITAL LETTER A] - output[outputPos++] = L'A'; - break; - case 0x00E0: // [LATIN SMALL LETTER A WITH GRAVE] - case 0x00E1: // [LATIN SMALL LETTER A WITH ACUTE] - case 0x00E2: // [LATIN SMALL LETTER A WITH CIRCUMFLEX] - case 0x00E3: // [LATIN SMALL LETTER A WITH TILDE] - case 0x00E4: // [LATIN SMALL LETTER A WITH DIAERESIS] - case 0x00E5: // [LATIN SMALL LETTER A WITH RING ABOVE] - case 0x0101: // [LATIN SMALL LETTER A WITH MACRON] - case 0x0103: // [LATIN SMALL LETTER A WITH BREVE] - case 0x0105: // [LATIN SMALL LETTER A WITH OGONEK] - case 0x01CE: // [LATIN SMALL LETTER A WITH CARON] - case 0x01DF: // [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON] - case 0x01E1: // [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON] - case 0x01FB: // [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE] - case 0x0201: // [LATIN SMALL LETTER A WITH DOUBLE GRAVE] - case 0x0203: // [LATIN SMALL LETTER A WITH INVERTED BREVE] - case 0x0227: // [LATIN SMALL LETTER A WITH DOT ABOVE] - case 0x0250: // [LATIN SMALL LETTER TURNED A] - case 0x0259: // [LATIN SMALL LETTER SCHWA] - case 0x025A: // [LATIN SMALL LETTER SCHWA WITH HOOK] - case 0x1D8F: // [LATIN SMALL LETTER A WITH RETROFLEX HOOK] - case 0x1D95: // [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK] - case 0x1E01: // [LATIN SMALL LETTER A WITH RING BELOW] - case 0x1E9A: // [LATIN SMALL LETTER A WITH RIGHT HALF RING] - case 0x1EA1: // [LATIN SMALL LETTER A WITH DOT BELOW] - case 0x1EA3: // [LATIN SMALL LETTER A WITH HOOK ABOVE] - case 0x1EA5: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE] - case 0x1EA7: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE] - case 0x1EA9: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1EAB: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE] - case 0x1EAD: // [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW] - case 0x1EAF: // [LATIN SMALL LETTER A WITH BREVE AND ACUTE] - case 0x1EB1: // [LATIN SMALL LETTER A WITH BREVE AND GRAVE] - case 0x1EB3: // [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE] - case 0x1EB5: // [LATIN SMALL LETTER A WITH BREVE AND TILDE] - case 0x1EB7: // [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW] - case 0x2090: // [LATIN SUBSCRIPT SMALL LETTER A] - case 0x2094: // [LATIN SUBSCRIPT SMALL LETTER SCHWA] - case 0x24D0: // [CIRCLED LATIN SMALL LETTER A] - case 0x2C65: // [LATIN SMALL LETTER A WITH STROKE] - case 0x2C6F: // [LATIN CAPITAL LETTER TURNED A] - case 0xFF41: // [FULLWIDTH LATIN SMALL LETTER A] - output[outputPos++] = L'a'; - break; - case 0xA732: // [LATIN CAPITAL LETTER AA] - output[outputPos++] = L'A'; - output[outputPos++] = L'A'; - break; - case 0x00C6: // [LATIN CAPITAL LETTER AE] - case 0x01E2: // [LATIN CAPITAL LETTER AE WITH MACRON] - case 0x01FC: // [LATIN CAPITAL LETTER AE WITH ACUTE] - case 0x1D01: // [LATIN LETTER SMALL CAPITAL AE] - output[outputPos++] = L'A'; - output[outputPos++] = L'E'; - break; - case 0xA734: // [LATIN CAPITAL LETTER AO] - output[outputPos++] = L'A'; - output[outputPos++] = L'O'; - break; - case 0xA736: // [LATIN CAPITAL LETTER AU] - output[outputPos++] = L'A'; - output[outputPos++] = L'U'; - break; - case 0xA738: // [LATIN CAPITAL LETTER AV] - case 0xA73A: // [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR] - output[outputPos++] = L'A'; - output[outputPos++] = L'V'; - break; - case 0xA73C: // [LATIN CAPITAL LETTER AY] - output[outputPos++] = L'A'; - output[outputPos++] = L'Y'; - break; - case 0x249C: // [PARENTHESIZED LATIN SMALL LETTER A] - output[outputPos++] = L'('; - output[outputPos++] = L'a'; - output[outputPos++] = L')'; - break; - case 0xA733: // [LATIN SMALL LETTER AA] - output[outputPos++] = L'a'; - output[outputPos++] = L'a'; - break; - case 0x00E6: // [LATIN SMALL LETTER AE] - case 0x01E3: // [LATIN SMALL LETTER AE WITH MACRON] - case 0x01FD: // [LATIN SMALL LETTER AE WITH ACUTE] - case 0x1D02: // [LATIN SMALL LETTER TURNED AE] - output[outputPos++] = L'a'; - output[outputPos++] = L'e'; - break; - case 0xA735: // [LATIN SMALL LETTER AO] - output[outputPos++] = L'a'; - output[outputPos++] = L'o'; - break; - case 0xA737: // [LATIN SMALL LETTER AU] - output[outputPos++] = L'a'; - output[outputPos++] = L'u'; - break; - case 0xA739: // [LATIN SMALL LETTER AV] - case 0xA73B: // [LATIN SMALL LETTER AV WITH HORIZONTAL BAR] - output[outputPos++] = L'a'; - output[outputPos++] = L'v'; - break; - case 0xA73D: // [LATIN SMALL LETTER AY] - output[outputPos++] = L'a'; - output[outputPos++] = L'y'; - break; - case 0x0181: // [LATIN CAPITAL LETTER B WITH HOOK] - case 0x0182: // [LATIN CAPITAL LETTER B WITH TOPBAR] - case 0x0243: // [LATIN CAPITAL LETTER B WITH STROKE] - case 0x0299: // [LATIN LETTER SMALL CAPITAL B] - case 0x1D03: // [LATIN LETTER SMALL CAPITAL BARRED B] - case 0x1E02: // [LATIN CAPITAL LETTER B WITH DOT ABOVE] - case 0x1E04: // [LATIN CAPITAL LETTER B WITH DOT BELOW] - case 0x1E06: // [LATIN CAPITAL LETTER B WITH LINE BELOW] - case 0x24B7: // [CIRCLED LATIN CAPITAL LETTER B] - case 0xFF22: // [FULLWIDTH LATIN CAPITAL LETTER B] - output[outputPos++] = L'B'; - break; - case 0x0180: // [LATIN SMALL LETTER B WITH STROKE] - case 0x0183: // [LATIN SMALL LETTER B WITH TOPBAR] - case 0x0253: // [LATIN SMALL LETTER B WITH HOOK] - case 0x1D6C: // [LATIN SMALL LETTER B WITH MIDDLE TILDE] - case 0x1D80: // [LATIN SMALL LETTER B WITH PALATAL HOOK] - case 0x1E03: // [LATIN SMALL LETTER B WITH DOT ABOVE] - case 0x1E05: // [LATIN SMALL LETTER B WITH DOT BELOW] - case 0x1E07: // [LATIN SMALL LETTER B WITH LINE BELOW] - case 0x24D1: // [CIRCLED LATIN SMALL LETTER B] - case 0xFF42: // [FULLWIDTH LATIN SMALL LETTER B] - output[outputPos++] = L'b'; - break; - case 0x249D: // [PARENTHESIZED LATIN SMALL LETTER B] - output[outputPos++] = L'('; - output[outputPos++] = L'b'; - output[outputPos++] = L')'; - break; - case 0x00C7: // [LATIN CAPITAL LETTER C WITH CEDILLA] - case 0x0106: // [LATIN CAPITAL LETTER C WITH ACUTE] - case 0x0108: // [LATIN CAPITAL LETTER C WITH CIRCUMFLEX] - case 0x010A: // [LATIN CAPITAL LETTER C WITH DOT ABOVE] - case 0x010C: // [LATIN CAPITAL LETTER C WITH CARON] - case 0x0187: // [LATIN CAPITAL LETTER C WITH HOOK] - case 0x023B: // [LATIN CAPITAL LETTER C WITH STROKE] - case 0x0297: // [LATIN LETTER STRETCHED C] - case 0x1D04: // [LATIN LETTER SMALL CAPITAL C] - case 0x1E08: // [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE] - case 0x24B8: // [CIRCLED LATIN CAPITAL LETTER C] - case 0xFF23: // [FULLWIDTH LATIN CAPITAL LETTER C] - output[outputPos++] = L'C'; - break; - case 0x00E7: // [LATIN SMALL LETTER C WITH CEDILLA] - case 0x0107: // [LATIN SMALL LETTER C WITH ACUTE] - case 0x0109: // [LATIN SMALL LETTER C WITH CIRCUMFLEX] - case 0x010B: // [LATIN SMALL LETTER C WITH DOT ABOVE] - case 0x010D: // [LATIN SMALL LETTER C WITH CARON] - case 0x0188: // [LATIN SMALL LETTER C WITH HOOK] - case 0x023C: // [LATIN SMALL LETTER C WITH STROKE] - case 0x0255: // [LATIN SMALL LETTER C WITH CURL] - case 0x1E09: // [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE] - case 0x2184: // [LATIN SMALL LETTER REVERSED C] - case 0x24D2: // [CIRCLED LATIN SMALL LETTER C] - case 0xA73E: // [LATIN CAPITAL LETTER REVERSED C WITH DOT] - case 0xA73F: // [LATIN SMALL LETTER REVERSED C WITH DOT] - case 0xFF43: // [FULLWIDTH LATIN SMALL LETTER C] - output[outputPos++] = L'c'; - break; - case 0x249E: // [PARENTHESIZED LATIN SMALL LETTER C] - output[outputPos++] = L'('; - output[outputPos++] = L'c'; - output[outputPos++] = L')'; - break; - case 0x00D0: // [LATIN CAPITAL LETTER ETH] - case 0x010E: // [LATIN CAPITAL LETTER D WITH CARON] - case 0x0110: // [LATIN CAPITAL LETTER D WITH STROKE] - case 0x0189: // [LATIN CAPITAL LETTER AFRICAN D] - case 0x018A: // [LATIN CAPITAL LETTER D WITH HOOK] - case 0x018B: // [LATIN CAPITAL LETTER D WITH TOPBAR] - case 0x1D05: // [LATIN LETTER SMALL CAPITAL D] - case 0x1D06: // [LATIN LETTER SMALL CAPITAL ETH] - case 0x1E0A: // [LATIN CAPITAL LETTER D WITH DOT ABOVE] - case 0x1E0C: // [LATIN CAPITAL LETTER D WITH DOT BELOW] - case 0x1E0E: // [LATIN CAPITAL LETTER D WITH LINE BELOW] - case 0x1E10: // [LATIN CAPITAL LETTER D WITH CEDILLA] - case 0x1E12: // [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW] - case 0x24B9: // [CIRCLED LATIN CAPITAL LETTER D] - case 0xA779: // [LATIN CAPITAL LETTER INSULAR D] - case 0xFF24: // [FULLWIDTH LATIN CAPITAL LETTER D] - output[outputPos++] = L'D'; - break; - case 0x00F0: // [LATIN SMALL LETTER ETH] - case 0x010F: // [LATIN SMALL LETTER D WITH CARON] - case 0x0111: // [LATIN SMALL LETTER D WITH STROKE] - case 0x018C: // [LATIN SMALL LETTER D WITH TOPBAR] - case 0x0221: // [LATIN SMALL LETTER D WITH CURL] - case 0x0256: // [LATIN SMALL LETTER D WITH TAIL] - case 0x0257: // [LATIN SMALL LETTER D WITH HOOK] - case 0x1D6D: // [LATIN SMALL LETTER D WITH MIDDLE TILDE] - case 0x1D81: // [LATIN SMALL LETTER D WITH PALATAL HOOK] - case 0x1D91: // [LATIN SMALL LETTER D WITH HOOK AND TAIL] - case 0x1E0B: // [LATIN SMALL LETTER D WITH DOT ABOVE] - case 0x1E0D: // [LATIN SMALL LETTER D WITH DOT BELOW] - case 0x1E0F: // [LATIN SMALL LETTER D WITH LINE BELOW] - case 0x1E11: // [LATIN SMALL LETTER D WITH CEDILLA] - case 0x1E13: // [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW] - case 0x24D3: // [CIRCLED LATIN SMALL LETTER D] - case 0xA77A: // [LATIN SMALL LETTER INSULAR D] - case 0xFF44: // [FULLWIDTH LATIN SMALL LETTER D] - output[outputPos++] = L'd'; - break; - case 0x01C4: // [LATIN CAPITAL LETTER DZ WITH CARON] - case 0x01F1: // [LATIN CAPITAL LETTER DZ] - output[outputPos++] = L'D'; - output[outputPos++] = L'Z'; - break; - case 0x01C5: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON] - case 0x01F2: // [LATIN CAPITAL LETTER D WITH SMALL LETTER Z] - output[outputPos++] = L'D'; - output[outputPos++] = L'z'; - break; - case 0x249F: // [PARENTHESIZED LATIN SMALL LETTER D] - output[outputPos++] = L'('; - output[outputPos++] = L'd'; - output[outputPos++] = L')'; - break; - case 0x0238: // [LATIN SMALL LETTER DB DIGRAPH] - output[outputPos++] = L'd'; - output[outputPos++] = L'b'; - break; - case 0x01C6: // [LATIN SMALL LETTER DZ WITH CARON] - case 0x01F3: // [LATIN SMALL LETTER DZ] - case 0x02A3: // [LATIN SMALL LETTER DZ DIGRAPH] - case 0x02A5: // [LATIN SMALL LETTER DZ DIGRAPH WITH CURL] - output[outputPos++] = L'd'; - output[outputPos++] = L'z'; - break; - case 0x00C8: // [LATIN CAPITAL LETTER E WITH GRAVE] - case 0x00C9: // [LATIN CAPITAL LETTER E WITH ACUTE] - case 0x00CA: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX] - case 0x00CB: // [LATIN CAPITAL LETTER E WITH DIAERESIS] - case 0x0112: // [LATIN CAPITAL LETTER E WITH MACRON] - case 0x0114: // [LATIN CAPITAL LETTER E WITH BREVE] - case 0x0116: // [LATIN CAPITAL LETTER E WITH DOT ABOVE] - case 0x0118: // [LATIN CAPITAL LETTER E WITH OGONEK] - case 0x011A: // [LATIN CAPITAL LETTER E WITH CARON] - case 0x018E: // [LATIN CAPITAL LETTER REVERSED E] - case 0x0190: // [LATIN CAPITAL LETTER OPEN E] - case 0x0204: // [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE] - case 0x0206: // [LATIN CAPITAL LETTER E WITH INVERTED BREVE] - case 0x0228: // [LATIN CAPITAL LETTER E WITH CEDILLA] - case 0x0246: // [LATIN CAPITAL LETTER E WITH STROKE] - case 0x1D07: // [LATIN LETTER SMALL CAPITAL E] - case 0x1E14: // [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE] - case 0x1E16: // [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE] - case 0x1E18: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW] - case 0x1E1A: // [LATIN CAPITAL LETTER E WITH TILDE BELOW] - case 0x1E1C: // [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE] - case 0x1EB8: // [LATIN CAPITAL LETTER E WITH DOT BELOW] - case 0x1EBA: // [LATIN CAPITAL LETTER E WITH HOOK ABOVE] - case 0x1EBC: // [LATIN CAPITAL LETTER E WITH TILDE] - case 0x1EBE: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE] - case 0x1EC0: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE] - case 0x1EC2: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1EC4: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE] - case 0x1EC6: // [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW] - case 0x24BA: // [CIRCLED LATIN CAPITAL LETTER E] - case 0x2C7B: // [LATIN LETTER SMALL CAPITAL TURNED E] - case 0xFF25: // [FULLWIDTH LATIN CAPITAL LETTER E] - output[outputPos++] = L'E'; - break; - case 0x00E8: // [LATIN SMALL LETTER E WITH GRAVE] - case 0x00E9: // [LATIN SMALL LETTER E WITH ACUTE] - case 0x00EA: // [LATIN SMALL LETTER E WITH CIRCUMFLEX] - case 0x00EB: // [LATIN SMALL LETTER E WITH DIAERESIS] - case 0x0113: // [LATIN SMALL LETTER E WITH MACRON] - case 0x0115: // [LATIN SMALL LETTER E WITH BREVE] - case 0x0117: // [LATIN SMALL LETTER E WITH DOT ABOVE] - case 0x0119: // [LATIN SMALL LETTER E WITH OGONEK] - case 0x011B: // [LATIN SMALL LETTER E WITH CARON] - case 0x01DD: // [LATIN SMALL LETTER TURNED E] - case 0x0205: // [LATIN SMALL LETTER E WITH DOUBLE GRAVE] - case 0x0207: // [LATIN SMALL LETTER E WITH INVERTED BREVE] - case 0x0229: // [LATIN SMALL LETTER E WITH CEDILLA] - case 0x0247: // [LATIN SMALL LETTER E WITH STROKE] - case 0x0258: // [LATIN SMALL LETTER REVERSED E] - case 0x025B: // [LATIN SMALL LETTER OPEN E] - case 0x025C: // [LATIN SMALL LETTER REVERSED OPEN E] - case 0x025D: // [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK] - case 0x025E: // [LATIN SMALL LETTER CLOSED REVERSED OPEN E] - case 0x029A: // [LATIN SMALL LETTER CLOSED OPEN E] - case 0x1D08: // [LATIN SMALL LETTER TURNED OPEN E] - case 0x1D92: // [LATIN SMALL LETTER E WITH RETROFLEX HOOK] - case 0x1D93: // [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK] - case 0x1D94: // [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK] - case 0x1E15: // [LATIN SMALL LETTER E WITH MACRON AND GRAVE] - case 0x1E17: // [LATIN SMALL LETTER E WITH MACRON AND ACUTE] - case 0x1E19: // [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW] - case 0x1E1B: // [LATIN SMALL LETTER E WITH TILDE BELOW] - case 0x1E1D: // [LATIN SMALL LETTER E WITH CEDILLA AND BREVE] - case 0x1EB9: // [LATIN SMALL LETTER E WITH DOT BELOW] - case 0x1EBB: // [LATIN SMALL LETTER E WITH HOOK ABOVE] - case 0x1EBD: // [LATIN SMALL LETTER E WITH TILDE] - case 0x1EBF: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE] - case 0x1EC1: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE] - case 0x1EC3: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1EC5: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE] - case 0x1EC7: // [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW] - case 0x2091: // [LATIN SUBSCRIPT SMALL LETTER E] - case 0x24D4: // [CIRCLED LATIN SMALL LETTER E] - case 0x2C78: // [LATIN SMALL LETTER E WITH NOTCH] - case 0xFF45: // [FULLWIDTH LATIN SMALL LETTER E] - output[outputPos++] = L'e'; - break; - case 0x24A0: // [PARENTHESIZED LATIN SMALL LETTER E] - output[outputPos++] = L'('; - output[outputPos++] = L'e'; - output[outputPos++] = L')'; - break; - case 0x0191: // [LATIN CAPITAL LETTER F WITH HOOK] - case 0x1E1E: // [LATIN CAPITAL LETTER F WITH DOT ABOVE] - case 0x24BB: // [CIRCLED LATIN CAPITAL LETTER F] - case 0xA730: // [LATIN LETTER SMALL CAPITAL F] - case 0xA77B: // [LATIN CAPITAL LETTER INSULAR F] - case 0xA7FB: // [LATIN EPIGRAPHIC LETTER REVERSED F] - case 0xFF26: // [FULLWIDTH LATIN CAPITAL LETTER F] - output[outputPos++] = L'F'; - break; - case 0x0192: // [LATIN SMALL LETTER F WITH HOOK] - case 0x1D6E: // [LATIN SMALL LETTER F WITH MIDDLE TILDE] - case 0x1D82: // [LATIN SMALL LETTER F WITH PALATAL HOOK] - case 0x1E1F: // [LATIN SMALL LETTER F WITH DOT ABOVE] - case 0x1E9B: // [LATIN SMALL LETTER LONG S WITH DOT ABOVE] - case 0x24D5: // [CIRCLED LATIN SMALL LETTER F] - case 0xA77C: // [LATIN SMALL LETTER INSULAR F] - case 0xFF46: // [FULLWIDTH LATIN SMALL LETTER F] - output[outputPos++] = L'f'; - break; - case 0x24A1: // [PARENTHESIZED LATIN SMALL LETTER F] - output[outputPos++] = L'('; - output[outputPos++] = L'f'; - output[outputPos++] = L')'; - break; - case 0xFB00: // [LATIN SMALL LIGATURE FF] - output[outputPos++] = L'f'; - output[outputPos++] = L'f'; - break; - case 0xFB03: // [LATIN SMALL LIGATURE FFI] - output[outputPos++] = L'f'; - output[outputPos++] = L'f'; - output[outputPos++] = L'i'; - break; - case 0xFB04: // [LATIN SMALL LIGATURE FFL] - output[outputPos++] = L'f'; - output[outputPos++] = L'f'; - output[outputPos++] = L'l'; - break; - case 0xFB01: // [LATIN SMALL LIGATURE FI] - output[outputPos++] = L'f'; - output[outputPos++] = L'i'; - break; - case 0xFB02: // [LATIN SMALL LIGATURE FL] - output[outputPos++] = L'f'; - output[outputPos++] = L'l'; - break; - case 0x011C: // [LATIN CAPITAL LETTER G WITH CIRCUMFLEX] - case 0x011E: // [LATIN CAPITAL LETTER G WITH BREVE] - case 0x0120: // [LATIN CAPITAL LETTER G WITH DOT ABOVE] - case 0x0122: // [LATIN CAPITAL LETTER G WITH CEDILLA] - case 0x0193: // [LATIN CAPITAL LETTER G WITH HOOK] - case 0x01E4: // [LATIN CAPITAL LETTER G WITH STROKE] - case 0x01E5: // [LATIN SMALL LETTER G WITH STROKE] - case 0x01E6: // [LATIN CAPITAL LETTER G WITH CARON] - case 0x01E7: // [LATIN SMALL LETTER G WITH CARON] - case 0x01F4: // [LATIN CAPITAL LETTER G WITH ACUTE] - case 0x0262: // [LATIN LETTER SMALL CAPITAL G] - case 0x029B: // [LATIN LETTER SMALL CAPITAL G WITH HOOK] - case 0x1E20: // [LATIN CAPITAL LETTER G WITH MACRON] - case 0x24BC: // [CIRCLED LATIN CAPITAL LETTER G] - case 0xA77D: // [LATIN CAPITAL LETTER INSULAR G] - case 0xA77E: // [LATIN CAPITAL LETTER TURNED INSULAR G] - case 0xFF27: // [FULLWIDTH LATIN CAPITAL LETTER G] - output[outputPos++] = L'G'; - break; - case 0x011D: // [LATIN SMALL LETTER G WITH CIRCUMFLEX] - case 0x011F: // [LATIN SMALL LETTER G WITH BREVE] - case 0x0121: // [LATIN SMALL LETTER G WITH DOT ABOVE] - case 0x0123: // [LATIN SMALL LETTER G WITH CEDILLA] - case 0x01F5: // [LATIN SMALL LETTER G WITH ACUTE] - case 0x0260: // [LATIN SMALL LETTER G WITH HOOK] - case 0x0261: // [LATIN SMALL LETTER SCRIPT G] - case 0x1D77: // [LATIN SMALL LETTER TURNED G] - case 0x1D79: // [LATIN SMALL LETTER INSULAR G] - case 0x1D83: // [LATIN SMALL LETTER G WITH PALATAL HOOK] - case 0x1E21: // [LATIN SMALL LETTER G WITH MACRON] - case 0x24D6: // [CIRCLED LATIN SMALL LETTER G] - case 0xA77F: // [LATIN SMALL LETTER TURNED INSULAR G] - case 0xFF47: // [FULLWIDTH LATIN SMALL LETTER G] - output[outputPos++] = L'g'; - break; - case 0x24A2: // [PARENTHESIZED LATIN SMALL LETTER G] - output[outputPos++] = L'('; - output[outputPos++] = L'g'; - output[outputPos++] = L')'; - break; - case 0x0124: // [LATIN CAPITAL LETTER H WITH CIRCUMFLEX] - case 0x0126: // [LATIN CAPITAL LETTER H WITH STROKE] - case 0x021E: // [LATIN CAPITAL LETTER H WITH CARON] - case 0x029C: // [LATIN LETTER SMALL CAPITAL H] - case 0x1E22: // [LATIN CAPITAL LETTER H WITH DOT ABOVE] - case 0x1E24: // [LATIN CAPITAL LETTER H WITH DOT BELOW] - case 0x1E26: // [LATIN CAPITAL LETTER H WITH DIAERESIS] - case 0x1E28: // [LATIN CAPITAL LETTER H WITH CEDILLA] - case 0x1E2A: // [LATIN CAPITAL LETTER H WITH BREVE BELOW] - case 0x24BD: // [CIRCLED LATIN CAPITAL LETTER H] - case 0x2C67: // [LATIN CAPITAL LETTER H WITH DESCENDER] - case 0x2C75: // [LATIN CAPITAL LETTER HALF H] - case 0xFF28: // [FULLWIDTH LATIN CAPITAL LETTER H] - output[outputPos++] = L'H'; - break; - case 0x0125: // [LATIN SMALL LETTER H WITH CIRCUMFLEX] - case 0x0127: // [LATIN SMALL LETTER H WITH STROKE] - case 0x021F: // [LATIN SMALL LETTER H WITH CARON] - case 0x0265: // [LATIN SMALL LETTER TURNED H] - case 0x0266: // [LATIN SMALL LETTER H WITH HOOK] - case 0x02AE: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK] - case 0x02AF: // [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL] - case 0x1E23: // [LATIN SMALL LETTER H WITH DOT ABOVE] - case 0x1E25: // [LATIN SMALL LETTER H WITH DOT BELOW] - case 0x1E27: // [LATIN SMALL LETTER H WITH DIAERESIS] - case 0x1E29: // [LATIN SMALL LETTER H WITH CEDILLA] - case 0x1E2B: // [LATIN SMALL LETTER H WITH BREVE BELOW] - case 0x1E96: // [LATIN SMALL LETTER H WITH LINE BELOW] - case 0x24D7: // [CIRCLED LATIN SMALL LETTER H] - case 0x2C68: // [LATIN SMALL LETTER H WITH DESCENDER] - case 0x2C76: // [LATIN SMALL LETTER HALF H] - case 0xFF48: // [FULLWIDTH LATIN SMALL LETTER H] - output[outputPos++] = L'h'; - break; - case 0x01F6: // [LATIN CAPITAL LETTER HWAIR] - output[outputPos++] = L'H'; - output[outputPos++] = L'V'; - break; - case 0x24A3: // [PARENTHESIZED LATIN SMALL LETTER H] - output[outputPos++] = L'('; - output[outputPos++] = L'h'; - output[outputPos++] = L')'; - break; - case 0x0195: // [LATIN SMALL LETTER HV] - output[outputPos++] = L'h'; - output[outputPos++] = L'v'; - break; - case 0x00CC: // [LATIN CAPITAL LETTER I WITH GRAVE] - case 0x00CD: // [LATIN CAPITAL LETTER I WITH ACUTE] - case 0x00CE: // [LATIN CAPITAL LETTER I WITH CIRCUMFLEX] - case 0x00CF: // [LATIN CAPITAL LETTER I WITH DIAERESIS] - case 0x0128: // [LATIN CAPITAL LETTER I WITH TILDE] - case 0x012A: // [LATIN CAPITAL LETTER I WITH MACRON] - case 0x012C: // [LATIN CAPITAL LETTER I WITH BREVE] - case 0x012E: // [LATIN CAPITAL LETTER I WITH OGONEK] - case 0x0130: // [LATIN CAPITAL LETTER I WITH DOT ABOVE] - case 0x0196: // [LATIN CAPITAL LETTER IOTA] - case 0x0197: // [LATIN CAPITAL LETTER I WITH STROKE] - case 0x01CF: // [LATIN CAPITAL LETTER I WITH CARON] - case 0x0208: // [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE] - case 0x020A: // [LATIN CAPITAL LETTER I WITH INVERTED BREVE] - case 0x026A: // [LATIN LETTER SMALL CAPITAL I] - case 0x1D7B: // [LATIN SMALL CAPITAL LETTER I WITH STROKE] - case 0x1E2C: // [LATIN CAPITAL LETTER I WITH TILDE BELOW] - case 0x1E2E: // [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE] - case 0x1EC8: // [LATIN CAPITAL LETTER I WITH HOOK ABOVE] - case 0x1ECA: // [LATIN CAPITAL LETTER I WITH DOT BELOW] - case 0x24BE: // [CIRCLED LATIN CAPITAL LETTER I] - case 0xA7FE: // [LATIN EPIGRAPHIC LETTER I LONGA] - case 0xFF29: // [FULLWIDTH LATIN CAPITAL LETTER I] - output[outputPos++] = L'I'; - break; - case 0x00EC: // [LATIN SMALL LETTER I WITH GRAVE] - case 0x00ED: // [LATIN SMALL LETTER I WITH ACUTE] - case 0x00EE: // [LATIN SMALL LETTER I WITH CIRCUMFLEX] - case 0x00EF: // [LATIN SMALL LETTER I WITH DIAERESIS] - case 0x0129: // [LATIN SMALL LETTER I WITH TILDE] - case 0x012B: // [LATIN SMALL LETTER I WITH MACRON] - case 0x012D: // [LATIN SMALL LETTER I WITH BREVE] - case 0x012F: // [LATIN SMALL LETTER I WITH OGONEK] - case 0x0131: // [LATIN SMALL LETTER DOTLESS I] - case 0x01D0: // [LATIN SMALL LETTER I WITH CARON] - case 0x0209: // [LATIN SMALL LETTER I WITH DOUBLE GRAVE] - case 0x020B: // [LATIN SMALL LETTER I WITH INVERTED BREVE] - case 0x0268: // [LATIN SMALL LETTER I WITH STROKE] - case 0x1D09: // [LATIN SMALL LETTER TURNED I] - case 0x1D62: // [LATIN SUBSCRIPT SMALL LETTER I] - case 0x1D7C: // [LATIN SMALL LETTER IOTA WITH STROKE] - case 0x1D96: // [LATIN SMALL LETTER I WITH RETROFLEX HOOK] - case 0x1E2D: // [LATIN SMALL LETTER I WITH TILDE BELOW] - case 0x1E2F: // [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE] - case 0x1EC9: // [LATIN SMALL LETTER I WITH HOOK ABOVE] - case 0x1ECB: // [LATIN SMALL LETTER I WITH DOT BELOW] - case 0x2071: // [SUPERSCRIPT LATIN SMALL LETTER I] - case 0x24D8: // [CIRCLED LATIN SMALL LETTER I] - case 0xFF49: // [FULLWIDTH LATIN SMALL LETTER I] - output[outputPos++] = L'i'; - break; - case 0x0132: // [LATIN CAPITAL LIGATURE IJ] - output[outputPos++] = L'I'; - output[outputPos++] = L'J'; - break; - case 0x24A4: // [PARENTHESIZED LATIN SMALL LETTER I] - output[outputPos++] = L'('; - output[outputPos++] = L'i'; - output[outputPos++] = L')'; - break; - case 0x0133: // [LATIN SMALL LIGATURE IJ] - output[outputPos++] = L'i'; - output[outputPos++] = L'j'; - break; - case 0x0134: // [LATIN CAPITAL LETTER J WITH CIRCUMFLEX] - case 0x0248: // [LATIN CAPITAL LETTER J WITH STROKE] - case 0x1D0A: // [LATIN LETTER SMALL CAPITAL J] - case 0x24BF: // [CIRCLED LATIN CAPITAL LETTER J] - case 0xFF2A: // [FULLWIDTH LATIN CAPITAL LETTER J] - output[outputPos++] = L'J'; - break; - case 0x0135: // [LATIN SMALL LETTER J WITH CIRCUMFLEX] - case 0x01F0: // [LATIN SMALL LETTER J WITH CARON] - case 0x0237: // [LATIN SMALL LETTER DOTLESS J] - case 0x0249: // [LATIN SMALL LETTER J WITH STROKE] - case 0x025F: // [LATIN SMALL LETTER DOTLESS J WITH STROKE] - case 0x0284: // [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK] - case 0x029D: // [LATIN SMALL LETTER J WITH CROSSED-TAIL] - case 0x24D9: // [CIRCLED LATIN SMALL LETTER J] - case 0x2C7C: // [LATIN SUBSCRIPT SMALL LETTER J] - case 0xFF4A: // [FULLWIDTH LATIN SMALL LETTER J] - output[outputPos++] = L'j'; - break; - case 0x24A5: // [PARENTHESIZED LATIN SMALL LETTER J] - output[outputPos++] = L'('; - output[outputPos++] = L'j'; - output[outputPos++] = L')'; - break; - case 0x0136: // [LATIN CAPITAL LETTER K WITH CEDILLA] - case 0x0198: // [LATIN CAPITAL LETTER K WITH HOOK] - case 0x01E8: // [LATIN CAPITAL LETTER K WITH CARON] - case 0x1D0B: // [LATIN LETTER SMALL CAPITAL K] - case 0x1E30: // [LATIN CAPITAL LETTER K WITH ACUTE] - case 0x1E32: // [LATIN CAPITAL LETTER K WITH DOT BELOW] - case 0x1E34: // [LATIN CAPITAL LETTER K WITH LINE BELOW] - case 0x24C0: // [CIRCLED LATIN CAPITAL LETTER K] - case 0x2C69: // [LATIN CAPITAL LETTER K WITH DESCENDER] - case 0xA740: // [LATIN CAPITAL LETTER K WITH STROKE] - case 0xA742: // [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE] - case 0xA744: // [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE] - case 0xFF2B: // [FULLWIDTH LATIN CAPITAL LETTER K] - output[outputPos++] = L'K'; - break; - case 0x0137: // [LATIN SMALL LETTER K WITH CEDILLA] - case 0x0199: // [LATIN SMALL LETTER K WITH HOOK] - case 0x01E9: // [LATIN SMALL LETTER K WITH CARON] - case 0x029E: // [LATIN SMALL LETTER TURNED K] - case 0x1D84: // [LATIN SMALL LETTER K WITH PALATAL HOOK] - case 0x1E31: // [LATIN SMALL LETTER K WITH ACUTE] - case 0x1E33: // [LATIN SMALL LETTER K WITH DOT BELOW] - case 0x1E35: // [LATIN SMALL LETTER K WITH LINE BELOW] - case 0x24DA: // [CIRCLED LATIN SMALL LETTER K] - case 0x2C6A: // [LATIN SMALL LETTER K WITH DESCENDER] - case 0xA741: // [LATIN SMALL LETTER K WITH STROKE] - case 0xA743: // [LATIN SMALL LETTER K WITH DIAGONAL STROKE] - case 0xA745: // [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE] - case 0xFF4B: // [FULLWIDTH LATIN SMALL LETTER K] - output[outputPos++] = L'k'; - break; - case 0x24A6: // [PARENTHESIZED LATIN SMALL LETTER K] - output[outputPos++] = L'('; - output[outputPos++] = L'k'; - output[outputPos++] = L')'; - break; - case 0x0139: // [LATIN CAPITAL LETTER L WITH ACUTE] - case 0x013B: // [LATIN CAPITAL LETTER L WITH CEDILLA] - case 0x013D: // [LATIN CAPITAL LETTER L WITH CARON] - case 0x013F: // [LATIN CAPITAL LETTER L WITH MIDDLE DOT] - case 0x0141: // [LATIN CAPITAL LETTER L WITH STROKE] - case 0x023D: // [LATIN CAPITAL LETTER L WITH BAR] - case 0x029F: // [LATIN LETTER SMALL CAPITAL L] - case 0x1D0C: // [LATIN LETTER SMALL CAPITAL L WITH STROKE] - case 0x1E36: // [LATIN CAPITAL LETTER L WITH DOT BELOW] - case 0x1E38: // [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON] - case 0x1E3A: // [LATIN CAPITAL LETTER L WITH LINE BELOW] - case 0x1E3C: // [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW] - case 0x24C1: // [CIRCLED LATIN CAPITAL LETTER L] - case 0x2C60: // [LATIN CAPITAL LETTER L WITH DOUBLE BAR] - case 0x2C62: // [LATIN CAPITAL LETTER L WITH MIDDLE TILDE] - case 0xA746: // [LATIN CAPITAL LETTER BROKEN L] - case 0xA748: // [LATIN CAPITAL LETTER L WITH HIGH STROKE] - case 0xA780: // [LATIN CAPITAL LETTER TURNED L] - case 0xFF2C: // [FULLWIDTH LATIN CAPITAL LETTER L] - output[outputPos++] = L'L'; - break; - case 0x013A: // [LATIN SMALL LETTER L WITH ACUTE] - case 0x013C: // [LATIN SMALL LETTER L WITH CEDILLA] - case 0x013E: // [LATIN SMALL LETTER L WITH CARON] - case 0x0140: // [LATIN SMALL LETTER L WITH MIDDLE DOT] - case 0x0142: // [LATIN SMALL LETTER L WITH STROKE] - case 0x019A: // [LATIN SMALL LETTER L WITH BAR] - case 0x0234: // [LATIN SMALL LETTER L WITH CURL] - case 0x026B: // [LATIN SMALL LETTER L WITH MIDDLE TILDE] - case 0x026C: // [LATIN SMALL LETTER L WITH BELT] - case 0x026D: // [LATIN SMALL LETTER L WITH RETROFLEX HOOK] - case 0x1D85: // [LATIN SMALL LETTER L WITH PALATAL HOOK] - case 0x1E37: // [LATIN SMALL LETTER L WITH DOT BELOW] - case 0x1E39: // [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON] - case 0x1E3B: // [LATIN SMALL LETTER L WITH LINE BELOW] - case 0x1E3D: // [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW] - case 0x24DB: // [CIRCLED LATIN SMALL LETTER L] - case 0x2C61: // [LATIN SMALL LETTER L WITH DOUBLE BAR] - case 0xA747: // [LATIN SMALL LETTER BROKEN L] - case 0xA749: // [LATIN SMALL LETTER L WITH HIGH STROKE] - case 0xA781: // [LATIN SMALL LETTER TURNED L] - case 0xFF4C: // [FULLWIDTH LATIN SMALL LETTER L] - output[outputPos++] = L'l'; - break; - case 0x01C7: // [LATIN CAPITAL LETTER LJ] - output[outputPos++] = L'L'; - output[outputPos++] = L'J'; - break; - case 0x1EFA: // [LATIN CAPITAL LETTER MIDDLE-WELSH LL] - output[outputPos++] = L'L'; - output[outputPos++] = L'L'; - break; - case 0x01C8: // [LATIN CAPITAL LETTER L WITH SMALL LETTER J] - output[outputPos++] = L'L'; - output[outputPos++] = L'j'; - break; - case 0x24A7: // [PARENTHESIZED LATIN SMALL LETTER L] - output[outputPos++] = L'('; - output[outputPos++] = L'l'; - output[outputPos++] = L')'; - break; - case 0x01C9: // [LATIN SMALL LETTER LJ] - output[outputPos++] = L'l'; - output[outputPos++] = L'j'; - break; - case 0x1EFB: // [LATIN SMALL LETTER MIDDLE-WELSH LL] - output[outputPos++] = L'l'; - output[outputPos++] = L'l'; - break; - case 0x02AA: // [LATIN SMALL LETTER LS DIGRAPH] - output[outputPos++] = L'l'; - output[outputPos++] = L's'; - break; - case 0x02AB: // [LATIN SMALL LETTER LZ DIGRAPH] - output[outputPos++] = L'l'; - output[outputPos++] = L'z'; - break; - case 0x019C: // [LATIN CAPITAL LETTER TURNED M] - case 0x1D0D: // [LATIN LETTER SMALL CAPITAL M] - case 0x1E3E: // [LATIN CAPITAL LETTER M WITH ACUTE] - case 0x1E40: // [LATIN CAPITAL LETTER M WITH DOT ABOVE] - case 0x1E42: // [LATIN CAPITAL LETTER M WITH DOT BELOW] - case 0x24C2: // [CIRCLED LATIN CAPITAL LETTER M] - case 0x2C6E: // [LATIN CAPITAL LETTER M WITH HOOK] - case 0xA7FD: // [LATIN EPIGRAPHIC LETTER INVERTED M] - case 0xA7FF: // [LATIN EPIGRAPHIC LETTER ARCHAIC M] - case 0xFF2D: // [FULLWIDTH LATIN CAPITAL LETTER M] - output[outputPos++] = L'M'; - break; - case 0x026F: // [LATIN SMALL LETTER TURNED M] - case 0x0270: // [LATIN SMALL LETTER TURNED M WITH LONG LEG] - case 0x0271: // [LATIN SMALL LETTER M WITH HOOK] - case 0x1D6F: // [LATIN SMALL LETTER M WITH MIDDLE TILDE] - case 0x1D86: // [LATIN SMALL LETTER M WITH PALATAL HOOK] - case 0x1E3F: // [LATIN SMALL LETTER M WITH ACUTE] - case 0x1E41: // [LATIN SMALL LETTER M WITH DOT ABOVE] - case 0x1E43: // [LATIN SMALL LETTER M WITH DOT BELOW] - case 0x24DC: // [CIRCLED LATIN SMALL LETTER M] - case 0xFF4D: // [FULLWIDTH LATIN SMALL LETTER M] - output[outputPos++] = L'm'; - break; - case 0x24A8: // [PARENTHESIZED LATIN SMALL LETTER M] - output[outputPos++] = L'('; - output[outputPos++] = L'm'; - output[outputPos++] = L')'; - break; - case 0x00D1: // [LATIN CAPITAL LETTER N WITH TILDE] - case 0x0143: // [LATIN CAPITAL LETTER N WITH ACUTE] - case 0x0145: // [LATIN CAPITAL LETTER N WITH CEDILLA] - case 0x0147: // [LATIN CAPITAL LETTER N WITH CARON] - case 0x014A: // [LATIN CAPITAL LETTER ENG] - case 0x019D: // [LATIN CAPITAL LETTER N WITH LEFT HOOK] - case 0x01F8: // [LATIN CAPITAL LETTER N WITH GRAVE] - case 0x0220: // [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG] - case 0x0274: // [LATIN LETTER SMALL CAPITAL N] - case 0x1D0E: // [LATIN LETTER SMALL CAPITAL REVERSED N] - case 0x1E44: // [LATIN CAPITAL LETTER N WITH DOT ABOVE] - case 0x1E46: // [LATIN CAPITAL LETTER N WITH DOT BELOW] - case 0x1E48: // [LATIN CAPITAL LETTER N WITH LINE BELOW] - case 0x1E4A: // [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW] - case 0x24C3: // [CIRCLED LATIN CAPITAL LETTER N] - case 0xFF2E: // [FULLWIDTH LATIN CAPITAL LETTER N] - output[outputPos++] = L'N'; - break; - case 0x00F1: // [LATIN SMALL LETTER N WITH TILDE] - case 0x0144: // [LATIN SMALL LETTER N WITH ACUTE] - case 0x0146: // [LATIN SMALL LETTER N WITH CEDILLA] - case 0x0148: // [LATIN SMALL LETTER N WITH CARON] - case 0x0149: // [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE] - case 0x014B: // [LATIN SMALL LETTER ENG] - case 0x019E: // [LATIN SMALL LETTER N WITH LONG RIGHT LEG] - case 0x01F9: // [LATIN SMALL LETTER N WITH GRAVE] - case 0x0235: // [LATIN SMALL LETTER N WITH CURL] - case 0x0272: // [LATIN SMALL LETTER N WITH LEFT HOOK] - case 0x0273: // [LATIN SMALL LETTER N WITH RETROFLEX HOOK] - case 0x1D70: // [LATIN SMALL LETTER N WITH MIDDLE TILDE] - case 0x1D87: // [LATIN SMALL LETTER N WITH PALATAL HOOK] - case 0x1E45: // [LATIN SMALL LETTER N WITH DOT ABOVE] - case 0x1E47: // [LATIN SMALL LETTER N WITH DOT BELOW] - case 0x1E49: // [LATIN SMALL LETTER N WITH LINE BELOW] - case 0x1E4B: // [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW] - case 0x207F: // [SUPERSCRIPT LATIN SMALL LETTER N] - case 0x24DD: // [CIRCLED LATIN SMALL LETTER N] - case 0xFF4E: // [FULLWIDTH LATIN SMALL LETTER N] - output[outputPos++] = L'n'; - break; - case 0x01CA: // [LATIN CAPITAL LETTER NJ] - output[outputPos++] = L'N'; - output[outputPos++] = L'J'; - break; - case 0x01CB: // [LATIN CAPITAL LETTER N WITH SMALL LETTER J] - output[outputPos++] = L'N'; - output[outputPos++] = L'j'; - break; - case 0x24A9: // [PARENTHESIZED LATIN SMALL LETTER N] - output[outputPos++] = L'('; - output[outputPos++] = L'n'; - output[outputPos++] = L')'; - break; - case 0x01CC: // [LATIN SMALL LETTER NJ] - output[outputPos++] = L'n'; - output[outputPos++] = L'j'; - break; - case 0x00D2: // [LATIN CAPITAL LETTER O WITH GRAVE] - case 0x00D3: // [LATIN CAPITAL LETTER O WITH ACUTE] - case 0x00D4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX] - case 0x00D5: // [LATIN CAPITAL LETTER O WITH TILDE] - case 0x00D6: // [LATIN CAPITAL LETTER O WITH DIAERESIS] - case 0x00D8: // [LATIN CAPITAL LETTER O WITH STROKE] - case 0x014C: // [LATIN CAPITAL LETTER O WITH MACRON] - case 0x014E: // [LATIN CAPITAL LETTER O WITH BREVE] - case 0x0150: // [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE] - case 0x0186: // [LATIN CAPITAL LETTER OPEN O] - case 0x019F: // [LATIN CAPITAL LETTER O WITH MIDDLE TILDE] - case 0x01A0: // [LATIN CAPITAL LETTER O WITH HORN] - case 0x01D1: // [LATIN CAPITAL LETTER O WITH CARON] - case 0x01EA: // [LATIN CAPITAL LETTER O WITH OGONEK] - case 0x01EC: // [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON] - case 0x01FE: // [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE] - case 0x020C: // [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE] - case 0x020E: // [LATIN CAPITAL LETTER O WITH INVERTED BREVE] - case 0x022A: // [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON] - case 0x022C: // [LATIN CAPITAL LETTER O WITH TILDE AND MACRON] - case 0x022E: // [LATIN CAPITAL LETTER O WITH DOT ABOVE] - case 0x0230: // [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON] - case 0x1D0F: // [LATIN LETTER SMALL CAPITAL O] - case 0x1D10: // [LATIN LETTER SMALL CAPITAL OPEN O] - case 0x1E4C: // [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE] - case 0x1E4E: // [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS] - case 0x1E50: // [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE] - case 0x1E52: // [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE] - case 0x1ECC: // [LATIN CAPITAL LETTER O WITH DOT BELOW] - case 0x1ECE: // [LATIN CAPITAL LETTER O WITH HOOK ABOVE] - case 0x1ED0: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE] - case 0x1ED2: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE] - case 0x1ED4: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1ED6: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE] - case 0x1ED8: // [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW] - case 0x1EDA: // [LATIN CAPITAL LETTER O WITH HORN AND ACUTE] - case 0x1EDC: // [LATIN CAPITAL LETTER O WITH HORN AND GRAVE] - case 0x1EDE: // [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE] - case 0x1EE0: // [LATIN CAPITAL LETTER O WITH HORN AND TILDE] - case 0x1EE2: // [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW] - case 0x24C4: // [CIRCLED LATIN CAPITAL LETTER O] - case 0xA74A: // [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY] - case 0xA74C: // [LATIN CAPITAL LETTER O WITH LOOP] - case 0xFF2F: // [FULLWIDTH LATIN CAPITAL LETTER O] - output[outputPos++] = L'O'; - break; - case 0x00F2: // [LATIN SMALL LETTER O WITH GRAVE] - case 0x00F3: // [LATIN SMALL LETTER O WITH ACUTE] - case 0x00F4: // [LATIN SMALL LETTER O WITH CIRCUMFLEX] - case 0x00F5: // [LATIN SMALL LETTER O WITH TILDE] - case 0x00F6: // [LATIN SMALL LETTER O WITH DIAERESIS] - case 0x00F8: // [LATIN SMALL LETTER O WITH STROKE] - case 0x014D: // [LATIN SMALL LETTER O WITH MACRON] - case 0x014F: // [LATIN SMALL LETTER O WITH BREVE] - case 0x0151: // [LATIN SMALL LETTER O WITH DOUBLE ACUTE] - case 0x01A1: // [LATIN SMALL LETTER O WITH HORN] - case 0x01D2: // [LATIN SMALL LETTER O WITH CARON] - case 0x01EB: // [LATIN SMALL LETTER O WITH OGONEK] - case 0x01ED: // [LATIN SMALL LETTER O WITH OGONEK AND MACRON] - case 0x01FF: // [LATIN SMALL LETTER O WITH STROKE AND ACUTE] - case 0x020D: // [LATIN SMALL LETTER O WITH DOUBLE GRAVE] - case 0x020F: // [LATIN SMALL LETTER O WITH INVERTED BREVE] - case 0x022B: // [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON] - case 0x022D: // [LATIN SMALL LETTER O WITH TILDE AND MACRON] - case 0x022F: // [LATIN SMALL LETTER O WITH DOT ABOVE] - case 0x0231: // [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON] - case 0x0254: // [LATIN SMALL LETTER OPEN O] - case 0x0275: // [LATIN SMALL LETTER BARRED O] - case 0x1D16: // [LATIN SMALL LETTER TOP HALF O] - case 0x1D17: // [LATIN SMALL LETTER BOTTOM HALF O] - case 0x1D97: // [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK] - case 0x1E4D: // [LATIN SMALL LETTER O WITH TILDE AND ACUTE] - case 0x1E4F: // [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS] - case 0x1E51: // [LATIN SMALL LETTER O WITH MACRON AND GRAVE] - case 0x1E53: // [LATIN SMALL LETTER O WITH MACRON AND ACUTE] - case 0x1ECD: // [LATIN SMALL LETTER O WITH DOT BELOW] - case 0x1ECF: // [LATIN SMALL LETTER O WITH HOOK ABOVE] - case 0x1ED1: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE] - case 0x1ED3: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE] - case 0x1ED5: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE] - case 0x1ED7: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE] - case 0x1ED9: // [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW] - case 0x1EDB: // [LATIN SMALL LETTER O WITH HORN AND ACUTE] - case 0x1EDD: // [LATIN SMALL LETTER O WITH HORN AND GRAVE] - case 0x1EDF: // [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE] - case 0x1EE1: // [LATIN SMALL LETTER O WITH HORN AND TILDE] - case 0x1EE3: // [LATIN SMALL LETTER O WITH HORN AND DOT BELOW] - case 0x2092: // [LATIN SUBSCRIPT SMALL LETTER O] - case 0x24DE: // [CIRCLED LATIN SMALL LETTER O] - case 0x2C7A: // [LATIN SMALL LETTER O WITH LOW RING INSIDE] - case 0xA74B: // [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY] - case 0xA74D: // [LATIN SMALL LETTER O WITH LOOP] - case 0xFF4F: // [FULLWIDTH LATIN SMALL LETTER O] - output[outputPos++] = L'o'; - break; - case 0x0152: // [LATIN CAPITAL LIGATURE OE] - case 0x0276: // [LATIN LETTER SMALL CAPITAL OE] - output[outputPos++] = L'O'; - output[outputPos++] = L'E'; - break; - case 0xA74E: // [LATIN CAPITAL LETTER OO] - output[outputPos++] = L'O'; - output[outputPos++] = L'O'; - break; - case 0x0222: // [LATIN CAPITAL LETTER OU] - case 0x1D15: // [LATIN LETTER SMALL CAPITAL OU] - output[outputPos++] = L'O'; - output[outputPos++] = L'U'; - break; - case 0x24AA: // [PARENTHESIZED LATIN SMALL LETTER O] - output[outputPos++] = L'('; - output[outputPos++] = L'o'; - output[outputPos++] = L')'; - break; - case 0x0153: // [LATIN SMALL LIGATURE OE] - case 0x1D14: // [LATIN SMALL LETTER TURNED OE] - output[outputPos++] = L'o'; - output[outputPos++] = L'e'; - break; - case 0xA74F: // [LATIN SMALL LETTER OO] - output[outputPos++] = L'o'; - output[outputPos++] = L'o'; - break; - case 0x0223: // [LATIN SMALL LETTER OU] - output[outputPos++] = L'o'; - output[outputPos++] = L'u'; - break; - case 0x01A4: // [LATIN CAPITAL LETTER P WITH HOOK] - case 0x1D18: // [LATIN LETTER SMALL CAPITAL P] - case 0x1E54: // [LATIN CAPITAL LETTER P WITH ACUTE] - case 0x1E56: // [LATIN CAPITAL LETTER P WITH DOT ABOVE] - case 0x24C5: // [CIRCLED LATIN CAPITAL LETTER P] - case 0x2C63: // [LATIN CAPITAL LETTER P WITH STROKE] - case 0xA750: // [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER] - case 0xA752: // [LATIN CAPITAL LETTER P WITH FLOURISH] - case 0xA754: // [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL] - case 0xFF30: // [FULLWIDTH LATIN CAPITAL LETTER P] - output[outputPos++] = L'P'; - break; - case 0x01A5: // [LATIN SMALL LETTER P WITH HOOK] - case 0x1D71: // [LATIN SMALL LETTER P WITH MIDDLE TILDE] - case 0x1D7D: // [LATIN SMALL LETTER P WITH STROKE] - case 0x1D88: // [LATIN SMALL LETTER P WITH PALATAL HOOK] - case 0x1E55: // [LATIN SMALL LETTER P WITH ACUTE] - case 0x1E57: // [LATIN SMALL LETTER P WITH DOT ABOVE] - case 0x24DF: // [CIRCLED LATIN SMALL LETTER P] - case 0xA751: // [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER] - case 0xA753: // [LATIN SMALL LETTER P WITH FLOURISH] - case 0xA755: // [LATIN SMALL LETTER P WITH SQUIRREL TAIL] - case 0xA7FC: // [LATIN EPIGRAPHIC LETTER REVERSED P] - case 0xFF50: // [FULLWIDTH LATIN SMALL LETTER P] - output[outputPos++] = L'p'; - break; - case 0x24AB: // [PARENTHESIZED LATIN SMALL LETTER P] - output[outputPos++] = L'('; - output[outputPos++] = L'p'; - output[outputPos++] = L')'; - break; - case 0x024A: // [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL] - case 0x24C6: // [CIRCLED LATIN CAPITAL LETTER Q] - case 0xA756: // [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER] - case 0xA758: // [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE] - case 0xFF31: // [FULLWIDTH LATIN CAPITAL LETTER Q] - output[outputPos++] = L'Q'; - break; - case 0x0138: // [LATIN SMALL LETTER KRA] - case 0x024B: // [LATIN SMALL LETTER Q WITH HOOK TAIL] - case 0x02A0: // [LATIN SMALL LETTER Q WITH HOOK] - case 0x24E0: // [CIRCLED LATIN SMALL LETTER Q] - case 0xA757: // [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER] - case 0xA759: // [LATIN SMALL LETTER Q WITH DIAGONAL STROKE] - case 0xFF51: // [FULLWIDTH LATIN SMALL LETTER Q] - output[outputPos++] = L'q'; - break; - case 0x24AC: // [PARENTHESIZED LATIN SMALL LETTER Q] - output[outputPos++] = L'('; - output[outputPos++] = L'q'; - output[outputPos++] = L')'; - break; - case 0x0239: // [LATIN SMALL LETTER QP DIGRAPH] - output[outputPos++] = L'q'; - output[outputPos++] = L'p'; - break; - case 0x0154: // [LATIN CAPITAL LETTER R WITH ACUTE] - case 0x0156: // [LATIN CAPITAL LETTER R WITH CEDILLA] - case 0x0158: // [LATIN CAPITAL LETTER R WITH CARON] - case 0x0210: // [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE] - case 0x0212: // [LATIN CAPITAL LETTER R WITH INVERTED BREVE] - case 0x024C: // [LATIN CAPITAL LETTER R WITH STROKE] - case 0x0280: // [LATIN LETTER SMALL CAPITAL R] - case 0x0281: // [LATIN LETTER SMALL CAPITAL INVERTED R] - case 0x1D19: // [LATIN LETTER SMALL CAPITAL REVERSED R] - case 0x1D1A: // [LATIN LETTER SMALL CAPITAL TURNED R] - case 0x1E58: // [LATIN CAPITAL LETTER R WITH DOT ABOVE] - case 0x1E5A: // [LATIN CAPITAL LETTER R WITH DOT BELOW] - case 0x1E5C: // [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON] - case 0x1E5E: // [LATIN CAPITAL LETTER R WITH LINE BELOW] - case 0x24C7: // [CIRCLED LATIN CAPITAL LETTER R] - case 0x2C64: // [LATIN CAPITAL LETTER R WITH TAIL] - case 0xA75A: // [LATIN CAPITAL LETTER R ROTUNDA] - case 0xA782: // [LATIN CAPITAL LETTER INSULAR R] - case 0xFF32: // [FULLWIDTH LATIN CAPITAL LETTER R] - output[outputPos++] = L'R'; - break; - case 0x0155: // [LATIN SMALL LETTER R WITH ACUTE] - case 0x0157: // [LATIN SMALL LETTER R WITH CEDILLA] - case 0x0159: // [LATIN SMALL LETTER R WITH CARON] - case 0x0211: // [LATIN SMALL LETTER R WITH DOUBLE GRAVE] - case 0x0213: // [LATIN SMALL LETTER R WITH INVERTED BREVE] - case 0x024D: // [LATIN SMALL LETTER R WITH STROKE] - case 0x027C: // [LATIN SMALL LETTER R WITH LONG LEG] - case 0x027D: // [LATIN SMALL LETTER R WITH TAIL] - case 0x027E: // [LATIN SMALL LETTER R WITH FISHHOOK] - case 0x027F: // [LATIN SMALL LETTER REVERSED R WITH FISHHOOK] - case 0x1D63: // [LATIN SUBSCRIPT SMALL LETTER R] - case 0x1D72: // [LATIN SMALL LETTER R WITH MIDDLE TILDE] - case 0x1D73: // [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE] - case 0x1D89: // [LATIN SMALL LETTER R WITH PALATAL HOOK] - case 0x1E59: // [LATIN SMALL LETTER R WITH DOT ABOVE] - case 0x1E5B: // [LATIN SMALL LETTER R WITH DOT BELOW] - case 0x1E5D: // [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON] - case 0x1E5F: // [LATIN SMALL LETTER R WITH LINE BELOW] - case 0x24E1: // [CIRCLED LATIN SMALL LETTER R] - case 0xA75B: // [LATIN SMALL LETTER R ROTUNDA] - case 0xA783: // [LATIN SMALL LETTER INSULAR R] - case 0xFF52: // [FULLWIDTH LATIN SMALL LETTER R] - output[outputPos++] = L'r'; - break; - case 0x24AD: // [PARENTHESIZED LATIN SMALL LETTER R] - output[outputPos++] = L'('; - output[outputPos++] = L'r'; - output[outputPos++] = L')'; - break; - case 0x015A: // [LATIN CAPITAL LETTER S WITH ACUTE] - case 0x015C: // [LATIN CAPITAL LETTER S WITH CIRCUMFLEX] - case 0x015E: // [LATIN CAPITAL LETTER S WITH CEDILLA] - case 0x0160: // [LATIN CAPITAL LETTER S WITH CARON] - case 0x0218: // [LATIN CAPITAL LETTER S WITH COMMA BELOW] - case 0x1E60: // [LATIN CAPITAL LETTER S WITH DOT ABOVE] - case 0x1E62: // [LATIN CAPITAL LETTER S WITH DOT BELOW] - case 0x1E64: // [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE] - case 0x1E66: // [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE] - case 0x1E68: // [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE] - case 0x24C8: // [CIRCLED LATIN CAPITAL LETTER S] - case 0xA731: // [LATIN LETTER SMALL CAPITAL S] - case 0xA785: // [LATIN SMALL LETTER INSULAR S] - case 0xFF33: // [FULLWIDTH LATIN CAPITAL LETTER S] - output[outputPos++] = L'S'; - break; - case 0x015B: // [LATIN SMALL LETTER S WITH ACUTE] - case 0x015D: // [LATIN SMALL LETTER S WITH CIRCUMFLEX] - case 0x015F: // [LATIN SMALL LETTER S WITH CEDILLA] - case 0x0161: // [LATIN SMALL LETTER S WITH CARON] - case 0x017F: // [LATIN SMALL LETTER LONG S] - case 0x0219: // [LATIN SMALL LETTER S WITH COMMA BELOW] - case 0x023F: // [LATIN SMALL LETTER S WITH SWASH TAIL] - case 0x0282: // [LATIN SMALL LETTER S WITH HOOK] - case 0x1D74: // [LATIN SMALL LETTER S WITH MIDDLE TILDE] - case 0x1D8A: // [LATIN SMALL LETTER S WITH PALATAL HOOK] - case 0x1E61: // [LATIN SMALL LETTER S WITH DOT ABOVE] - case 0x1E63: // [LATIN SMALL LETTER S WITH DOT BELOW] - case 0x1E65: // [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE] - case 0x1E67: // [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE] - case 0x1E69: // [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE] - case 0x1E9C: // [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE] - case 0x1E9D: // [LATIN SMALL LETTER LONG S WITH HIGH STROKE] - case 0x24E2: // [CIRCLED LATIN SMALL LETTER S] - case 0xA784: // [LATIN CAPITAL LETTER INSULAR S] - case 0xFF53: // [FULLWIDTH LATIN SMALL LETTER S] - output[outputPos++] = L's'; - break; - case 0x1E9E: // [LATIN CAPITAL LETTER SHARP S] - output[outputPos++] = L'S'; - output[outputPos++] = L'S'; - break; - case 0x24AE: // [PARENTHESIZED LATIN SMALL LETTER S] - output[outputPos++] = L'('; - output[outputPos++] = L's'; - output[outputPos++] = L')'; - break; - case 0x00DF: // [LATIN SMALL LETTER SHARP S] - output[outputPos++] = L's'; - output[outputPos++] = L's'; - break; - case 0xFB06: // [LATIN SMALL LIGATURE ST] - output[outputPos++] = L's'; - output[outputPos++] = L't'; - break; - case 0x0162: // [LATIN CAPITAL LETTER T WITH CEDILLA] - case 0x0164: // [LATIN CAPITAL LETTER T WITH CARON] - case 0x0166: // [LATIN CAPITAL LETTER T WITH STROKE] - case 0x01AC: // [LATIN CAPITAL LETTER T WITH HOOK] - case 0x01AE: // [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK] - case 0x021A: // [LATIN CAPITAL LETTER T WITH COMMA BELOW] - case 0x023E: // [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE] - case 0x1D1B: // [LATIN LETTER SMALL CAPITAL T] - case 0x1E6A: // [LATIN CAPITAL LETTER T WITH DOT ABOVE] - case 0x1E6C: // [LATIN CAPITAL LETTER T WITH DOT BELOW] - case 0x1E6E: // [LATIN CAPITAL LETTER T WITH LINE BELOW] - case 0x1E70: // [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW] - case 0x24C9: // [CIRCLED LATIN CAPITAL LETTER T] - case 0xA786: // [LATIN CAPITAL LETTER INSULAR T] - case 0xFF34: // [FULLWIDTH LATIN CAPITAL LETTER T] - output[outputPos++] = L'T'; - break; - case 0x0163: // [LATIN SMALL LETTER T WITH CEDILLA] - case 0x0165: // [LATIN SMALL LETTER T WITH CARON] - case 0x0167: // [LATIN SMALL LETTER T WITH STROKE] - case 0x01AB: // [LATIN SMALL LETTER T WITH PALATAL HOOK] - case 0x01AD: // [LATIN SMALL LETTER T WITH HOOK] - case 0x021B: // [LATIN SMALL LETTER T WITH COMMA BELOW] - case 0x0236: // [LATIN SMALL LETTER T WITH CURL] - case 0x0287: // [LATIN SMALL LETTER TURNED T] - case 0x0288: // [LATIN SMALL LETTER T WITH RETROFLEX HOOK] - case 0x1D75: // [LATIN SMALL LETTER T WITH MIDDLE TILDE] - case 0x1E6B: // [LATIN SMALL LETTER T WITH DOT ABOVE] - case 0x1E6D: // [LATIN SMALL LETTER T WITH DOT BELOW] - case 0x1E6F: // [LATIN SMALL LETTER T WITH LINE BELOW] - case 0x1E71: // [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW] - case 0x1E97: // [LATIN SMALL LETTER T WITH DIAERESIS] - case 0x24E3: // [CIRCLED LATIN SMALL LETTER T] - case 0x2C66: // [LATIN SMALL LETTER T WITH DIAGONAL STROKE] - case 0xFF54: // [FULLWIDTH LATIN SMALL LETTER T] - output[outputPos++] = L't'; - break; - case 0x00DE: // [LATIN CAPITAL LETTER THORN] - case 0xA766: // [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER] - output[outputPos++] = L'T'; - output[outputPos++] = L'H'; - break; - case 0xA728: // [LATIN CAPITAL LETTER TZ] - output[outputPos++] = L'T'; - output[outputPos++] = L'Z'; - break; - case 0x24AF: // [PARENTHESIZED LATIN SMALL LETTER T] - output[outputPos++] = L'('; - output[outputPos++] = L't'; - output[outputPos++] = L')'; - break; - case 0x02A8: // [LATIN SMALL LETTER TC DIGRAPH WITH CURL] - output[outputPos++] = L't'; - output[outputPos++] = L'c'; - break; - case 0x00FE: // [LATIN SMALL LETTER THORN] - case 0x1D7A: // [LATIN SMALL LETTER TH WITH STRIKETHROUGH] - case 0xA767: // [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER] - output[outputPos++] = L't'; - output[outputPos++] = L'h'; - break; - case 0x02A6: // [LATIN SMALL LETTER TS DIGRAPH] - output[outputPos++] = L't'; - output[outputPos++] = L's'; - break; - case 0xA729: // [LATIN SMALL LETTER TZ] - output[outputPos++] = L't'; - output[outputPos++] = L'z'; - break; - case 0x00D9: // [LATIN CAPITAL LETTER U WITH GRAVE] - case 0x00DA: // [LATIN CAPITAL LETTER U WITH ACUTE] - case 0x00DB: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX] - case 0x00DC: // [LATIN CAPITAL LETTER U WITH DIAERESIS] - case 0x0168: // [LATIN CAPITAL LETTER U WITH TILDE] - case 0x016A: // [LATIN CAPITAL LETTER U WITH MACRON] - case 0x016C: // [LATIN CAPITAL LETTER U WITH BREVE] - case 0x016E: // [LATIN CAPITAL LETTER U WITH RING ABOVE] - case 0x0170: // [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE] - case 0x0172: // [LATIN CAPITAL LETTER U WITH OGONEK] - case 0x01AF: // [LATIN CAPITAL LETTER U WITH HORN] - case 0x01D3: // [LATIN CAPITAL LETTER U WITH CARON] - case 0x01D5: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON] - case 0x01D7: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE] - case 0x01D9: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON] - case 0x01DB: // [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE] - case 0x0214: // [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE] - case 0x0216: // [LATIN CAPITAL LETTER U WITH INVERTED BREVE] - case 0x0244: // [LATIN CAPITAL LETTER U BAR] - case 0x1D1C: // [LATIN LETTER SMALL CAPITAL U] - case 0x1D7E: // [LATIN SMALL CAPITAL LETTER U WITH STROKE] - case 0x1E72: // [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW] - case 0x1E74: // [LATIN CAPITAL LETTER U WITH TILDE BELOW] - case 0x1E76: // [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW] - case 0x1E78: // [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE] - case 0x1E7A: // [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS] - case 0x1EE4: // [LATIN CAPITAL LETTER U WITH DOT BELOW] - case 0x1EE6: // [LATIN CAPITAL LETTER U WITH HOOK ABOVE] - case 0x1EE8: // [LATIN CAPITAL LETTER U WITH HORN AND ACUTE] - case 0x1EEA: // [LATIN CAPITAL LETTER U WITH HORN AND GRAVE] - case 0x1EEC: // [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE] - case 0x1EEE: // [LATIN CAPITAL LETTER U WITH HORN AND TILDE] - case 0x1EF0: // [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW] - case 0x24CA: // [CIRCLED LATIN CAPITAL LETTER U] - case 0xFF35: // [FULLWIDTH LATIN CAPITAL LETTER U] - output[outputPos++] = L'U'; - break; - case 0x00F9: // [LATIN SMALL LETTER U WITH GRAVE] - case 0x00FA: // [LATIN SMALL LETTER U WITH ACUTE] - case 0x00FB: // [LATIN SMALL LETTER U WITH CIRCUMFLEX] - case 0x00FC: // [LATIN SMALL LETTER U WITH DIAERESIS] - case 0x0169: // [LATIN SMALL LETTER U WITH TILDE] - case 0x016B: // [LATIN SMALL LETTER U WITH MACRON] - case 0x016D: // [LATIN SMALL LETTER U WITH BREVE] - case 0x016F: // [LATIN SMALL LETTER U WITH RING ABOVE] - case 0x0171: // [LATIN SMALL LETTER U WITH DOUBLE ACUTE] - case 0x0173: // [LATIN SMALL LETTER U WITH OGONEK] - case 0x01B0: // [LATIN SMALL LETTER U WITH HORN] - case 0x01D4: // [LATIN SMALL LETTER U WITH CARON] - case 0x01D6: // [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON] - case 0x01D8: // [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE] - case 0x01DA: // [LATIN SMALL LETTER U WITH DIAERESIS AND CARON] - case 0x01DC: // [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE] - case 0x0215: // [LATIN SMALL LETTER U WITH DOUBLE GRAVE] - case 0x0217: // [LATIN SMALL LETTER U WITH INVERTED BREVE] - case 0x0289: // [LATIN SMALL LETTER U BAR] - case 0x1D64: // [LATIN SUBSCRIPT SMALL LETTER U] - case 0x1D99: // [LATIN SMALL LETTER U WITH RETROFLEX HOOK] - case 0x1E73: // [LATIN SMALL LETTER U WITH DIAERESIS BELOW] - case 0x1E75: // [LATIN SMALL LETTER U WITH TILDE BELOW] - case 0x1E77: // [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW] - case 0x1E79: // [LATIN SMALL LETTER U WITH TILDE AND ACUTE] - case 0x1E7B: // [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS] - case 0x1EE5: // [LATIN SMALL LETTER U WITH DOT BELOW] - case 0x1EE7: // [LATIN SMALL LETTER U WITH HOOK ABOVE] - case 0x1EE9: // [LATIN SMALL LETTER U WITH HORN AND ACUTE] - case 0x1EEB: // [LATIN SMALL LETTER U WITH HORN AND GRAVE] - case 0x1EED: // [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE] - case 0x1EEF: // [LATIN SMALL LETTER U WITH HORN AND TILDE] - case 0x1EF1: // [LATIN SMALL LETTER U WITH HORN AND DOT BELOW] - case 0x24E4: // [CIRCLED LATIN SMALL LETTER U] - case 0xFF55: // [FULLWIDTH LATIN SMALL LETTER U] - output[outputPos++] = L'u'; - break; - case 0x24B0: // [PARENTHESIZED LATIN SMALL LETTER U] - output[outputPos++] = L'('; - output[outputPos++] = L'u'; - output[outputPos++] = L')'; - break; - case 0x1D6B: // [LATIN SMALL LETTER UE] - output[outputPos++] = L'u'; - output[outputPos++] = L'e'; - break; - case 0x01B2: // [LATIN CAPITAL LETTER V WITH HOOK] - case 0x0245: // [LATIN CAPITAL LETTER TURNED V] - case 0x1D20: // [LATIN LETTER SMALL CAPITAL V] - case 0x1E7C: // [LATIN CAPITAL LETTER V WITH TILDE] - case 0x1E7E: // [LATIN CAPITAL LETTER V WITH DOT BELOW] - case 0x1EFC: // [LATIN CAPITAL LETTER MIDDLE-WELSH V] - case 0x24CB: // [CIRCLED LATIN CAPITAL LETTER V] - case 0xA75E: // [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE] - case 0xA768: // [LATIN CAPITAL LETTER VEND] - case 0xFF36: // [FULLWIDTH LATIN CAPITAL LETTER V] - output[outputPos++] = L'V'; - break; - case 0x028B: // [LATIN SMALL LETTER V WITH HOOK] - case 0x028C: // [LATIN SMALL LETTER TURNED V] - case 0x1D65: // [LATIN SUBSCRIPT SMALL LETTER V] - case 0x1D8C: // [LATIN SMALL LETTER V WITH PALATAL HOOK] - case 0x1E7D: // [LATIN SMALL LETTER V WITH TILDE] - case 0x1E7F: // [LATIN SMALL LETTER V WITH DOT BELOW] - case 0x24E5: // [CIRCLED LATIN SMALL LETTER V] - case 0x2C71: // [LATIN SMALL LETTER V WITH RIGHT HOOK] - case 0x2C74: // [LATIN SMALL LETTER V WITH CURL] - case 0xA75F: // [LATIN SMALL LETTER V WITH DIAGONAL STROKE] - case 0xFF56: // [FULLWIDTH LATIN SMALL LETTER V] - output[outputPos++] = L'v'; - break; - case 0xA760: // [LATIN CAPITAL LETTER VY] - output[outputPos++] = L'V'; - output[outputPos++] = L'Y'; - break; - case 0x24B1: // [PARENTHESIZED LATIN SMALL LETTER V] - output[outputPos++] = L'('; - output[outputPos++] = L'v'; - output[outputPos++] = L')'; - break; - case 0xA761: // [LATIN SMALL LETTER VY] - output[outputPos++] = L'v'; - output[outputPos++] = L'y'; - break; - case 0x0174: // [LATIN CAPITAL LETTER W WITH CIRCUMFLEX] - case 0x01F7: // [LATIN CAPITAL LETTER WYNN] - case 0x1D21: // [LATIN LETTER SMALL CAPITAL W] - case 0x1E80: // [LATIN CAPITAL LETTER W WITH GRAVE] - case 0x1E82: // [LATIN CAPITAL LETTER W WITH ACUTE] - case 0x1E84: // [LATIN CAPITAL LETTER W WITH DIAERESIS] - case 0x1E86: // [LATIN CAPITAL LETTER W WITH DOT ABOVE] - case 0x1E88: // [LATIN CAPITAL LETTER W WITH DOT BELOW] - case 0x24CC: // [CIRCLED LATIN CAPITAL LETTER W] - case 0x2C72: // [LATIN CAPITAL LETTER W WITH HOOK] - case 0xFF37: // [FULLWIDTH LATIN CAPITAL LETTER W] - output[outputPos++] = L'W'; - break; - case 0x0175: // [LATIN SMALL LETTER W WITH CIRCUMFLEX] - case 0x01BF: // [LATIN LETTER WYNN] - case 0x028D: // [LATIN SMALL LETTER TURNED W] - case 0x1E81: // [LATIN SMALL LETTER W WITH GRAVE] - case 0x1E83: // [LATIN SMALL LETTER W WITH ACUTE] - case 0x1E85: // [LATIN SMALL LETTER W WITH DIAERESIS] - case 0x1E87: // [LATIN SMALL LETTER W WITH DOT ABOVE] - case 0x1E89: // [LATIN SMALL LETTER W WITH DOT BELOW] - case 0x1E98: // [LATIN SMALL LETTER W WITH RING ABOVE] - case 0x24E6: // [CIRCLED LATIN SMALL LETTER W] - case 0x2C73: // [LATIN SMALL LETTER W WITH HOOK] - case 0xFF57: // [FULLWIDTH LATIN SMALL LETTER W] - output[outputPos++] = L'w'; - break; - case 0x24B2: // [PARENTHESIZED LATIN SMALL LETTER W] - output[outputPos++] = L'('; - output[outputPos++] = L'w'; - output[outputPos++] = L')'; - break; - case 0x1E8A: // [LATIN CAPITAL LETTER X WITH DOT ABOVE] - case 0x1E8C: // [LATIN CAPITAL LETTER X WITH DIAERESIS] - case 0x24CD: // [CIRCLED LATIN CAPITAL LETTER X] - case 0xFF38: // [FULLWIDTH LATIN CAPITAL LETTER X] - output[outputPos++] = L'X'; - break; - case 0x1D8D: // [LATIN SMALL LETTER X WITH PALATAL HOOK] - case 0x1E8B: // [LATIN SMALL LETTER X WITH DOT ABOVE] - case 0x1E8D: // [LATIN SMALL LETTER X WITH DIAERESIS] - case 0x2093: // [LATIN SUBSCRIPT SMALL LETTER X] - case 0x24E7: // [CIRCLED LATIN SMALL LETTER X] - case 0xFF58: // [FULLWIDTH LATIN SMALL LETTER X] - output[outputPos++] = L'x'; - break; - case 0x24B3: // [PARENTHESIZED LATIN SMALL LETTER X] - output[outputPos++] = L'('; - output[outputPos++] = L'x'; - output[outputPos++] = L')'; - break; - case 0x00DD: // [LATIN CAPITAL LETTER Y WITH ACUTE] - case 0x0176: // [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX] - case 0x0178: // [LATIN CAPITAL LETTER Y WITH DIAERESIS] - case 0x01B3: // [LATIN CAPITAL LETTER Y WITH HOOK] - case 0x0232: // [LATIN CAPITAL LETTER Y WITH MACRON] - case 0x024E: // [LATIN CAPITAL LETTER Y WITH STROKE] - case 0x028F: // [LATIN LETTER SMALL CAPITAL Y] - case 0x1E8E: // [LATIN CAPITAL LETTER Y WITH DOT ABOVE] - case 0x1EF2: // [LATIN CAPITAL LETTER Y WITH GRAVE] - case 0x1EF4: // [LATIN CAPITAL LETTER Y WITH DOT BELOW] - case 0x1EF6: // [LATIN CAPITAL LETTER Y WITH HOOK ABOVE] - case 0x1EF8: // [LATIN CAPITAL LETTER Y WITH TILDE] - case 0x1EFE: // [LATIN CAPITAL LETTER Y WITH LOOP] - case 0x24CE: // [CIRCLED LATIN CAPITAL LETTER Y] - case 0xFF39: // [FULLWIDTH LATIN CAPITAL LETTER Y] - output[outputPos++] = L'Y'; - break; - case 0x00FD: // [LATIN SMALL LETTER Y WITH ACUTE] - case 0x00FF: // [LATIN SMALL LETTER Y WITH DIAERESIS] - case 0x0177: // [LATIN SMALL LETTER Y WITH CIRCUMFLEX] - case 0x01B4: // [LATIN SMALL LETTER Y WITH HOOK] - case 0x0233: // [LATIN SMALL LETTER Y WITH MACRON] - case 0x024F: // [LATIN SMALL LETTER Y WITH STROKE] - case 0x028E: // [LATIN SMALL LETTER TURNED Y] - case 0x1E8F: // [LATIN SMALL LETTER Y WITH DOT ABOVE] - case 0x1E99: // [LATIN SMALL LETTER Y WITH RING ABOVE] - case 0x1EF3: // [LATIN SMALL LETTER Y WITH GRAVE] - case 0x1EF5: // [LATIN SMALL LETTER Y WITH DOT BELOW] - case 0x1EF7: // [LATIN SMALL LETTER Y WITH HOOK ABOVE] - case 0x1EF9: // [LATIN SMALL LETTER Y WITH TILDE] - case 0x1EFF: // [LATIN SMALL LETTER Y WITH LOOP] - case 0x24E8: // [CIRCLED LATIN SMALL LETTER Y] - case 0xFF59: // [FULLWIDTH LATIN SMALL LETTER Y] - output[outputPos++] = L'y'; - break; - case 0x24B4: // [PARENTHESIZED LATIN SMALL LETTER Y] - output[outputPos++] = L'('; - output[outputPos++] = L'y'; - output[outputPos++] = L')'; - break; - case 0x0179: // [LATIN CAPITAL LETTER Z WITH ACUTE] - case 0x017B: // [LATIN CAPITAL LETTER Z WITH DOT ABOVE] - case 0x017D: // [LATIN CAPITAL LETTER Z WITH CARON] - case 0x01B5: // [LATIN CAPITAL LETTER Z WITH STROKE] - case 0x021C: // [LATIN CAPITAL LETTER YOGH] - case 0x0224: // [LATIN CAPITAL LETTER Z WITH HOOK] - case 0x1D22: // [LATIN LETTER SMALL CAPITAL Z] - case 0x1E90: // [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX] - case 0x1E92: // [LATIN CAPITAL LETTER Z WITH DOT BELOW] - case 0x1E94: // [LATIN CAPITAL LETTER Z WITH LINE BELOW] - case 0x24CF: // [CIRCLED LATIN CAPITAL LETTER Z] - case 0x2C6B: // [LATIN CAPITAL LETTER Z WITH DESCENDER] - case 0xA762: // [LATIN CAPITAL LETTER VISIGOTHIC Z] - case 0xFF3A: // [FULLWIDTH LATIN CAPITAL LETTER Z] - output[outputPos++] = L'Z'; - break; - case 0x017A: // [LATIN SMALL LETTER Z WITH ACUTE] - case 0x017C: // [LATIN SMALL LETTER Z WITH DOT ABOVE] - case 0x017E: // [LATIN SMALL LETTER Z WITH CARON] - case 0x01B6: // [LATIN SMALL LETTER Z WITH STROKE] - case 0x021D: // [LATIN SMALL LETTER YOGH] - case 0x0225: // [LATIN SMALL LETTER Z WITH HOOK] - case 0x0240: // [LATIN SMALL LETTER Z WITH SWASH TAIL] - case 0x0290: // [LATIN SMALL LETTER Z WITH RETROFLEX HOOK] - case 0x0291: // [LATIN SMALL LETTER Z WITH CURL] - case 0x1D76: // [LATIN SMALL LETTER Z WITH MIDDLE TILDE] - case 0x1D8E: // [LATIN SMALL LETTER Z WITH PALATAL HOOK] - case 0x1E91: // [LATIN SMALL LETTER Z WITH CIRCUMFLEX] - case 0x1E93: // [LATIN SMALL LETTER Z WITH DOT BELOW] - case 0x1E95: // [LATIN SMALL LETTER Z WITH LINE BELOW] - case 0x24E9: // [CIRCLED LATIN SMALL LETTER Z] - case 0x2C6C: // [LATIN SMALL LETTER Z WITH DESCENDER] - case 0xA763: // [LATIN SMALL LETTER VISIGOTHIC Z] - case 0xFF5A: // [FULLWIDTH LATIN SMALL LETTER Z] - output[outputPos++] = L'z'; - break; - case 0x24B5: // [PARENTHESIZED LATIN SMALL LETTER Z] - output[outputPos++] = L'('; - output[outputPos++] = L'z'; - output[outputPos++] = L')'; - break; - case 0x2070: // [SUPERSCRIPT ZERO] - case 0x2080: // [SUBSCRIPT ZERO] - case 0x24EA: // [CIRCLED DIGIT ZERO] - case 0x24FF: // [NEGATIVE CIRCLED DIGIT ZERO] - case 0xFF10: // [FULLWIDTH DIGIT ZERO] - output[outputPos++] = L'0'; - break; - case 0x00B9: // [SUPERSCRIPT ONE] - case 0x2081: // [SUBSCRIPT ONE] - case 0x2460: // [CIRCLED DIGIT ONE] - case 0x24F5: // [DOUBLE CIRCLED DIGIT ONE] - case 0x2776: // [DINGBAT NEGATIVE CIRCLED DIGIT ONE] - case 0x2780: // [DINGBAT CIRCLED SANS-SERIF DIGIT ONE] - case 0x278A: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE] - case 0xFF11: // [FULLWIDTH DIGIT ONE] - output[outputPos++] = L'1'; - break; - case 0x2488: // [DIGIT ONE FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'.'; - break; - case 0x2474: // [PARENTHESIZED DIGIT ONE] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L')'; - break; - case 0x00B2: // [SUPERSCRIPT TWO] - case 0x2082: // [SUBSCRIPT TWO] - case 0x2461: // [CIRCLED DIGIT TWO] - case 0x24F6: // [DOUBLE CIRCLED DIGIT TWO] - case 0x2777: // [DINGBAT NEGATIVE CIRCLED DIGIT TWO] - case 0x2781: // [DINGBAT CIRCLED SANS-SERIF DIGIT TWO] - case 0x278B: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO] - case 0xFF12: // [FULLWIDTH DIGIT TWO] - output[outputPos++] = L'2'; - break; - case 0x2489: // [DIGIT TWO FULL STOP] - output[outputPos++] = L'2'; - output[outputPos++] = L'.'; - break; - case 0x2475: // [PARENTHESIZED DIGIT TWO] - output[outputPos++] = L'('; - output[outputPos++] = L'2'; - output[outputPos++] = L')'; - break; - case 0x00B3: // [SUPERSCRIPT THREE] - case 0x2083: // [SUBSCRIPT THREE] - case 0x2462: // [CIRCLED DIGIT THREE] - case 0x24F7: // [DOUBLE CIRCLED DIGIT THREE] - case 0x2778: // [DINGBAT NEGATIVE CIRCLED DIGIT THREE] - case 0x2782: // [DINGBAT CIRCLED SANS-SERIF DIGIT THREE] - case 0x278C: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE] - case 0xFF13: // [FULLWIDTH DIGIT THREE] - output[outputPos++] = L'3'; - break; - case 0x248A: // [DIGIT THREE FULL STOP] - output[outputPos++] = L'3'; - output[outputPos++] = L'.'; - break; - case 0x2476: // [PARENTHESIZED DIGIT THREE] - output[outputPos++] = L'('; - output[outputPos++] = L'3'; - output[outputPos++] = L')'; - break; - case 0x2074: // [SUPERSCRIPT FOUR] - case 0x2084: // [SUBSCRIPT FOUR] - case 0x2463: // [CIRCLED DIGIT FOUR] - case 0x24F8: // [DOUBLE CIRCLED DIGIT FOUR] - case 0x2779: // [DINGBAT NEGATIVE CIRCLED DIGIT FOUR] - case 0x2783: // [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR] - case 0x278D: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR] - case 0xFF14: // [FULLWIDTH DIGIT FOUR] - output[outputPos++] = L'4'; - break; - case 0x248B: // [DIGIT FOUR FULL STOP] - output[outputPos++] = L'4'; - output[outputPos++] = L'.'; - break; - case 0x2477: // [PARENTHESIZED DIGIT FOUR] - output[outputPos++] = L'('; - output[outputPos++] = L'4'; - output[outputPos++] = L')'; - break; - case 0x2075: // [SUPERSCRIPT FIVE] - case 0x2085: // [SUBSCRIPT FIVE] - case 0x2464: // [CIRCLED DIGIT FIVE] - case 0x24F9: // [DOUBLE CIRCLED DIGIT FIVE] - case 0x277A: // [DINGBAT NEGATIVE CIRCLED DIGIT FIVE] - case 0x2784: // [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE] - case 0x278E: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE] - case 0xFF15: // [FULLWIDTH DIGIT FIVE] - output[outputPos++] = L'5'; - break; - case 0x248C: // [DIGIT FIVE FULL STOP] - output[outputPos++] = L'5'; - output[outputPos++] = L'.'; - break; - case 0x2478: // [PARENTHESIZED DIGIT FIVE] - output[outputPos++] = L'('; - output[outputPos++] = L'5'; - output[outputPos++] = L')'; - break; - case 0x2076: // [SUPERSCRIPT SIX] - case 0x2086: // [SUBSCRIPT SIX] - case 0x2465: // [CIRCLED DIGIT SIX] - case 0x24FA: // [DOUBLE CIRCLED DIGIT SIX] - case 0x277B: // [DINGBAT NEGATIVE CIRCLED DIGIT SIX] - case 0x2785: // [DINGBAT CIRCLED SANS-SERIF DIGIT SIX] - case 0x278F: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX] - case 0xFF16: // [FULLWIDTH DIGIT SIX] - output[outputPos++] = L'6'; - break; - case 0x248D: // [DIGIT SIX FULL STOP] - output[outputPos++] = L'6'; - output[outputPos++] = L'.'; - break; - case 0x2479: // [PARENTHESIZED DIGIT SIX] - output[outputPos++] = L'('; - output[outputPos++] = L'6'; - output[outputPos++] = L')'; - break; - case 0x2077: // [SUPERSCRIPT SEVEN] - case 0x2087: // [SUBSCRIPT SEVEN] - case 0x2466: // [CIRCLED DIGIT SEVEN] - case 0x24FB: // [DOUBLE CIRCLED DIGIT SEVEN] - case 0x277C: // [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN] - case 0x2786: // [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN] - case 0x2790: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN] - case 0xFF17: // [FULLWIDTH DIGIT SEVEN] - output[outputPos++] = L'7'; - break; - case 0x248E: // [DIGIT SEVEN FULL STOP] - output[outputPos++] = L'7'; - output[outputPos++] = L'.'; - break; - case 0x247A: // [PARENTHESIZED DIGIT SEVEN] - output[outputPos++] = L'('; - output[outputPos++] = L'7'; - output[outputPos++] = L')'; - break; - case 0x2078: // [SUPERSCRIPT EIGHT] - case 0x2088: // [SUBSCRIPT EIGHT] - case 0x2467: // [CIRCLED DIGIT EIGHT] - case 0x24FC: // [DOUBLE CIRCLED DIGIT EIGHT] - case 0x277D: // [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT] - case 0x2787: // [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT] - case 0x2791: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT] - case 0xFF18: // [FULLWIDTH DIGIT EIGHT] - output[outputPos++] = L'8'; - break; - case 0x248F: // [DIGIT EIGHT FULL STOP] - output[outputPos++] = L'8'; - output[outputPos++] = L'.'; - break; - case 0x247B: // [PARENTHESIZED DIGIT EIGHT] - output[outputPos++] = L'('; - output[outputPos++] = L'8'; - output[outputPos++] = L')'; - break; - case 0x2079: // [SUPERSCRIPT NINE] - case 0x2089: // [SUBSCRIPT NINE] - case 0x2468: // [CIRCLED DIGIT NINE] - case 0x24FD: // [DOUBLE CIRCLED DIGIT NINE] - case 0x277E: // [DINGBAT NEGATIVE CIRCLED DIGIT NINE] - case 0x2788: // [DINGBAT CIRCLED SANS-SERIF DIGIT NINE] - case 0x2792: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE] - case 0xFF19: // [FULLWIDTH DIGIT NINE] - output[outputPos++] = L'9'; - break; - case 0x2490: // [DIGIT NINE FULL STOP] - output[outputPos++] = L'9'; - output[outputPos++] = L'.'; - break; - case 0x247C: // [PARENTHESIZED DIGIT NINE] - output[outputPos++] = L'('; - output[outputPos++] = L'9'; - output[outputPos++] = L')'; - break; - case 0x2469: // [CIRCLED NUMBER TEN] - case 0x24FE: // [DOUBLE CIRCLED NUMBER TEN] - case 0x277F: // [DINGBAT NEGATIVE CIRCLED NUMBER TEN] - case 0x2789: // [DINGBAT CIRCLED SANS-SERIF NUMBER TEN] - case 0x2793: // [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'0'; - break; - case 0x2491: // [NUMBER TEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'0'; - output[outputPos++] = L'.'; - break; - case 0x247D: // [PARENTHESIZED NUMBER TEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'0'; - output[outputPos++] = L')'; - break; - case 0x246A: // [CIRCLED NUMBER ELEVEN] - case 0x24EB: // [NEGATIVE CIRCLED NUMBER ELEVEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'1'; - break; - case 0x2492: // [NUMBER ELEVEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'1'; - output[outputPos++] = L'.'; - break; - case 0x247E: // [PARENTHESIZED NUMBER ELEVEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'1'; - output[outputPos++] = L')'; - break; - case 0x246B: // [CIRCLED NUMBER TWELVE] - case 0x24EC: // [NEGATIVE CIRCLED NUMBER TWELVE] - output[outputPos++] = L'1'; - output[outputPos++] = L'2'; - break; - case 0x2493: // [NUMBER TWELVE FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'2'; - output[outputPos++] = L'.'; - break; - case 0x247F: // [PARENTHESIZED NUMBER TWELVE] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'2'; - output[outputPos++] = L')'; - break; - case 0x246C: // [CIRCLED NUMBER THIRTEEN] - case 0x24ED: // [NEGATIVE CIRCLED NUMBER THIRTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'3'; - break; - case 0x2494: // [NUMBER THIRTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'3'; - output[outputPos++] = L'.'; - break; - case 0x2480: // [PARENTHESIZED NUMBER THIRTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'3'; - output[outputPos++] = L')'; - break; - case 0x246D: // [CIRCLED NUMBER FOURTEEN] - case 0x24EE: // [NEGATIVE CIRCLED NUMBER FOURTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'4'; - break; - case 0x2495: // [NUMBER FOURTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'4'; - output[outputPos++] = L'.'; - break; - case 0x2481: // [PARENTHESIZED NUMBER FOURTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'4'; - output[outputPos++] = L')'; - break; - case 0x246E: // [CIRCLED NUMBER FIFTEEN] - case 0x24EF: // [NEGATIVE CIRCLED NUMBER FIFTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'5'; - break; - case 0x2496: // [NUMBER FIFTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'5'; - output[outputPos++] = L'.'; - break; - case 0x2482: // [PARENTHESIZED NUMBER FIFTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'5'; - output[outputPos++] = L')'; - break; - case 0x246F: // [CIRCLED NUMBER SIXTEEN] - case 0x24F0: // [NEGATIVE CIRCLED NUMBER SIXTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'6'; - break; - case 0x2497: // [NUMBER SIXTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'6'; - output[outputPos++] = L'.'; - break; - case 0x2483: // [PARENTHESIZED NUMBER SIXTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'6'; - output[outputPos++] = L')'; - break; - case 0x2470: // [CIRCLED NUMBER SEVENTEEN] - case 0x24F1: // [NEGATIVE CIRCLED NUMBER SEVENTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'7'; - break; - case 0x2498: // [NUMBER SEVENTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'7'; - output[outputPos++] = L'.'; - break; - case 0x2484: // [PARENTHESIZED NUMBER SEVENTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'7'; - output[outputPos++] = L')'; - break; - case 0x2471: // [CIRCLED NUMBER EIGHTEEN] - case 0x24F2: // [NEGATIVE CIRCLED NUMBER EIGHTEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'8'; - break; - case 0x2499: // [NUMBER EIGHTEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'8'; - output[outputPos++] = L'.'; - break; - case 0x2485: // [PARENTHESIZED NUMBER EIGHTEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'8'; - output[outputPos++] = L')'; - break; - case 0x2472: // [CIRCLED NUMBER NINETEEN] - case 0x24F3: // [NEGATIVE CIRCLED NUMBER NINETEEN] - output[outputPos++] = L'1'; - output[outputPos++] = L'9'; - break; - case 0x249A: // [NUMBER NINETEEN FULL STOP] - output[outputPos++] = L'1'; - output[outputPos++] = L'9'; - output[outputPos++] = L'.'; - break; - case 0x2486: // [PARENTHESIZED NUMBER NINETEEN] - output[outputPos++] = L'('; - output[outputPos++] = L'1'; - output[outputPos++] = L'9'; - output[outputPos++] = L')'; - break; - case 0x2473: // [CIRCLED NUMBER TWENTY] - case 0x24F4: // [NEGATIVE CIRCLED NUMBER TWENTY] - output[outputPos++] = L'2'; - output[outputPos++] = L'0'; - break; - case 0x249B: // [NUMBER TWENTY FULL STOP] - output[outputPos++] = L'2'; - output[outputPos++] = L'0'; - output[outputPos++] = L'.'; - break; - case 0x2487: // [PARENTHESIZED NUMBER TWENTY] - output[outputPos++] = L'('; - output[outputPos++] = L'2'; - output[outputPos++] = L'0'; - output[outputPos++] = L')'; - break; - case 0x00AB: // [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK] - case 0x00BB: // [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK] - case 0x201C: // [LEFT DOUBLE QUOTATION MARK] - case 0x201D: // [RIGHT DOUBLE QUOTATION MARK] - case 0x201E: // [DOUBLE LOW-9 QUOTATION MARK] - case 0x2033: // [DOUBLE PRIME] - case 0x2036: // [REVERSED DOUBLE PRIME] - case 0x275D: // [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT] - case 0x275E: // [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT] - case 0x276E: // [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT] - case 0x276F: // [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT] - case 0xFF02: // [FULLWIDTH QUOTATION MARK] - output[outputPos++] = L'"'; - break; - case 0x2018: // [LEFT SINGLE QUOTATION MARK] - case 0x2019: // [RIGHT SINGLE QUOTATION MARK] - case 0x201A: // [SINGLE LOW-9 QUOTATION MARK] - case 0x201B: // [SINGLE HIGH-REVERSED-9 QUOTATION MARK] - case 0x2032: // [PRIME] - case 0x2035: // [REVERSED PRIME] - case 0x2039: // [SINGLE LEFT-POINTING ANGLE QUOTATION MARK] - case 0x203A: // [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK] - case 0x275B: // [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT] - case 0x275C: // [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT] - case 0xFF07: // [FULLWIDTH APOSTROPHE] - output[outputPos++] = L'\''; - break; - case 0x2010: // [HYPHEN] - case 0x2011: // [NON-BREAKING HYPHEN] - case 0x2012: // [FIGURE DASH] - case 0x2013: // [EN DASH] - case 0x2014: // [EM DASH] - case 0x207B: // [SUPERSCRIPT MINUS] - case 0x208B: // [SUBSCRIPT MINUS] - case 0xFF0D: // [FULLWIDTH HYPHEN-MINUS] - output[outputPos++] = L'-'; - break; - case 0x2045: // [LEFT SQUARE BRACKET WITH QUILL] - case 0x2772: // [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT] - case 0xFF3B: // [FULLWIDTH LEFT SQUARE BRACKET] - output[outputPos++] = L'['; - break; - case 0x2046: // [RIGHT SQUARE BRACKET WITH QUILL] - case 0x2773: // [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT] - case 0xFF3D: // [FULLWIDTH RIGHT SQUARE BRACKET] - output[outputPos++] = L']'; - break; - case 0x207D: // [SUPERSCRIPT LEFT PARENTHESIS] - case 0x208D: // [SUBSCRIPT LEFT PARENTHESIS] - case 0x2768: // [MEDIUM LEFT PARENTHESIS ORNAMENT] - case 0x276A: // [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT] - case 0xFF08: // [FULLWIDTH LEFT PARENTHESIS] - output[outputPos++] = L'('; - break; - case 0x2E28: // [LEFT DOUBLE PARENTHESIS] - output[outputPos++] = L'('; - output[outputPos++] = L'('; - break; - case 0x207E: // [SUPERSCRIPT RIGHT PARENTHESIS] - case 0x208E: // [SUBSCRIPT RIGHT PARENTHESIS] - case 0x2769: // [MEDIUM RIGHT PARENTHESIS ORNAMENT] - case 0x276B: // [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT] - case 0xFF09: // [FULLWIDTH RIGHT PARENTHESIS] - output[outputPos++] = L')'; - break; - case 0x2E29: // [RIGHT DOUBLE PARENTHESIS] - output[outputPos++] = L')'; - output[outputPos++] = L')'; - break; - case 0x276C: // [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT] - case 0x2770: // [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT] - case 0xFF1C: // [FULLWIDTH LESS-THAN SIGN] - output[outputPos++] = L'<'; - break; - case 0x276D: // [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT] - case 0x2771: // [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT] - case 0xFF1E: // [FULLWIDTH GREATER-THAN SIGN] - output[outputPos++] = L'>'; - break; - case 0x2774: // [MEDIUM LEFT CURLY BRACKET ORNAMENT] - case 0xFF5B: // [FULLWIDTH LEFT CURLY BRACKET] - output[outputPos++] = L'{'; - break; - case 0x2775: // [MEDIUM RIGHT CURLY BRACKET ORNAMENT] - case 0xFF5D: // [FULLWIDTH RIGHT CURLY BRACKET] - output[outputPos++] = L'}'; - break; - case 0x207A: // [SUPERSCRIPT PLUS SIGN] - case 0x208A: // [SUBSCRIPT PLUS SIGN] - case 0xFF0B: // [FULLWIDTH PLUS SIGN] - output[outputPos++] = L'+'; - break; - case 0x207C: // [SUPERSCRIPT EQUALS SIGN] - case 0x208C: // [SUBSCRIPT EQUALS SIGN] - case 0xFF1D: // [FULLWIDTH EQUALS SIGN] - output[outputPos++] = L'='; - break; - case 0xFF01: // [FULLWIDTH EXCLAMATION MARK] - output[outputPos++] = L'!'; - break; - case 0x203C: // [DOUBLE EXCLAMATION MARK] - output[outputPos++] = L'!'; - output[outputPos++] = L'!'; - break; - case 0x2049: // [EXCLAMATION QUESTION MARK] - output[outputPos++] = L'!'; - output[outputPos++] = L'?'; - break; - case 0xFF03: // [FULLWIDTH NUMBER SIGN] - output[outputPos++] = L'#'; - break; - case 0xFF04: // [FULLWIDTH DOLLAR SIGN] - output[outputPos++] = L'$'; - break; - case 0x2052: // [COMMERCIAL MINUS SIGN] - case 0xFF05: // [FULLWIDTH PERCENT SIGN] - output[outputPos++] = L'%'; - break; - case 0xFF06: // [FULLWIDTH AMPERSAND] - output[outputPos++] = L'&'; - break; - case 0x204E: // [LOW ASTERISK] - case 0xFF0A: // [FULLWIDTH ASTERISK] - output[outputPos++] = L'*'; - break; - case 0xFF0C: // [FULLWIDTH COMMA] - output[outputPos++] = L','; - break; - case 0xFF0E: // [FULLWIDTH FULL STOP] - output[outputPos++] = L'.'; - break; - case 0x2044: // [FRACTION SLASH] - case 0xFF0F: // [FULLWIDTH SOLIDUS] - output[outputPos++] = L'/'; - break; - case 0xFF1A: // [FULLWIDTH COLON] - output[outputPos++] = L':'; - break; - case 0x204F: // [REVERSED SEMICOLON] - case 0xFF1B: // [FULLWIDTH SEMICOLON] - output[outputPos++] = L';'; - break; - case 0xFF1F: // [FULLWIDTH QUESTION MARK] - output[outputPos++] = L'?'; - break; - case 0x2047: // [DOUBLE QUESTION MARK] - output[outputPos++] = L'?'; - output[outputPos++] = L'?'; - break; - case 0x2048: // [QUESTION EXCLAMATION MARK] - output[outputPos++] = L'?'; - output[outputPos++] = L'!'; - break; - case 0xFF20: // [FULLWIDTH COMMERCIAL AT] - output[outputPos++] = L'@'; - break; - case 0xFF3C: // [FULLWIDTH REVERSE SOLIDUS] - output[outputPos++] = L'\\'; - break; - case 0x2038: // [CARET] - case 0xFF3E: // [FULLWIDTH CIRCUMFLEX ACCENT] - output[outputPos++] = L'^'; - break; - case 0xFF3F: // [FULLWIDTH LOW LINE] - output[outputPos++] = L'_'; - break; - case 0x2053: // [SWUNG DASH] - case 0xFF5E: // [FULLWIDTH TILDE] - output[outputPos++] = L'~'; - break; - default: - output[outputPos++] = c; - break; - } + break; } } } } + +} diff --git a/src/core/analysis/Analyzer.cpp b/src/core/analysis/Analyzer.cpp index 8356783e..f7eb99ac 100644 --- a/src/core/analysis/Analyzer.cpp +++ b/src/core/analysis/Analyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,39 +8,33 @@ #include "Analyzer.h" #include "Fieldable.h" -namespace Lucene -{ - Analyzer::~Analyzer() - { - } - - TokenStreamPtr Analyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - return tokenStream(fieldName, reader); - } - - LuceneObjectPtr Analyzer::getPreviousTokenStream() - { - return tokenStreams.get(); - } - - void Analyzer::setPreviousTokenStream(LuceneObjectPtr stream) - { - tokenStreams.set(stream); - } - - int32_t Analyzer::getPositionIncrementGap(const String& fieldName) - { - return 0; - } - - int32_t Analyzer::getOffsetGap(FieldablePtr field) - { - return field->isTokenized() ? 1 : 0; - } - - void Analyzer::close() - { - tokenStreams.close(); - } +namespace Lucene { + +Analyzer::~Analyzer() { +} + +TokenStreamPtr Analyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + return tokenStream(fieldName, reader); +} + +LuceneObjectPtr Analyzer::getPreviousTokenStream() { + return tokenStreams.get(); +} + +void Analyzer::setPreviousTokenStream(const LuceneObjectPtr& stream) { + tokenStreams.set(stream); +} + +int32_t Analyzer::getPositionIncrementGap(const String& fieldName) { + return 0; +} + +int32_t Analyzer::getOffsetGap(const FieldablePtr& field) { + return field->isTokenized() ? 1 : 0; +} + +void Analyzer::close() { + tokenStreams.close(); +} + } diff --git a/src/core/analysis/BaseCharFilter.cpp b/src/core/analysis/BaseCharFilter.cpp index 31b87226..0681139f 100644 --- a/src/core/analysis/BaseCharFilter.cpp +++ b/src/core/analysis/BaseCharFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,65 +8,61 @@ #include "BaseCharFilter.h" #include "MiscUtils.h" -namespace Lucene -{ - BaseCharFilter::BaseCharFilter(CharStreamPtr in) : CharFilter(in) - { - size = 0; +namespace Lucene { + +BaseCharFilter::BaseCharFilter(const CharStreamPtr& in) : CharFilter(in) { + size = 0; +} + +BaseCharFilter::~BaseCharFilter() { +} + +int32_t BaseCharFilter::correct(int32_t currentOff) { + if (!offsets || currentOff < offsets[0]) { + return currentOff; } - - BaseCharFilter::~BaseCharFilter() - { + + int32_t hi = size - 1; + if (currentOff >= offsets[hi]) { + return currentOff + diffs[hi]; } - - int32_t BaseCharFilter::correct(int32_t currentOff) - { - if (!offsets || currentOff < offsets[0]) - return currentOff; - - int32_t hi = size - 1; - if (currentOff >= offsets[hi]) - return currentOff + diffs[hi]; - - int32_t lo = 0; - int32_t mid = -1; - - while (hi >= lo) - { - mid = MiscUtils::unsignedShift(lo + hi, 1); - if (currentOff < offsets[mid]) - hi = mid - 1; - else if (currentOff > offsets[mid]) - lo = mid + 1; - else - return currentOff + diffs[mid]; - } - - if (currentOff < offsets[mid]) - return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; - else + + int32_t lo = 0; + int32_t mid = -1; + + while (hi >= lo) { + mid = MiscUtils::unsignedShift(lo + hi, 1); + if (currentOff < offsets[mid]) { + hi = mid - 1; + } else if (currentOff > offsets[mid]) { + lo = mid + 1; + } else { return currentOff + diffs[mid]; + } } - - int32_t BaseCharFilter::getLastCumulativeDiff() - { - return !offsets ? 0 : diffs[size - 1]; + + if (currentOff < offsets[mid]) { + return mid == 0 ? currentOff : currentOff + diffs[mid - 1]; + } else { + return currentOff + diffs[mid]; } - - void BaseCharFilter::addOffCorrectMap(int32_t off, int32_t cumulativeDiff) - { - if (!offsets) - { - offsets = IntArray::newInstance(64); - diffs = IntArray::newInstance(64); - } - else if (size == offsets.size()) - { - offsets.resize(MiscUtils::getNextSize(offsets.size())); - diffs.resize(MiscUtils::getNextSize(diffs.size())); - } +} - offsets[size] = off; - diffs[size++] = cumulativeDiff; +int32_t BaseCharFilter::getLastCumulativeDiff() { + return !offsets ? 0 : diffs[size - 1]; +} + +void BaseCharFilter::addOffCorrectMap(int32_t off, int32_t cumulativeDiff) { + if (!offsets) { + offsets = IntArray::newInstance(64); + diffs = IntArray::newInstance(64); + } else if (size == offsets.size()) { + offsets.resize(MiscUtils::getNextSize(offsets.size())); + diffs.resize(MiscUtils::getNextSize(diffs.size())); } + + offsets[size] = off; + diffs[size++] = cumulativeDiff; +} + } diff --git a/src/core/analysis/CachingTokenFilter.cpp b/src/core/analysis/CachingTokenFilter.cpp index c5d42d01..1365705e 100644 --- a/src/core/analysis/CachingTokenFilter.cpp +++ b/src/core/analysis/CachingTokenFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,55 +7,51 @@ #include "LuceneInc.h" #include "CachingTokenFilter.h" -namespace Lucene -{ - CachingTokenFilter::CachingTokenFilter(TokenStreamPtr input) : TokenFilter(input) - { - } - - CachingTokenFilter::~CachingTokenFilter() - { +namespace Lucene { + +CachingTokenFilter::CachingTokenFilter(const TokenStreamPtr& input) : TokenFilter(input) { +} + +CachingTokenFilter::~CachingTokenFilter() { +} + +bool CachingTokenFilter::incrementToken() { + if (!cache) { + // fill cache lazily + cache = Collection::newInstance(); + fillCache(); + iterator = cache.begin(); } - - bool CachingTokenFilter::incrementToken() - { - if (!cache) - { - // fill cache lazily - cache = Collection::newInstance(); - fillCache(); - iterator = cache.begin(); - } - - if (iterator == cache.end()) - { - // the cache is exhausted, return false - return false; - } - - // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. - restoreState(*iterator++); - return true; + + if (iterator == cache.end()) { + // the cache is exhausted, return false + return false; } - - void CachingTokenFilter::end() - { - if (finalState) - restoreState(finalState); + + // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. + restoreState(*iterator++); + return true; +} + +void CachingTokenFilter::end() { + if (finalState) { + restoreState(finalState); } - - void CachingTokenFilter::reset() - { - if (cache) - iterator = cache.begin(); +} + +void CachingTokenFilter::reset() { + if (cache) { + iterator = cache.begin(); } - - void CachingTokenFilter::fillCache() - { - while (input->incrementToken()) - cache.add(captureState()); - // capture final state - input->end(); - finalState = captureState(); +} + +void CachingTokenFilter::fillCache() { + while (input->incrementToken()) { + cache.add(captureState()); } + // capture final state + input->end(); + finalState = captureState(); +} + } diff --git a/src/core/analysis/CharArraySet.cpp b/src/core/analysis/CharArraySet.cpp index ccb07af1..5e36fda5 100644 --- a/src/core/analysis/CharArraySet.cpp +++ b/src/core/analysis/CharArraySet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,77 +8,66 @@ #include "CharArraySet.h" #include "StringUtils.h" -namespace Lucene -{ - CharArraySet::CharArraySet(bool ignoreCase) - { - this->ignoreCase = ignoreCase; - this->entries = HashSet::newInstance(); - } - - CharArraySet::CharArraySet(HashSet entries, bool ignoreCase) - { - this->ignoreCase = ignoreCase; - this->entries = HashSet::newInstance(); - if (entries) - { - for (HashSet::iterator entry = entries.begin(); entry != entries.end(); ++entry) - add(*entry); +namespace Lucene { + +CharArraySet::CharArraySet(bool ignoreCase) { + this->ignoreCase = ignoreCase; + this->entries = HashSet::newInstance(); +} + +CharArraySet::CharArraySet(HashSet entries, bool ignoreCase) { + this->ignoreCase = ignoreCase; + this->entries = HashSet::newInstance(); + if (entries) { + for (HashSet::iterator entry = entries.begin(); entry != entries.end(); ++entry) { + add(*entry); } } - - CharArraySet::CharArraySet(Collection entries, bool ignoreCase) - { - this->ignoreCase = ignoreCase; - this->entries = HashSet::newInstance(); - if (entries) - { - for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) - add(*entry); +} + +CharArraySet::CharArraySet(Collection entries, bool ignoreCase) { + this->ignoreCase = ignoreCase; + this->entries = HashSet::newInstance(); + if (entries) { + for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) { + add(*entry); } } - - CharArraySet::~CharArraySet() - { - } - - bool CharArraySet::contains(const String& text) - { - return entries.contains(ignoreCase ? StringUtils::toLower(text) : text); - } - - bool CharArraySet::contains(const wchar_t* text, int32_t offset, int32_t length) - { - return contains(String(text + offset, length)); - } - - bool CharArraySet::add(const String& text) - { - return entries.add(ignoreCase ? StringUtils::toLower(text) : text); - } - - bool CharArraySet::add(CharArray text) - { - return add(String(text.get(), text.size())); - } - - int32_t CharArraySet::size() - { - return entries.size(); - } - - bool CharArraySet::isEmpty() - { - return entries.empty(); - } - - HashSet::iterator CharArraySet::begin() - { - return entries.begin(); - } - - HashSet::iterator CharArraySet::end() - { - return entries.end(); - } +} + +CharArraySet::~CharArraySet() { +} + +bool CharArraySet::contains(const String& text) { + return entries.contains(ignoreCase ? StringUtils::toLower(text) : text); +} + +bool CharArraySet::contains(const wchar_t* text, int32_t offset, int32_t length) { + return contains(String(text + offset, length)); +} + +bool CharArraySet::add(const String& text) { + return entries.add(ignoreCase ? StringUtils::toLower(text) : text); +} + +bool CharArraySet::add(CharArray text) { + return add(String(text.get(), text.size())); +} + +int32_t CharArraySet::size() { + return entries.size(); +} + +bool CharArraySet::isEmpty() { + return entries.empty(); +} + +HashSet::iterator CharArraySet::begin() { + return entries.begin(); +} + +HashSet::iterator CharArraySet::end() { + return entries.end(); +} + } diff --git a/src/core/analysis/CharFilter.cpp b/src/core/analysis/CharFilter.cpp index 8687f127..eca8caf7 100644 --- a/src/core/analysis/CharFilter.cpp +++ b/src/core/analysis/CharFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,49 +7,41 @@ #include "LuceneInc.h" #include "CharFilter.h" -namespace Lucene -{ - CharFilter::CharFilter(CharStreamPtr in) - { - input = in; - } - - CharFilter::~CharFilter() - { - } - - int32_t CharFilter::correct(int32_t currentOff) - { - return currentOff; - } - - int32_t CharFilter::correctOffset(int32_t currentOff) - { - return input->correctOffset(correct(currentOff)); - } - - void CharFilter::close() - { - input->close(); - } - - int32_t CharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) - { - return input->read(buffer, offset, length); - } - - bool CharFilter::markSupported() - { - return input->markSupported(); - } - - void CharFilter::mark(int32_t readAheadLimit) - { - input->mark(readAheadLimit); - } - - void CharFilter::reset() - { - input->reset(); - } +namespace Lucene { + +CharFilter::CharFilter(const CharStreamPtr& in) { + input = in; +} + +CharFilter::~CharFilter() { +} + +int32_t CharFilter::correct(int32_t currentOff) { + return currentOff; +} + +int32_t CharFilter::correctOffset(int32_t currentOff) { + return input->correctOffset(correct(currentOff)); +} + +void CharFilter::close() { + input->close(); +} + +int32_t CharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { + return input->read(buffer, offset, length); +} + +bool CharFilter::markSupported() { + return input->markSupported(); +} + +void CharFilter::mark(int32_t readAheadLimit) { + input->mark(readAheadLimit); +} + +void CharFilter::reset() { + input->reset(); +} + } diff --git a/src/core/analysis/CharReader.cpp b/src/core/analysis/CharReader.cpp index cf1fbded..4356d893 100644 --- a/src/core/analysis/CharReader.cpp +++ b/src/core/analysis/CharReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,51 +7,44 @@ #include "LuceneInc.h" #include "CharReader.h" -namespace Lucene -{ - CharReader::CharReader(ReaderPtr in) - { - input = in; - } - - CharReader::~CharReader() - { - } - - CharStreamPtr CharReader::get(ReaderPtr input) - { - CharStreamPtr charStream(boost::dynamic_pointer_cast(input)); - return charStream ? charStream : newLucene(input); - } - - int32_t CharReader::correctOffset(int32_t currentOff) - { - return currentOff; - } - - void CharReader::close() - { - if (input) - input->close(); - } - - int32_t CharReader::read(wchar_t* buffer, int32_t offset, int32_t length) - { - return input->read(buffer, offset, length); - } - - bool CharReader::markSupported() - { - return input->markSupported(); - } - - void CharReader::mark(int32_t readAheadLimit) - { - input->mark(readAheadLimit); - } - - void CharReader::reset() - { - input->reset(); +namespace Lucene { + +CharReader::CharReader(const ReaderPtr& in) { + input = in; +} + +CharReader::~CharReader() { +} + +CharStreamPtr CharReader::get(const ReaderPtr& input) { + CharStreamPtr charStream(boost::dynamic_pointer_cast(input)); + return charStream ? charStream : newLucene(input); +} + +int32_t CharReader::correctOffset(int32_t currentOff) { + return currentOff; +} + +void CharReader::close() { + if (input) { + input->close(); } } + +int32_t CharReader::read(wchar_t* buffer, int32_t offset, int32_t length) { + return input->read(buffer, offset, length); +} + +bool CharReader::markSupported() { + return input->markSupported(); +} + +void CharReader::mark(int32_t readAheadLimit) { + input->mark(readAheadLimit); +} + +void CharReader::reset() { + input->reset(); +} + +} diff --git a/src/core/analysis/CharStream.cpp b/src/core/analysis/CharStream.cpp index 47b1b802..3c74f4f8 100644 --- a/src/core/analysis/CharStream.cpp +++ b/src/core/analysis/CharStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "CharStream.h" -namespace Lucene -{ - CharStream::~CharStream() - { - } +namespace Lucene { + +CharStream::~CharStream() { +} + } diff --git a/src/core/analysis/CharTokenizer.cpp b/src/core/analysis/CharTokenizer.cpp index d7768134..5827dffd 100644 --- a/src/core/analysis/CharTokenizer.cpp +++ b/src/core/analysis/CharTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,112 +10,104 @@ #include "TermAttribute.h" #include "Reader.h" -namespace Lucene -{ - const int32_t CharTokenizer::MAX_WORD_LEN = 255; - const int32_t CharTokenizer::IO_BUFFER_SIZE = 4096; - - CharTokenizer::CharTokenizer(ReaderPtr input) : Tokenizer(input) - { - offset = 0; - bufferIndex = 0; - dataLen = 0; - ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); - - offsetAtt = addAttribute(); - termAtt = addAttribute(); - } - - CharTokenizer::CharTokenizer(AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source, input) - { - offset = 0; - bufferIndex = 0; - dataLen = 0; - ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); - - offsetAtt = addAttribute(); - termAtt = addAttribute(); - } - - CharTokenizer::CharTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory, input) - { - offset = 0; - bufferIndex = 0; - dataLen = 0; - ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); - - offsetAtt = addAttribute(); - termAtt = addAttribute(); - } - - CharTokenizer::~CharTokenizer() - { - } - - wchar_t CharTokenizer::normalize(wchar_t c) - { - return c; - } - - bool CharTokenizer::incrementToken() - { - clearAttributes(); - int32_t length = 0; - int32_t start = bufferIndex; - CharArray buffer(termAtt->termBuffer()); - while (true) - { - if (bufferIndex >= dataLen) - { - offset += dataLen; - dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); - if (dataLen == -1) - { - dataLen = 0; // so next offset += dataLen won't decrement offset - if (length > 0) - break; - else - return false; +namespace Lucene { + +const int32_t CharTokenizer::MAX_WORD_LEN = 255; +const int32_t CharTokenizer::IO_BUFFER_SIZE = 4096; + +CharTokenizer::CharTokenizer(const ReaderPtr& input) : Tokenizer(input) { + offset = 0; + bufferIndex = 0; + dataLen = 0; + ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); + + offsetAtt = addAttribute(); + termAtt = addAttribute(); +} + +CharTokenizer::CharTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source, input) { + offset = 0; + bufferIndex = 0; + dataLen = 0; + ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); + + offsetAtt = addAttribute(); + termAtt = addAttribute(); +} + +CharTokenizer::CharTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory, input) { + offset = 0; + bufferIndex = 0; + dataLen = 0; + ioBuffer = CharArray::newInstance(IO_BUFFER_SIZE); + + offsetAtt = addAttribute(); + termAtt = addAttribute(); +} + +CharTokenizer::~CharTokenizer() { +} + +wchar_t CharTokenizer::normalize(wchar_t c) { + return c; +} + +bool CharTokenizer::incrementToken() { + clearAttributes(); + int32_t length = 0; + int32_t start = bufferIndex; + CharArray buffer(termAtt->termBuffer()); + while (true) { + if (bufferIndex >= dataLen) { + offset += dataLen; + dataLen = input->read(ioBuffer.get(), 0, ioBuffer.size()); + if (dataLen == -1) { + dataLen = 0; // so next offset += dataLen won't decrement offset + if (length > 0) { + break; + } else { + return false; } - bufferIndex = 0; } - - wchar_t c = ioBuffer[bufferIndex++]; - - if (isTokenChar(c)) // if it's a token char - { - if (length == 0) - start = offset + bufferIndex - 1; - else if (length == buffer.size()) - buffer = termAtt->resizeTermBuffer(1 + length); - - buffer[length++] = normalize(c); // buffer it, normalized - - if (length == MAX_WORD_LEN) // buffer overflow! - break; + bufferIndex = 0; + } + + wchar_t c = ioBuffer[bufferIndex++]; + + if (isTokenChar(c)) { // if it's a token char + if (length == 0) { + start = offset + bufferIndex - 1; + } else if (length == buffer.size()) { + buffer = termAtt->resizeTermBuffer(1 + length); } - else if (length > 0) // at non-Letter with chars - break; // return them + + buffer[length++] = normalize(c); // buffer it, normalized + + if (length == MAX_WORD_LEN) { // buffer overflow! + break; + } + } else if (length > 0) { // at non-Letter with chars + break; // return them } - - termAtt->setTermLength(length); - offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); - - return true; - } - - void CharTokenizer::end() - { - // set final offset - int32_t finalOffset = correctOffset(offset); - offsetAtt->setOffset(finalOffset, finalOffset); - } - - void CharTokenizer::reset(ReaderPtr input) - { - Tokenizer::reset(input); - bufferIndex = 0; - offset = 0; - dataLen = 0; } + + termAtt->setTermLength(length); + offsetAtt->setOffset(correctOffset(start), correctOffset(start + length)); + + return true; +} + +void CharTokenizer::end() { + // set final offset + int32_t finalOffset = correctOffset(offset); + offsetAtt->setOffset(finalOffset, finalOffset); +} + +void CharTokenizer::reset(const ReaderPtr& input) { + Tokenizer::reset(input); + bufferIndex = 0; + offset = 0; + dataLen = 0; +} + } diff --git a/src/core/analysis/ISOLatin1AccentFilter.cpp b/src/core/analysis/ISOLatin1AccentFilter.cpp index 8eea181b..4c31fe13 100644 --- a/src/core/analysis/ISOLatin1AccentFilter.cpp +++ b/src/core/analysis/ISOLatin1AccentFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,226 +8,219 @@ #include "ISOLatin1AccentFilter.h" #include "TermAttribute.h" -namespace Lucene -{ - ISOLatin1AccentFilter::ISOLatin1AccentFilter(TokenStreamPtr input) : TokenFilter(input) - { - output = CharArray::newInstance(256); - outputPos = 0; - termAtt = addAttribute(); - } - - ISOLatin1AccentFilter::~ISOLatin1AccentFilter() - { - } - - bool ISOLatin1AccentFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* buffer = termAtt->termBufferArray(); - int32_t length = termAtt->termLength(); - - // If no characters actually require rewriting then we just return token as-is - for (int32_t i = 0; i < length; ++i) - { - wchar_t c = buffer[i]; - if (c >= 0x00c0 && c <= 0xfb06) - { - removeAccents(buffer, length); - termAtt->setTermBuffer(output.get(), 0, outputPos); - break; - } +namespace Lucene { + +ISOLatin1AccentFilter::ISOLatin1AccentFilter(const TokenStreamPtr& input) : TokenFilter(input) { + output = CharArray::newInstance(256); + outputPos = 0; + termAtt = addAttribute(); +} + +ISOLatin1AccentFilter::~ISOLatin1AccentFilter() { +} + +bool ISOLatin1AccentFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* buffer = termAtt->termBufferArray(); + int32_t length = termAtt->termLength(); + + // If no characters actually require rewriting then we just return token as-is + for (int32_t i = 0; i < length; ++i) { + wchar_t c = buffer[i]; + if (c >= 0x00c0 && c <= 0xfb06) { + removeAccents(buffer, length); + termAtt->setTermBuffer(output.get(), 0, outputPos); + break; } - return true; } - else - return false; + return true; + } else { + return false; + } +} + +void ISOLatin1AccentFilter::removeAccents(const wchar_t* input, int32_t length) { + // Worst-case length required + int32_t maxSizeNeeded = 2 * length; + + int32_t size = output.size(); + while (size < maxSizeNeeded) { + size *= 2; } - - void ISOLatin1AccentFilter::removeAccents(const wchar_t* input, int32_t length) - { - // Worst-case length required - int32_t maxSizeNeeded = 2 * length; - - int32_t size = output.size(); - while (size < maxSizeNeeded) - size *= 2; - - if (size != output.size()) - output.resize(size); - - outputPos = 0; - int32_t pos = 0; - - wchar_t* output = this->output.get(); - - for (int32_t i = 0; i < length; ++i, ++pos) - { - wchar_t c = input[pos]; - - // Quick test: if it's not in range then just keep current character - if (c < 0x00C0 || c > 0xFB06) + + if (size != output.size()) { + output.resize(size); + } + + outputPos = 0; + int32_t pos = 0; + + wchar_t* output = this->output.get(); + + for (int32_t i = 0; i < length; ++i, ++pos) { + wchar_t c = input[pos]; + + // Quick test: if it's not in range then just keep current character + if (c < 0x00C0 || c > 0xFB06) { + output[outputPos++] = c; + } else { + switch (c) { + case 0x00C0: + case 0x00C1: + case 0x00C2: + case 0x00C3: + case 0x00C4: + case 0x00C5: + output[outputPos++] = L'A'; + break; + case 0x00C6: + output[outputPos++] = L'A'; + output[outputPos++] = L'E'; + break; + case 0x00C7: + output[outputPos++] = L'C'; + break; + case 0x00C8: + case 0x00C9: + case 0x00CA: + case 0x00CB: + output[outputPos++] = L'E'; + break; + case 0x00CC: + case 0x00CD: + case 0x00CE: + case 0x00CF: + output[outputPos++] = L'I'; + break; + case 0x0132: + output[outputPos++] = L'I'; + output[outputPos++] = L'J'; + break; + case 0x00D0: + output[outputPos++] = L'D'; + break; + case 0x00D1: + output[outputPos++] = L'N'; + break; + case 0x00D2: + case 0x00D3: + case 0x00D4: + case 0x00D5: + case 0x00D6: + case 0x00D8: + output[outputPos++] = L'O'; + break; + case 0x0152: + output[outputPos++] = L'O'; + output[outputPos++] = L'E'; + break; + case 0x00DE: + output[outputPos++] = L'T'; + output[outputPos++] = L'H'; + break; + case 0x00D9: + case 0x00DA: + case 0x00DB: + case 0x00DC: + output[outputPos++] = L'U'; + break; + case 0x00DD: + case 0x0178: + output[outputPos++] = L'Y'; + break; + case 0x00E0: + case 0x00E1: + case 0x00E2: + case 0x00E3: + case 0x00E4: + case 0x00E5: + output[outputPos++] = L'a'; + break; + case 0x00E6: + output[outputPos++] = L'a'; + output[outputPos++] = L'e'; + break; + case 0x00E7: + output[outputPos++] = L'c'; + break; + case 0x00E8: + case 0x00E9: + case 0x00EA: + case 0x00EB: + output[outputPos++] = L'e'; + break; + case 0x00EC: + case 0x00ED: + case 0x00EE: + case 0x00EF: + output[outputPos++] = L'i'; + break; + case 0x0133: + output[outputPos++] = L'i'; + output[outputPos++] = L'j'; + break; + case 0x00F0: + output[outputPos++] = L'd'; + break; + case 0x00F1: + output[outputPos++] = L'n'; + break; + case 0x00F2: + case 0x00F3: + case 0x00F4: + case 0x00F5: + case 0x00F6: + case 0x00F8: + output[outputPos++] = L'o'; + break; + case 0x0153: + output[outputPos++] = L'o'; + output[outputPos++] = L'e'; + break; + case 0x00DF: + output[outputPos++] = L's'; + output[outputPos++] = L's'; + break; + case 0x00FE: + output[outputPos++] = L't'; + output[outputPos++] = L'h'; + break; + case 0x00F9: + case 0x00FA: + case 0x00FB: + case 0x00FC: + output[outputPos++] = L'u'; + break; + case 0x00FD: + case 0x00FF: + output[outputPos++] = L'y'; + break; + case 0xFB00: + output[outputPos++] = L'f'; + output[outputPos++] = L'f'; + break; + case 0xFB01: + output[outputPos++] = L'f'; + output[outputPos++] = L'i'; + break; + case 0xFB02: + output[outputPos++] = L'f'; + output[outputPos++] = L'l'; + break; + case 0xFB05: + output[outputPos++] = L'f'; + output[outputPos++] = L't'; + break; + case 0xFB06: + output[outputPos++] = L's'; + output[outputPos++] = L't'; + break; + default : output[outputPos++] = c; - else - { - switch (c) - { - case 0x00C0: - case 0x00C1: - case 0x00C2: - case 0x00C3: - case 0x00C4: - case 0x00C5: - output[outputPos++] = L'A'; - break; - case 0x00C6: - output[outputPos++] = L'A'; - output[outputPos++] = L'E'; - break; - case 0x00C7: - output[outputPos++] = L'C'; - break; - case 0x00C8: - case 0x00C9: - case 0x00CA: - case 0x00CB: - output[outputPos++] = L'E'; - break; - case 0x00CC: - case 0x00CD: - case 0x00CE: - case 0x00CF: - output[outputPos++] = L'I'; - break; - case 0x0132: - output[outputPos++] = L'I'; - output[outputPos++] = L'J'; - break; - case 0x00D0: - output[outputPos++] = L'D'; - break; - case 0x00D1: - output[outputPos++] = L'N'; - break; - case 0x00D2: - case 0x00D3: - case 0x00D4: - case 0x00D5: - case 0x00D6: - case 0x00D8: - output[outputPos++] = L'O'; - break; - case 0x0152: - output[outputPos++] = L'O'; - output[outputPos++] = L'E'; - break; - case 0x00DE: - output[outputPos++] = L'T'; - output[outputPos++] = L'H'; - break; - case 0x00D9: - case 0x00DA: - case 0x00DB: - case 0x00DC: - output[outputPos++] = L'U'; - break; - case 0x00DD: - case 0x0178: - output[outputPos++] = L'Y'; - break; - case 0x00E0: - case 0x00E1: - case 0x00E2: - case 0x00E3: - case 0x00E4: - case 0x00E5: - output[outputPos++] = L'a'; - break; - case 0x00E6: - output[outputPos++] = L'a'; - output[outputPos++] = L'e'; - break; - case 0x00E7: - output[outputPos++] = L'c'; - break; - case 0x00E8: - case 0x00E9: - case 0x00EA: - case 0x00EB: - output[outputPos++] = L'e'; - break; - case 0x00EC: - case 0x00ED: - case 0x00EE: - case 0x00EF: - output[outputPos++] = L'i'; - break; - case 0x0133: - output[outputPos++] = L'i'; - output[outputPos++] = L'j'; - break; - case 0x00F0: - output[outputPos++] = L'd'; - break; - case 0x00F1: - output[outputPos++] = L'n'; - break; - case 0x00F2: - case 0x00F3: - case 0x00F4: - case 0x00F5: - case 0x00F6: - case 0x00F8: - output[outputPos++] = L'o'; - break; - case 0x0153: - output[outputPos++] = L'o'; - output[outputPos++] = L'e'; - break; - case 0x00DF: - output[outputPos++] = L's'; - output[outputPos++] = L's'; - break; - case 0x00FE: - output[outputPos++] = L't'; - output[outputPos++] = L'h'; - break; - case 0x00F9: - case 0x00FA: - case 0x00FB: - case 0x00FC: - output[outputPos++] = L'u'; - break; - case 0x00FD: - case 0x00FF: - output[outputPos++] = L'y'; - break; - case 0xFB00: - output[outputPos++] = L'f'; - output[outputPos++] = L'f'; - break; - case 0xFB01: - output[outputPos++] = L'f'; - output[outputPos++] = L'i'; - break; - case 0xFB02: - output[outputPos++] = L'f'; - output[outputPos++] = L'l'; - break; - case 0xFB05: - output[outputPos++] = L'f'; - output[outputPos++] = L't'; - break; - case 0xFB06: - output[outputPos++] = L's'; - output[outputPos++] = L't'; - break; - default : - output[outputPos++] = c; - break; - } + break; } } } } + +} diff --git a/src/core/analysis/KeywordAnalyzer.cpp b/src/core/analysis/KeywordAnalyzer.cpp index 61b8d7d9..3f19c83b 100644 --- a/src/core/analysis/KeywordAnalyzer.cpp +++ b/src/core/analysis/KeywordAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,27 +8,24 @@ #include "KeywordAnalyzer.h" #include "KeywordTokenizer.h" -namespace Lucene -{ - KeywordAnalyzer::~KeywordAnalyzer() - { - } - - TokenStreamPtr KeywordAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - return newLucene(reader); - } - - TokenStreamPtr KeywordAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!tokenizer) - { - tokenizer = newLucene(reader); - setPreviousTokenStream(tokenizer); - } - else - tokenizer->reset(reader); - return tokenizer; +namespace Lucene { + +KeywordAnalyzer::~KeywordAnalyzer() { +} + +TokenStreamPtr KeywordAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + return newLucene(reader); +} + +TokenStreamPtr KeywordAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!tokenizer) { + tokenizer = newLucene(reader); + setPreviousTokenStream(tokenizer); + } else { + tokenizer->reset(reader); } + return tokenizer; +} + } diff --git a/src/core/analysis/KeywordTokenizer.cpp b/src/core/analysis/KeywordTokenizer.cpp index b98189a1..7d0d2ee7 100644 --- a/src/core/analysis/KeywordTokenizer.cpp +++ b/src/core/analysis/KeywordTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,77 +10,69 @@ #include "OffsetAttribute.h" #include "Reader.h" -namespace Lucene -{ - const int32_t KeywordTokenizer::DEFAULT_BUFFER_SIZE = 256; - - KeywordTokenizer::KeywordTokenizer(ReaderPtr input) : Tokenizer(input) - { - init(DEFAULT_BUFFER_SIZE); - } - - KeywordTokenizer::KeywordTokenizer(ReaderPtr input, int32_t bufferSize) : Tokenizer(input) - { - init(bufferSize); - } - - KeywordTokenizer::KeywordTokenizer(AttributeSourcePtr source, ReaderPtr input, int32_t bufferSize) : Tokenizer(source, input) - { - init(bufferSize); - } - - KeywordTokenizer::KeywordTokenizer(AttributeFactoryPtr factory, ReaderPtr input, int32_t bufferSize) : Tokenizer(factory, input) - { - init(bufferSize); - } - - KeywordTokenizer::~KeywordTokenizer() - { - } - - void KeywordTokenizer::init(int32_t bufferSize) - { - this->done = false; - this->finalOffset = 0; - this->termAtt = addAttribute(); - this->offsetAtt = addAttribute(); - this->termAtt->resizeTermBuffer(bufferSize); - } - - bool KeywordTokenizer::incrementToken() - { - if (!done) - { - clearAttributes(); - done = true; - int32_t upto = 0; - CharArray buffer(termAtt->termBuffer()); - while (true) - { - int32_t length = input->read(buffer.get(), upto, buffer.size() - upto); - if (length == -1) - break; - upto += length; - if (upto == buffer.size()) - buffer = termAtt->resizeTermBuffer(buffer.size() + 1); +namespace Lucene { + +const int32_t KeywordTokenizer::DEFAULT_BUFFER_SIZE = 256; + +KeywordTokenizer::KeywordTokenizer(const ReaderPtr& input) : Tokenizer(input) { + init(DEFAULT_BUFFER_SIZE); +} + +KeywordTokenizer::KeywordTokenizer(const ReaderPtr& input, int32_t bufferSize) : Tokenizer(input) { + init(bufferSize); +} + +KeywordTokenizer::KeywordTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input, int32_t bufferSize) : Tokenizer(source, input) { + init(bufferSize); +} + +KeywordTokenizer::KeywordTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input, int32_t bufferSize) : Tokenizer(factory, input) { + init(bufferSize); +} + +KeywordTokenizer::~KeywordTokenizer() { +} + +void KeywordTokenizer::init(int32_t bufferSize) { + this->done = false; + this->finalOffset = 0; + this->termAtt = addAttribute(); + this->offsetAtt = addAttribute(); + this->termAtt->resizeTermBuffer(bufferSize); +} + +bool KeywordTokenizer::incrementToken() { + if (!done) { + clearAttributes(); + done = true; + int32_t upto = 0; + CharArray buffer(termAtt->termBuffer()); + while (true) { + int32_t length = input->read(buffer.get(), upto, buffer.size() - upto); + if (length == -1) { + break; + } + upto += length; + if (upto == buffer.size()) { + buffer = termAtt->resizeTermBuffer(buffer.size() + 1); } - termAtt->setTermLength(upto); - finalOffset = correctOffset(upto); - offsetAtt->setOffset(correctOffset(0), finalOffset); - return true; } - return false; - } - - void KeywordTokenizer::end() - { - // set final offset - offsetAtt->setOffset(finalOffset, finalOffset); - } - - void KeywordTokenizer::reset() - { - Tokenizer::reset(input); - done = false; + termAtt->setTermLength(upto); + finalOffset = correctOffset(upto); + offsetAtt->setOffset(correctOffset(0), finalOffset); + return true; } + return false; +} + +void KeywordTokenizer::end() { + // set final offset + offsetAtt->setOffset(finalOffset, finalOffset); +} + +void KeywordTokenizer::reset() { + Tokenizer::reset(input); + done = false; +} + } diff --git a/src/core/analysis/LengthFilter.cpp b/src/core/analysis/LengthFilter.cpp index 68d71c5b..28dfddab 100644 --- a/src/core/analysis/LengthFilter.cpp +++ b/src/core/analysis/LengthFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,30 +8,28 @@ #include "LengthFilter.h" #include "TermAttribute.h" -namespace Lucene -{ - LengthFilter::LengthFilter(TokenStreamPtr input, int32_t min, int32_t max) : TokenFilter(input) - { - this->min = min; - this->max = max; - this->termAtt = addAttribute(); - } - - LengthFilter::~LengthFilter() - { - } - - bool LengthFilter::incrementToken() - { - // return the first non-stop word found - while (input->incrementToken()) - { - int32_t len = termAtt->termLength(); - if (len >= min && len <= max) - return true; - // note: else we ignore it but should we index each part of it? +namespace Lucene { + +LengthFilter::LengthFilter(const TokenStreamPtr& input, int32_t min, int32_t max) : TokenFilter(input) { + this->min = min; + this->max = max; + this->termAtt = addAttribute(); +} + +LengthFilter::~LengthFilter() { +} + +bool LengthFilter::incrementToken() { + // return the first non-stop word found + while (input->incrementToken()) { + int32_t len = termAtt->termLength(); + if (len >= min && len <= max) { + return true; } - // reached EOS -- return false - return false; + // note: else we ignore it but should we index each part of it? } + // reached EOS -- return false + return false; +} + } diff --git a/src/core/analysis/LetterTokenizer.cpp b/src/core/analysis/LetterTokenizer.cpp index 0bf733f3..953f8afd 100644 --- a/src/core/analysis/LetterTokenizer.cpp +++ b/src/core/analysis/LetterTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,22 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - LetterTokenizer::LetterTokenizer(ReaderPtr input) : CharTokenizer(input) - { - } - - LetterTokenizer::LetterTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) - { - } - - LetterTokenizer::LetterTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) - { - } - - LetterTokenizer::~LetterTokenizer() - { - } - - bool LetterTokenizer::isTokenChar(wchar_t c) - { - return UnicodeUtil::isAlpha(c); - } +namespace Lucene { + +LetterTokenizer::LetterTokenizer(const ReaderPtr& input) : CharTokenizer(input) { +} + +LetterTokenizer::LetterTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { +} + +LetterTokenizer::LetterTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { +} + +LetterTokenizer::~LetterTokenizer() { +} + +bool LetterTokenizer::isTokenChar(wchar_t c) { + return UnicodeUtil::isAlpha(c); +} + } diff --git a/src/core/analysis/LowerCaseFilter.cpp b/src/core/analysis/LowerCaseFilter.cpp index c52de0bf..2d4afd47 100644 --- a/src/core/analysis/LowerCaseFilter.cpp +++ b/src/core/analysis/LowerCaseFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,25 +9,22 @@ #include "TermAttribute.h" #include "CharFolder.h" -namespace Lucene -{ - LowerCaseFilter::LowerCaseFilter(TokenStreamPtr input) : TokenFilter(input) - { - termAtt = addAttribute(); - } - - LowerCaseFilter::~LowerCaseFilter() - { - } - - bool LowerCaseFilter::incrementToken() - { - if (input->incrementToken()) - { - wchar_t* buffer = termAtt->termBufferArray(); - CharFolder::toLower(buffer, buffer + termAtt->termLength()); - return true; - } - return false; +namespace Lucene { + +LowerCaseFilter::LowerCaseFilter(const TokenStreamPtr& input) : TokenFilter(input) { + termAtt = addAttribute(); +} + +LowerCaseFilter::~LowerCaseFilter() { +} + +bool LowerCaseFilter::incrementToken() { + if (input->incrementToken()) { + wchar_t* buffer = termAtt->termBufferArray(); + CharFolder::toLower(buffer, buffer + termAtt->termLength()); + return true; } + return false; +} + } diff --git a/src/core/analysis/LowerCaseTokenizer.cpp b/src/core/analysis/LowerCaseTokenizer.cpp index 166cd0e2..71ff6556 100644 --- a/src/core/analysis/LowerCaseTokenizer.cpp +++ b/src/core/analysis/LowerCaseTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,26 +8,22 @@ #include "LowerCaseTokenizer.h" #include "CharFolder.h" -namespace Lucene -{ - LowerCaseTokenizer::LowerCaseTokenizer(ReaderPtr input) : LetterTokenizer(input) - { - } - - LowerCaseTokenizer::LowerCaseTokenizer(AttributeSourcePtr source, ReaderPtr input) : LetterTokenizer(source, input) - { - } - - LowerCaseTokenizer::LowerCaseTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : LetterTokenizer(factory, input) - { - } - - LowerCaseTokenizer::~LowerCaseTokenizer() - { - } - - wchar_t LowerCaseTokenizer::normalize(wchar_t c) - { - return CharFolder::toLower(c); - } +namespace Lucene { + +LowerCaseTokenizer::LowerCaseTokenizer(const ReaderPtr& input) : LetterTokenizer(input) { +} + +LowerCaseTokenizer::LowerCaseTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : LetterTokenizer(source, input) { +} + +LowerCaseTokenizer::LowerCaseTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : LetterTokenizer(factory, input) { +} + +LowerCaseTokenizer::~LowerCaseTokenizer() { +} + +wchar_t LowerCaseTokenizer::normalize(wchar_t c) { + return CharFolder::toLower(c); +} + } diff --git a/src/core/analysis/MappingCharFilter.cpp b/src/core/analysis/MappingCharFilter.cpp index a6610a8b..da8761cd 100644 --- a/src/core/analysis/MappingCharFilter.cpp +++ b/src/core/analysis/MappingCharFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,120 +9,118 @@ #include "NormalizeCharMap.h" #include "CharReader.h" -namespace Lucene -{ - MappingCharFilter::MappingCharFilter(NormalizeCharMapPtr normMap, CharStreamPtr in) : BaseCharFilter(in) - { - this->normMap = normMap; - this->charPointer = 0; - this->nextCharCounter = 0; - } - - MappingCharFilter::MappingCharFilter(NormalizeCharMapPtr normMap, ReaderPtr in) : BaseCharFilter(CharReader::get(in)) - { - this->normMap = normMap; - this->charPointer = 0; - this->nextCharCounter = 0; - } - - MappingCharFilter::~MappingCharFilter() - { - } - - int32_t MappingCharFilter::read() - { - while (true) - { - if (charPointer < (int32_t)replacement.length()) - return (int32_t)replacement[charPointer++]; - - int32_t firstChar = nextChar(); - if (firstChar == -1) - return -1; - NormalizeCharMapPtr nm(normMap->submap ? normMap->submap.get((wchar_t)firstChar) : NormalizeCharMapPtr()); - if (!nm) - return firstChar; - NormalizeCharMapPtr result(match(nm)); - if (!result) - return firstChar; - replacement = result->normStr; - charPointer = 0; - if (result->diff != 0) - { - int32_t prevCumulativeDiff = getLastCumulativeDiff(); - if (result->diff < 0) - { - for (int32_t i = 0; i < -result->diff; ++i) - addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i); +namespace Lucene { + +MappingCharFilter::MappingCharFilter(const NormalizeCharMapPtr& normMap, const CharStreamPtr& in) : BaseCharFilter(in) { + this->normMap = normMap; + this->charPointer = 0; + this->nextCharCounter = 0; +} + +MappingCharFilter::MappingCharFilter(const NormalizeCharMapPtr& normMap, const ReaderPtr& in) : BaseCharFilter(CharReader::get(in)) { + this->normMap = normMap; + this->charPointer = 0; + this->nextCharCounter = 0; +} + +MappingCharFilter::~MappingCharFilter() { +} + +int32_t MappingCharFilter::read() { + while (true) { + if (charPointer < (int32_t)replacement.length()) { + return (int32_t)replacement[charPointer++]; + } + + int32_t firstChar = nextChar(); + if (firstChar == -1) { + return -1; + } + NormalizeCharMapPtr nm(normMap->submap ? normMap->submap.get((wchar_t)firstChar) : NormalizeCharMapPtr()); + if (!nm) { + return firstChar; + } + NormalizeCharMapPtr result(match(nm)); + if (!result) { + return firstChar; + } + replacement = result->normStr; + charPointer = 0; + if (result->diff != 0) { + int32_t prevCumulativeDiff = getLastCumulativeDiff(); + if (result->diff < 0) { + for (int32_t i = 0; i < -result->diff; ++i) { + addOffCorrectMap(nextCharCounter + i - prevCumulativeDiff, prevCumulativeDiff - 1 - i); } - else - addOffCorrectMap(nextCharCounter - result->diff - prevCumulativeDiff, prevCumulativeDiff + result->diff); + } else { + addOffCorrectMap(nextCharCounter - result->diff - prevCumulativeDiff, prevCumulativeDiff + result->diff); } - } + } - - int32_t MappingCharFilter::nextChar() - { - ++nextCharCounter; - if (buffer && !buffer.empty()) - return buffer.removeFirst(); - return input->read(); +} + +int32_t MappingCharFilter::nextChar() { + ++nextCharCounter; + if (buffer && !buffer.empty()) { + return buffer.removeFirst(); } - - void MappingCharFilter::pushChar(int32_t c) - { - --nextCharCounter; - if (!buffer) - buffer = Collection::newInstance(); - buffer.add(0, (wchar_t)c); + return input->read(); +} + +void MappingCharFilter::pushChar(int32_t c) { + --nextCharCounter; + if (!buffer) { + buffer = Collection::newInstance(); } - - void MappingCharFilter::pushLastChar(int32_t c) - { - if (!buffer) - buffer = Collection::newInstance(); - buffer.add((wchar_t)c); + buffer.add(0, (wchar_t)c); +} + +void MappingCharFilter::pushLastChar(int32_t c) { + if (!buffer) { + buffer = Collection::newInstance(); } - - NormalizeCharMapPtr MappingCharFilter::match(NormalizeCharMapPtr map) - { - NormalizeCharMapPtr result; - if (map->submap) - { - int32_t chr = nextChar(); - if (chr != -1) - { - NormalizeCharMapPtr subMap(map->submap.get((wchar_t)chr)); - if (subMap) - result = match(subMap); - if (!result) - pushChar(chr); + buffer.add((wchar_t)c); +} + +NormalizeCharMapPtr MappingCharFilter::match(const NormalizeCharMapPtr& map) { + NormalizeCharMapPtr result; + if (map->submap) { + int32_t chr = nextChar(); + if (chr != -1) { + NormalizeCharMapPtr subMap(map->submap.get((wchar_t)chr)); + if (subMap) { + result = match(subMap); + } + if (!result) { + pushChar(chr); } } - if (!result) - result = map; - return result; } - - int32_t MappingCharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) - { - CharArray tmp(CharArray::newInstance(length)); - int32_t l = input->read(tmp.get(), 0, length); - if (l != -1) - { - for (int32_t i = 0; i < l; ++i) - pushLastChar(tmp[i]); + if (!result) { + result = map; + } + return result; +} + +int32_t MappingCharFilter::read(wchar_t* buffer, int32_t offset, int32_t length) { + CharArray tmp(CharArray::newInstance(length)); + int32_t l = input->read(tmp.get(), 0, length); + if (l != -1) { + for (int32_t i = 0; i < l; ++i) { + pushLastChar(tmp[i]); } - l = 0; - for (int32_t i = offset; i < offset + length; ++i) - { - int32_t c = read(); - if (c == -1) - break; - buffer[i] = (wchar_t)c; - ++l; + } + l = 0; + for (int32_t i = offset; i < offset + length; ++i) { + int32_t c = read(); + if (c == -1) { + break; } - return l == 0 ? -1 : l; + buffer[i] = (wchar_t)c; + ++l; } + return l == 0 ? -1 : l; +} + } diff --git a/src/core/analysis/NormalizeCharMap.cpp b/src/core/analysis/NormalizeCharMap.cpp index d0353ca5..df1a6af9 100644 --- a/src/core/analysis/NormalizeCharMap.cpp +++ b/src/core/analysis/NormalizeCharMap.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,35 +7,33 @@ #include "LuceneInc.h" #include "NormalizeCharMap.h" -namespace Lucene -{ - NormalizeCharMap::NormalizeCharMap() - { - diff = 0; - } - - NormalizeCharMap::~NormalizeCharMap() - { - } - - void NormalizeCharMap::add(const String& singleMatch, const String& replacement) - { - NormalizeCharMapPtr currMap(shared_from_this()); - for (String::const_iterator c = singleMatch.begin(); c != singleMatch.end(); ++c) - { - if (!currMap->submap) - currMap->submap = MapCharNormalizeCharMap::newInstance(); - NormalizeCharMapPtr map(currMap->submap.get(*c)); - if (!map) - { - map = newLucene(); - currMap->submap.put(*c, map); - } - currMap = map; +namespace Lucene { + +NormalizeCharMap::NormalizeCharMap() { + diff = 0; +} + +NormalizeCharMap::~NormalizeCharMap() { +} + +void NormalizeCharMap::add(const String& singleMatch, const String& replacement) { + NormalizeCharMapPtr currMap(shared_from_this()); + for (String::const_iterator c = singleMatch.begin(); c != singleMatch.end(); ++c) { + if (!currMap->submap) { + currMap->submap = MapCharNormalizeCharMap::newInstance(); + } + NormalizeCharMapPtr map(currMap->submap.get(*c)); + if (!map) { + map = newLucene(); + currMap->submap.put(*c, map); } - if (!currMap->normStr.empty()) - boost::throw_exception(RuntimeException(L"MappingCharFilter: there is already a mapping for " + singleMatch)); - currMap->normStr = replacement; - currMap->diff = (int32_t)(singleMatch.length() - replacement.length()); + currMap = map; + } + if (!currMap->normStr.empty()) { + boost::throw_exception(RuntimeException(L"MappingCharFilter: there is already a mapping for " + singleMatch)); } + currMap->normStr = replacement; + currMap->diff = (int32_t)(singleMatch.length() - replacement.length()); +} + } diff --git a/src/core/analysis/NumericTokenStream.cpp b/src/core/analysis/NumericTokenStream.cpp index 7524ca63..407f4761 100644 --- a/src/core/analysis/NumericTokenStream.cpp +++ b/src/core/analysis/NumericTokenStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,135 +12,128 @@ #include "TypeAttribute.h" #include "PositionIncrementAttribute.h" -namespace Lucene -{ - NumericTokenStream::NumericTokenStream() - { - this->shift = 0; - this->valSize = 0; - this->termAtt = addAttribute(); - this->typeAtt = addAttribute(); - this->posIncrAtt = addAttribute(); - this->precisionStep = NumericUtils::PRECISION_STEP_DEFAULT; - } - - NumericTokenStream::NumericTokenStream(int32_t precisionStep) - { - this->shift = 0; - this->valSize = 0; - this->termAtt = addAttribute(); - this->typeAtt = addAttribute(); - this->posIncrAtt = addAttribute(); - this->precisionStep = precisionStep; - if (precisionStep < 1) - boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); - } - - NumericTokenStream::NumericTokenStream(AttributeSourcePtr source, int32_t precisionStep) : TokenStream(source) - { - this->shift = 0; - this->valSize = 0; - this->termAtt = addAttribute(); - this->typeAtt = addAttribute(); - this->posIncrAtt = addAttribute(); - this->precisionStep = precisionStep; - if (precisionStep < 1) - boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); - } - - NumericTokenStream::NumericTokenStream(AttributeFactoryPtr factory, int32_t precisionStep) : TokenStream(factory) - { - this->shift = 0; - this->valSize = 0; - this->termAtt = addAttribute(); - this->typeAtt = addAttribute(); - this->posIncrAtt = addAttribute(); - this->precisionStep = precisionStep; - if (precisionStep < 1) - boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); - } - - NumericTokenStream::~NumericTokenStream() - { - } - - const String& NumericTokenStream::TOKEN_TYPE_FULL_PREC() - { - static String _TOKEN_TYPE_FULL_PREC(L"fullPrecNumeric"); - return _TOKEN_TYPE_FULL_PREC; - } - - const String& NumericTokenStream::TOKEN_TYPE_LOWER_PREC() - { - static String _TOKEN_TYPE_LOWER_PREC(L"lowerPrecNumeric"); - return _TOKEN_TYPE_LOWER_PREC; +namespace Lucene { + +NumericTokenStream::NumericTokenStream() { + this->shift = 0; + this->valSize = 0; + this->termAtt = addAttribute(); + this->typeAtt = addAttribute(); + this->posIncrAtt = addAttribute(); + this->precisionStep = NumericUtils::PRECISION_STEP_DEFAULT; +} + +NumericTokenStream::NumericTokenStream(int32_t precisionStep) { + this->shift = 0; + this->valSize = 0; + this->termAtt = addAttribute(); + this->typeAtt = addAttribute(); + this->posIncrAtt = addAttribute(); + this->precisionStep = precisionStep; + if (precisionStep < 1) { + boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } - - NumericTokenStreamPtr NumericTokenStream::setLongValue(int64_t value) - { - this->value = value; - valSize = 64; - shift = 0; - return shared_from_this(); +} + +NumericTokenStream::NumericTokenStream(const AttributeSourcePtr& source, int32_t precisionStep) : TokenStream(source) { + this->shift = 0; + this->valSize = 0; + this->termAtt = addAttribute(); + this->typeAtt = addAttribute(); + this->posIncrAtt = addAttribute(); + this->precisionStep = precisionStep; + if (precisionStep < 1) { + boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } - - NumericTokenStreamPtr NumericTokenStream::setIntValue(int32_t value) - { - this->value = (int64_t)value; - valSize = 32; - shift = 0; - return shared_from_this(); +} + +NumericTokenStream::NumericTokenStream(const AttributeFactoryPtr& factory, int32_t precisionStep) : TokenStream(factory) { + this->shift = 0; + this->valSize = 0; + this->termAtt = addAttribute(); + this->typeAtt = addAttribute(); + this->posIncrAtt = addAttribute(); + this->precisionStep = precisionStep; + if (precisionStep < 1) { + boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); } - - NumericTokenStreamPtr NumericTokenStream::setDoubleValue(double value) - { - this->value = (int64_t)value; - valSize = 64; - shift = 0; - return shared_from_this(); +} + +NumericTokenStream::~NumericTokenStream() { +} + +const String& NumericTokenStream::TOKEN_TYPE_FULL_PREC() { + static String _TOKEN_TYPE_FULL_PREC(L"fullPrecNumeric"); + return _TOKEN_TYPE_FULL_PREC; +} + +const String& NumericTokenStream::TOKEN_TYPE_LOWER_PREC() { + static String _TOKEN_TYPE_LOWER_PREC(L"lowerPrecNumeric"); + return _TOKEN_TYPE_LOWER_PREC; +} + +NumericTokenStreamPtr NumericTokenStream::setLongValue(int64_t value) { + this->value = value; + valSize = 64; + shift = 0; + return shared_from_this(); +} + +NumericTokenStreamPtr NumericTokenStream::setIntValue(int32_t value) { + this->value = (int64_t)value; + valSize = 32; + shift = 0; + return shared_from_this(); +} + +NumericTokenStreamPtr NumericTokenStream::setDoubleValue(double value) { + this->value = NumericUtils::doubleToSortableLong(value); + valSize = 64; + shift = 0; + return shared_from_this(); +} + +void NumericTokenStream::reset() { + if (valSize == 0) { + boost::throw_exception(IllegalStateException(L"call setValue() before usage")); } - - void NumericTokenStream::reset() - { - if (valSize == 0) - boost::throw_exception(IllegalStateException(L"call setValue() before usage")); - shift = 0; + shift = 0; +} + +bool NumericTokenStream::incrementToken() { + if (valSize == 0) { + boost::throw_exception(IllegalStateException(L"call setValue() before usage")); } - - bool NumericTokenStream::incrementToken() - { - if (valSize == 0) - boost::throw_exception(IllegalStateException(L"call setValue() before usage")); - if (shift >= valSize) - return false; - - clearAttributes(); - CharArray buffer; - switch (valSize) - { - case 64: - buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_LONG); - termAtt->setTermLength(NumericUtils::longToPrefixCoded(value, shift, buffer)); - break; - case 32: - buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_INT); - termAtt->setTermLength(NumericUtils::intToPrefixCoded((int32_t)value, shift, buffer)); - break; - default: - // should not happen - boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); - } - - typeAtt->setType(shift == 0 ? TOKEN_TYPE_FULL_PREC() : TOKEN_TYPE_LOWER_PREC()); - posIncrAtt->setPositionIncrement(shift == 0 ? 1 : 0); - shift += precisionStep; - return true; + if (shift >= valSize) { + return false; } - - String NumericTokenStream::toString() - { - StringStream buffer; - buffer << L"(numeric,valSize=" << valSize << L",precisionStep=" << precisionStep << L")"; - return buffer.str(); + + clearAttributes(); + CharArray buffer; + switch (valSize) { + case 64: + buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_LONG); + termAtt->setTermLength(NumericUtils::longToPrefixCoded(value, shift, buffer)); + break; + case 32: + buffer = termAtt->resizeTermBuffer(NumericUtils::BUF_SIZE_INT); + termAtt->setTermLength(NumericUtils::intToPrefixCoded((int32_t)value, shift, buffer)); + break; + default: + // should not happen + boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } + + typeAtt->setType(shift == 0 ? TOKEN_TYPE_FULL_PREC() : TOKEN_TYPE_LOWER_PREC()); + posIncrAtt->setPositionIncrement(shift == 0 ? 1 : 0); + shift += precisionStep; + return true; +} + +String NumericTokenStream::toString() { + StringStream buffer; + buffer << L"(numeric,valSize=" << valSize << L",precisionStep=" << precisionStep << L")"; + return buffer.str(); +} + } diff --git a/src/core/analysis/PerFieldAnalyzerWrapper.cpp b/src/core/analysis/PerFieldAnalyzerWrapper.cpp index bc7859a0..bbbec3a1 100644 --- a/src/core/analysis/PerFieldAnalyzerWrapper.cpp +++ b/src/core/analysis/PerFieldAnalyzerWrapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,65 +8,62 @@ #include "PerFieldAnalyzerWrapper.h" #include "Fieldable.h" -namespace Lucene -{ - PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer) - { - this->defaultAnalyzer = defaultAnalyzer; - this->analyzerMap = MapStringAnalyzer::newInstance(); - } - - PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(AnalyzerPtr defaultAnalyzer, MapStringAnalyzer fieldAnalyzers) - { - this->defaultAnalyzer = defaultAnalyzer; - this->analyzerMap = MapStringAnalyzer::newInstance(); - if (fieldAnalyzers) - analyzerMap.putAll(fieldAnalyzers.begin(), fieldAnalyzers.end()); - } - - PerFieldAnalyzerWrapper::~PerFieldAnalyzerWrapper() - { - } - - void PerFieldAnalyzerWrapper::addAnalyzer(const String& fieldName, AnalyzerPtr analyzer) - { - analyzerMap.put(fieldName, analyzer); - } - - TokenStreamPtr PerFieldAnalyzerWrapper::tokenStream(const String& fieldName, ReaderPtr reader) - { - AnalyzerPtr analyzer(analyzerMap.get(fieldName)); - if (!analyzer) - analyzer = defaultAnalyzer; - return analyzer->tokenStream(fieldName, reader); +namespace Lucene { + +PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer) { + this->defaultAnalyzer = defaultAnalyzer; + this->analyzerMap = MapStringAnalyzer::newInstance(); +} + +PerFieldAnalyzerWrapper::PerFieldAnalyzerWrapper(const AnalyzerPtr& defaultAnalyzer, MapStringAnalyzer fieldAnalyzers) { + this->defaultAnalyzer = defaultAnalyzer; + this->analyzerMap = MapStringAnalyzer::newInstance(); + if (fieldAnalyzers) { + analyzerMap.putAll(fieldAnalyzers.begin(), fieldAnalyzers.end()); } - - TokenStreamPtr PerFieldAnalyzerWrapper::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - AnalyzerPtr analyzer(analyzerMap.get(fieldName)); - if (!analyzer) - analyzer = defaultAnalyzer; - return analyzer->reusableTokenStream(fieldName, reader); +} + +PerFieldAnalyzerWrapper::~PerFieldAnalyzerWrapper() { +} + +void PerFieldAnalyzerWrapper::addAnalyzer(const String& fieldName, const AnalyzerPtr& analyzer) { + analyzerMap.put(fieldName, analyzer); +} + +TokenStreamPtr PerFieldAnalyzerWrapper::tokenStream(const String& fieldName, const ReaderPtr& reader) { + AnalyzerPtr analyzer(analyzerMap.get(fieldName)); + if (!analyzer) { + analyzer = defaultAnalyzer; } - - int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(const String& fieldName) - { - AnalyzerPtr analyzer(analyzerMap.get(fieldName)); - if (!analyzer) - analyzer = defaultAnalyzer; - return analyzer->getPositionIncrementGap(fieldName); + return analyzer->tokenStream(fieldName, reader); +} + +TokenStreamPtr PerFieldAnalyzerWrapper::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + AnalyzerPtr analyzer(analyzerMap.get(fieldName)); + if (!analyzer) { + analyzer = defaultAnalyzer; } - - int32_t PerFieldAnalyzerWrapper::getOffsetGap(FieldablePtr field) - { - AnalyzerPtr analyzer(analyzerMap.get(field->name())); - if (!analyzer) - analyzer = defaultAnalyzer; - return analyzer->getOffsetGap(field); + return analyzer->reusableTokenStream(fieldName, reader); +} + +int32_t PerFieldAnalyzerWrapper::getPositionIncrementGap(const String& fieldName) { + AnalyzerPtr analyzer(analyzerMap.get(fieldName)); + if (!analyzer) { + analyzer = defaultAnalyzer; } - - String PerFieldAnalyzerWrapper::toString() - { - return L"PerFieldAnalyzerWrapper(default=" + defaultAnalyzer->toString() + L")"; + return analyzer->getPositionIncrementGap(fieldName); +} + +int32_t PerFieldAnalyzerWrapper::getOffsetGap(const FieldablePtr& field) { + AnalyzerPtr analyzer(analyzerMap.get(field->name())); + if (!analyzer) { + analyzer = defaultAnalyzer; } + return analyzer->getOffsetGap(field); +} + +String PerFieldAnalyzerWrapper::toString() { + return L"PerFieldAnalyzerWrapper(default=" + defaultAnalyzer->toString() + L")"; +} + } diff --git a/src/core/analysis/PorterStemFilter.cpp b/src/core/analysis/PorterStemFilter.cpp index 0d3dc412..55862d31 100644 --- a/src/core/analysis/PorterStemFilter.cpp +++ b/src/core/analysis/PorterStemFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,25 +9,25 @@ #include "PorterStemmer.h" #include "TermAttribute.h" -namespace Lucene -{ - PorterStemFilter::PorterStemFilter(TokenStreamPtr input) : TokenFilter(input) - { - stemmer = newLucene(); - termAtt = addAttribute(); - } - - PorterStemFilter::~PorterStemFilter() - { +namespace Lucene { + +PorterStemFilter::PorterStemFilter(const TokenStreamPtr& input) : TokenFilter(input) { + stemmer = newLucene(); + termAtt = addAttribute(); +} + +PorterStemFilter::~PorterStemFilter() { +} + +bool PorterStemFilter::incrementToken() { + if (!input->incrementToken()) { + return false; } - - bool PorterStemFilter::incrementToken() - { - if (!input->incrementToken()) - return false; - - if (stemmer->stem(termAtt->termBuffer())) - termAtt->setTermBuffer(stemmer->getResultBuffer(), 0, stemmer->getResultLength()); - return true; + + if (stemmer->stem(termAtt->termBufferArray(), termAtt->termLength() - 1)) { + termAtt->setTermBuffer(stemmer->getResultBuffer(), 0, stemmer->getResultLength()); } + return true; +} + } diff --git a/src/core/analysis/PorterStemmer.cpp b/src/core/analysis/PorterStemmer.cpp index 6855f154..8947eeff 100644 --- a/src/core/analysis/PorterStemmer.cpp +++ b/src/core/analysis/PorterStemmer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,482 +7,463 @@ #include "LuceneInc.h" #include "PorterStemmer.h" -namespace Lucene -{ - PorterStemmer::PorterStemmer() - { - b = NULL; - k = 0; - j = 0; - i = 0; - dirty = false; - } - - PorterStemmer::~PorterStemmer() - { - } - - bool PorterStemmer::stem(CharArray word) - { - return stem(word.get(), word.size() - 1); - } - - bool PorterStemmer::stem(wchar_t* b, int32_t k) - { - this->b = b; - this->k = k; - this->j = 0; - this->i = k; - dirty = false; - - if (k <= 1) - return false; // DEPARTURE - - // With these lines, strings of length 1 or 2 don't go through the stemming process, although no mention - // is made of this in the published algorithm. Remove the line to match the published algorithm. - step1ab(); - step1c(); - step2(); - step3(); - step4(); - step5(); - - if (i != this->k) - dirty = true; - return dirty; +namespace Lucene { + +PorterStemmer::PorterStemmer() { + b = NULL; + k = 0; + j = 0; + i = 0; + dirty = false; +} + +PorterStemmer::~PorterStemmer() { +} + +bool PorterStemmer::stem(CharArray word) { + return stem(word.get(), word.size() - 1); +} + +bool PorterStemmer::stem(wchar_t* b, int32_t k) { + this->b = b; + this->k = k; + this->j = 0; + this->i = k; + dirty = false; + + if (k <= 1) { + return false; // DEPARTURE } - - wchar_t* PorterStemmer::getResultBuffer() - { - return b; + + // With these lines, strings of length 1 or 2 don't go through the stemming process, although no mention + // is made of this in the published algorithm. Remove the line to match the published algorithm. + step1ab(); + step1c(); + step2(); + step3(); + step4(); + step5(); + + if (i != this->k) { + dirty = true; } - - int32_t PorterStemmer::getResultLength() - { - return k + 1; + return dirty; +} + +wchar_t* PorterStemmer::getResultBuffer() { + return b; +} + +int32_t PorterStemmer::getResultLength() { + return k + 1; +} + +bool PorterStemmer::cons(int32_t i) { + switch (b[i]) { + case L'a': + case L'e': + case L'i': + case L'o': + case L'u': + return false; + case L'y': + return (i == 0) ? true : !cons(i - 1); + default: + return true; } - - bool PorterStemmer::cons(int32_t i) - { - switch (b[i]) - { - case L'a': - case L'e': - case L'i': - case L'o': - case L'u': - return false; - case L'y': - return (i == 0) ? true : !cons(i - 1); - default: - return true; +} + +int32_t PorterStemmer::m() { + int32_t n = 0; + int32_t i = 0; + while (true) { + if (i > j) { + return n; } + if (!cons(i)) { + break; + } + ++i; } - - int32_t PorterStemmer::m() - { - int32_t n = 0; - int32_t i = 0; - while (true) - { - if (i > j) + ++i; + while (true) { + while (true) { + if (i > j) { return n; - if (!cons(i)) + } + if (cons(i)) { break; + } ++i; } ++i; - while (true) - { - while (true) - { - if (i > j) - return n; - if (cons(i)) - break; - ++i; + ++n; + while (true) { + if (i > j) { + return n; } - ++i; - ++n; - while (true) - { - if (i > j) - return n; - if (!cons(i)) - break; - ++i; + if (!cons(i)) { + break; } ++i; } + ++i; } - - bool PorterStemmer::vowelinstem() - { - for (int32_t i = 0; i <= j; ++i) - { - if (!cons(i)) - return true; +} + +bool PorterStemmer::vowelinstem() { + for (int32_t i = 0; i <= j; ++i) { + if (!cons(i)) { + return true; } + } + return false; +} + +bool PorterStemmer::doublec(int32_t j) { + if (j < 1) { return false; } - - bool PorterStemmer::doublec(int32_t j) - { - if (j < 1) - return false; - if (b[j] != b[j - 1]) - return false; - return cons(j); + if (b[j] != b[j - 1]) { + return false; } - - bool PorterStemmer::cvc(int32_t i) - { - if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) - return false; - int32_t ch = b[i]; - if (ch == L'w' || ch == L'x' || ch == L'y') - return false; - return true; + return cons(j); +} + +bool PorterStemmer::cvc(int32_t i) { + if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) { + return false; } - - bool PorterStemmer::ends(const wchar_t* s) - { - int32_t length = s[0]; - if (s[length] != b[k]) - return false; // tiny speed-up - if (length > k + 1) - return false; - if (std::memcmp(b + k - length + 1, s + 1, length) != 0) - return false; - j = k - length; - return true; + int32_t ch = b[i]; + if (ch == L'w' || ch == L'x' || ch == L'y') { + return false; } - - void PorterStemmer::setto(const wchar_t* s) - { - int32_t length = s[0]; - std::memmove(b + j + 1, s + 1, length); - k = j + length; - dirty = true; + return true; +} + +bool PorterStemmer::ends(const wchar_t* s) { + int32_t length = s[0]; + if (s[length] != b[k]) { + return false; // tiny speed-up + } + if (length > k + 1) { + return false; + } + if (std::memcmp(b + k - length + 1, s + 1, length) != 0) { + return false; } - - void PorterStemmer::r(const wchar_t* s) - { - if (m() > 0) - setto(s); + j = k - length; + return true; +} + +void PorterStemmer::setto(const wchar_t* s) { + int32_t length = s[0]; + std::memmove(b + j + 1, s + 1, length); + k = j + length; + dirty = true; +} + +void PorterStemmer::r(const wchar_t* s) { + if (m() > 0) { + setto(s); + } +} + +void PorterStemmer::step1ab() { + if (b[k] == L's') { + if (ends(L"\04" L"sses")) { + k -= 2; + } else if (ends(L"\03" L"ies")) { + setto(L"\01" L"i"); + } else if (b[k - 1] != L's') { + --k; + } } - - void PorterStemmer::step1ab() - { - if (b[k] == L's') - { - if (ends(L"\04" L"sses")) - k -= 2; - else if (ends(L"\03" L"ies")) - setto(L"\01" L"i"); - else if (b[k - 1] != L's') - --k; - } - if (ends(L"\03" L"eed")) - { - if (m() > 0) - --k; - } - else if ((ends(L"\02" L"ed") || ends(L"\03" L"ing")) && vowelinstem()) - { - k = j; - if (ends(L"\02" L"at")) - setto(L"\03" L"ate"); - else if (ends(L"\02" L"bl")) - setto(L"\03" L"ble"); - else if (ends(L"\02" L"iz")) - setto(L"\03" L"ize"); - else if (doublec(k)) - { - --k; - int32_t ch = b[k]; - if (ch == L'l' || ch == L's' || ch == L'z') - ++k; + if (ends(L"\03" L"eed")) { + if (m() > 0) { + --k; + } + } else if ((ends(L"\02" L"ed") || ends(L"\03" L"ing")) && vowelinstem()) { + k = j; + if (ends(L"\02" L"at")) { + setto(L"\03" L"ate"); + } else if (ends(L"\02" L"bl")) { + setto(L"\03" L"ble"); + } else if (ends(L"\02" L"iz")) { + setto(L"\03" L"ize"); + } else if (doublec(k)) { + --k; + int32_t ch = b[k]; + if (ch == L'l' || ch == L's' || ch == L'z') { + ++k; } - else if (m() == 1 && cvc(k)) - setto(L"\01" L"e"); + } else if (m() == 1 && cvc(k)) { + setto(L"\01" L"e"); } } - - void PorterStemmer::step1c() - { - if (ends(L"\01" L"y") && vowelinstem()) - { - b[k] = L'i'; - dirty = true; +} + +void PorterStemmer::step1c() { + if (ends(L"\01" L"y") && vowelinstem()) { + b[k] = L'i'; + dirty = true; + } +} + +void PorterStemmer::step2() { + if (k == 0) { + return; + } + switch (b[k - 1]) { + case L'a': + if (ends(L"\07" L"ational")) { + r(L"\03" L"ate"); + break; + } + if (ends(L"\06" L"tional")) { + r(L"\04" L"tion"); + break; + } + break; + case L'c': + if (ends(L"\04" L"enci")) { + r(L"\04" L"ence"); + break; + } + if (ends(L"\04" L"anci")) { + r(L"\04" L"ance"); + break; + } + break; + case L'e': + if (ends(L"\04" L"izer")) { + r(L"\03" L"ize"); + break; + } + break; + case L'l': + if (ends(L"\03" L"bli")) { // DEPARTURE + r(L"\03" L"ble"); + break; + } + if (ends(L"\04" L"alli")) { + r(L"\02" L"al"); + break; + } + if (ends(L"\05" L"entli")) { + r(L"\03" L"ent"); + break; + } + if (ends(L"\03" L"eli")) { + r(L"\01" L"e"); + break; + } + if (ends(L"\05" L"ousli")) { + r(L"\03" L"ous"); + break; + } + break; + case L'o': + if (ends(L"\07" L"ization")) { + r(L"\03" L"ize"); + break; + } + if (ends(L"\05" L"ation")) { + r(L"\03" L"ate"); + break; + } + if (ends(L"\04" L"ator")) { + r(L"\03" L"ate"); + break; + } + break; + case L's': + if (ends(L"\05" L"alism")) { + r(L"\02" L"al"); + break; + } + if (ends(L"\07" L"iveness")) { + r(L"\03" L"ive"); + break; + } + if (ends(L"\07" L"fulness")) { + r(L"\03" L"ful"); + break; + } + if (ends(L"\07" L"ousness")) { + r(L"\03" L"ous"); + break; + } + break; + case L't': + if (ends(L"\05" L"aliti")) { + r(L"\02" L"al"); + break; + } + if (ends(L"\05" L"iviti")) { + r(L"\03" L"ive"); + break; + } + if (ends(L"\06" L"biliti")) { + r(L"\03" L"ble"); + break; + } + break; + case L'g': + if (ends(L"\04" L"logi")) { // DEPARTURE + r(L"\03" L"log"); + break; } } - - void PorterStemmer::step2() - { - if (k == 0) - return; - switch (b[k - 1]) - { - case L'a': - if (ends(L"\07" L"ational")) - { - r(L"\03" L"ate"); - break; - } - if (ends(L"\06" L"tional")) - { - r(L"\04" L"tion"); - break; - } - break; - case L'c': - if (ends(L"\04" L"enci")) - { - r(L"\04" L"ence"); - break; - } - if (ends(L"\04" L"anci")) - { - r(L"\04" L"ance"); - break; - } - break; - case L'e': - if (ends(L"\04" L"izer")) - { - r(L"\03" L"ize"); - break; - } - break; - case L'l': - if (ends(L"\03" L"bli")) // DEPARTURE - { - r(L"\03" L"ble"); - break; - } - if (ends(L"\04" L"alli")) - { - r(L"\02" L"al"); - break; - } - if (ends(L"\05" L"entli")) - { - r(L"\03" L"ent"); - break; - } - if (ends(L"\03" L"eli")) - { - r(L"\01" L"e"); - break; - } - if (ends(L"\05" L"ousli")) - { - r(L"\03" L"ous"); - break; - } - break; - case L'o': - if (ends(L"\07" L"ization")) - { - r(L"\03" L"ize"); - break; - } - if (ends(L"\05" L"ation")) - { - r(L"\03" L"ate"); - break; - } - if (ends(L"\04" L"ator")) - { - r(L"\03" L"ate"); - break; - } - break; - case L's': - if (ends(L"\05" L"alism")) - { - r(L"\02" L"al"); - break; - } - if (ends(L"\07" L"iveness")) - { - r(L"\03" L"ive"); - break; - } - if (ends(L"\07" L"fulness")) - { - r(L"\03" L"ful"); - break; - } - if (ends(L"\07" L"ousness")) - { - r(L"\03" L"ous"); - break; - } - break; - case L't': - if (ends(L"\05" L"aliti")) - { - r(L"\02" L"al"); - break; - } - if (ends(L"\05" L"iviti")) - { - r(L"\03" L"ive"); - break; - } - if (ends(L"\06" L"biliti")) - { - r(L"\03" L"ble"); - break; - } - break; - case L'g': - if (ends(L"\04" L"logi")) // DEPARTURE - { - r(L"\03" L"log"); - break; - } +} + +void PorterStemmer::step3() { + switch (b[k]) { + case L'e': + if (ends(L"\05" L"icate")) { + r(L"\02" L"ic"); + break; + } + if (ends(L"\05" L"ative")) { + r(L"\00" L""); + break; + } + if (ends(L"\05" L"alize")) { + r(L"\02" L"al"); + break; + } + break; + case L'i': + if (ends(L"\05" L"iciti")) { + r(L"\02" L"ic"); + break; } + break; + case L'l': + if (ends(L"\04" L"ical")) { + r(L"\02" L"ic"); + break; + } + if (ends(L"\03" L"ful")) { + r(L"\00" L""); + break; + } + break; + case L's': + if (ends(L"\04" L"ness")) { + r(L"\00" L""); + break; + } + break; } - - void PorterStemmer::step3() - { - switch (b[k]) - { - case L'e': - if (ends(L"\05" L"icate")) - { - r(L"\02" L"ic"); - break; - } - if (ends(L"\05" L"ative")) - { - r(L"\00" L""); - break; - } - if (ends(L"\05" L"alize")) - { - r(L"\02" L"al"); - break; - } - break; - case L'i': - if (ends(L"\05" L"iciti")) - { - r(L"\02" L"ic"); - break; - } - break; - case L'l': - if (ends(L"\04" L"ical")) - { - r(L"\02" L"ic"); - break; - } - if (ends(L"\03" L"ful")) - { - r(L"\00" L""); - break; - } - break; - case L's': - if (ends(L"\04" L"ness")) - { - r(L"\00" L""); - break; - } - break; +} + +void PorterStemmer::step4() { + if (k == 0) { + return; + } + switch (b[k - 1]) { + case L'a': + if (ends(L"\02" L"al")) { + break; + } + return; + case L'c': + if (ends(L"\04" L"ance")) { + break; + } + if (ends(L"\04" L"ence")) { + break; + } + return; + case L'e': + if (ends(L"\02" L"er")) { + break; + } + return; + case L'i': + if (ends(L"\02" L"ic")) { + break; + } + return; + case L'l': + if (ends(L"\04" L"able")) { + break; + } + if (ends(L"\04" L"ible")) { + break; + } + return; + case L'n': + if (ends(L"\03" L"ant")) { + break; } + if (ends(L"\05" L"ement")) { + break; + } + if (ends(L"\04" L"ment")) { + break; + } + if (ends(L"\03" L"ent")) { + break; + } + return; + case L'o': + if (ends(L"\03" L"ion") && (b[j] == L's' || b[j] == L't')) { + break; + } + if (ends(L"\02" L"ou")) { + break; + } + return; + // takes care of -ous + case L's': + if (ends(L"\03" L"ism")) { + break; + } + return; + case L't': + if (ends(L"\03" L"ate")) { + break; + } + if (ends(L"\03" L"iti")) { + break; + } + return; + case L'u': + if (ends(L"\03" L"ous")) { + break; + } + return; + case L'v': + if (ends(L"\03" L"ive")) { + break; + } + return; + case L'z': + if (ends(L"\03" L"ize")) { + break; + } + return; + default: + return; } - - void PorterStemmer::step4() - { - if (k == 0) - return; - switch (b[k - 1]) - { - case L'a': - if (ends(L"\02" L"al")) - break; - return; - case L'c': - if (ends(L"\04" L"ance")) - break; - if (ends(L"\04" L"ence")) - break; - return; - case L'e': - if (ends(L"\02" L"er")) - break; - return; - case L'i': - if (ends(L"\02" L"ic")) - break; - return; - case L'l': - if (ends(L"\04" L"able")) - break; - if (ends(L"\04" L"ible")) - break; - return; - case L'n': - if (ends(L"\03" L"ant")) - break; - if (ends(L"\05" L"ement")) - break; - if (ends(L"\04" L"ment")) - break; - if (ends(L"\03" L"ent")) - break; - return; - case L'o': - if (ends(L"\03" L"ion") && (b[j] == L's' || b[j] == L't')) - break; - if (ends(L"\02" L"ou")) - break; - return; - // takes care of -ous - case L's': - if (ends(L"\03" L"ism")) - break; - return; - case L't': - if (ends(L"\03" L"ate")) - break; - if (ends(L"\03" L"iti")) - break; - return; - case L'u': - if (ends(L"\03" L"ous")) - break; - return; - case L'v': - if (ends(L"\03" L"ive")) - break; - return; - case L'z': - if (ends(L"\03" L"ize")) - break; - return; - default: - return; - } - if (m() > 1) - k = j; + if (m() > 1) { + k = j; } - - void PorterStemmer::step5() - { - j = k; - if (b[k] == L'e') - { - int32_t a = m(); - if (a > 1 || a == 1 && !cvc(k - 1)) - --k; - } - if (b[k] == L'l' && doublec(k) && m() > 1) +} + +void PorterStemmer::step5() { + j = k; + if (b[k] == L'e') { + int32_t a = m(); + if (a > 1 || (a == 1 && !cvc(k - 1))) { --k; + } + } + if (b[k] == L'l' && doublec(k) && m() > 1) { + --k; } } + +} diff --git a/src/core/analysis/SimpleAnalyzer.cpp b/src/core/analysis/SimpleAnalyzer.cpp index a92cea42..49f55902 100644 --- a/src/core/analysis/SimpleAnalyzer.cpp +++ b/src/core/analysis/SimpleAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,27 +8,24 @@ #include "SimpleAnalyzer.h" #include "LowerCaseTokenizer.h" -namespace Lucene -{ - SimpleAnalyzer::~SimpleAnalyzer() - { - } - - TokenStreamPtr SimpleAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - return newLucene(reader); - } - - TokenStreamPtr SimpleAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!tokenizer) - { - tokenizer = newLucene(reader); - setPreviousTokenStream(tokenizer); - } - else - tokenizer->reset(reader); - return tokenizer; +namespace Lucene { + +SimpleAnalyzer::~SimpleAnalyzer() { +} + +TokenStreamPtr SimpleAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + return newLucene(reader); +} + +TokenStreamPtr SimpleAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!tokenizer) { + tokenizer = newLucene(reader); + setPreviousTokenStream(tokenizer); + } else { + tokenizer->reset(reader); } + return tokenizer; +} + } diff --git a/src/core/analysis/StopAnalyzer.cpp b/src/core/analysis/StopAnalyzer.cpp index c0bc7da6..56401cc0 100644 --- a/src/core/analysis/StopAnalyzer.cpp +++ b/src/core/analysis/StopAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,73 +12,64 @@ #include "Reader.h" #include "LowerCaseTokenizer.h" -namespace Lucene -{ - const wchar_t* StopAnalyzer::_ENGLISH_STOP_WORDS_SET[] = - { - L"a", L"an", L"and", L"are", L"as", L"at", L"be", L"but", L"by", - L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", - L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", - L"these", L"they", L"this", L"to", L"was", L"will", L"with" - }; - - StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion) - { - stopWords = ENGLISH_STOP_WORDS_SET(); - enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); - } - - StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) - { - this->stopWords = stopWords; - enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); - } - - StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile) - { - stopWords = WordlistLoader::getWordSet(stopwordsFile); - enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); - } - - StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords) - { - stopWords = WordlistLoader::getWordSet(stopwords); - enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); - } - - StopAnalyzer::~StopAnalyzer() - { - } - - const HashSet StopAnalyzer::ENGLISH_STOP_WORDS_SET() - { - static HashSet __ENGLISH_STOP_WORDS_SET; - if (!__ENGLISH_STOP_WORDS_SET) - __ENGLISH_STOP_WORDS_SET = HashSet::newInstance(_ENGLISH_STOP_WORDS_SET, _ENGLISH_STOP_WORDS_SET + SIZEOF_ARRAY(_ENGLISH_STOP_WORDS_SET)); - return __ENGLISH_STOP_WORDS_SET; - } - - TokenStreamPtr StopAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - return newLucene(enablePositionIncrements, newLucene(reader), stopWords); - } - - TokenStreamPtr StopAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - StopAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!streams) - { - streams = newLucene(); - streams->source = newLucene(reader); - streams->result = newLucene(enablePositionIncrements, streams->source, stopWords); - setPreviousTokenStream(streams); - } - else - streams->source->reset(reader); - return streams->result; - } - - StopAnalyzerSavedStreams::~StopAnalyzerSavedStreams() - { +namespace Lucene { + +const wchar_t* StopAnalyzer::_ENGLISH_STOP_WORDS_SET[] = { + L"a", L"an", L"and", L"are", L"as", L"at", L"be", L"but", L"by", + L"for", L"if", L"in", L"into", L"is", L"it", L"no", L"not", L"of", + L"on", L"or", L"such", L"that", L"the", L"their", L"then", L"there", + L"these", L"they", L"this", L"to", L"was", L"will", L"with" +}; + +StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion) { + stopWords = ENGLISH_STOP_WORDS_SET(); + enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); +} + +StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { + this->stopWords = stopWords; + enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); +} + +StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const String& stopwordsFile) { + stopWords = WordlistLoader::getWordSet(stopwordsFile); + enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); +} + +StopAnalyzer::StopAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords) { + stopWords = WordlistLoader::getWordSet(stopwords); + enablePositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); +} + +StopAnalyzer::~StopAnalyzer() { +} + +const HashSet StopAnalyzer::ENGLISH_STOP_WORDS_SET() { + static HashSet __ENGLISH_STOP_WORDS_SET; + LUCENE_RUN_ONCE( + __ENGLISH_STOP_WORDS_SET = HashSet::newInstance(_ENGLISH_STOP_WORDS_SET, _ENGLISH_STOP_WORDS_SET + SIZEOF_ARRAY(_ENGLISH_STOP_WORDS_SET)); + ); + return __ENGLISH_STOP_WORDS_SET; +} + +TokenStreamPtr StopAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + return newLucene(enablePositionIncrements, newLucene(reader), stopWords); +} + +TokenStreamPtr StopAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + StopAnalyzerSavedStreamsPtr streams(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!streams) { + streams = newLucene(); + streams->source = newLucene(reader); + streams->result = newLucene(enablePositionIncrements, streams->source, stopWords); + setPreviousTokenStream(streams); + } else { + streams->source->reset(reader); } + return streams->result; +} + +StopAnalyzerSavedStreams::~StopAnalyzerSavedStreams() { +} + } diff --git a/src/core/analysis/StopFilter.cpp b/src/core/analysis/StopFilter.cpp index 362ed7f9..5cfa6c50 100644 --- a/src/core/analysis/StopFilter.cpp +++ b/src/core/analysis/StopFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,63 +10,55 @@ #include "TermAttribute.h" #include "PositionIncrementAttribute.h" -namespace Lucene -{ - StopFilter::StopFilter(bool enablePositionIncrements, TokenStreamPtr input, HashSet stopWords, bool ignoreCase) : TokenFilter(input) - { - this->stopWords = newLucene(stopWords, ignoreCase); - this->enablePositionIncrements = enablePositionIncrements; - termAtt = addAttribute(); - posIncrAtt = addAttribute(); - } - - StopFilter::StopFilter(bool enablePositionIncrements, TokenStreamPtr input, CharArraySetPtr stopWords, bool ignoreCase) : TokenFilter(input) - { - this->stopWords = stopWords; - this->enablePositionIncrements = enablePositionIncrements; - termAtt = addAttribute(); - posIncrAtt = addAttribute(); - } - - StopFilter::~StopFilter() - { - } - - HashSet StopFilter::makeStopSet(Collection stopWords) - { - return HashSet::newInstance(stopWords.begin(), stopWords.end()); - } - - bool StopFilter::incrementToken() - { - // return the first non-stop word found - int32_t skippedPositions = 0; - while (input->incrementToken()) - { - if (!stopWords->contains(termAtt->termBufferArray(), 0, termAtt->termLength())) - { - if (enablePositionIncrements) - posIncrAtt->setPositionIncrement(posIncrAtt->getPositionIncrement() + skippedPositions); - return true; +namespace Lucene { + +StopFilter::StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, HashSet stopWords, bool ignoreCase) : TokenFilter(input) { + this->stopWords = newLucene(stopWords, ignoreCase); + this->enablePositionIncrements = enablePositionIncrements; + termAtt = addAttribute(); + posIncrAtt = addAttribute(); +} + +StopFilter::StopFilter(bool enablePositionIncrements, const TokenStreamPtr& input, const CharArraySetPtr& stopWords, bool ignoreCase) : TokenFilter(input) { + this->stopWords = stopWords; + this->enablePositionIncrements = enablePositionIncrements; + termAtt = addAttribute(); + posIncrAtt = addAttribute(); +} + +StopFilter::~StopFilter() { +} + +HashSet StopFilter::makeStopSet(Collection stopWords) { + return HashSet::newInstance(stopWords.begin(), stopWords.end()); +} + +bool StopFilter::incrementToken() { + // return the first non-stop word found + int32_t skippedPositions = 0; + while (input->incrementToken()) { + if (!stopWords->contains(termAtt->termBufferArray(), 0, termAtt->termLength())) { + if (enablePositionIncrements) { + posIncrAtt->setPositionIncrement(posIncrAtt->getPositionIncrement() + skippedPositions); } - skippedPositions += posIncrAtt->getPositionIncrement(); + return true; } - // reached EOS -- return false - return false; - } - - bool StopFilter::getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion) - { - return LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); - } - - bool StopFilter::getEnablePositionIncrements() - { - return enablePositionIncrements; - } - - void StopFilter::setEnablePositionIncrements(bool enable) - { - this->enablePositionIncrements = enable; + skippedPositions += posIncrAtt->getPositionIncrement(); } + // reached EOS -- return false + return false; +} + +bool StopFilter::getEnablePositionIncrementsVersionDefault(LuceneVersion::Version matchVersion) { + return LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); +} + +bool StopFilter::getEnablePositionIncrements() { + return enablePositionIncrements; +} + +void StopFilter::setEnablePositionIncrements(bool enable) { + this->enablePositionIncrements = enable; +} + } diff --git a/src/core/analysis/TeeSinkTokenFilter.cpp b/src/core/analysis/TeeSinkTokenFilter.cpp index 266863ba..c1bc81bf 100644 --- a/src/core/analysis/TeeSinkTokenFilter.cpp +++ b/src/core/analysis/TeeSinkTokenFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,162 +8,142 @@ #include "TeeSinkTokenFilter.h" #include "Attribute.h" -namespace Lucene -{ - TeeSinkTokenFilter::TeeSinkTokenFilter(TokenStreamPtr input) : TokenFilter(input) - { - this->sinks = Collection::newInstance(); - } - - TeeSinkTokenFilter::~TeeSinkTokenFilter() - { - } - - SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream() - { - static SinkFilterPtr ACCEPT_ALL_FILTER; - if (!ACCEPT_ALL_FILTER) - { - ACCEPT_ALL_FILTER = newLucene(); - CycleCheck::addStatic(ACCEPT_ALL_FILTER); - } - return newSinkTokenStream(ACCEPT_ALL_FILTER); - } - - SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream(SinkFilterPtr filter) - { - SinkTokenStreamPtr sink(newLucene(this->cloneAttributes(), filter)); - this->sinks.add(sink); - return sink; +namespace Lucene { + +TeeSinkTokenFilter::TeeSinkTokenFilter(const TokenStreamPtr& input) : TokenFilter(input) { + this->sinks = Collection::newInstance(); +} + +TeeSinkTokenFilter::~TeeSinkTokenFilter() { +} + +SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream() { + static SinkFilterPtr ACCEPT_ALL_FILTER; + LUCENE_RUN_ONCE( + ACCEPT_ALL_FILTER = newLucene(); + CycleCheck::addStatic(ACCEPT_ALL_FILTER); + ); + return newSinkTokenStream(ACCEPT_ALL_FILTER); +} + +SinkTokenStreamPtr TeeSinkTokenFilter::newSinkTokenStream(const SinkFilterPtr& filter) { + SinkTokenStreamPtr sink(newLucene(this->cloneAttributes(), filter)); + this->sinks.add(sink); + return sink; +} + +void TeeSinkTokenFilter::addSinkTokenStream(const SinkTokenStreamPtr& sink) { + // check that sink has correct factory + if (this->getAttributeFactory() != sink->getAttributeFactory()) { + boost::throw_exception(IllegalArgumentException(L"The supplied sink is not compatible to this tee.")); } - - void TeeSinkTokenFilter::addSinkTokenStream(SinkTokenStreamPtr sink) - { - // check that sink has correct factory - if (this->getAttributeFactory() != sink->getAttributeFactory()) - boost::throw_exception(IllegalArgumentException(L"The supplied sink is not compatible to this tee.")); - // add eventually missing attribute impls to the existing sink - Collection attrImpls(this->cloneAttributes()->getAttributes()); - for (Collection::iterator it = attrImpls.begin(); it != attrImpls.end(); ++it) - sink->addAttribute((*it)->getClassName(), *it); - this->sinks.add(sink); + // add eventually missing attribute impls to the existing sink + Collection attrImpls(this->cloneAttributes()->getAttributes()); + for (Collection::iterator it = attrImpls.begin(); it != attrImpls.end(); ++it) { + sink->addAttribute((*it)->getClassName(), *it); } - - void TeeSinkTokenFilter::consumeAllTokens() - { - while (incrementToken()) - { - } + this->sinks.add(sink); +} + +void TeeSinkTokenFilter::consumeAllTokens() { + while (incrementToken()) { } - - bool TeeSinkTokenFilter::incrementToken() - { - if (input->incrementToken()) - { - // capture state lazily - maybe no SinkFilter accepts this state - AttributeSourceStatePtr state; - for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) - { - if (*ref) - { - if ((*ref)->accept(shared_from_this())) - { - if (!state) - state = this->captureState(); - (*ref)->addState(state); +} + +bool TeeSinkTokenFilter::incrementToken() { + if (input->incrementToken()) { + // capture state lazily - maybe no SinkFilter accepts this state + AttributeSourceStatePtr state; + for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { + if (*ref) { + if ((*ref)->accept(shared_from_this())) { + if (!state) { + state = this->captureState(); } + (*ref)->addState(state); } } - return true; - } - - return false; - } - - void TeeSinkTokenFilter::end() - { - TokenFilter::end(); - AttributeSourceStatePtr finalState(captureState()); - for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) - { - if (*ref) - (*ref)->setFinalState(finalState); } - } - - SinkFilter::~SinkFilter() - { - } - - void SinkFilter::reset() - { - // nothing to do; can be overridden - } - - AcceptAllSinkFilter::~AcceptAllSinkFilter() - { - } - - bool AcceptAllSinkFilter::accept(AttributeSourcePtr source) - { return true; } - - SinkTokenStream::SinkTokenStream(AttributeSourcePtr source, SinkFilterPtr filter) : TokenStream(source) - { - this->filter = filter; - this->cachedStates = Collection::newInstance(); - this->it = cachedStates.begin(); - this->initIterator = false; - } - - SinkTokenStream::~SinkTokenStream() - { - } - - bool SinkTokenStream::accept(AttributeSourcePtr source) - { - return filter->accept(source); - } - - void SinkTokenStream::addState(AttributeSourceStatePtr state) - { - if (initIterator) - boost::throw_exception(IllegalStateException(L"The tee must be consumed before sinks are consumed.")); - cachedStates.add(state); - } - - void SinkTokenStream::setFinalState(AttributeSourceStatePtr finalState) - { - this->finalState = finalState; - } - - bool SinkTokenStream::incrementToken() - { - // lazy init the iterator - if (!initIterator) - { - it = cachedStates.begin(); - initIterator = true; + + return false; +} + +void TeeSinkTokenFilter::end() { + TokenFilter::end(); + AttributeSourceStatePtr finalState(captureState()); + for (Collection::iterator ref = sinks.begin(); ref != sinks.end(); ++ref) { + if (*ref) { + (*ref)->setFinalState(finalState); } - - if (it == cachedStates.end()) - return false; - - AttributeSourceStatePtr state = *it++; - restoreState(state); - return true; } - - void SinkTokenStream::end() - { - if (finalState) - restoreState(finalState); +} + +SinkFilter::~SinkFilter() { +} + +void SinkFilter::reset() { + // nothing to do; can be overridden +} + +AcceptAllSinkFilter::~AcceptAllSinkFilter() { +} + +bool AcceptAllSinkFilter::accept(const AttributeSourcePtr& source) { + return true; +} + +SinkTokenStream::SinkTokenStream(const AttributeSourcePtr& source, const SinkFilterPtr& filter) : TokenStream(source) { + this->filter = filter; + this->cachedStates = Collection::newInstance(); + this->it = cachedStates.begin(); + this->initIterator = false; +} + +SinkTokenStream::~SinkTokenStream() { +} + +bool SinkTokenStream::accept(const AttributeSourcePtr& source) { + return filter->accept(source); +} + +void SinkTokenStream::addState(const AttributeSourceStatePtr& state) { + if (initIterator) { + boost::throw_exception(IllegalStateException(L"The tee must be consumed before sinks are consumed.")); } - - void SinkTokenStream::reset() - { + cachedStates.add(state); +} + +void SinkTokenStream::setFinalState(const AttributeSourceStatePtr& finalState) { + this->finalState = finalState; +} + +bool SinkTokenStream::incrementToken() { + // lazy init the iterator + if (!initIterator) { it = cachedStates.begin(); - initIterator = false; + initIterator = true; + } + + if (it == cachedStates.end()) { + return false; } + + AttributeSourceStatePtr state = *it++; + restoreState(state); + return true; +} + +void SinkTokenStream::end() { + if (finalState) { + restoreState(finalState); + } +} + +void SinkTokenStream::reset() { + it = cachedStates.begin(); + initIterator = false; +} + } diff --git a/src/core/analysis/Token.cpp b/src/core/analysis/Token.cpp index 46404ae3..baa86c49 100644 --- a/src/core/analysis/Token.cpp +++ b/src/core/analysis/Token.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,522 +16,471 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t Token::MIN_BUFFER_SIZE = 10; - - Token::Token() - { - ConstructToken(0, 0, DEFAULT_TYPE(), 0); - } - - Token::Token(int32_t start, int32_t end) - { - ConstructToken(start, end, DEFAULT_TYPE(), 0); - } - - Token::Token(int32_t start, int32_t end, const String& type) - { - ConstructToken(start, end, type, 0); - } - - Token::Token(int32_t start, int32_t end, int32_t flags) - { - ConstructToken(start, end, DEFAULT_TYPE(), flags); - } - - Token::Token(const String& text, int32_t start, int32_t end) - { - ConstructToken(start, end, DEFAULT_TYPE(), 0); - setTermBuffer(text); - } - - Token::Token(const String& text, int32_t start, int32_t end, const String& type) - { - ConstructToken(start, end, type, 0); - setTermBuffer(text); - } - - Token::Token(const String& text, int32_t start, int32_t end, int32_t flags) - { - ConstructToken(start, end, DEFAULT_TYPE(), flags); - setTermBuffer(text); - } - - Token::Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end) - { - ConstructToken(start, end, DEFAULT_TYPE(), 0); - setTermBuffer(startTermBuffer.get(), termBufferOffset, termBufferLength); - } - - Token::~Token() - { - } - - void Token::ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags) - { - this->_termLength = 0; - this->_startOffset = start; - this->_endOffset = end; - this->_type = type; - this->flags = flags; - this->positionIncrement = 1; - } - - const String& Token::DEFAULT_TYPE() - { - static String _DEFAULT_TYPE(L"word"); - return _DEFAULT_TYPE; - } - - void Token::setPositionIncrement(int32_t positionIncrement) - { - if (positionIncrement < 0) - boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); - this->positionIncrement = positionIncrement; - } - - int32_t Token::getPositionIncrement() - { - return positionIncrement; +namespace Lucene { + +const int32_t Token::MIN_BUFFER_SIZE = 10; + +Token::Token() { + ConstructToken(0, 0, DEFAULT_TYPE(), 0); +} + +Token::Token(int32_t start, int32_t end) { + ConstructToken(start, end, DEFAULT_TYPE(), 0); +} + +Token::Token(int32_t start, int32_t end, const String& type) { + ConstructToken(start, end, type, 0); +} + +Token::Token(int32_t start, int32_t end, int32_t flags) { + ConstructToken(start, end, DEFAULT_TYPE(), flags); +} + +Token::Token(const String& text, int32_t start, int32_t end) { + ConstructToken(start, end, DEFAULT_TYPE(), 0); + setTermBuffer(text); +} + +Token::Token(const String& text, int32_t start, int32_t end, const String& type) { + ConstructToken(start, end, type, 0); + setTermBuffer(text); +} + +Token::Token(const String& text, int32_t start, int32_t end, int32_t flags) { + ConstructToken(start, end, DEFAULT_TYPE(), flags); + setTermBuffer(text); +} + +Token::Token(CharArray startTermBuffer, int32_t termBufferOffset, int32_t termBufferLength, int32_t start, int32_t end) { + ConstructToken(start, end, DEFAULT_TYPE(), 0); + setTermBuffer(startTermBuffer.get(), termBufferOffset, termBufferLength); +} + +Token::~Token() { +} + +void Token::ConstructToken(int32_t start, int32_t end, const String& type, int32_t flags) { + this->_termLength = 0; + this->_startOffset = start; + this->_endOffset = end; + this->_type = type; + this->flags = flags; + this->positionIncrement = 1; +} + +const String& Token::DEFAULT_TYPE() { + static String _DEFAULT_TYPE(L"word"); + return _DEFAULT_TYPE; +} + +void Token::setPositionIncrement(int32_t positionIncrement) { + if (positionIncrement < 0) { + boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); } - - String Token::term() - { + this->positionIncrement = positionIncrement; +} + +int32_t Token::getPositionIncrement() { + return positionIncrement; +} + +String Token::term() { + initTermBuffer(); + return String(_termBuffer.get(), _termLength); +} + +void Token::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { + growTermBuffer(length); + MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); + _termLength = length; +} + +void Token::setTermBuffer(const String& buffer) { + int32_t length = (int32_t)buffer.size(); + growTermBuffer(length); + MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); + _termLength = length; +} + +void Token::setTermBuffer(const String& buffer, int32_t offset, int32_t length) { + BOOST_ASSERT(offset <= (int32_t)buffer.length()); + BOOST_ASSERT(offset + length <= (int32_t)buffer.length()); + growTermBuffer(length); + MiscUtils::arrayCopy(buffer.begin(), offset, _termBuffer.get(), 0, length); + _termLength = length; +} + +CharArray Token::termBuffer() { + if (!_termBuffer) { initTermBuffer(); - return String(_termBuffer.get(), _termLength); - } - - void Token::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) - { - growTermBuffer(length); - MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); - _termLength = length; - } - - void Token::setTermBuffer(const String& buffer) - { - int32_t length = (int32_t)buffer.size(); - growTermBuffer(length); - MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); - _termLength = length; - } - - void Token::setTermBuffer(const String& buffer, int32_t offset, int32_t length) - { - BOOST_ASSERT(offset <= (int32_t)buffer.length()); - BOOST_ASSERT(offset + length <= (int32_t)buffer.length()); - growTermBuffer(length); - MiscUtils::arrayCopy(buffer.begin(), offset, _termBuffer.get(), 0, length); - _termLength = length; - } - - CharArray Token::termBuffer() - { - if (!_termBuffer) - initTermBuffer(); - return _termBuffer; - } - - wchar_t* Token::termBufferArray() - { - if (!_termBuffer) - initTermBuffer(); - return _termBuffer.get(); } - - CharArray Token::resizeTermBuffer(int32_t newSize) - { - if (!_termBuffer) - { - // The buffer is always at least MIN_BUFFER_SIZE - _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); - } - else - { - if (_termBuffer.size() < newSize) - { - // Not big enough; create a new array with slight over allocation and preserve content - _termBuffer.resize(MiscUtils::getNextSize(newSize)); - } - } - return _termBuffer; - } - - void Token::growTermBuffer(int32_t newSize) - { - _termBuffer = resizeTermBuffer(newSize); + return _termBuffer; +} + +wchar_t* Token::termBufferArray() { + if (!_termBuffer) { + initTermBuffer(); } - - void Token::initTermBuffer() - { - if (!_termBuffer) - { - _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); - _termLength = 0; + return _termBuffer.get(); +} + +CharArray Token::resizeTermBuffer(int32_t newSize) { + if (!_termBuffer) { + // The buffer is always at least MIN_BUFFER_SIZE + _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); + } else { + if (_termBuffer.size() < newSize) { + // Not big enough; create a new array with slight over allocation and preserve content + _termBuffer.resize(MiscUtils::getNextSize(newSize)); } } - - int32_t Token::termLength() - { - if (!_termBuffer) - initTermBuffer(); - return _termLength; + return _termBuffer; +} + +void Token::growTermBuffer(int32_t newSize) { + _termBuffer = resizeTermBuffer(newSize); +} + +void Token::initTermBuffer() { + if (!_termBuffer) { + _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); + _termLength = 0; } - - void Token::setTermLength(int32_t length) - { +} + +int32_t Token::termLength() { + if (!_termBuffer) { initTermBuffer(); - if (length > _termBuffer.size()) - { - boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + - L" exceeds the size of the termBuffer (" + - StringUtils::toString(_termBuffer.size()) + L")")); - } - _termLength = length; - } - - int32_t Token::startOffset() - { - return _startOffset; } - - void Token::setStartOffset(int32_t offset) - { - this->_startOffset = offset; - } - - int32_t Token::endOffset() - { - return _endOffset; - } - - void Token::setEndOffset(int32_t offset) - { - this->_endOffset = offset; - } - - void Token::setOffset(int32_t startOffset, int32_t endOffset) - { - this->_startOffset = startOffset; - this->_endOffset = endOffset; - } - - String Token::type() - { - return _type; - } - - void Token::setType(const String& type) - { - this->_type = type; - } - - int32_t Token::getFlags() - { - return flags; - } - - void Token::setFlags(int32_t flags) - { - this->flags = flags; + return _termLength; +} + +void Token::setTermLength(int32_t length) { + initTermBuffer(); + if (length > _termBuffer.size()) { + boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + + L" exceeds the size of the termBuffer (" + + StringUtils::toString(_termBuffer.size()) + L")")); + } + _termLength = length; +} + +int32_t Token::startOffset() { + return _startOffset; +} + +void Token::setStartOffset(int32_t offset) { + this->_startOffset = offset; +} + +int32_t Token::endOffset() { + return _endOffset; +} + +void Token::setEndOffset(int32_t offset) { + this->_endOffset = offset; +} + +void Token::setOffset(int32_t startOffset, int32_t endOffset) { + this->_startOffset = startOffset; + this->_endOffset = endOffset; +} + +String Token::type() { + return _type; +} + +void Token::setType(const String& type) { + this->_type = type; +} + +int32_t Token::getFlags() { + return flags; +} + +void Token::setFlags(int32_t flags) { + this->flags = flags; +} + +PayloadPtr Token::getPayload() { + return this->payload; +} + +void Token::setPayload(const PayloadPtr& payload) { + this->payload = payload; +} + +String Token::toString() { + StringStream buffer; + initTermBuffer(); + buffer << L"("; + if (!_termBuffer) { + buffer << L"null"; + } else { + buffer << term() << L"," << _startOffset << L"," << _endOffset; + } + if (_type != L"word") { + buffer << L",type=" << _type; + } + if (positionIncrement != 1) { + buffer << L",posIncr=" << positionIncrement; + } + buffer << L")"; + return buffer.str(); +} + +void Token::clear() { + payload.reset(); + // Leave termBuffer to allow re-use + _termLength = 0; + positionIncrement = 1; + flags = 0; + _startOffset = 0; + _endOffset = 0; + _type = DEFAULT_TYPE(); +} + +LuceneObjectPtr Token::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); + TokenPtr cloneToken(boost::dynamic_pointer_cast(clone)); + cloneToken->_termLength = _termLength; + cloneToken->_startOffset = _startOffset; + cloneToken->_endOffset = _endOffset; + cloneToken->_type = _type; + cloneToken->flags = flags; + cloneToken->positionIncrement = positionIncrement; + + // Do a deep clone + if (_termBuffer) { + cloneToken->_termBuffer = CharArray::newInstance(_termBuffer.size()); + MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneToken->_termBuffer.get(), 0, _termBuffer.size()); } - - PayloadPtr Token::getPayload() - { - return this->payload; + if (payload) { + cloneToken->payload = boost::dynamic_pointer_cast(payload->clone()); } - - void Token::setPayload(PayloadPtr payload) - { - this->payload = payload; + + return cloneToken; +} + +TokenPtr Token::clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { + TokenPtr clone(newLucene(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)); + clone->positionIncrement = positionIncrement; + clone->flags = flags; + clone->_type = _type; + if (payload) { + clone->payload = boost::dynamic_pointer_cast(payload->clone()); + } + return clone; +} + +bool Token::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - String Token::toString() - { - StringStream buffer; + + TokenPtr otherToken(boost::dynamic_pointer_cast(other)); + if (otherToken) { initTermBuffer(); - buffer << L"("; - if (!_termBuffer) - buffer << L"null"; - else - buffer << term() << L"," << _startOffset << L"," << _endOffset; - if (_type != L"word") - buffer << L",type=" << _type; - if (positionIncrement != 1) - buffer << L",posIncr=" << positionIncrement; - buffer << L")"; - return buffer.str(); - } - - void Token::clear() - { - payload.reset(); - // Leave termBuffer to allow re-use - _termLength = 0; - positionIncrement = 1; - flags = 0; - _startOffset = 0; - _endOffset = 0; - _type = DEFAULT_TYPE(); - } - - LuceneObjectPtr Token::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); - TokenPtr cloneToken(boost::dynamic_pointer_cast(clone)); - cloneToken->_termLength = _termLength; - cloneToken->_startOffset = _startOffset; - cloneToken->_endOffset = _endOffset; - cloneToken->_type = _type; - cloneToken->flags = flags; - cloneToken->positionIncrement = positionIncrement; - - // Do a deep clone - if (_termBuffer) - { - cloneToken->_termBuffer = CharArray::newInstance(_termBuffer.size()); - MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneToken->_termBuffer.get(), 0, _termBuffer.size()); - } - if (payload) - cloneToken->payload = boost::dynamic_pointer_cast(payload->clone()); - - return cloneToken; - } - - TokenPtr Token::clone(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) - { - TokenPtr clone(newLucene(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset)); - clone->positionIncrement = positionIncrement; - clone->flags = flags; - clone->_type = _type; - if (payload) - clone->payload = boost::dynamic_pointer_cast(payload->clone()); - return clone; - } - - bool Token::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - TokenPtr otherToken(boost::dynamic_pointer_cast(other)); - if (otherToken) - { - initTermBuffer(); - otherToken->initTermBuffer(); - - if (_termLength == otherToken->_termLength && _startOffset == otherToken->_startOffset && + otherToken->initTermBuffer(); + + if (_termLength == otherToken->_termLength && _startOffset == otherToken->_startOffset && _endOffset == otherToken->_endOffset && flags == otherToken->flags && positionIncrement == otherToken->positionIncrement && _type == otherToken->_type && - (payload ? payload->equals(otherToken->payload) : !otherToken->payload)) - { - for (int32_t i = 0; i < _termLength; ++i) - { - if (_termBuffer[i] != otherToken->_termBuffer[i]) - return false; + (payload ? payload->equals(otherToken->payload) : !otherToken->payload)) { + for (int32_t i = 0; i < _termLength; ++i) { + if (_termBuffer[i] != otherToken->_termBuffer[i]) { + return false; } - return true; } - else - return false; - } - else + return true; + } else { return false; + } + } else { + return false; } - - int32_t Token::hashCode() - { +} + +int32_t Token::hashCode() { + initTermBuffer(); + int32_t code = _termLength; + code = code * 31 + _startOffset; + code = code * 31 + _endOffset; + code = code * 31 + flags; + code = code * 31 + positionIncrement; + code = code * 31 + StringUtils::hashCode(_type); + code = payload ? code * 31 + payload->hashCode() : code; + code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); + return code; +} + +void Token::clearNoTermBuffer() { + payload.reset(); + positionIncrement = 1; + flags = 0; + _startOffset = 0; + _endOffset = 0; + _type = DEFAULT_TYPE(); +} + +TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { + clearNoTermBuffer(); + payload.reset(); + positionIncrement = 1; + setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = newType; + return shared_from_this(); +} + +TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { + clearNoTermBuffer(); + setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = DEFAULT_TYPE(); + return shared_from_this(); +} + +TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { + clearNoTermBuffer(); + setTermBuffer(newTerm); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = newType; + return shared_from_this(); +} + +TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) { + clearNoTermBuffer(); + setTermBuffer(newTerm, newTermOffset, newTermLength); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = newType; + return shared_from_this(); +} + +TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset) { + clearNoTermBuffer(); + setTermBuffer(newTerm); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = DEFAULT_TYPE(); + return shared_from_this(); +} + +TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) { + clearNoTermBuffer(); + setTermBuffer(newTerm, newTermOffset, newTermLength); + _startOffset = newStartOffset; + _endOffset = newEndOffset; + _type = DEFAULT_TYPE(); + return shared_from_this(); +} + +void Token::reinit(const TokenPtr& prototype) { + prototype->initTermBuffer(); + setTermBuffer(prototype->_termBuffer.get(), 0, prototype->_termLength); + positionIncrement = prototype->positionIncrement; + flags = prototype->flags; + _startOffset = prototype->_startOffset; + _endOffset = prototype->_endOffset; + _type = prototype->_type; + payload = prototype->payload; +} + +void Token::reinit(const TokenPtr& prototype, const String& newTerm) { + setTermBuffer(newTerm); + positionIncrement = prototype->positionIncrement; + flags = prototype->flags; + _startOffset = prototype->_startOffset; + _endOffset = prototype->_endOffset; + _type = prototype->_type; + payload = prototype->payload; +} + +void Token::reinit(const TokenPtr& prototype, CharArray newTermBuffer, int32_t offset, int32_t length) { + setTermBuffer(newTermBuffer.get(), offset, length); + positionIncrement = prototype->positionIncrement; + flags = prototype->flags; + _startOffset = prototype->_startOffset; + _endOffset = prototype->_endOffset; + _type = prototype->_type; + payload = prototype->payload; +} + +void Token::copyTo(const AttributePtr& target) { + TokenPtr targetToken(boost::dynamic_pointer_cast(target)); + if (targetToken) { + targetToken->reinit(shared_from_this()); + // reinit shares the payload, so clone it + if (payload) { + targetToken->payload = boost::dynamic_pointer_cast(payload->clone()); + } + } else { initTermBuffer(); - int32_t code = _termLength; - code = code * 31 + _startOffset; - code = code * 31 + _endOffset; - code = code * 31 + flags; - code = code * 31 + positionIncrement; - code = code * 31 + StringUtils::hashCode(_type); - code = payload ? code * 31 + payload->hashCode() : code; - code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); - return code; - } - - void Token::clearNoTermBuffer() - { - payload.reset(); - positionIncrement = 1; - flags = 0; - _startOffset = 0; - _endOffset = 0; - _type = DEFAULT_TYPE(); - } - - TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) - { - clearNoTermBuffer(); - payload.reset(); - positionIncrement = 1; - setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = newType; - return shared_from_this(); - } - - TokenPtr Token::reinit(CharArray newTermBuffer, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) - { - clearNoTermBuffer(); - setTermBuffer(newTermBuffer.get(), newTermOffset, newTermLength); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = DEFAULT_TYPE(); - return shared_from_this(); - } - - TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset, const String& newType) - { - clearNoTermBuffer(); - setTermBuffer(newTerm); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = newType; - return shared_from_this(); - } - - TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset, const String& newType) - { - clearNoTermBuffer(); - setTermBuffer(newTerm, newTermOffset, newTermLength); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = newType; - return shared_from_this(); - } - - TokenPtr Token::reinit(const String& newTerm, int32_t newStartOffset, int32_t newEndOffset) - { - clearNoTermBuffer(); - setTermBuffer(newTerm); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = DEFAULT_TYPE(); - return shared_from_this(); - } - - TokenPtr Token::reinit(const String& newTerm, int32_t newTermOffset, int32_t newTermLength, int32_t newStartOffset, int32_t newEndOffset) - { - clearNoTermBuffer(); - setTermBuffer(newTerm, newTermOffset, newTermLength); - _startOffset = newStartOffset; - _endOffset = newEndOffset; - _type = DEFAULT_TYPE(); - return shared_from_this(); - } - - void Token::reinit(TokenPtr prototype) - { - prototype->initTermBuffer(); - setTermBuffer(prototype->_termBuffer.get(), 0, prototype->_termLength); - positionIncrement = prototype->positionIncrement; - flags = prototype->flags; - _startOffset = prototype->_startOffset; - _endOffset = prototype->_endOffset; - _type = prototype->_type; - payload = prototype->payload; - } - - void Token::reinit(TokenPtr prototype, const String& newTerm) - { - setTermBuffer(newTerm); - positionIncrement = prototype->positionIncrement; - flags = prototype->flags; - _startOffset = prototype->_startOffset; - _endOffset = prototype->_endOffset; - _type = prototype->_type; - payload = prototype->payload; - } - - void Token::reinit(TokenPtr prototype, CharArray newTermBuffer, int32_t offset, int32_t length) - { - setTermBuffer(newTermBuffer.get(), offset, length); - positionIncrement = prototype->positionIncrement; - flags = prototype->flags; - _startOffset = prototype->_startOffset; - _endOffset = prototype->_endOffset; - _type = prototype->_type; - payload = prototype->payload; - } - - void Token::copyTo(AttributePtr target) - { - TokenPtr targetToken(boost::dynamic_pointer_cast(target)); - if (targetToken) - { - targetToken->reinit(shared_from_this()); - // reinit shares the payload, so clone it - if (payload) - targetToken->payload = boost::dynamic_pointer_cast(payload->clone()); + TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); + if (targetTermAttribute) { + targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); } - else - { - initTermBuffer(); - TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); - if (targetTermAttribute) - targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); - OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); - if (targetOffsetAttribute) - targetOffsetAttribute->setOffset(_startOffset, _endOffset); - PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); - if (targetPositionIncrementAttribute) - targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); - PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); - if (targetPayloadAttribute) - targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); - FlagsAttributePtr targetFlagsAttribute(boost::dynamic_pointer_cast(target)); - if (targetFlagsAttribute) - targetFlagsAttribute->setFlags(flags); - TypeAttributePtr targetTypeAttribute(boost::dynamic_pointer_cast(target)); - if (targetTypeAttribute) - targetTypeAttribute->setType(_type); + OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); + if (targetOffsetAttribute) { + targetOffsetAttribute->setOffset(_startOffset, _endOffset); } - } - - AttributeFactoryPtr Token::TOKEN_ATTRIBUTE_FACTORY() - { - static AttributeFactoryPtr _TOKEN_ATTRIBUTE_FACTORY; - if (!_TOKEN_ATTRIBUTE_FACTORY) - { - _TOKEN_ATTRIBUTE_FACTORY = newLucene(AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY()); - CycleCheck::addStatic(_TOKEN_ATTRIBUTE_FACTORY); + PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); + if (targetPositionIncrementAttribute) { + targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); + } + PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); + if (targetPayloadAttribute) { + targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); + } + FlagsAttributePtr targetFlagsAttribute(boost::dynamic_pointer_cast(target)); + if (targetFlagsAttribute) { + targetFlagsAttribute->setFlags(flags); + } + TypeAttributePtr targetTypeAttribute(boost::dynamic_pointer_cast(target)); + if (targetTypeAttribute) { + targetTypeAttribute->setType(_type); } - return _TOKEN_ATTRIBUTE_FACTORY; - } - - TokenAttributeFactory::TokenAttributeFactory(AttributeFactoryPtr delegate) - { - this->delegate = delegate; - } - - TokenAttributeFactory::~TokenAttributeFactory() - { - } - - AttributePtr TokenAttributeFactory::createAttributeInstance(const String& className) - { - return newLucene(); } - - bool TokenAttributeFactory::equals(LuceneObjectPtr other) - { - if (AttributeFactory::equals(other)) - return true; - - TokenAttributeFactoryPtr otherTokenAttributeFactory(boost::dynamic_pointer_cast(other)); - if (otherTokenAttributeFactory) - return this->delegate->equals(otherTokenAttributeFactory->delegate); - return false; +} + +AttributeFactoryPtr Token::TOKEN_ATTRIBUTE_FACTORY() { + static AttributeFactoryPtr _TOKEN_ATTRIBUTE_FACTORY; + LUCENE_RUN_ONCE( + _TOKEN_ATTRIBUTE_FACTORY = newLucene(AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY()); + CycleCheck::addStatic(_TOKEN_ATTRIBUTE_FACTORY); + ); + return _TOKEN_ATTRIBUTE_FACTORY; +} + +TokenAttributeFactory::TokenAttributeFactory(const AttributeFactoryPtr& delegate) { + this->delegate = delegate; +} + +TokenAttributeFactory::~TokenAttributeFactory() { +} + +AttributePtr TokenAttributeFactory::createAttributeInstance(const String& className) { + return newLucene(); +} + +bool TokenAttributeFactory::equals(const LuceneObjectPtr& other) { + if (AttributeFactory::equals(other)) { + return true; } - - int32_t TokenAttributeFactory::hashCode() - { - return (delegate->hashCode() ^ 0x0a45aa31); + + TokenAttributeFactoryPtr otherTokenAttributeFactory(boost::dynamic_pointer_cast(other)); + if (otherTokenAttributeFactory) { + return this->delegate->equals(otherTokenAttributeFactory->delegate); } + return false; +} + +int32_t TokenAttributeFactory::hashCode() { + return (delegate->hashCode() ^ 0x0a45aa31); +} + } diff --git a/src/core/analysis/TokenFilter.cpp b/src/core/analysis/TokenFilter.cpp index 563931c5..bba3789d 100644 --- a/src/core/analysis/TokenFilter.cpp +++ b/src/core/analysis/TokenFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,29 +7,25 @@ #include "LuceneInc.h" #include "TokenFilter.h" -namespace Lucene -{ - TokenFilter::TokenFilter(TokenStreamPtr input) : TokenStream(input) - { - this->input = input; - } - - TokenFilter::~TokenFilter() - { - } - - void TokenFilter::end() - { - input->end(); - } - - void TokenFilter::close() - { - input->close(); - } - - void TokenFilter::reset() - { - input->reset(); - } +namespace Lucene { + +TokenFilter::TokenFilter(const TokenStreamPtr& input) : TokenStream(input) { + this->input = input; +} + +TokenFilter::~TokenFilter() { +} + +void TokenFilter::end() { + input->end(); +} + +void TokenFilter::close() { + input->close(); +} + +void TokenFilter::reset() { + input->reset(); +} + } diff --git a/src/core/analysis/TokenStream.cpp b/src/core/analysis/TokenStream.cpp index 67f8d001..d2967ac4 100644 --- a/src/core/analysis/TokenStream.cpp +++ b/src/core/analysis/TokenStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,34 +7,28 @@ #include "LuceneInc.h" #include "TokenStream.h" -namespace Lucene -{ - TokenStream::TokenStream() - { - } - - TokenStream::TokenStream(AttributeSourcePtr input) : AttributeSource(input) - { - } - - TokenStream::TokenStream(AttributeFactoryPtr factory) : AttributeSource(factory) - { - } - - TokenStream::~TokenStream() - { - } - - void TokenStream::end() - { - // do nothing by default - } - - void TokenStream::reset() - { - } - - void TokenStream::close() - { - } +namespace Lucene { + +TokenStream::TokenStream() { +} + +TokenStream::TokenStream(const AttributeSourcePtr& input) : AttributeSource(input) { +} + +TokenStream::TokenStream(const AttributeFactoryPtr& factory) : AttributeSource(factory) { +} + +TokenStream::~TokenStream() { +} + +void TokenStream::end() { + // do nothing by default +} + +void TokenStream::reset() { +} + +void TokenStream::close() { +} + } diff --git a/src/core/analysis/Tokenizer.cpp b/src/core/analysis/Tokenizer.cpp index d777a010..67ff5afa 100644 --- a/src/core/analysis/Tokenizer.cpp +++ b/src/core/analysis/Tokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,58 +8,48 @@ #include "Tokenizer.h" #include "CharReader.h" -namespace Lucene -{ - Tokenizer::Tokenizer() - { - } - - Tokenizer::Tokenizer(ReaderPtr input) - { - this->input = CharReader::get(input); - this->charStream = boost::dynamic_pointer_cast(this->input); - } - - Tokenizer::Tokenizer(AttributeFactoryPtr factory) : TokenStream(factory) - { - } - - Tokenizer::Tokenizer(AttributeFactoryPtr factory, ReaderPtr input) : TokenStream(factory) - { - this->input = CharReader::get(input); - this->charStream = boost::dynamic_pointer_cast(this->input); - } - - Tokenizer::Tokenizer(AttributeSourcePtr source) : TokenStream(source) - { - } - - Tokenizer::Tokenizer(AttributeSourcePtr source, ReaderPtr input) : TokenStream(source) - { - this->input = CharReader::get(input); - this->charStream = boost::dynamic_pointer_cast(this->input); - } - - Tokenizer::~Tokenizer() - { - } - - void Tokenizer::close() - { - if (input) - { - input->close(); - input.reset(); // don't hold onto Reader after close - } - } - - int32_t Tokenizer::correctOffset(int32_t currentOff) - { - return charStream ? charStream->correctOffset(currentOff) : currentOff; - } - - void Tokenizer::reset(ReaderPtr input) - { - this->input = input; +namespace Lucene { + +Tokenizer::Tokenizer() { +} + +Tokenizer::Tokenizer(const ReaderPtr& input) { + this->input = CharReader::get(input); + this->charStream = boost::dynamic_pointer_cast(this->input); +} + +Tokenizer::Tokenizer(const AttributeFactoryPtr& factory) : TokenStream(factory) { +} + +Tokenizer::Tokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : TokenStream(factory) { + this->input = CharReader::get(input); + this->charStream = boost::dynamic_pointer_cast(this->input); +} + +Tokenizer::Tokenizer(const AttributeSourcePtr& source) : TokenStream(source) { +} + +Tokenizer::Tokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : TokenStream(source) { + this->input = CharReader::get(input); + this->charStream = boost::dynamic_pointer_cast(this->input); +} + +Tokenizer::~Tokenizer() { +} + +void Tokenizer::close() { + if (input) { + input->close(); + input.reset(); // don't hold onto Reader after close } } + +int32_t Tokenizer::correctOffset(int32_t currentOff) { + return charStream ? charStream->correctOffset(currentOff) : currentOff; +} + +void Tokenizer::reset(const ReaderPtr& input) { + this->input = input; +} + +} diff --git a/src/core/analysis/WhitespaceAnalyzer.cpp b/src/core/analysis/WhitespaceAnalyzer.cpp index dc21a04f..6353e0ee 100644 --- a/src/core/analysis/WhitespaceAnalyzer.cpp +++ b/src/core/analysis/WhitespaceAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,27 +8,24 @@ #include "WhitespaceAnalyzer.h" #include "WhitespaceTokenizer.h" -namespace Lucene -{ - WhitespaceAnalyzer::~WhitespaceAnalyzer() - { - } - - TokenStreamPtr WhitespaceAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - return newLucene(reader); - } - - TokenStreamPtr WhitespaceAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); - if (!tokenizer) - { - tokenizer = newLucene(reader); - setPreviousTokenStream(tokenizer); - } - else - tokenizer->reset(reader); - return tokenizer; +namespace Lucene { + +WhitespaceAnalyzer::~WhitespaceAnalyzer() { +} + +TokenStreamPtr WhitespaceAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + return newLucene(reader); +} + +TokenStreamPtr WhitespaceAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + TokenizerPtr tokenizer(boost::dynamic_pointer_cast(getPreviousTokenStream())); + if (!tokenizer) { + tokenizer = newLucene(reader); + setPreviousTokenStream(tokenizer); + } else { + tokenizer->reset(reader); } + return tokenizer; +} + } diff --git a/src/core/analysis/WhitespaceTokenizer.cpp b/src/core/analysis/WhitespaceTokenizer.cpp index 885e3adb..a5492020 100644 --- a/src/core/analysis/WhitespaceTokenizer.cpp +++ b/src/core/analysis/WhitespaceTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,22 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - WhitespaceTokenizer::WhitespaceTokenizer(ReaderPtr input) : CharTokenizer(input) - { - } - - WhitespaceTokenizer::WhitespaceTokenizer(AttributeSourcePtr source, ReaderPtr input) : CharTokenizer(source, input) - { - } - - WhitespaceTokenizer::WhitespaceTokenizer(AttributeFactoryPtr factory, ReaderPtr input) : CharTokenizer(factory, input) - { - } - - WhitespaceTokenizer::~WhitespaceTokenizer() - { - } - - bool WhitespaceTokenizer::isTokenChar(wchar_t c) - { - return !UnicodeUtil::isSpace(c); - } +namespace Lucene { + +WhitespaceTokenizer::WhitespaceTokenizer(const ReaderPtr& input) : CharTokenizer(input) { +} + +WhitespaceTokenizer::WhitespaceTokenizer(const AttributeSourcePtr& source, const ReaderPtr& input) : CharTokenizer(source, input) { +} + +WhitespaceTokenizer::WhitespaceTokenizer(const AttributeFactoryPtr& factory, const ReaderPtr& input) : CharTokenizer(factory, input) { +} + +WhitespaceTokenizer::~WhitespaceTokenizer() { +} + +bool WhitespaceTokenizer::isTokenChar(wchar_t c) { + return !UnicodeUtil::isSpace(c); +} + } diff --git a/src/core/analysis/WordlistLoader.cpp b/src/core/analysis/WordlistLoader.cpp index a2530950..6db85c96 100644 --- a/src/core/analysis/WordlistLoader.cpp +++ b/src/core/analysis/WordlistLoader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,88 +10,79 @@ #include "FileReader.h" #include "BufferedReader.h" -namespace Lucene -{ - WordlistLoader::~WordlistLoader() - { +namespace Lucene { + +WordlistLoader::~WordlistLoader() { +} + +HashSet WordlistLoader::getWordSet(const String& wordfile, const String& comment) { + HashSet result(HashSet::newInstance()); + FileReaderPtr reader; + LuceneException finally; + try { + reader = newLucene(wordfile); + result = getWordSet(reader, comment); + } catch (LuceneException& e) { + finally = e; } - - HashSet WordlistLoader::getWordSet(const String& wordfile, const String& comment) - { - HashSet result(HashSet::newInstance()); - FileReaderPtr reader; - LuceneException finally; - try - { - reader = newLucene(wordfile); - result = getWordSet(reader, comment); - } - catch (LuceneException& e) - { - finally = e; - } - if (reader) - reader->close(); - finally.throwException(); - return result; + if (reader) { + reader->close(); } - - HashSet WordlistLoader::getWordSet(ReaderPtr reader, const String& comment) - { - HashSet result(HashSet::newInstance()); - LuceneException finally; - BufferedReaderPtr bufferedReader(boost::dynamic_pointer_cast(reader)); - try - { - if (!bufferedReader) - bufferedReader = newLucene(reader); - String word; - while (bufferedReader->readLine(word)) - { - if (comment.empty() || !boost::starts_with(word, comment)) - { - boost::trim(word); - result.add(word); - } - } + finally.throwException(); + return result; +} + +HashSet WordlistLoader::getWordSet(const ReaderPtr& reader, const String& comment) { + HashSet result(HashSet::newInstance()); + LuceneException finally; + BufferedReaderPtr bufferedReader(boost::dynamic_pointer_cast(reader)); + try { + if (!bufferedReader) { + bufferedReader = newLucene(reader); } - catch (LuceneException& e) - { - finally = e; + String word; + while (bufferedReader->readLine(word)) { + if (comment.empty() || !boost::starts_with(word, comment)) { + boost::trim(word); + result.add(word); + } } - if (bufferedReader) - bufferedReader->close(); - finally.throwException(); - return result; + } catch (LuceneException& e) { + finally = e; } - - MapStringString WordlistLoader::getStemDict(const String& wordstemfile) - { - MapStringString result(MapStringString::newInstance()); - BufferedReaderPtr bufferedReader; - FileReaderPtr reader; - LuceneException finally; - try - { - reader = newLucene(wordstemfile); - bufferedReader = newLucene(reader); - String line; - while (bufferedReader->readLine(line)) - { - String::size_type sep = line.find(L'\t'); - if (sep != String::npos) - result.put(line.substr(0, sep), line.substr(sep + 1)); + if (bufferedReader) { + bufferedReader->close(); + } + finally.throwException(); + return result; +} + +MapStringString WordlistLoader::getStemDict(const String& wordstemfile) { + MapStringString result(MapStringString::newInstance()); + BufferedReaderPtr bufferedReader; + FileReaderPtr reader; + LuceneException finally; + try { + reader = newLucene(wordstemfile); + bufferedReader = newLucene(reader); + String line; + while (bufferedReader->readLine(line)) { + String::size_type sep = line.find(L'\t'); + if (sep != String::npos) { + result.put(line.substr(0, sep), line.substr(sep + 1)); } } - catch (LuceneException& e) - { - finally = e; - } - if (reader) - reader->close(); - if (bufferedReader) - bufferedReader->close(); - finally.throwException(); - return result; + } catch (LuceneException& e) { + finally = e; } + if (reader) { + reader->close(); + } + if (bufferedReader) { + bufferedReader->close(); + } + finally.throwException(); + return result; +} + } diff --git a/src/core/analysis/standard/StandardAnalyzer.cpp b/src/core/analysis/standard/StandardAnalyzer.cpp index 738a544b..d1ec2eb7 100644 --- a/src/core/analysis/standard/StandardAnalyzer.cpp +++ b/src/core/analysis/standard/StandardAnalyzer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,86 +14,75 @@ #include "StopFilter.h" #include "WordlistLoader.h" -namespace Lucene -{ - /// Construct an analyzer with the given stop words. - const int32_t StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH = 255; - - StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion) - { - ConstructAnalyser(matchVersion, StopAnalyzer::ENGLISH_STOP_WORDS_SET()); - } - - StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) - { - ConstructAnalyser(matchVersion, stopWords); - } - - StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords) - { - ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); - } - - StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, ReaderPtr stopwords) - { - ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); - } - - StandardAnalyzer::~StandardAnalyzer() - { - } - - void StandardAnalyzer::ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords) - { - stopSet = stopWords; - enableStopPositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); - replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); - this->matchVersion = matchVersion; - this->maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; - } - - TokenStreamPtr StandardAnalyzer::tokenStream(const String& fieldName, ReaderPtr reader) - { - StandardTokenizerPtr tokenStream(newLucene(matchVersion, reader)); - tokenStream->setMaxTokenLength(maxTokenLength); - TokenStreamPtr result(newLucene(tokenStream)); - result = newLucene(result); - result = newLucene(enableStopPositionIncrements, result, stopSet); - return result; - } - - void StandardAnalyzer::setMaxTokenLength(int32_t length) - { - maxTokenLength = length; - } - - int32_t StandardAnalyzer::getMaxTokenLength() - { - return maxTokenLength; - } - - TokenStreamPtr StandardAnalyzer::reusableTokenStream(const String& fieldName, ReaderPtr reader) - { - StandardAnalyzerSavedStreamsPtr streams = boost::dynamic_pointer_cast(getPreviousTokenStream()); - if (!streams) - { - streams = newLucene(); - setPreviousTokenStream(streams); - streams->tokenStream = newLucene(matchVersion, reader); - streams->filteredTokenStream = newLucene(streams->tokenStream); - streams->filteredTokenStream = newLucene(streams->filteredTokenStream); - streams->filteredTokenStream = newLucene(enableStopPositionIncrements, streams->filteredTokenStream, stopSet); - } - else - streams->tokenStream->reset(reader); - streams->tokenStream->setMaxTokenLength(maxTokenLength); - - streams->tokenStream->setReplaceInvalidAcronym(replaceInvalidAcronym); - - return streams->filteredTokenStream; - } - - StandardAnalyzerSavedStreams::~StandardAnalyzerSavedStreams() - { +namespace Lucene { + +/// Construct an analyzer with the given stop words. +const int32_t StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH = 255; + +StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion) { + ConstructAnalyser(matchVersion, StopAnalyzer::ENGLISH_STOP_WORDS_SET()); +} + +StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, HashSet stopWords) { + ConstructAnalyser(matchVersion, stopWords); +} + +StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const String& stopwords) { + ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); +} + +StandardAnalyzer::StandardAnalyzer(LuceneVersion::Version matchVersion, const ReaderPtr& stopwords) { + ConstructAnalyser(matchVersion, WordlistLoader::getWordSet(stopwords)); +} + +StandardAnalyzer::~StandardAnalyzer() { +} + +void StandardAnalyzer::ConstructAnalyser(LuceneVersion::Version matchVersion, HashSet stopWords) { + stopSet = stopWords; + enableStopPositionIncrements = StopFilter::getEnablePositionIncrementsVersionDefault(matchVersion); + replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); + this->matchVersion = matchVersion; + this->maxTokenLength = DEFAULT_MAX_TOKEN_LENGTH; +} + +TokenStreamPtr StandardAnalyzer::tokenStream(const String& fieldName, const ReaderPtr& reader) { + StandardTokenizerPtr tokenStream(newLucene(matchVersion, reader)); + tokenStream->setMaxTokenLength(maxTokenLength); + TokenStreamPtr result(newLucene(tokenStream)); + result = newLucene(result); + result = newLucene(enableStopPositionIncrements, result, stopSet); + return result; +} + +void StandardAnalyzer::setMaxTokenLength(int32_t length) { + maxTokenLength = length; +} + +int32_t StandardAnalyzer::getMaxTokenLength() { + return maxTokenLength; +} + +TokenStreamPtr StandardAnalyzer::reusableTokenStream(const String& fieldName, const ReaderPtr& reader) { + StandardAnalyzerSavedStreamsPtr streams = boost::dynamic_pointer_cast(getPreviousTokenStream()); + if (!streams) { + streams = newLucene(); + setPreviousTokenStream(streams); + streams->tokenStream = newLucene(matchVersion, reader); + streams->filteredTokenStream = newLucene(streams->tokenStream); + streams->filteredTokenStream = newLucene(streams->filteredTokenStream); + streams->filteredTokenStream = newLucene(enableStopPositionIncrements, streams->filteredTokenStream, stopSet); + } else { + streams->tokenStream->reset(reader); } + streams->tokenStream->setMaxTokenLength(maxTokenLength); + + streams->tokenStream->setReplaceInvalidAcronym(replaceInvalidAcronym); + + return streams->filteredTokenStream; +} + +StandardAnalyzerSavedStreams::~StandardAnalyzerSavedStreams() { +} + } diff --git a/src/core/analysis/standard/StandardFilter.cpp b/src/core/analysis/standard/StandardFilter.cpp index 80b2474a..be1abd04 100644 --- a/src/core/analysis/standard/StandardFilter.cpp +++ b/src/core/analysis/standard/StandardFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,61 +10,61 @@ #include "TermAttribute.h" #include "TypeAttribute.h" -namespace Lucene -{ - StandardFilter::StandardFilter(TokenStreamPtr input) : TokenFilter(input) - { - termAtt = addAttribute(); - typeAtt = addAttribute(); - } +namespace Lucene { + +StandardFilter::StandardFilter(const TokenStreamPtr& input) : TokenFilter(input) { + termAtt = addAttribute(); + typeAtt = addAttribute(); +} + +StandardFilter::~StandardFilter() { +} + +const String& StandardFilter::APOSTROPHE_TYPE() { + static String _APOSTROPHE_TYPE; - StandardFilter::~StandardFilter() - { - } + LUCENE_RUN_ONCE( + _APOSTROPHE_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::APOSTROPHE]; + ); - const String& StandardFilter::APOSTROPHE_TYPE() - { - static String _APOSTROPHE_TYPE; - if (_APOSTROPHE_TYPE.empty()) - _APOSTROPHE_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::APOSTROPHE]; - return _APOSTROPHE_TYPE; - } + return _APOSTROPHE_TYPE; +} + +const String& StandardFilter::ACRONYM_TYPE() { + static String _ACRONYM_TYPE; + + LUCENE_RUN_ONCE( + _ACRONYM_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::ACRONYM] + ); - const String& StandardFilter::ACRONYM_TYPE() - { - static String _ACRONYM_TYPE; - if (_ACRONYM_TYPE.empty()) - _ACRONYM_TYPE = StandardTokenizer::TOKEN_TYPES()[StandardTokenizer::ACRONYM]; - return _ACRONYM_TYPE; + return _ACRONYM_TYPE; +} + +bool StandardFilter::incrementToken() { + if (!input->incrementToken()) { + return false; } - - bool StandardFilter::incrementToken() - { - if (!input->incrementToken()) - return false; - - wchar_t* termBuffer = termAtt->termBufferArray(); - int32_t bufferLength = termAtt->termLength(); - String type(typeAtt->type()); - - if (type == APOSTROPHE_TYPE() && bufferLength >= 2 && termBuffer[bufferLength - 2] == L'\'' && - (termBuffer[bufferLength - 1] == L's' || termBuffer[bufferLength - 1] == L'S')) // remove 's - { - // Strip last 2 characters off - termAtt->setTermLength(bufferLength - 2); - } - else if (type == ACRONYM_TYPE()) // remove dots - { - int32_t upto = 0; - for (int32_t i = 0; i < bufferLength; ++i) - { - wchar_t c = termBuffer[i]; - if (c != L'.') - termBuffer[upto++] = c; + + wchar_t* termBuffer = termAtt->termBufferArray(); + int32_t bufferLength = termAtt->termLength(); + String type(typeAtt->type()); + + if (type == APOSTROPHE_TYPE() && bufferLength >= 2 && termBuffer[bufferLength - 2] == L'\'' && + (termBuffer[bufferLength - 1] == L's' || termBuffer[bufferLength - 1] == L'S')) { // remove 's + // Strip last 2 characters off + termAtt->setTermLength(bufferLength - 2); + } else if (type == ACRONYM_TYPE()) { // remove dots + int32_t upto = 0; + for (int32_t i = 0; i < bufferLength; ++i) { + wchar_t c = termBuffer[i]; + if (c != L'.') { + termBuffer[upto++] = c; } - termAtt->setTermLength(upto); } - - return true; + termAtt->setTermLength(upto); } + + return true; +} + } diff --git a/src/core/analysis/standard/StandardTokenizer.cpp b/src/core/analysis/standard/StandardTokenizer.cpp index 4b94b40b..1f0c66a3 100644 --- a/src/core/analysis/standard/StandardTokenizer.cpp +++ b/src/core/analysis/standard/StandardTokenizer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,146 +13,128 @@ #include "PositionIncrementAttribute.h" #include "TypeAttribute.h" -namespace Lucene -{ - const int32_t StandardTokenizer::ALPHANUM = 0; - const int32_t StandardTokenizer::APOSTROPHE = 1; - const int32_t StandardTokenizer::ACRONYM = 2; - const int32_t StandardTokenizer::COMPANY = 3; - const int32_t StandardTokenizer::EMAIL = 4; - const int32_t StandardTokenizer::HOST = 5; - const int32_t StandardTokenizer::NUM = 6; - const int32_t StandardTokenizer::CJ = 7; - - /// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. - const int32_t StandardTokenizer::ACRONYM_DEP = 8; - - StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, ReaderPtr input) - { - this->scanner = newLucene(input); - init(input, matchVersion); - } - - StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, AttributeSourcePtr source, ReaderPtr input) : Tokenizer(source) - { - this->scanner = newLucene(input); - init(input, matchVersion); - } - - StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, AttributeFactoryPtr factory, ReaderPtr input) : Tokenizer(factory) - { - this->scanner = newLucene(input); - init(input, matchVersion); - } - - StandardTokenizer::~StandardTokenizer() - { - } - - const Collection StandardTokenizer::TOKEN_TYPES() - { - static Collection _TOKEN_TYPES; - if (!_TOKEN_TYPES) - { - _TOKEN_TYPES = newCollection( - L"", - L"", - L"", - L"", - L"", - L"", - L"", - L"", - L"" - ); +namespace Lucene { + +const int32_t StandardTokenizer::ALPHANUM = 0; +const int32_t StandardTokenizer::APOSTROPHE = 1; +const int32_t StandardTokenizer::ACRONYM = 2; +const int32_t StandardTokenizer::COMPANY = 3; +const int32_t StandardTokenizer::EMAIL = 4; +const int32_t StandardTokenizer::HOST = 5; +const int32_t StandardTokenizer::NUM = 6; +const int32_t StandardTokenizer::CJ = 7; + +/// @deprecated this solves a bug where HOSTs that end with '.' are identified as ACRONYMs. +const int32_t StandardTokenizer::ACRONYM_DEP = 8; + +StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const ReaderPtr& input) { + this->scanner = newLucene(input); + init(input, matchVersion); +} + +StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeSourcePtr& source, const ReaderPtr& input) : Tokenizer(source) { + this->scanner = newLucene(input); + init(input, matchVersion); +} + +StandardTokenizer::StandardTokenizer(LuceneVersion::Version matchVersion, const AttributeFactoryPtr& factory, const ReaderPtr& input) : Tokenizer(factory) { + this->scanner = newLucene(input); + init(input, matchVersion); +} + +StandardTokenizer::~StandardTokenizer() { +} + +const Collection StandardTokenizer::TOKEN_TYPES() { + static Collection _TOKEN_TYPES; + LUCENE_RUN_ONCE( + _TOKEN_TYPES = newCollection( + L"", + L"", + L"", + L"", + L"", + L"", + L"", + L"", + L"" + ); + ); + return _TOKEN_TYPES; +} + +void StandardTokenizer::init(const ReaderPtr& input, LuceneVersion::Version matchVersion) { + replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); + maxTokenLength = StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH; + this->input = input; + termAtt = addAttribute(); + offsetAtt = addAttribute(); + posIncrAtt = addAttribute(); + typeAtt = addAttribute(); +} + +void StandardTokenizer::setMaxTokenLength(int32_t length) { + this->maxTokenLength = length; +} + +int32_t StandardTokenizer::getMaxTokenLength() { + return maxTokenLength; +} + +bool StandardTokenizer::incrementToken() { + clearAttributes(); + int32_t posIncr = 1; + + while (true) { + int32_t tokenType = scanner->getNextToken(); + + if (tokenType == StandardTokenizerImpl::YYEOF) { + return false; } - return _TOKEN_TYPES; - } - - void StandardTokenizer::init(ReaderPtr input, LuceneVersion::Version matchVersion) - { - replaceInvalidAcronym = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_24); - maxTokenLength = StandardAnalyzer::DEFAULT_MAX_TOKEN_LENGTH; - this->input = input; - termAtt = addAttribute(); - offsetAtt = addAttribute(); - posIncrAtt = addAttribute(); - typeAtt = addAttribute(); - } - - void StandardTokenizer::setMaxTokenLength(int32_t length) - { - this->maxTokenLength = length; - } - - int32_t StandardTokenizer::getMaxTokenLength() - { - return maxTokenLength; - } - - bool StandardTokenizer::incrementToken() - { - clearAttributes(); - int32_t posIncr = 1; - - while (true) - { - int32_t tokenType = scanner->getNextToken(); - - if (tokenType == StandardTokenizerImpl::YYEOF) - return false; - - if (scanner->yylength() <= maxTokenLength) - { - posIncrAtt->setPositionIncrement(posIncr); - scanner->getText(termAtt); - int32_t start = scanner->yychar(); - offsetAtt->setOffset(correctOffset(start), correctOffset(start + termAtt->termLength())); - - // This 'if' should be removed in the next release. For now, it converts invalid acronyms to HOST. - /// When removed, only the 'else' part should remain. - if (tokenType == ACRONYM_DEP) - { - if (replaceInvalidAcronym) - { - typeAtt->setType(TOKEN_TYPES()[HOST]); - termAtt->setTermLength(termAtt->termLength() - 1); // remove extra '.' - } - else - typeAtt->setType(TOKEN_TYPES()[ACRONYM]); + + if (scanner->yylength() <= maxTokenLength) { + posIncrAtt->setPositionIncrement(posIncr); + scanner->getText(termAtt); + int32_t start = scanner->yychar(); + offsetAtt->setOffset(correctOffset(start), correctOffset(start + termAtt->termLength())); + + // This 'if' should be removed in the next release. For now, it converts invalid acronyms to HOST. + /// When removed, only the 'else' part should remain. + if (tokenType == ACRONYM_DEP) { + if (replaceInvalidAcronym) { + typeAtt->setType(TOKEN_TYPES()[HOST]); + termAtt->setTermLength(termAtt->termLength() - 1); // remove extra '.' + } else { + typeAtt->setType(TOKEN_TYPES()[ACRONYM]); } - else - typeAtt->setType(TOKEN_TYPES()[tokenType]); - return true; - } - else - { - // When we skip a too-long term, we still increment the position increment - ++posIncr; + } else { + typeAtt->setType(TOKEN_TYPES()[tokenType]); } + return true; + } else { + // When we skip a too-long term, we still increment the position increment + ++posIncr; } } - - void StandardTokenizer::end() - { - // set final offset - int32_t finalOffset = correctOffset(scanner->yychar() + scanner->yylength()); - offsetAtt->setOffset(finalOffset, finalOffset); - } - - void StandardTokenizer::reset(ReaderPtr input) - { - Tokenizer::reset(input); - scanner->reset(input); - } - - bool StandardTokenizer::isReplaceInvalidAcronym() - { - return replaceInvalidAcronym; - } - - void StandardTokenizer::setReplaceInvalidAcronym(bool replaceInvalidAcronym) - { - this->replaceInvalidAcronym = replaceInvalidAcronym; - } +} + +void StandardTokenizer::end() { + // set final offset + int32_t finalOffset = correctOffset(scanner->yychar() + scanner->yylength()); + offsetAtt->setOffset(finalOffset, finalOffset); +} + +void StandardTokenizer::reset(const ReaderPtr& input) { + Tokenizer::reset(input); + scanner->reset(input); +} + +bool StandardTokenizer::isReplaceInvalidAcronym() { + return replaceInvalidAcronym; +} + +void StandardTokenizer::setReplaceInvalidAcronym(bool replaceInvalidAcronym) { + this->replaceInvalidAcronym = replaceInvalidAcronym; +} + } diff --git a/src/core/analysis/standard/StandardTokenizerImpl.cpp b/src/core/analysis/standard/StandardTokenizerImpl.cpp index 95ed1976..3828dc76 100644 --- a/src/core/analysis/standard/StandardTokenizerImpl.cpp +++ b/src/core/analysis/standard/StandardTokenizerImpl.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,547 +12,536 @@ #include "TermAttribute.h" #include "MiscUtils.h" -namespace Lucene -{ - /// Initial size of the lookahead buffer - const int32_t StandardTokenizerImpl::ZZ_BUFFERSIZE = 16384; - - /// Translates characters to character classes - const wchar_t StandardTokenizerImpl::ZZ_CMAP_PACKED[] = - { - L"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5" - L"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12" - L"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12" - L"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12" - L"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12" - L"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12" - L"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12" - L"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12" - L"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12" - L"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12" - L"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0" - L"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0" - L"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0" - L"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12" - L"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12" - L"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12" - L"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12" - L"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12" - L"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12" - L"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12" - L"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12" - L"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12" - L"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12" - L"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12" - L"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12" - L"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12" - L"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12" - L"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1" - L"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0" - L"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0" - L"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0" - L"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0" - L"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0" - L"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0" - L"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0" - L"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0" - L"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0" - L"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0" - L"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0" - L"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0" - L"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0" - L"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0" - L"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0" - L"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0" - L"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0" - L"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0" - L"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0" - L"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0" - L"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0" - L"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13" - L"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0" - L"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12" - L"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12" - L"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12" - L"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12" - L"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2" - L"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12" - L"\2\0\6\12\2\0\6\12\2\0\3\12\43\0" - }; - - const int32_t StandardTokenizerImpl::ZZ_CMAP_LENGTH = 65536; - const int32_t StandardTokenizerImpl::ZZ_CMAP_PACKED_LENGTH = 1154; - - const wchar_t StandardTokenizerImpl::ZZ_ACTION_PACKED_0[] = - { - L"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4" - L"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4" - L"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12" - L"\1\4" - }; - - const int32_t StandardTokenizerImpl::ZZ_ACTION_LENGTH = 51; - const int32_t StandardTokenizerImpl::ZZ_ACTION_PACKED_LENGTH = 50; - - const wchar_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_0[] = - { - L"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124" - L"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304" - L"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134" - L"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4" - L"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206" - L"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214" - L"\0\u0268\0\u0276\0\u0284" - }; - - const int32_t StandardTokenizerImpl::ZZ_ROWMAP_LENGTH = 51; - const int32_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_LENGTH = 102; - - const wchar_t StandardTokenizerImpl::ZZ_TRANS_PACKED_0[] = - { - L"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2" - L"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13" - L"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11" - L"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20" - L"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0" - L"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27" - L"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0" - L"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37" - L"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44" - L"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0" - L"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4" - L"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0" - L"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24" - L"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54" - L"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0" - L"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56" - L"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52" - L"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31" - L"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0" - L"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0" - L"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33" - L"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13" - L"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11" - L"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57" - L"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0" - L"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37" - L"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40" - L"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12" - L"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13" - L"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16" - L"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13" - L"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25" - L"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0" - L"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0" - L"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0" - L"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0" - L"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0" - L"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0" - L"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0" - L"\1\11\2\52\1\0\1\24\3\0" - }; - - const int32_t StandardTokenizerImpl::ZZ_TRANS_LENGTH = 658; - const int32_t StandardTokenizerImpl::ZZ_TRANS_PACKED_LENGTH = 634; - - const int32_t StandardTokenizerImpl::ZZ_UNKNOWN_ERROR = 0; - const int32_t StandardTokenizerImpl::ZZ_NO_MATCH = 1; - const int32_t StandardTokenizerImpl::ZZ_PUSHBACK_2BIG = 2; - - const wchar_t* StandardTokenizerImpl::ZZ_ERROR_MSG[] = - { - L"Unknown internal scanner error", - L"Error: could not match input", - L"Error: pushback value was too large" - }; - - const wchar_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_0[] = - { - L"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0" - L"\1\1\1\0\17\1\1\0\1\1\3\0\5\1" - }; - - const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_LENGTH = 51; - const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_LENGTH = 30; - - /// This character denotes the end of file - const int32_t StandardTokenizerImpl::YYEOF = -1; - - /// Lexical states - const int32_t StandardTokenizerImpl::YYINITIAL = 0; - - StandardTokenizerImpl::StandardTokenizerImpl(ReaderPtr in) - { - this->zzState = 0; - this->zzLexicalState = YYINITIAL; - this->zzBuffer = CharArray::newInstance(ZZ_BUFFERSIZE); - this->zzMarkedPos = 0; - this->zzPushbackPos = 0; - this->zzCurrentPos = 0; - this->zzStartRead = 0; - this->zzEndRead = 0; - this->yyline = 0; - this->_yychar = 0; - this->yycolumn = 0; - this->zzAtBOL = true; - this->zzAtEOF = false; - this->zzReader = in; - } - - StandardTokenizerImpl::~StandardTokenizerImpl() - { - } - - const wchar_t* StandardTokenizerImpl::ZZ_CMAP() - { - static CharArray _ZZ_CMAP; - if (!_ZZ_CMAP) - { - _ZZ_CMAP = CharArray::newInstance(ZZ_CMAP_LENGTH); - wchar_t* result = _ZZ_CMAP.get(); - - int32_t i = 0; // index in packed string - int32_t j = 0; // index in unpacked array - while (i < ZZ_CMAP_PACKED_LENGTH) - { - int32_t count = ZZ_CMAP_PACKED[i++]; - wchar_t value = ZZ_CMAP_PACKED[i++]; - do - result[j++] = value; - while (--count > 0); - } - } - return _ZZ_CMAP.get(); - } - - const int32_t* StandardTokenizerImpl::ZZ_ACTION() - { - static IntArray _ZZ_ACTION; - if (!_ZZ_ACTION) - { - _ZZ_ACTION = IntArray::newInstance(ZZ_ACTION_LENGTH); - int32_t* result = _ZZ_ACTION.get(); - - int32_t i = 0; // index in packed string - int32_t j = 0; // index in unpacked array - while (i < ZZ_ACTION_PACKED_LENGTH) - { - int32_t count = ZZ_ACTION_PACKED_0[i++]; - int32_t value = ZZ_ACTION_PACKED_0[i++]; - do - result[j++] = value; - while (--count > 0); - } - } - return _ZZ_ACTION.get(); - } - - const int32_t* StandardTokenizerImpl::ZZ_ROWMAP() - { - static IntArray _ZZ_ROWMAP; - if (!_ZZ_ROWMAP) - { - _ZZ_ROWMAP = IntArray::newInstance(ZZ_ROWMAP_LENGTH); - int32_t* result = _ZZ_ROWMAP.get(); - - int32_t i = 0; // index in packed string - int32_t j = 0; // index in unpacked array - while (i < ZZ_ROWMAP_PACKED_LENGTH) - { - int32_t high = ZZ_ROWMAP_PACKED_0[i++] << 16; - result[j++] = high | ZZ_ROWMAP_PACKED_0[i++]; - } - } - return _ZZ_ROWMAP.get(); - } - - const int32_t* StandardTokenizerImpl::ZZ_TRANS() - { - static IntArray _ZZ_TRANS; - if (!_ZZ_TRANS) - { - _ZZ_TRANS = IntArray::newInstance(ZZ_TRANS_LENGTH); - int32_t* result = _ZZ_TRANS.get(); - - int32_t i = 0; // index in packed string - int32_t j = 0; // index in unpacked array - while (i < ZZ_TRANS_PACKED_LENGTH) - { - int32_t count = ZZ_TRANS_PACKED_0[i++]; - int32_t value = ZZ_TRANS_PACKED_0[i++]; - --value; - do - result[j++] = value; - while (--count > 0); - } - } - return _ZZ_TRANS.get(); - } - - const int32_t* StandardTokenizerImpl::ZZ_ATTRIBUTE() - { - static IntArray _ZZ_ATTRIBUTE; - if (!_ZZ_ATTRIBUTE) - { - _ZZ_ATTRIBUTE = IntArray::newInstance(ZZ_ATTRIBUTE_LENGTH); - int32_t* result = _ZZ_ATTRIBUTE.get(); - - int32_t i = 0; // index in packed string - int32_t j = 0; // index in unpacked array - while (i < ZZ_ATTRIBUTE_PACKED_LENGTH) - { - int32_t count = ZZ_ATTRIBUTE_PACKED_0[i++]; - int32_t value = ZZ_ATTRIBUTE_PACKED_0[i++]; - do - result[j++] = value; - while (--count > 0); - } - } - return _ZZ_ATTRIBUTE.get(); - } - - int32_t StandardTokenizerImpl::yychar() - { - return _yychar; +#include + +namespace Lucene { + +/// Initial size of the lookahead buffer +const int32_t StandardTokenizerImpl::ZZ_BUFFERSIZE = 16384; + +/// Translates characters to character classes +CharArray StandardTokenizerImpl::_ZZ_CMAP; +const wchar_t StandardTokenizerImpl::ZZ_CMAP_PACKED[] = { + L"\11\0\1\0\1\15\1\0\1\0\1\14\22\0\1\0\5\0\1\5" + L"\1\3\4\0\1\11\1\7\1\4\1\11\12\2\6\0\1\6\32\12" + L"\4\0\1\10\1\0\32\12\57\0\1\12\12\0\1\12\4\0\1\12" + L"\5\0\27\12\1\0\37\12\1\0\u0128\12\2\0\22\12\34\0\136\12" + L"\2\0\11\12\2\0\7\12\16\0\2\12\16\0\5\12\11\0\1\12" + L"\213\0\1\12\13\0\1\12\1\0\3\12\1\0\1\12\1\0\24\12" + L"\1\0\54\12\1\0\10\12\2\0\32\12\14\0\202\12\12\0\71\12" + L"\2\0\2\12\2\0\2\12\3\0\46\12\2\0\2\12\67\0\46\12" + L"\2\0\1\12\7\0\47\12\110\0\33\12\5\0\3\12\56\0\32\12" + L"\5\0\13\12\25\0\12\2\7\0\143\12\1\0\1\12\17\0\2\12" + L"\11\0\12\2\3\12\23\0\1\12\1\0\33\12\123\0\46\12\u015f\0" + L"\65\12\3\0\1\12\22\0\1\12\7\0\12\12\4\0\12\2\25\0" + L"\10\12\2\0\2\12\2\0\26\12\1\0\7\12\1\0\1\12\3\0" + L"\4\12\42\0\2\12\1\0\3\12\4\0\12\2\2\12\23\0\6\12" + L"\4\0\2\12\2\0\26\12\1\0\7\12\1\0\2\12\1\0\2\12" + L"\1\0\2\12\37\0\4\12\1\0\1\12\7\0\12\2\2\0\3\12" + L"\20\0\7\12\1\0\1\12\1\0\3\12\1\0\26\12\1\0\7\12" + L"\1\0\2\12\1\0\5\12\3\0\1\12\22\0\1\12\17\0\1\12" + L"\5\0\12\2\25\0\10\12\2\0\2\12\2\0\26\12\1\0\7\12" + L"\1\0\2\12\2\0\4\12\3\0\1\12\36\0\2\12\1\0\3\12" + L"\4\0\12\2\25\0\6\12\3\0\3\12\1\0\4\12\3\0\2\12" + L"\1\0\1\12\1\0\2\12\3\0\2\12\3\0\3\12\3\0\10\12" + L"\1\0\3\12\55\0\11\2\25\0\10\12\1\0\3\12\1\0\27\12" + L"\1\0\12\12\1\0\5\12\46\0\2\12\4\0\12\2\25\0\10\12" + L"\1\0\3\12\1\0\27\12\1\0\12\12\1\0\5\12\44\0\1\12" + L"\1\0\2\12\4\0\12\2\25\0\10\12\1\0\3\12\1\0\27\12" + L"\1\0\20\12\46\0\2\12\4\0\12\2\25\0\22\12\3\0\30\12" + L"\1\0\11\12\1\0\1\12\2\0\7\12\71\0\1\1\60\12\1\1" + L"\2\12\14\1\7\12\11\1\12\2\47\0\2\12\1\0\1\12\2\0" + L"\2\12\1\0\1\12\2\0\1\12\6\0\4\12\1\0\7\12\1\0" + L"\3\12\1\0\1\12\1\0\1\12\2\0\2\12\1\0\4\12\1\0" + L"\2\12\11\0\1\12\2\0\5\12\1\0\1\12\11\0\12\2\2\0" + L"\2\12\42\0\1\12\37\0\12\2\26\0\10\12\1\0\42\12\35\0" + L"\4\12\164\0\42\12\1\0\5\12\1\0\2\12\25\0\12\2\6\0" + L"\6\12\112\0\46\12\12\0\47\12\11\0\132\12\5\0\104\12\5\0" + L"\122\12\6\0\7\12\1\0\77\12\1\0\1\12\1\0\4\12\2\0" + L"\7\12\1\0\1\12\1\0\4\12\2\0\47\12\1\0\1\12\1\0" + L"\4\12\2\0\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0" + L"\1\12\1\0\4\12\2\0\7\12\1\0\7\12\1\0\27\12\1\0" + L"\37\12\1\0\1\12\1\0\4\12\2\0\7\12\1\0\47\12\1\0" + L"\23\12\16\0\11\2\56\0\125\12\14\0\u026c\12\2\0\10\12\12\0" + L"\32\12\5\0\113\12\225\0\64\12\54\0\12\2\46\0\12\2\6\0" + L"\130\12\10\0\51\12\u0557\0\234\12\4\0\132\12\6\0\26\12\2\0" + L"\6\12\2\0\46\12\2\0\6\12\2\0\10\12\1\0\1\12\1\0" + L"\1\12\1\0\1\12\1\0\37\12\2\0\65\12\1\0\7\12\1\0" + L"\1\12\3\0\3\12\1\0\7\12\3\0\4\12\2\0\6\12\4\0" + L"\15\12\5\0\3\12\1\0\7\12\202\0\1\12\202\0\1\12\4\0" + L"\1\12\2\0\12\12\1\0\1\12\3\0\5\12\6\0\1\12\1\0" + L"\1\12\1\0\1\12\1\0\4\12\1\0\3\12\1\0\7\12\u0ecb\0" + L"\2\12\52\0\5\12\12\0\1\13\124\13\10\13\2\13\2\13\132\13" + L"\1\13\3\13\6\13\50\13\3\13\1\0\136\12\21\0\30\12\70\0" + L"\20\13\u0100\0\200\13\200\0\u19b6\13\12\13\100\0\u51a6\13\132\13\u048d\12" + L"\u0773\0\u2ba4\12\u215c\0\u012e\13\322\13\7\12\14\0\5\12\5\0\1\12" + L"\1\0\12\12\1\0\15\12\1\0\5\12\1\0\1\12\1\0\2\12" + L"\1\0\2\12\1\0\154\12\41\0\u016b\12\22\0\100\12\2\0\66\12" + L"\50\0\14\12\164\0\3\12\1\0\1\12\1\0\207\12\23\0\12\2" + L"\7\0\32\12\6\0\32\12\12\0\1\13\72\13\37\12\3\0\6\12" + L"\2\0\6\12\2\0\6\12\2\0\3\12\43\0" +}; + +const int32_t StandardTokenizerImpl::ZZ_CMAP_LENGTH = 65536; +const int32_t StandardTokenizerImpl::ZZ_CMAP_PACKED_LENGTH = 1154; + +IntArray StandardTokenizerImpl::_ZZ_ACTION; +const wchar_t StandardTokenizerImpl::ZZ_ACTION_PACKED_0[] = { + L"\1\0\1\1\3\2\1\3\1\1\13\0\1\2\3\4" + L"\2\0\1\5\1\0\1\5\3\4\6\5\1\6\1\4" + L"\2\7\1\10\1\0\1\10\3\0\2\10\1\11\1\12" + L"\1\4" +}; + +const int32_t StandardTokenizerImpl::ZZ_ACTION_LENGTH = 51; +const int32_t StandardTokenizerImpl::ZZ_ACTION_PACKED_LENGTH = 50; + +IntArray StandardTokenizerImpl::_ZZ_ROWMAP; +const wchar_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_0[] = { + L"\0\0\0\16\0\34\0\52\0\70\0\16\0\106\0\124" + L"\0\142\0\160\0\176\0\214\0\232\0\250\0\266\0\304" + L"\0\322\0\340\0\356\0\374\0\u010a\0\u0118\0\u0126\0\u0134" + L"\0\u0142\0\u0150\0\u015e\0\u016c\0\u017a\0\u0188\0\u0196\0\u01a4" + L"\0\u01b2\0\u01c0\0\u01ce\0\u01dc\0\u01ea\0\u01f8\0\322\0\u0206" + L"\0\u0214\0\u0222\0\u0230\0\u023e\0\u024c\0\u025a\0\124\0\214" + L"\0\u0268\0\u0276\0\u0284" +}; + +const int32_t StandardTokenizerImpl::ZZ_ROWMAP_LENGTH = 51; +const int32_t StandardTokenizerImpl::ZZ_ROWMAP_PACKED_LENGTH = 102; + +IntArray StandardTokenizerImpl::_ZZ_TRANS; +const wchar_t StandardTokenizerImpl::ZZ_TRANS_PACKED_0[] = { + L"\1\2\1\3\1\4\7\2\1\5\1\6\1\7\1\2" + L"\17\0\2\3\1\0\1\10\1\0\1\11\2\12\1\13" + L"\1\3\4\0\1\3\1\4\1\0\1\14\1\0\1\11" + L"\2\15\1\16\1\4\4\0\1\3\1\4\1\17\1\20" + L"\1\21\1\22\2\12\1\13\1\23\20\0\1\2\1\0" + L"\1\24\1\25\7\0\1\26\4\0\2\27\7\0\1\27" + L"\4\0\1\30\1\31\7\0\1\32\5\0\1\33\7\0" + L"\1\13\4\0\1\34\1\35\7\0\1\36\4\0\1\37" + L"\1\40\7\0\1\41\4\0\1\42\1\43\7\0\1\44" + L"\15\0\1\45\4\0\1\24\1\25\7\0\1\46\15\0" + L"\1\47\4\0\2\27\7\0\1\50\4\0\1\3\1\4" + L"\1\17\1\10\1\21\1\22\2\12\1\13\1\23\4\0" + L"\2\24\1\0\1\51\1\0\1\11\2\52\1\0\1\24" + L"\4\0\1\24\1\25\1\0\1\53\1\0\1\11\2\54" + L"\1\55\1\25\4\0\1\24\1\25\1\0\1\51\1\0" + L"\1\11\2\52\1\0\1\26\4\0\2\27\1\0\1\56" + L"\2\0\1\56\2\0\1\27\4\0\2\30\1\0\1\52" + L"\1\0\1\11\2\52\1\0\1\30\4\0\1\30\1\31" + L"\1\0\1\54\1\0\1\11\2\54\1\55\1\31\4\0" + L"\1\30\1\31\1\0\1\52\1\0\1\11\2\52\1\0" + L"\1\32\5\0\1\33\1\0\1\55\2\0\3\55\1\33" + L"\4\0\2\34\1\0\1\57\1\0\1\11\2\12\1\13" + L"\1\34\4\0\1\34\1\35\1\0\1\60\1\0\1\11" + L"\2\15\1\16\1\35\4\0\1\34\1\35\1\0\1\57" + L"\1\0\1\11\2\12\1\13\1\36\4\0\2\37\1\0" + L"\1\12\1\0\1\11\2\12\1\13\1\37\4\0\1\37" + L"\1\40\1\0\1\15\1\0\1\11\2\15\1\16\1\40" + L"\4\0\1\37\1\40\1\0\1\12\1\0\1\11\2\12" + L"\1\13\1\41\4\0\2\42\1\0\1\13\2\0\3\13" + L"\1\42\4\0\1\42\1\43\1\0\1\16\2\0\3\16" + L"\1\43\4\0\1\42\1\43\1\0\1\13\2\0\3\13" + L"\1\44\6\0\1\17\6\0\1\45\4\0\1\24\1\25" + L"\1\0\1\61\1\0\1\11\2\52\1\0\1\26\4\0" + L"\2\27\1\0\1\56\2\0\1\56\2\0\1\50\4\0" + L"\2\24\7\0\1\24\4\0\2\30\7\0\1\30\4\0" + L"\2\34\7\0\1\34\4\0\2\37\7\0\1\37\4\0" + L"\2\42\7\0\1\42\4\0\2\62\7\0\1\62\4\0" + L"\2\24\7\0\1\63\4\0\2\62\1\0\1\56\2\0" + L"\1\56\2\0\1\62\4\0\2\24\1\0\1\61\1\0" + L"\1\11\2\52\1\0\1\24\3\0" +}; + +const int32_t StandardTokenizerImpl::ZZ_TRANS_LENGTH = 658; +const int32_t StandardTokenizerImpl::ZZ_TRANS_PACKED_LENGTH = 634; + +const int32_t StandardTokenizerImpl::ZZ_UNKNOWN_ERROR = 0; +const int32_t StandardTokenizerImpl::ZZ_NO_MATCH = 1; +const int32_t StandardTokenizerImpl::ZZ_PUSHBACK_2BIG = 2; + +const wchar_t* StandardTokenizerImpl::ZZ_ERROR_MSG[] = { + L"Unknown internal scanner error", + L"Error: could not match input", + L"Error: pushback value was too large" +}; + +IntArray StandardTokenizerImpl::_ZZ_ATTRIBUTE; +const wchar_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_0[] = { + L"\1\0\1\11\3\1\1\11\1\1\13\0\4\1\2\0" + L"\1\1\1\0\17\1\1\0\1\1\3\0\5\1" +}; + +const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_LENGTH = 51; +const int32_t StandardTokenizerImpl::ZZ_ATTRIBUTE_PACKED_LENGTH = 30; + +/// This character denotes the end of file +const int32_t StandardTokenizerImpl::YYEOF = -1; + +/// Lexical states +const int32_t StandardTokenizerImpl::YYINITIAL = 0; + +StandardTokenizerImpl::StandardTokenizerImpl(const ReaderPtr& in) { + this->zzState = 0; + this->zzLexicalState = YYINITIAL; + this->zzBuffer = CharArray::newInstance(ZZ_BUFFERSIZE); + this->zzMarkedPos = 0; + this->zzPushbackPos = 0; + this->zzCurrentPos = 0; + this->zzStartRead = 0; + this->zzEndRead = 0; + this->yyline = 0; + this->_yychar = 0; + this->yycolumn = 0; + this->zzAtBOL = true; + this->zzAtEOF = false; + this->zzReader = in; +} + +StandardTokenizerImpl::~StandardTokenizerImpl() { +} + +void StandardTokenizerImpl::ZZ_CMAP_INIT() { + _ZZ_CMAP = CharArray::newInstance(ZZ_CMAP_LENGTH); + wchar_t* result = _ZZ_CMAP.get(); + + int32_t i = 0; // index in packed string + int32_t j = 0; // index in unpacked array + while (i < ZZ_CMAP_PACKED_LENGTH) { + int32_t count = ZZ_CMAP_PACKED[i++]; + wchar_t value = ZZ_CMAP_PACKED[i++]; + do { + result[j++] = value; + } while (--count > 0); } - - void StandardTokenizerImpl::reset(ReaderPtr r) - { - // reset to default buffer size, if buffer has grown - if (zzBuffer.size() > ZZ_BUFFERSIZE) - zzBuffer.resize(ZZ_BUFFERSIZE); - yyreset(r); +} + +const wchar_t* StandardTokenizerImpl::ZZ_CMAP() { + static boost::once_flag once = BOOST_ONCE_INIT; + boost::call_once(once, ZZ_CMAP_INIT); + return _ZZ_CMAP.get(); +} + +void StandardTokenizerImpl::ZZ_ACTION_INIT() { + _ZZ_ACTION = IntArray::newInstance(ZZ_ACTION_LENGTH); + int32_t* result = _ZZ_ACTION.get(); + + int32_t i = 0; // index in packed string + int32_t j = 0; // index in unpacked array + while (i < ZZ_ACTION_PACKED_LENGTH) { + int32_t count = ZZ_ACTION_PACKED_0[i++]; + int32_t value = ZZ_ACTION_PACKED_0[i++]; + do { + result[j++] = value; + } while (--count > 0); } - - void StandardTokenizerImpl::getText(TokenPtr t) - { - t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); +} + +const int32_t* StandardTokenizerImpl::ZZ_ACTION() { + static boost::once_flag once = BOOST_ONCE_INIT; + boost::call_once(once, ZZ_ACTION_INIT); + return _ZZ_ACTION.get(); +} + +void StandardTokenizerImpl::ZZ_ROWMAP_INIT() { + _ZZ_ROWMAP = IntArray::newInstance(ZZ_ROWMAP_LENGTH); + int32_t* result = _ZZ_ROWMAP.get(); + + int32_t i = 0; // index in packed string + int32_t j = 0; // index in unpacked array + while (i < ZZ_ROWMAP_PACKED_LENGTH) { + int32_t high = ZZ_ROWMAP_PACKED_0[i++] << 16; + result[j++] = high | ZZ_ROWMAP_PACKED_0[i++]; } - - void StandardTokenizerImpl::getText(TermAttributePtr t) - { - t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); +} + +const int32_t* StandardTokenizerImpl::ZZ_ROWMAP() { + static boost::once_flag once = BOOST_ONCE_INIT; + boost::call_once(once, ZZ_ROWMAP_INIT); + return _ZZ_ROWMAP.get(); +} + +void StandardTokenizerImpl::ZZ_TRANS_INIT() { + _ZZ_TRANS = IntArray::newInstance(ZZ_TRANS_LENGTH); + int32_t* result = _ZZ_TRANS.get(); + + int32_t i = 0; // index in packed string + int32_t j = 0; // index in unpacked array + while (i < ZZ_TRANS_PACKED_LENGTH) { + int32_t count = ZZ_TRANS_PACKED_0[i++]; + int32_t value = ZZ_TRANS_PACKED_0[i++]; + --value; + do { + result[j++] = value; + } while (--count > 0); } - - bool StandardTokenizerImpl::zzRefill() - { - // first: make room (if you can) - if (zzStartRead > 0) - { - MiscUtils::arrayCopy(zzBuffer.get(), zzStartRead, zzBuffer.get(), 0, zzEndRead - zzStartRead); - - // translate stored positions - zzEndRead -= zzStartRead; - zzCurrentPos -= zzStartRead; - zzMarkedPos -= zzStartRead; - zzPushbackPos -= zzStartRead; - zzStartRead = 0; - } - - // is the buffer big enough? - if (zzCurrentPos >= zzBuffer.size()) - zzBuffer.resize(zzCurrentPos * 2); - - // finally: fill the buffer with new input - int32_t numRead = zzReader->read(zzBuffer.get(), zzEndRead, zzBuffer.size() - zzEndRead); - - if (numRead < 0) - return true; - else - { - zzEndRead += numRead; - return false; - } +} + +const int32_t* StandardTokenizerImpl::ZZ_TRANS() { + static boost::once_flag once = BOOST_ONCE_INIT; + boost::call_once(once, ZZ_TRANS_INIT); + return _ZZ_TRANS.get(); +} + +void StandardTokenizerImpl::ZZ_ATTRIBUTE_INIT() { + _ZZ_ATTRIBUTE = IntArray::newInstance(ZZ_ATTRIBUTE_LENGTH); + int32_t* result = _ZZ_ATTRIBUTE.get(); + + int32_t i = 0; // index in packed string + int32_t j = 0; // index in unpacked array + while (i < ZZ_ATTRIBUTE_PACKED_LENGTH) { + int32_t count = ZZ_ATTRIBUTE_PACKED_0[i++]; + int32_t value = ZZ_ATTRIBUTE_PACKED_0[i++]; + do { + result[j++] = value; + } while (--count > 0); } - - void StandardTokenizerImpl::yyclose() - { - zzAtEOF = true; // indicate end of file - zzEndRead = zzStartRead; // invalidate buffer - - if (zzReader) - zzReader->close(); +} + +const int32_t* StandardTokenizerImpl::ZZ_ATTRIBUTE() { + static boost::once_flag once = BOOST_ONCE_INIT; + boost::call_once(once, ZZ_ATTRIBUTE_INIT); + return _ZZ_ATTRIBUTE.get(); +} + +int32_t StandardTokenizerImpl::yychar() { + return _yychar; +} + +void StandardTokenizerImpl::reset(const ReaderPtr& r) { + // reset to default buffer size, if buffer has grown + if (zzBuffer.size() > ZZ_BUFFERSIZE) { + zzBuffer.resize(ZZ_BUFFERSIZE); } - - void StandardTokenizerImpl::yyreset(ReaderPtr reader) - { - zzReader = reader; - zzAtBOL = true; - zzAtEOF = false; - zzEndRead = 0; + yyreset(r); +} + +void StandardTokenizerImpl::getText(const TokenPtr& t) { + t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); +} + +void StandardTokenizerImpl::getText(const TermAttributePtr& t) { + t->setTermBuffer(zzBuffer.get(), zzStartRead, zzMarkedPos - zzStartRead); +} + +bool StandardTokenizerImpl::zzRefill() { + // first: make room (if you can) + if (zzStartRead > 0) { + MiscUtils::arrayCopy(zzBuffer.get(), zzStartRead, zzBuffer.get(), 0, zzEndRead - zzStartRead); + + // translate stored positions + zzEndRead -= zzStartRead; + zzCurrentPos -= zzStartRead; + zzMarkedPos -= zzStartRead; + zzPushbackPos -= zzStartRead; zzStartRead = 0; - zzCurrentPos = 0; - zzMarkedPos = 0; - zzPushbackPos = 0; - yyline = 0; - _yychar = 0; - yycolumn = 0; - zzLexicalState = YYINITIAL; - } - - int32_t StandardTokenizerImpl::yystate() - { - return zzLexicalState; - } - - void StandardTokenizerImpl::yybegin(int32_t newState) - { - zzLexicalState = newState; } - - String StandardTokenizerImpl::yytext() - { - return String(zzBuffer.get() + zzStartRead, zzMarkedPos - zzStartRead); - } - - wchar_t StandardTokenizerImpl::yycharat(int32_t pos) - { - return zzBuffer[zzStartRead + pos]; + + // is the buffer big enough? + if (zzCurrentPos >= zzBuffer.size()) { + zzBuffer.resize(zzCurrentPos * 2); } - - int32_t StandardTokenizerImpl::yylength() - { - return zzMarkedPos - zzStartRead; + + // finally: fill the buffer with new input + int32_t numRead = zzReader->read(zzBuffer.get(), zzEndRead, zzBuffer.size() - zzEndRead); + + if (numRead < 0) { + return true; + } else { + zzEndRead += numRead; + return false; } - - void StandardTokenizerImpl::zzScanError(int32_t errorCode) - { - boost::throw_exception(ParseException(ZZ_ERROR_MSG[errorCode])); +} + +void StandardTokenizerImpl::yyclose() { + zzAtEOF = true; // indicate end of file + zzEndRead = zzStartRead; // invalidate buffer + + if (zzReader) { + zzReader->close(); } - - void StandardTokenizerImpl::yypushback(int32_t number) - { - if (number > yylength()) - zzScanError(ZZ_PUSHBACK_2BIG); - zzMarkedPos -= number; +} + +void StandardTokenizerImpl::yyreset(const ReaderPtr& reader) { + zzReader = reader; + zzAtBOL = true; + zzAtEOF = false; + zzEndRead = 0; + zzStartRead = 0; + zzCurrentPos = 0; + zzMarkedPos = 0; + zzPushbackPos = 0; + yyline = 0; + _yychar = 0; + yycolumn = 0; + zzLexicalState = YYINITIAL; +} + +int32_t StandardTokenizerImpl::yystate() { + return zzLexicalState; +} + +void StandardTokenizerImpl::yybegin(int32_t newState) { + zzLexicalState = newState; +} + +String StandardTokenizerImpl::yytext() { + return String(zzBuffer.get() + zzStartRead, zzMarkedPos - zzStartRead); +} + +wchar_t StandardTokenizerImpl::yycharat(int32_t pos) { + return zzBuffer[zzStartRead + pos]; +} + +int32_t StandardTokenizerImpl::yylength() { + return zzMarkedPos - zzStartRead; +} + +void StandardTokenizerImpl::zzScanError(int32_t errorCode) { + boost::throw_exception(ParseException(ZZ_ERROR_MSG[errorCode])); +} + +void StandardTokenizerImpl::yypushback(int32_t number) { + if (number > yylength()) { + zzScanError(ZZ_PUSHBACK_2BIG); } - - int32_t StandardTokenizerImpl::getNextToken() - { - int32_t zzInput; - int32_t zzAction; - - // cached fields - int32_t zzCurrentPosL; - int32_t zzMarkedPosL; - int32_t zzEndReadL = zzEndRead; - wchar_t* zzBufferL = zzBuffer.get(); - const wchar_t* zzCMapL = ZZ_CMAP(); - - const int32_t* zzTransL = ZZ_TRANS(); - const int32_t* zzRowMapL = ZZ_ROWMAP(); - const int32_t* zzAttrL = ZZ_ATTRIBUTE(); - const int32_t* zzActionL = ZZ_ACTION(); - - while (true) - { - zzMarkedPosL = zzMarkedPos; - _yychar += zzMarkedPosL - zzStartRead; - zzAction = -1; - zzCurrentPosL = zzMarkedPosL; - zzCurrentPos = zzMarkedPosL; - zzStartRead = zzMarkedPosL; - zzState = zzLexicalState; - - while (true) - { - if (zzCurrentPosL < zzEndReadL) - zzInput = zzBufferL[zzCurrentPosL++]; - else if (zzAtEOF) - { + zzMarkedPos -= number; +} + +int32_t StandardTokenizerImpl::getNextToken() { + int32_t zzInput; + int32_t zzAction; + + // cached fields + int32_t zzCurrentPosL; + int32_t zzMarkedPosL; + int32_t zzEndReadL = zzEndRead; + wchar_t* zzBufferL = zzBuffer.get(); + const wchar_t* zzCMapL = ZZ_CMAP(); + + // This code was originally written in Java, which uses UTF-16, and it can't + // correctly deal with 32bit wchar_t and characters outside of the Basic + // Multilingual Plane. As a workaround to prevent crashes, treat all + // characters above U+FFFF as letters in the tokenizer. + // See https://github.com/luceneplusplus/LucenePlusPlus/issues/57 + const wchar_t zzCMapFallback = zzCMapL['A']; +#ifdef LPP_UNICODE_CHAR_SIZE_4 + #define zzCMap_at(n) ((n) > 0xFFFF ? zzCMapFallback : zzCMapL[n]) +#else + // If the 16-bit value is in [0xD800, 0xDFFF], it is part of a multi-byte + // UTF-16 character and its UTF code point is > U+FFFF, so handle as above. + #define zzCMap_at(n) (((n) & 0xF800) == 0xD800 ? zzCMapFallback : zzCMapL[n]) +#endif + + const int32_t* zzTransL = ZZ_TRANS(); + const int32_t* zzRowMapL = ZZ_ROWMAP(); + const int32_t* zzAttrL = ZZ_ATTRIBUTE(); + const int32_t* zzActionL = ZZ_ACTION(); + + while (true) { + zzMarkedPosL = zzMarkedPos; + _yychar += zzMarkedPosL - zzStartRead; + zzAction = -1; + zzCurrentPosL = zzMarkedPosL; + zzCurrentPos = zzMarkedPosL; + zzStartRead = zzMarkedPosL; + zzState = zzLexicalState; + + while (true) { + if (zzCurrentPosL < zzEndReadL) { + zzInput = zzBufferL[zzCurrentPosL++]; + } else if (zzAtEOF) { + zzInput = YYEOF; + break; + } else { + // store back cached positions + zzCurrentPos = zzCurrentPosL; + zzMarkedPos = zzMarkedPosL; + bool eof = zzRefill(); + // get translated positions and possibly new buffer + zzCurrentPosL = zzCurrentPos; + zzMarkedPosL = zzMarkedPos; + zzBufferL = zzBuffer.get(); + zzEndReadL = zzEndRead; + if (eof) { zzInput = YYEOF; break; + } else { + zzInput = zzBufferL[zzCurrentPosL++]; } - else - { - // store back cached positions - zzCurrentPos = zzCurrentPosL; - zzMarkedPos = zzMarkedPosL; - bool eof = zzRefill(); - // get translated positions and possibly new buffer - zzCurrentPosL = zzCurrentPos; - zzMarkedPosL = zzMarkedPos; - zzBufferL = zzBuffer.get(); - zzEndReadL = zzEndRead; - if (eof) - { - zzInput = YYEOF; - break; - } - else - zzInput = zzBufferL[zzCurrentPosL++]; - } - - int32_t zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; - if (zzNext == -1) + } + + int32_t zzNext = zzTransL[zzRowMapL[zzState] + zzCMap_at(zzInput)]; + if (zzNext == -1) { + break; + } + zzState = zzNext; + + int32_t zzAttributes = zzAttrL[zzState]; + if ((zzAttributes & 1) == 1) { + zzAction = zzState; + zzMarkedPosL = zzCurrentPosL; + if ((zzAttributes & 8) == 8) { break; - zzState = zzNext; - - int32_t zzAttributes = zzAttrL[zzState]; - if ((zzAttributes & 1) == 1) - { - zzAction = zzState; - zzMarkedPosL = zzCurrentPosL; - if ((zzAttributes & 8) == 8) - break; } } + } - // store back cached position - zzMarkedPos = zzMarkedPosL; - - switch (zzAction < 0 ? zzAction : zzActionL[zzAction]) - { - case 4: - return StandardTokenizer::HOST; - case 11: - break; - case 9: - return StandardTokenizer::ACRONYM; - case 12: - break; - case 8: - return StandardTokenizer::ACRONYM_DEP; - case 13: - break; - case 1: // ignore - case 14: - break; - case 5: - return StandardTokenizer::NUM; - case 15: - break; - case 3: - return StandardTokenizer::CJ; - case 16: - break; - case 2: - return StandardTokenizer::ALPHANUM; - case 17: - break; - case 7: - return StandardTokenizer::COMPANY; - case 18: - break; - case 6: - return StandardTokenizer::APOSTROPHE; - case 19: - break; - case 10: - return StandardTokenizer::EMAIL; - case 20: - break; - default: - if (zzInput == YYEOF && zzStartRead == zzCurrentPos) - { - zzAtEOF = true; - return YYEOF; - } - else - zzScanError(ZZ_NO_MATCH); + // store back cached position + zzMarkedPos = zzMarkedPosL; + + switch (zzAction < 0 ? zzAction : zzActionL[zzAction]) { + case 4: + return StandardTokenizer::HOST; + case 11: + break; + case 9: + return StandardTokenizer::ACRONYM; + case 12: + break; + case 8: + return StandardTokenizer::ACRONYM_DEP; + case 13: + break; + case 1: // ignore + case 14: + break; + case 5: + return StandardTokenizer::NUM; + case 15: + break; + case 3: + return StandardTokenizer::CJ; + case 16: + break; + case 2: + return StandardTokenizer::ALPHANUM; + case 17: + break; + case 7: + return StandardTokenizer::COMPANY; + case 18: + break; + case 6: + return StandardTokenizer::APOSTROPHE; + case 19: + break; + case 10: + return StandardTokenizer::EMAIL; + case 20: + break; + default: + if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { + zzAtEOF = true; + return YYEOF; + } else { + zzScanError(ZZ_NO_MATCH); } } - - return YYINITIAL; } + + return YYINITIAL; +} + } diff --git a/src/core/analysis/tokenattributes/FlagsAttribute.cpp b/src/core/analysis/tokenattributes/FlagsAttribute.cpp index b8e0857f..67a260cd 100644 --- a/src/core/analysis/tokenattributes/FlagsAttribute.cpp +++ b/src/core/analysis/tokenattributes/FlagsAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,64 +8,57 @@ #include "FlagsAttribute.h" #include "StringUtils.h" -namespace Lucene -{ - FlagsAttribute::FlagsAttribute() - { - flags = 0; - } - - FlagsAttribute::~FlagsAttribute() - { - } - - String FlagsAttribute::toString() - { - return L"flags=" + StringUtils::toString(flags); - } - - int32_t FlagsAttribute::getFlags() - { - return flags; - } - - void FlagsAttribute::setFlags(int32_t flags) - { - this->flags = flags; - } - - void FlagsAttribute::clear() - { - flags = 0; - } - - bool FlagsAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - FlagsAttributePtr otherFlagsAttribute(boost::dynamic_pointer_cast(other)); - if (otherFlagsAttribute) - return (otherFlagsAttribute->flags == flags); - - return false; - } - - int32_t FlagsAttribute::hashCode() - { - return flags; - } - - void FlagsAttribute::copyTo(AttributePtr target) - { - boost::dynamic_pointer_cast(target)->setFlags(flags); +namespace Lucene { + +FlagsAttribute::FlagsAttribute() { + flags = 0; +} + +FlagsAttribute::~FlagsAttribute() { +} + +String FlagsAttribute::toString() { + return L"flags=" + StringUtils::toString(flags); +} + +int32_t FlagsAttribute::getFlags() { + return flags; +} + +void FlagsAttribute::setFlags(int32_t flags) { + this->flags = flags; +} + +void FlagsAttribute::clear() { + flags = 0; +} + +bool FlagsAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - LuceneObjectPtr FlagsAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - FlagsAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); - cloneAttribute->flags = flags; - return cloneAttribute; + + FlagsAttributePtr otherFlagsAttribute(boost::dynamic_pointer_cast(other)); + if (otherFlagsAttribute) { + return (otherFlagsAttribute->flags == flags); } + + return false; +} + +int32_t FlagsAttribute::hashCode() { + return flags; +} + +void FlagsAttribute::copyTo(const AttributePtr& target) { + boost::dynamic_pointer_cast(target)->setFlags(flags); +} + +LuceneObjectPtr FlagsAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + FlagsAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); + cloneAttribute->flags = flags; + return cloneAttribute; +} + } diff --git a/src/core/analysis/tokenattributes/OffsetAttribute.cpp b/src/core/analysis/tokenattributes/OffsetAttribute.cpp index 32a7ca45..4f20c61c 100644 --- a/src/core/analysis/tokenattributes/OffsetAttribute.cpp +++ b/src/core/analysis/tokenattributes/OffsetAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,76 +8,68 @@ #include "OffsetAttribute.h" #include "StringUtils.h" -namespace Lucene -{ - OffsetAttribute::OffsetAttribute() - { - _startOffset = 0; - _endOffset = 0; - } - - OffsetAttribute::~OffsetAttribute() - { - } - - String OffsetAttribute::toString() - { - return L"startOffset=" + StringUtils::toString(_startOffset) + L";endOffset=" + StringUtils::toString(_endOffset); - } - - int32_t OffsetAttribute::startOffset() - { - return _startOffset; - } - - void OffsetAttribute::setOffset(int32_t startOffset, int32_t endOffset) - { - this->_startOffset = startOffset; - this->_endOffset = endOffset; - } - - int32_t OffsetAttribute::endOffset() - { - return _endOffset; - } - - void OffsetAttribute::clear() - { - _startOffset = 0; - _endOffset = 0; - } - - bool OffsetAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - OffsetAttributePtr otherOffsetAttribute(boost::dynamic_pointer_cast(other)); - if (otherOffsetAttribute) - return (otherOffsetAttribute->_startOffset == _startOffset && otherOffsetAttribute->_endOffset == _endOffset); - - return false; - } - - int32_t OffsetAttribute::hashCode() - { - int32_t code = _startOffset; - code = code * 31 + _endOffset; - return code; - } - - void OffsetAttribute::copyTo(AttributePtr target) - { - OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); - targetOffsetAttribute->setOffset(_startOffset, _endOffset); +namespace Lucene { + +OffsetAttribute::OffsetAttribute() { + _startOffset = 0; + _endOffset = 0; +} + +OffsetAttribute::~OffsetAttribute() { +} + +String OffsetAttribute::toString() { + return L"startOffset=" + StringUtils::toString(_startOffset) + L";endOffset=" + StringUtils::toString(_endOffset); +} + +int32_t OffsetAttribute::startOffset() { + return _startOffset; +} + +void OffsetAttribute::setOffset(int32_t startOffset, int32_t endOffset) { + this->_startOffset = startOffset; + this->_endOffset = endOffset; +} + +int32_t OffsetAttribute::endOffset() { + return _endOffset; +} + +void OffsetAttribute::clear() { + _startOffset = 0; + _endOffset = 0; +} + +bool OffsetAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - LuceneObjectPtr OffsetAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - OffsetAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); - cloneAttribute->_startOffset = _startOffset; - cloneAttribute->_endOffset = _endOffset; - return cloneAttribute; + + OffsetAttributePtr otherOffsetAttribute(boost::dynamic_pointer_cast(other)); + if (otherOffsetAttribute) { + return (otherOffsetAttribute->_startOffset == _startOffset && otherOffsetAttribute->_endOffset == _endOffset); } + + return false; +} + +int32_t OffsetAttribute::hashCode() { + int32_t code = _startOffset; + code = code * 31 + _endOffset; + return code; +} + +void OffsetAttribute::copyTo(const AttributePtr& target) { + OffsetAttributePtr targetOffsetAttribute(boost::dynamic_pointer_cast(target)); + targetOffsetAttribute->setOffset(_startOffset, _endOffset); +} + +LuceneObjectPtr OffsetAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + OffsetAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); + cloneAttribute->_startOffset = _startOffset; + cloneAttribute->_endOffset = _endOffset; + return cloneAttribute; +} + } diff --git a/src/core/analysis/tokenattributes/PayloadAttribute.cpp b/src/core/analysis/tokenattributes/PayloadAttribute.cpp index 14c87828..217056da 100644 --- a/src/core/analysis/tokenattributes/PayloadAttribute.cpp +++ b/src/core/analysis/tokenattributes/PayloadAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,74 +9,66 @@ #include "Payload.h" #include "StringUtils.h" -namespace Lucene -{ - PayloadAttribute::PayloadAttribute() - { - } - - PayloadAttribute::PayloadAttribute(PayloadPtr payload) - { - this->payload = payload; - } - - PayloadAttribute::~PayloadAttribute() - { - } - - String PayloadAttribute::toString() - { - return L"payload(length)=" + StringUtils::toString(payload->length()); - } - - PayloadPtr PayloadAttribute::getPayload() - { - return this->payload; - } - - void PayloadAttribute::setPayload(PayloadPtr payload) - { - this->payload = payload; - } - - void PayloadAttribute::clear() - { - payload.reset(); +namespace Lucene { + +PayloadAttribute::PayloadAttribute() { +} + +PayloadAttribute::PayloadAttribute(const PayloadPtr& payload) { + this->payload = payload; +} + +PayloadAttribute::~PayloadAttribute() { +} + +String PayloadAttribute::toString() { + return L"payload(length)=" + StringUtils::toString(payload->length()); +} + +PayloadPtr PayloadAttribute::getPayload() { + return this->payload; +} + +void PayloadAttribute::setPayload(const PayloadPtr& payload) { + this->payload = payload; +} + +void PayloadAttribute::clear() { + payload.reset(); +} + +LuceneObjectPtr PayloadAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); + PayloadAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); + if (payload) { + cloneAttribute->payload = boost::dynamic_pointer_cast(payload->clone()); } - - LuceneObjectPtr PayloadAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); - PayloadAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); - if (payload) - cloneAttribute->payload = boost::dynamic_pointer_cast(payload->clone()); - return cloneAttribute; + return cloneAttribute; +} + +bool PayloadAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - bool PayloadAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) + + PayloadAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); + if (otherAttribute) { + if (!otherAttribute->payload && !payload) { return true; - - PayloadAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); - if (otherAttribute) - { - if (!otherAttribute->payload && !payload) - return true; - return otherAttribute->payload->equals(payload); } - - return false; - } - - int32_t PayloadAttribute::hashCode() - { - return payload ? payload->hashCode() : 0; - } - - void PayloadAttribute::copyTo(AttributePtr target) - { - PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); - targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); + return otherAttribute->payload->equals(payload); } + + return false; +} + +int32_t PayloadAttribute::hashCode() { + return payload ? payload->hashCode() : 0; +} + +void PayloadAttribute::copyTo(const AttributePtr& target) { + PayloadAttributePtr targetPayloadAttribute(boost::dynamic_pointer_cast(target)); + targetPayloadAttribute->setPayload(payload ? boost::dynamic_pointer_cast(payload->clone()) : PayloadPtr()); +} + } diff --git a/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp b/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp index f8fde549..a6610aa3 100644 --- a/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp +++ b/src/core/analysis/tokenattributes/PositionIncrementAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,67 +8,61 @@ #include "PositionIncrementAttribute.h" #include "StringUtils.h" -namespace Lucene -{ - PositionIncrementAttribute::PositionIncrementAttribute() - { - positionIncrement = 1; - } - - PositionIncrementAttribute::~PositionIncrementAttribute() - { - } - - String PositionIncrementAttribute::toString() - { - return L"positionIncrement=" + StringUtils::toString(positionIncrement); - } - - void PositionIncrementAttribute::setPositionIncrement(int32_t positionIncrement) - { - if (positionIncrement < 0) - boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); - this->positionIncrement = positionIncrement; - } - - int32_t PositionIncrementAttribute::getPositionIncrement() - { - return positionIncrement; - } - - void PositionIncrementAttribute::clear() - { - this->positionIncrement = 1; - } - - bool PositionIncrementAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - PositionIncrementAttributePtr otherPositionIncrementAttribute(boost::dynamic_pointer_cast(other)); - if (otherPositionIncrementAttribute) - return positionIncrement == otherPositionIncrementAttribute->positionIncrement; - - return false; - } - - int32_t PositionIncrementAttribute::hashCode() - { - return positionIncrement; +namespace Lucene { + +PositionIncrementAttribute::PositionIncrementAttribute() { + positionIncrement = 1; +} + +PositionIncrementAttribute::~PositionIncrementAttribute() { +} + +String PositionIncrementAttribute::toString() { + return L"positionIncrement=" + StringUtils::toString(positionIncrement); +} + +void PositionIncrementAttribute::setPositionIncrement(int32_t positionIncrement) { + if (positionIncrement < 0) { + boost::throw_exception(IllegalArgumentException(L"Increment must be zero or greater: " + StringUtils::toString(positionIncrement))); } - - void PositionIncrementAttribute::copyTo(AttributePtr target) - { - PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); - targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); + this->positionIncrement = positionIncrement; +} + +int32_t PositionIncrementAttribute::getPositionIncrement() { + return positionIncrement; +} + +void PositionIncrementAttribute::clear() { + this->positionIncrement = 1; +} + +bool PositionIncrementAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - LuceneObjectPtr PositionIncrementAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - PositionIncrementAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); - cloneAttribute->positionIncrement = positionIncrement; - return cloneAttribute; + + PositionIncrementAttributePtr otherPositionIncrementAttribute(boost::dynamic_pointer_cast(other)); + if (otherPositionIncrementAttribute) { + return positionIncrement == otherPositionIncrementAttribute->positionIncrement; } + + return false; +} + +int32_t PositionIncrementAttribute::hashCode() { + return positionIncrement; +} + +void PositionIncrementAttribute::copyTo(const AttributePtr& target) { + PositionIncrementAttributePtr targetPositionIncrementAttribute(boost::dynamic_pointer_cast(target)); + targetPositionIncrementAttribute->setPositionIncrement(positionIncrement); +} + +LuceneObjectPtr PositionIncrementAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + PositionIncrementAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); + cloneAttribute->positionIncrement = positionIncrement; + return cloneAttribute; +} + } diff --git a/src/core/analysis/tokenattributes/TermAttribute.cpp b/src/core/analysis/tokenattributes/TermAttribute.cpp index ccc01162..e98fe628 100644 --- a/src/core/analysis/tokenattributes/TermAttribute.cpp +++ b/src/core/analysis/tokenattributes/TermAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,159 +9,141 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t TermAttribute::MIN_BUFFER_SIZE = 10; - - TermAttribute::TermAttribute() - { - _termLength = 0; - } - - TermAttribute::~TermAttribute() - { - } - - String TermAttribute::toString() - { - return L"term=" + term(); - } - - String TermAttribute::term() - { +namespace Lucene { + +const int32_t TermAttribute::MIN_BUFFER_SIZE = 10; + +TermAttribute::TermAttribute() { + _termLength = 0; +} + +TermAttribute::~TermAttribute() { +} + +String TermAttribute::toString() { + return L"term=" + term(); +} + +String TermAttribute::term() { + initTermBuffer(); + return String(_termBuffer.get(), _termLength); +} + +void TermAttribute::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) { + growTermBuffer(length); + MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); + _termLength = length; +} + +void TermAttribute::setTermBuffer(const String& buffer) { + int32_t length = (int32_t)buffer.size(); + growTermBuffer(length); + MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); + _termLength = length; +} + +CharArray TermAttribute::termBuffer() { + if (!_termBuffer) { initTermBuffer(); - return String(_termBuffer.get(), _termLength); - } - - void TermAttribute::setTermBuffer(const wchar_t* buffer, int32_t offset, int32_t length) - { - growTermBuffer(length); - MiscUtils::arrayCopy(buffer, offset, _termBuffer.get(), 0, length); - _termLength = length; - } - - void TermAttribute::setTermBuffer(const String& buffer) - { - int32_t length = (int32_t)buffer.size(); - growTermBuffer(length); - MiscUtils::arrayCopy(buffer.begin(), 0, _termBuffer.get(), 0, length); - _termLength = length; - } - - CharArray TermAttribute::termBuffer() - { - if (!_termBuffer) - initTermBuffer(); - return _termBuffer; - } - - wchar_t* TermAttribute::termBufferArray() - { - if (!_termBuffer) - initTermBuffer(); - return _termBuffer.get(); - } - - CharArray TermAttribute::resizeTermBuffer(int32_t newSize) - { - if (!_termBuffer) - { - // The buffer is always at least MIN_BUFFER_SIZE - _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); - } - else if (_termBuffer.size() < newSize) - _termBuffer.resize(MiscUtils::getNextSize(newSize)); - return _termBuffer; } - - void TermAttribute::growTermBuffer(int32_t newSize) - { - if (!_termBuffer) - { - // The buffer is always at least MIN_BUFFER_SIZE - _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); - } - else if (_termBuffer.size() < newSize) - _termBuffer.resize(MiscUtils::getNextSize(newSize)); - } - - void TermAttribute::initTermBuffer() - { - if (!_termBuffer) - { - _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); - _termLength = 0; - } + return _termBuffer; +} + +wchar_t* TermAttribute::termBufferArray() { + if (!_termBuffer) { + initTermBuffer(); } - - int32_t TermAttribute::termLength() - { - return _termLength; + return _termBuffer.get(); +} + +CharArray TermAttribute::resizeTermBuffer(int32_t newSize) { + if (!_termBuffer) { + // The buffer is always at least MIN_BUFFER_SIZE + _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); + } else if (_termBuffer.size() < newSize) { + _termBuffer.resize(MiscUtils::getNextSize(newSize)); + } + return _termBuffer; +} + +void TermAttribute::growTermBuffer(int32_t newSize) { + if (!_termBuffer) { + // The buffer is always at least MIN_BUFFER_SIZE + _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(std::max(newSize, MIN_BUFFER_SIZE))); + } else if (_termBuffer.size() < newSize) { + _termBuffer.resize(MiscUtils::getNextSize(newSize)); } - - void TermAttribute::setTermLength(int32_t length) - { - if (!_termBuffer) - initTermBuffer(); - if (length > _termBuffer.size()) - { - boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + - L" exceeds the size of the termBuffer (" + - StringUtils::toString(_termBuffer.size()) + L")")); - } - _termLength = length; +} + +void TermAttribute::initTermBuffer() { + if (!_termBuffer) { + _termBuffer = CharArray::newInstance(MiscUtils::getNextSize(MIN_BUFFER_SIZE)); + _termLength = 0; } - - int32_t TermAttribute::hashCode() - { +} + +int32_t TermAttribute::termLength() { + return _termLength; +} + +void TermAttribute::setTermLength(int32_t length) { + if (!_termBuffer) { initTermBuffer(); - int32_t code = _termLength; - code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); - return code; - } - - void TermAttribute::clear() - { - _termLength = 0; } - - LuceneObjectPtr TermAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); - TermAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); - cloneAttribute->_termLength = _termLength; - if (_termBuffer) - { - cloneAttribute->_termBuffer = CharArray::newInstance(_termBuffer.size()); - MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneAttribute->_termBuffer.get(), 0, _termBuffer.size()); - } - return cloneAttribute; + if (length > _termBuffer.size()) { + boost::throw_exception(IllegalArgumentException(L"length " + StringUtils::toString(length) + + L" exceeds the size of the termBuffer (" + + StringUtils::toString(_termBuffer.size()) + L")")); } - - bool TermAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - TermAttributePtr otherTermAttribute(boost::dynamic_pointer_cast(other)); - if (otherTermAttribute) - { - initTermBuffer(); - otherTermAttribute->initTermBuffer(); - - if (_termLength != otherTermAttribute->_termLength) - return false; - - return (std::memcmp(_termBuffer.get(), otherTermAttribute->_termBuffer.get(), _termLength) == 0); - } - - return false; + _termLength = length; +} + +int32_t TermAttribute::hashCode() { + initTermBuffer(); + int32_t code = _termLength; + code = code * 31 + MiscUtils::hashCode(_termBuffer.get(), 0, _termLength); + return code; +} + +void TermAttribute::clear() { + _termLength = 0; +} + +LuceneObjectPtr TermAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Attribute::clone(other ? other : newLucene()); + TermAttributePtr cloneAttribute(boost::dynamic_pointer_cast(clone)); + cloneAttribute->_termLength = _termLength; + if (_termBuffer) { + cloneAttribute->_termBuffer = CharArray::newInstance(_termBuffer.size()); + MiscUtils::arrayCopy(_termBuffer.get(), 0, cloneAttribute->_termBuffer.get(), 0, _termBuffer.size()); + } + return cloneAttribute; +} + +bool TermAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - void TermAttribute::copyTo(AttributePtr target) - { + + TermAttributePtr otherTermAttribute(boost::dynamic_pointer_cast(other)); + if (otherTermAttribute) { initTermBuffer(); - TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); - targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); + otherTermAttribute->initTermBuffer(); + + if (_termLength != otherTermAttribute->_termLength) { + return false; + } + + return (std::memcmp(_termBuffer.get(), otherTermAttribute->_termBuffer.get(), _termLength) == 0); } + + return false; +} + +void TermAttribute::copyTo(const AttributePtr& target) { + initTermBuffer(); + TermAttributePtr targetTermAttribute(boost::dynamic_pointer_cast(target)); + targetTermAttribute->setTermBuffer(_termBuffer.get(), 0, _termLength); +} + } diff --git a/src/core/analysis/tokenattributes/TypeAttribute.cpp b/src/core/analysis/tokenattributes/TypeAttribute.cpp index 0417b702..9da99b61 100644 --- a/src/core/analysis/tokenattributes/TypeAttribute.cpp +++ b/src/core/analysis/tokenattributes/TypeAttribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,75 +8,66 @@ #include "TypeAttribute.h" #include "StringUtils.h" -namespace Lucene -{ - TypeAttribute::TypeAttribute() - { - _type = DEFAULT_TYPE(); - } - - TypeAttribute::TypeAttribute(const String& type) - { - _type = type; - } - - TypeAttribute::~TypeAttribute() - { - } - - const String& TypeAttribute::DEFAULT_TYPE() - { - static String _DEFAULT_TYPE(L"word"); - return _DEFAULT_TYPE; - } - - String TypeAttribute::toString() - { - return L"type=" + _type; - } - - String TypeAttribute::type() - { - return _type; - } - - void TypeAttribute::setType(const String& type) - { - _type = type; - } - - void TypeAttribute::clear() - { - _type = DEFAULT_TYPE(); - } - - bool TypeAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - TypeAttributePtr otherTypeAttribute(boost::dynamic_pointer_cast(other)); - if (otherTypeAttribute) - return (otherTypeAttribute->_type == _type); - - return false; - } - - int32_t TypeAttribute::hashCode() - { - return StringUtils::hashCode(_type); - } - - void TypeAttribute::copyTo(AttributePtr target) - { - boost::dynamic_pointer_cast(target)->setType(_type); +namespace Lucene { + +TypeAttribute::TypeAttribute() { + _type = DEFAULT_TYPE(); +} + +TypeAttribute::TypeAttribute(const String& type) { + _type = type; +} + +TypeAttribute::~TypeAttribute() { +} + +const String& TypeAttribute::DEFAULT_TYPE() { + static String _DEFAULT_TYPE(L"word"); + return _DEFAULT_TYPE; +} + +String TypeAttribute::toString() { + return L"type=" + _type; +} + +String TypeAttribute::type() { + return _type; +} + +void TypeAttribute::setType(const String& type) { + _type = type; +} + +void TypeAttribute::clear() { + _type = DEFAULT_TYPE(); +} + +bool TypeAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - LuceneObjectPtr TypeAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - TypeAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); - cloneAttribute->_type = _type; - return cloneAttribute; + + TypeAttributePtr otherTypeAttribute(boost::dynamic_pointer_cast(other)); + if (otherTypeAttribute) { + return (otherTypeAttribute->_type == _type); } + + return false; +} + +int32_t TypeAttribute::hashCode() { + return StringUtils::hashCode(_type); +} + +void TypeAttribute::copyTo(const AttributePtr& target) { + boost::dynamic_pointer_cast(target)->setType(_type); +} + +LuceneObjectPtr TypeAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + TypeAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); + cloneAttribute->_type = _type; + return cloneAttribute; +} + } diff --git a/src/core/document/AbstractField.cpp b/src/core/document/AbstractField.cpp index 6c54d390..a7ca3e46 100644 --- a/src/core/document/AbstractField.cpp +++ b/src/core/document/AbstractField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,215 +10,198 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - AbstractField::AbstractField() - { - this->_name = L"body"; - this->storeTermVector = false; - this->storeOffsetWithTermVector = false; - this->storePositionWithTermVector = false; - this->_omitNorms = false; - this->_isStored = false; - this->_isIndexed = true; - this->_isTokenized = true; - this->_isBinary = false; - - this->lazy = false; - this->omitTermFreqAndPositions = false; - this->boost = 1.0; - this->fieldsData = VariantUtils::null(); - - this->binaryLength = 0; - this->binaryOffset = 0; - } - - AbstractField::AbstractField(const String& name, Field::Store store, Field::Index index, Field::TermVector termVector) - { - this->_name = name; - this->_isStored = Field::isStored(store); - this->_isIndexed = Field::isIndexed(index); - this->_isTokenized = Field::isAnalyzed(index); - this->_omitNorms = Field::omitNorms(index); - this->_isBinary = false; - - this->lazy = false; - this->omitTermFreqAndPositions = false; - this->boost = 1.0; - this->fieldsData = VariantUtils::null(); - - this->binaryLength = 0; - this->binaryOffset = 0; - - setStoreTermVector(termVector); - } - - AbstractField::~AbstractField() - { - } - - void AbstractField::setBoost(double boost) - { - this->boost = boost; - } - - double AbstractField::getBoost() - { - return boost; - } - - String AbstractField::name() - { - return _name; - } - - void AbstractField::setStoreTermVector(Field::TermVector termVector) - { - this->storeTermVector = Field::isStored(termVector); - this->storePositionWithTermVector = Field::withPositions(termVector); - this->storeOffsetWithTermVector = Field::withOffsets(termVector); - } - - bool AbstractField::isStored() - { - return _isStored; - } - - bool AbstractField::isIndexed() - { - return _isIndexed; - } - - bool AbstractField::isTokenized() - { - return _isTokenized; - } - - bool AbstractField::isTermVectorStored() - { - return storeTermVector; - } - - bool AbstractField::isStoreOffsetWithTermVector() - { - return storeOffsetWithTermVector; - } - - bool AbstractField::isStorePositionWithTermVector() - { - return storePositionWithTermVector; - } - - bool AbstractField::isBinary() - { - return _isBinary; +namespace Lucene { + +AbstractField::AbstractField() { + this->_name = L"body"; + this->storeTermVector = false; + this->storeOffsetWithTermVector = false; + this->storePositionWithTermVector = false; + this->_omitNorms = false; + this->_isStored = false; + this->_isIndexed = true; + this->_isTokenized = true; + this->_isBinary = false; + + this->lazy = false; + this->omitTermFreqAndPositions = false; + this->boost = 1.0; + this->fieldsData = VariantUtils::null(); + + this->binaryLength = 0; + this->binaryOffset = 0; +} + +AbstractField::AbstractField(const String& name, Field::Store store, Field::Index index, Field::TermVector termVector) { + this->_name = name; + this->_isStored = Field::isStored(store); + this->_isIndexed = Field::isIndexed(index); + this->_isTokenized = Field::isAnalyzed(index); + this->_omitNorms = Field::omitNorms(index); + this->_isBinary = false; + + this->lazy = false; + this->omitTermFreqAndPositions = false; + this->boost = 1.0; + this->fieldsData = VariantUtils::null(); + + this->binaryLength = 0; + this->binaryOffset = 0; + + setStoreTermVector(termVector); +} + +AbstractField::~AbstractField() { +} + +void AbstractField::setBoost(double boost) { + this->boost = boost; +} + +double AbstractField::getBoost() { + return boost; +} + +String AbstractField::name() { + return _name; +} + +void AbstractField::setStoreTermVector(Field::TermVector termVector) { + this->storeTermVector = Field::isStored(termVector); + this->storePositionWithTermVector = Field::withPositions(termVector); + this->storeOffsetWithTermVector = Field::withOffsets(termVector); +} + +bool AbstractField::isStored() { + return _isStored; +} + +bool AbstractField::isIndexed() { + return _isIndexed; +} + +bool AbstractField::isTokenized() { + return _isTokenized; +} + +bool AbstractField::isTermVectorStored() { + return storeTermVector; +} + +bool AbstractField::isStoreOffsetWithTermVector() { + return storeOffsetWithTermVector; +} + +bool AbstractField::isStorePositionWithTermVector() { + return storePositionWithTermVector; +} + +bool AbstractField::isBinary() { + return _isBinary; +} + +ByteArray AbstractField::getBinaryValue() { + return getBinaryValue(ByteArray()); +} + +ByteArray AbstractField::getBinaryValue(ByteArray result) { + return VariantUtils::get(fieldsData); +} + +int32_t AbstractField::getBinaryLength() { + if (_isBinary) { + return binaryLength; } - - ByteArray AbstractField::getBinaryValue() - { - return getBinaryValue(ByteArray()); + ByteArray binary(VariantUtils::get(fieldsData)); + return binary ? binary.size() : 0; +} + +int32_t AbstractField::getBinaryOffset() { + return binaryOffset; +} + +bool AbstractField::getOmitNorms() { + return _omitNorms; +} + +bool AbstractField::getOmitTermFreqAndPositions() { + return omitTermFreqAndPositions; +} + +void AbstractField::setOmitNorms(bool omitNorms) { + this->_omitNorms = omitNorms; +} + +void AbstractField::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { + this->omitTermFreqAndPositions = omitTermFreqAndPositions; +} + +bool AbstractField::isLazy() { + return lazy; +} + +String AbstractField::toString() { + StringStream result; + if (_isStored) { + result << L"stored"; + } + if (_isIndexed) { + if (!result.str().empty()) { + result << L","; + } + result << L"indexed"; } - - ByteArray AbstractField::getBinaryValue(ByteArray result) - { - return VariantUtils::get(fieldsData); + if (_isTokenized) { + if (!result.str().empty()) { + result << L","; + } + result << L"tokenized"; } - - int32_t AbstractField::getBinaryLength() - { - if (_isBinary) - return binaryLength; - ByteArray binary(VariantUtils::get(fieldsData)); - return binary ? binary.size() : 0; + if (storeTermVector) { + if (!result.str().empty()) { + result << L","; + } + result << L"termVector"; } - - int32_t AbstractField::getBinaryOffset() - { - return binaryOffset; + if (storeOffsetWithTermVector) { + if (!result.str().empty()) { + result << L","; + } + result << L"termVectorOffsets"; } - - bool AbstractField::getOmitNorms() - { - return _omitNorms; + if (storePositionWithTermVector) { + if (!result.str().empty()) { + result << L","; + } + result << L"termVectorPosition"; } - - bool AbstractField::getOmitTermFreqAndPositions() - { - return omitTermFreqAndPositions; + if (_isBinary) { + if (!result.str().empty()) { + result << L","; + } + result << L"binary"; } - - void AbstractField::setOmitNorms(bool omitNorms) - { - this->_omitNorms = omitNorms; + if (_omitNorms) { + result << L",omitNorms"; } - - void AbstractField::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) - { - this->omitTermFreqAndPositions = omitTermFreqAndPositions; + if (omitTermFreqAndPositions) { + result << L",omitTermFreqAndPositions"; } - - bool AbstractField::isLazy() - { - return lazy; + if (lazy) { + result << L",lazy"; } - - String AbstractField::toString() - { - StringStream result; - if (_isStored) - result << L"stored"; - if (_isIndexed) - { - if (!result.str().empty()) - result << L","; - result << L"indexed"; - } - if (_isTokenized) - { - if (!result.str().empty()) - result << L","; - result << L"tokenized"; - } - if (storeTermVector) - { - if (!result.str().empty()) - result << L","; - result << L"termVector"; - } - if (storeOffsetWithTermVector) - { - if (!result.str().empty()) - result << L","; - result << L"termVectorOffsets"; - } - if (storePositionWithTermVector) - { - if (!result.str().empty()) - result << L","; - result << L"termVectorPosition"; - } - if (_isBinary) - { - if (!result.str().empty()) - result << L","; - result << L"binary"; - } - if (_omitNorms) - result << L",omitNorms"; - if (omitTermFreqAndPositions) - result << L",omitTermFreqAndPositions"; - if (lazy) - result << L",lazy"; - result << L"<" << _name << L":"; - - if (VariantUtils::typeOf(fieldsData)) - result << VariantUtils::get(fieldsData); - else if (VariantUtils::typeOf(fieldsData)) - result << L"Reader"; - else if (VariantUtils::typeOf(fieldsData)) - result << L"Binary [size=" << StringUtils::toString(VariantUtils::get(fieldsData).size()) << L"]"; - - result << L">"; - return result.str(); + result << L"<" << _name << L":"; + + if (VariantUtils::typeOf(fieldsData)) { + result << VariantUtils::get(fieldsData); + } else if (VariantUtils::typeOf(fieldsData)) { + result << L"Reader"; + } else if (VariantUtils::typeOf(fieldsData)) { + result << L"Binary [size=" << StringUtils::toString(VariantUtils::get(fieldsData).size()) << L"]"; } + + result << L">"; + return result.str(); +} + } diff --git a/src/core/document/CompressionTools.cpp b/src/core/document/CompressionTools.cpp index ff644995..227fb618 100644 --- a/src/core/document/CompressionTools.cpp +++ b/src/core/document/CompressionTools.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,147 +15,130 @@ #include #include -namespace Lucene -{ - const int32_t CompressionTools::COMPRESS_BUFFER = 4096; - - String ZLibToMessage(int32_t error) - { - if (error == boost::iostreams::zlib::okay) - return L"okay"; - else if (error == boost::iostreams::zlib::stream_end) - return L"stream_end"; - else if (error == boost::iostreams::zlib::stream_error) - return L"stream_error"; - else if (error == boost::iostreams::zlib::version_error) - return L"version_error"; - else if (error == boost::iostreams::zlib::data_error) - return L"data_error"; - else if (error == boost::iostreams::zlib::mem_error) - return L"mem_error"; - else if (error == boost::iostreams::zlib::buf_error ) - return L"buf_error"; - else - return L"unknown"; +namespace Lucene { + +const int32_t CompressionTools::COMPRESS_BUFFER = 4096; + +String ZLibToMessage(int32_t error) { + if (error == boost::iostreams::zlib::okay) { + return L"okay"; + } else if (error == boost::iostreams::zlib::stream_end) { + return L"stream_end"; + } else if (error == boost::iostreams::zlib::stream_error) { + return L"stream_error"; + } else if (error == boost::iostreams::zlib::version_error) { + return L"version_error"; + } else if (error == boost::iostreams::zlib::data_error) { + return L"data_error"; + } else if (error == boost::iostreams::zlib::mem_error) { + return L"mem_error"; + } else if (error == boost::iostreams::zlib::buf_error ) { + return L"buf_error"; + } else { + return L"unknown"; } - - class BufferArraySink : public boost::iostreams::sink - { - public: - BufferArraySink(ByteArray& _buffer, std::streamsize& _position, size_t allocSize) : buffer(_buffer), position(_position) - { - this->allocSize = allocSize; - this->buffer.resize((int32_t)allocSize); - } - - public: - ByteArray& buffer; - std::streamsize& position; - - private: - size_t allocSize; - - public: - std::streamsize write(const char* s, std::streamsize n) - { - if (position + n >= (std::streamsize)allocSize) - { - // grow buffer - allocSize <<= 1; - buffer.resize((int32_t)allocSize); - } - MiscUtils::arrayCopy(s, 0, buffer.get(), position, n); - position += n; - return n; - } - }; +} - CompressionTools::~CompressionTools() - { +class BufferArraySink : public boost::iostreams::sink { +public: + BufferArraySink(ByteArray& _buffer, std::streamsize& _position, size_t allocSize) : buffer(_buffer), position(_position) { + this->allocSize = allocSize; + this->buffer.resize((int32_t)allocSize); } - ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel) - { - // setup the outStream - boost::iostreams::filtering_ostreambuf outStream; - boost::iostreams::zlib_compressor zcompressor(compressionLevel); - outStream.push(zcompressor); - - // and the output buffer - ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); - std::streamsize position = 0; - outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); - - // setup the source stream, and then copy it to the outStream - boost::iostreams::stream< boost::iostreams::array_source > source((char*)(value + offset), length); - - try - { - boost::iostreams::copy(source, outStream); - } - catch (boost::iostreams::zlib_error& err) - { - boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); +public: + ByteArray& buffer; + std::streamsize& position; + +private: + size_t allocSize; + +public: + std::streamsize write(const char* s, std::streamsize n) { + if (position + n >= (std::streamsize)allocSize) { + // grow buffer + allocSize <<= 1; + buffer.resize((int32_t)allocSize); } - - buffer.resize((int32_t)position); - - return buffer; - } - - ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length) - { - return compress(value, offset, length, boost::iostreams::zlib::best_compression); - } - - ByteArray CompressionTools::compress(ByteArray value) - { - return compress(value.get(), 0, value.size(), boost::iostreams::zlib::best_compression); + MiscUtils::arrayCopy(s, 0, buffer.get(), position, n); + position += n; + return n; } - - ByteArray CompressionTools::compressString(const String& value) - { - return compressString(value, boost::iostreams::zlib::best_compression); - } - - ByteArray CompressionTools::compressString(const String& value, int32_t compressionLevel) - { - UTF8ResultPtr utf8Result(newLucene()); - StringUtils::toUTF8(value.c_str(), (int32_t)value.length(), utf8Result); - return compress(utf8Result->result.get(), 0, utf8Result->length, compressionLevel); - } - - ByteArray CompressionTools::decompress(ByteArray value) - { - // setup the outStream - boost::iostreams::filtering_ostreambuf outStream; - outStream.push(boost::iostreams::zlib_decompressor()); - - // and the output buffer - ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); - std::streamsize position = 0; - outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); - - //setup the source stream, and then copy it to the outStream - boost::iostreams::stream< boost::iostreams::array_source > source((char*)value.get(), value.size()); - - try - { - boost::iostreams::copy(source, outStream); - } - catch (boost::iostreams::zlib_error& err) - { - boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); - } - - buffer.resize((int32_t)position); - - return buffer; +}; + +CompressionTools::~CompressionTools() { +} + +ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length, int32_t compressionLevel) { + // setup the outStream + boost::iostreams::filtering_ostreambuf outStream; + boost::iostreams::zlib_compressor zcompressor(compressionLevel); + outStream.push(zcompressor); + + // and the output buffer + ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); + std::streamsize position = 0; + outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); + + // setup the source stream, and then copy it to the outStream + boost::iostreams::stream< boost::iostreams::array_source > source((char*)(value + offset), length); + + try { + boost::iostreams::copy(source, outStream); + } catch (boost::iostreams::zlib_error& err) { + boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } - - String CompressionTools::decompressString(ByteArray value) - { - ByteArray bytes(decompress(value)); - return StringUtils::toUnicode(bytes.get(), bytes.size()); + + buffer.resize((int32_t)position); + + return buffer; +} + +ByteArray CompressionTools::compress(uint8_t* value, int32_t offset, int32_t length) { + return compress(value, offset, length, boost::iostreams::zlib::best_compression); +} + +ByteArray CompressionTools::compress(ByteArray value) { + return compress(value.get(), 0, value.size(), boost::iostreams::zlib::best_compression); +} + +ByteArray CompressionTools::compressString(const String& value) { + return compressString(value, boost::iostreams::zlib::best_compression); +} + +ByteArray CompressionTools::compressString(const String& value, int32_t compressionLevel) { + UTF8ResultPtr utf8Result(newLucene()); + StringUtils::toUTF8(value.c_str(), (int32_t)value.length(), utf8Result); + return compress(utf8Result->result.get(), 0, utf8Result->length, compressionLevel); +} + +ByteArray CompressionTools::decompress(ByteArray value) { + // setup the outStream + boost::iostreams::filtering_ostreambuf outStream; + outStream.push(boost::iostreams::zlib_decompressor()); + + // and the output buffer + ByteArray buffer(ByteArray::newInstance(COMPRESS_BUFFER)); + std::streamsize position = 0; + outStream.push(BufferArraySink(buffer, position, COMPRESS_BUFFER)); + + //setup the source stream, and then copy it to the outStream + boost::iostreams::stream< boost::iostreams::array_source > source((char*)value.get(), value.size()); + + try { + boost::iostreams::copy(source, outStream); + } catch (boost::iostreams::zlib_error& err) { + boost::throw_exception(CompressionException(L"deflate failure: " + ZLibToMessage(err.error()))); } + + buffer.resize((int32_t)position); + + return buffer; +} + +String CompressionTools::decompressString(ByteArray value) { + ByteArray bytes(decompress(value)); + return StringUtils::toUnicode(bytes.get(), bytes.size()); +} + } diff --git a/src/core/document/DateField.cpp b/src/core/document/DateField.cpp index 545c6717..6cbc92a7 100644 --- a/src/core/document/DateField.cpp +++ b/src/core/document/DateField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,60 +9,55 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DateField::~DateField() - { - } - - int32_t DateField::DATE_LEN() - { - static int32_t _DATE_LEN = 0; - if (_DATE_LEN == 0) - { - // make date strings long enough to last a millennium - _DATE_LEN = (int32_t)StringUtils::toString((int64_t)(1000 * 365 * 24) * (int64_t)(60 * 60 * 1000), StringUtils::CHARACTER_MAX_RADIX).length(); - } - return _DATE_LEN; - } - - const String& DateField::MIN_DATE_STRING() - { - static String _MIN_DATE_STRING; - if (_MIN_DATE_STRING.empty()) - _MIN_DATE_STRING = timeToString(0); - return _MIN_DATE_STRING; - } - - const String& DateField::MAX_DATE_STRING() - { - static String _MAX_DATE_STRING; - if (_MAX_DATE_STRING.empty()) - { - _MAX_DATE_STRING.resize(DATE_LEN()); - std::fill(_MAX_DATE_STRING.begin(), _MAX_DATE_STRING.end(), L'z'); - } - return _MAX_DATE_STRING; - } - - String DateField::dateToString(const boost::posix_time::ptime& date) - { - return timeToString(MiscUtils::getTimeMillis(date)); - } - - String DateField::timeToString(int64_t time) - { - if (time < 0) - boost::throw_exception(RuntimeException(L"time '" + StringUtils::toString(time) + L"' is too early, must be >= 0")); - - String timeString(DATE_LEN(), L'0'); - timeString += StringUtils::toString(time, StringUtils::CHARACTER_MAX_RADIX); - - return timeString.substr(timeString.length() - DATE_LEN(), DATE_LEN()); - } - - int64_t DateField::stringToTime(const String& s) - { - return StringUtils::toLong(s, StringUtils::CHARACTER_MAX_RADIX); +namespace Lucene { + +DateField::~DateField() { +} + +int32_t DateField::DATE_LEN() { + static int32_t _DATE_LEN = 0; + + // make date strings long enough to last a millennium + LUCENE_RUN_ONCE( + _DATE_LEN = (int32_t)StringUtils::toString((int64_t)(1000 * 365 * 24) * (int64_t)(60 * 60 * 1000), StringUtils::CHARACTER_MAX_RADIX).length(); + ); + return _DATE_LEN; +} + +const String& DateField::MIN_DATE_STRING() { + static String _MIN_DATE_STRING; + LUCENE_RUN_ONCE( + _MIN_DATE_STRING = timeToString(0); + ); + return _MIN_DATE_STRING; +} + +const String& DateField::MAX_DATE_STRING() { + static String _MAX_DATE_STRING; + LUCENE_RUN_ONCE( + _MAX_DATE_STRING.resize(DATE_LEN()); + std::fill(_MAX_DATE_STRING.begin(), _MAX_DATE_STRING.end(), L'z'); + ); + return _MAX_DATE_STRING; +} + +String DateField::dateToString(const boost::posix_time::ptime& date) { + return timeToString(MiscUtils::getTimeMillis(date)); +} + +String DateField::timeToString(int64_t time) { + if (time < 0) { + boost::throw_exception(RuntimeException(L"time '" + StringUtils::toString(time) + L"' is too early, must be >= 0")); } + + String timeString(DATE_LEN(), L'0'); + timeString += StringUtils::toString(time, StringUtils::CHARACTER_MAX_RADIX); + + return timeString.substr(timeString.length() - DATE_LEN(), DATE_LEN()); +} + +int64_t DateField::stringToTime(const String& s) { + return StringUtils::toLong(s, StringUtils::CHARACTER_MAX_RADIX); +} + } diff --git a/src/core/document/DateTools.cpp b/src/core/document/DateTools.cpp index 4ad1f4f0..87f7927f 100644 --- a/src/core/document/DateTools.cpp +++ b/src/core/document/DateTools.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,215 +11,200 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DateTools::DateOrder DateTools::dateOrder = DateTools::DATEORDER_LOCALE; - - DateTools::~DateTools() - { - } - - String DateTools::dateToString(const boost::posix_time::ptime& date, Resolution resolution) - { - return timeToString(MiscUtils::getTimeMillis(date), resolution); +namespace Lucene { + +DateTools::DateOrder DateTools::dateOrder = DateTools::DATEORDER_LOCALE; + +DateTools::~DateTools() { +} + +String DateTools::dateToString(const boost::posix_time::ptime& date, Resolution resolution) { + return timeToString(MiscUtils::getTimeMillis(date), resolution); +} + +String DateTools::timeToString(int64_t time, Resolution resolution) { + std::string timeString(boost::posix_time::to_iso_string(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)))); + switch (resolution) { + case RESOLUTION_YEAR: + return StringUtils::toUnicode(timeString.substr(0, 4).c_str()); + case RESOLUTION_MONTH: + return StringUtils::toUnicode(timeString.substr(0, 6).c_str()); + case RESOLUTION_DAY: + return StringUtils::toUnicode(timeString.substr(0, 8).c_str()); + case RESOLUTION_HOUR: + return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 2)).c_str()); + case RESOLUTION_MINUTE: + return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 4)).c_str()); + case RESOLUTION_SECOND: + return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6)).c_str()); + case RESOLUTION_MILLISECOND: { + std::string fraction(timeString.length() > 16 ? timeString.substr(16, 3) : "000" ); + return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6) + fraction).c_str()); } - - String DateTools::timeToString(int64_t time, Resolution resolution) - { - std::string timeString(boost::posix_time::to_iso_string(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)))); - switch (resolution) - { - case RESOLUTION_YEAR: - return StringUtils::toUnicode(timeString.substr(0, 4).c_str()); - case RESOLUTION_MONTH: - return StringUtils::toUnicode(timeString.substr(0, 6).c_str()); - case RESOLUTION_DAY: - return StringUtils::toUnicode(timeString.substr(0, 8).c_str()); - case RESOLUTION_HOUR: - return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 2)).c_str()); - case RESOLUTION_MINUTE: - return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 4)).c_str()); - case RESOLUTION_SECOND: - return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6)).c_str()); - case RESOLUTION_MILLISECOND: - { - std::string fraction(timeString.length() > 16 ? timeString.substr(16, 3) : "000" ); - return StringUtils::toUnicode(std::string(timeString.substr(0, 8) + timeString.substr(9, 6) + fraction).c_str()); - } - case RESOLUTION_NULL: - // silence static analyzers - break; - } - - boost::throw_exception(IllegalArgumentException(L"unknown resolution '" + StringUtils::toString(resolution) + L"'")); - return L""; + case RESOLUTION_NULL: + // silence static analyzers + break; } - - int64_t DateTools::stringToTime(const String& dateString) - { - return MiscUtils::getTimeMillis(stringToDate(dateString)); + + boost::throw_exception(IllegalArgumentException(L"unknown resolution '" + StringUtils::toString(resolution) + L"'")); + return L""; +} + +int64_t DateTools::stringToTime(const String& dateString) { + return MiscUtils::getTimeMillis(stringToDate(dateString)); +} + +boost::posix_time::ptime DateTools::stringToDate(const String& dateString) { + uint16_t year = dateString.length() >= 4 ? (uint16_t)wcstol(dateString.substr(0, 4).c_str(), 0, 10) : 1970; + uint16_t month = dateString.length() >= 6 ? (uint16_t)wcstol(dateString.substr(4, 2).c_str(), 0, 10) : 1; + uint16_t day = dateString.length() >= 8 ? (uint16_t)wcstol(dateString.substr(6, 2).c_str(), 0, 10) : 1; + uint16_t hour = dateString.length() >= 10 ? (uint16_t)wcstol(dateString.substr(8, 2).c_str(), 0, 10) : 0; + uint16_t minute = dateString.length() >= 12 ? (uint16_t)wcstol(dateString.substr(10, 2).c_str(), 0, 10) : 0; + uint16_t second = dateString.length() >= 14 ? (uint16_t)wcstol(dateString.substr(12, 2).c_str(), 0, 10) : 0; + uint16_t millisecond = dateString.length() >= 16 ? (uint16_t)wcstol(dateString.substr(14, 3).c_str(), 0, 10) : 0; + boost::posix_time::ptime date; + try { + date = boost::posix_time::ptime(boost::gregorian::date(year, month, day), + boost::posix_time::hours(hour) + + boost::posix_time::minutes(minute) + + boost::posix_time::seconds(second) + + boost::posix_time::milliseconds(millisecond)); + } catch (...) { + boost::throw_exception(ParseException(L"Input is not valid date string: " + dateString)); } - - boost::posix_time::ptime DateTools::stringToDate(const String& dateString) - { - uint16_t year = dateString.length() >= 4 ? (uint16_t)wcstol(dateString.substr(0, 4).c_str(), 0, 10) : 1970; - uint16_t month = dateString.length() >= 6 ? (uint16_t)wcstol(dateString.substr(4, 2).c_str(), 0, 10) : 1; - uint16_t day = dateString.length() >= 8 ? (uint16_t)wcstol(dateString.substr(6, 2).c_str(), 0, 10) : 1; - uint16_t hour = dateString.length() >= 10 ? (uint16_t)wcstol(dateString.substr(8, 2).c_str(), 0, 10) : 0; - uint16_t minute = dateString.length() >= 12 ? (uint16_t)wcstol(dateString.substr(10, 2).c_str(), 0, 10) : 0; - uint16_t second = dateString.length() >= 14 ? (uint16_t)wcstol(dateString.substr(12, 2).c_str(), 0, 10) : 0; - uint16_t millisecond = dateString.length() >= 16 ? (uint16_t)wcstol(dateString.substr(14, 3).c_str(), 0, 10) : 0; - boost::posix_time::ptime date; - try - { - date = boost::posix_time::ptime(boost::gregorian::date(year, month, day), - boost::posix_time::hours(hour) + - boost::posix_time::minutes(minute) + - boost::posix_time::seconds(second) + - boost::posix_time::milliseconds(millisecond)); - } - catch (...) - { - boost::throw_exception(ParseException(L"Input is not valid date string: " + dateString)); - } + return date; +} + +boost::posix_time::ptime DateTools::round(const boost::posix_time::ptime& date, Resolution resolution) { + boost::posix_time::ptime roundDate; + + switch (resolution) { + case RESOLUTION_YEAR: + return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), 1, 1)); + case RESOLUTION_MONTH: + return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), date.date().month(), 1)); + case RESOLUTION_DAY: + return boost::posix_time::ptime(date.date()); + case RESOLUTION_HOUR: + return boost::posix_time::ptime(date.date(), + boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours())); + case RESOLUTION_MINUTE: + return boost::posix_time::ptime(date.date(), + boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes())); + case RESOLUTION_SECOND: + return boost::posix_time::ptime(date.date(), + boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + + boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes()) + + boost::posix_time::seconds(boost::posix_time::time_duration(date.time_of_day()).seconds())); + case RESOLUTION_MILLISECOND: return date; + case RESOLUTION_NULL: + // silence static analyzers + break; } - - boost::posix_time::ptime DateTools::round(const boost::posix_time::ptime& date, Resolution resolution) - { - boost::posix_time::ptime roundDate; - - switch (resolution) - { - case RESOLUTION_YEAR: - return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), 1, 1)); - case RESOLUTION_MONTH: - return boost::posix_time::ptime(boost::gregorian::date(date.date().year(), date.date().month(), 1)); - case RESOLUTION_DAY: - return boost::posix_time::ptime(date.date()); - case RESOLUTION_HOUR: - return boost::posix_time::ptime(date.date(), - boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours())); - case RESOLUTION_MINUTE: - return boost::posix_time::ptime(date.date(), - boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + - boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes())); - case RESOLUTION_SECOND: - return boost::posix_time::ptime(date.date(), - boost::posix_time::hours(boost::posix_time::time_duration(date.time_of_day()).hours()) + - boost::posix_time::minutes(boost::posix_time::time_duration(date.time_of_day()).minutes()) + - boost::posix_time::seconds(boost::posix_time::time_duration(date.time_of_day()).seconds())); - case RESOLUTION_MILLISECOND: - return date; - case RESOLUTION_NULL: - // silence static analyzers - break; - } - - return boost::posix_time::ptime(); - } - - int64_t DateTools::round(int64_t time, Resolution resolution) - { - return MiscUtils::getTimeMillis(round(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)), resolution)); + + return boost::posix_time::ptime(); +} + +int64_t DateTools::round(int64_t time, Resolution resolution) { + return MiscUtils::getTimeMillis(round(boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1), boost::posix_time::milliseconds(time)), resolution)); +} + +void DateTools::setDateOrder(DateTools::DateOrder order) { + dateOrder = order; +} + +DateTools::DateOrder DateTools::getDateOrder(std::locale locale) { + if (dateOrder != DATEORDER_LOCALE) { + return dateOrder; } - - void DateTools::setDateOrder(DateTools::DateOrder order) - { - dateOrder = order; + + std::locale localeDate(std::locale(locale, new boost::gregorian::date_facet("%x"))); + SingleStringStream controlStream; + + controlStream.imbue(localeDate); + controlStream << boost::gregorian::date(1974, 10, 20); // Oct 20th 1974 + + SingleString controlDate(controlStream.str()); + SingleString::size_type year = controlDate.find("74"); + SingleString::size_type month = controlDate.find("10"); + if (month == SingleString::npos) { + month = controlDate.find("O"); // safety } - - DateTools::DateOrder DateTools::getDateOrder(std::locale locale) - { - if (dateOrder != DATEORDER_LOCALE) - return dateOrder; - - std::locale localeDate(std::locale(locale, new boost::gregorian::date_facet("%x"))); - SingleStringStream controlStream; - - controlStream.imbue(localeDate); - controlStream << boost::gregorian::date(1974, 10, 20); // Oct 20th 1974 - - SingleString controlDate(controlStream.str()); - SingleString::size_type year = controlDate.find("74"); - SingleString::size_type month = controlDate.find("10"); - if (month == SingleString::npos) - month = controlDate.find("O"); // safety - SingleString::size_type day = controlDate.find("20"); - - if (year < month) - return DATEORDER_YMD; - else if (month < day) - return DATEORDER_MDY; - else - return DATEORDER_DMY; + SingleString::size_type day = controlDate.find("20"); + + if (year < month) { + return DATEORDER_YMD; + } else if (month < day) { + return DATEORDER_MDY; + } else { + return DATEORDER_DMY; } - - boost::posix_time::ptime DateTools::parseDate(const String& dateString, std::locale locale) - { - Collection dateTokens(StringUtils::split(dateString, L",-. /")); - String delimiter(dateTokens.size() == 1 ? L"" : L"/"); - String paddedDate; - for (Collection::iterator token = dateTokens.begin(); token != dateTokens.end(); ++token) - { - if (token != dateTokens.begin()) - paddedDate += delimiter; - if (token->length() == 1) - paddedDate += L"0" + *token; - else - paddedDate += *token; +} + +boost::posix_time::ptime DateTools::parseDate(const String& dateString, std::locale locale) { + Collection dateTokens(StringUtils::split(dateString, L",-. /")); + String delimiter(dateTokens.size() == 1 ? L"" : L"/"); + String paddedDate; + for (Collection::iterator token = dateTokens.begin(); token != dateTokens.end(); ++token) { + if (token != dateTokens.begin()) { + paddedDate += delimiter; } - - Collection dateFormats(Collection::newInstance()); - - switch (getDateOrder(locale)) - { - case DATEORDER_DMY: - dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%Y"); - dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%y"); - dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%Y"); - dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%y"); - dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%Y"); - dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%y"); - break; - case DATEORDER_MDY: - dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%Y"); - dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%y"); - dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%Y"); - dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%y"); - dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%Y"); - dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%y"); - break; - case DATEORDER_YMD: - dateFormats.add(L"%Y" + delimiter + L"%m" + delimiter + L"%d"); - dateFormats.add(L"%y" + delimiter + L"%m" + delimiter + L"%d"); - dateFormats.add(L"%Y" + delimiter + L"%b" + delimiter + L"%d"); - dateFormats.add(L"%y" + delimiter + L"%b" + delimiter + L"%d"); - dateFormats.add(L"%Y" + delimiter + L"%B" + delimiter + L"%d"); - dateFormats.add(L"%y" + delimiter + L"%B" + delimiter + L"%d"); - break; - case DATEORDER_LOCALE: - // silence static analyzers - break; + if (token->length() == 1) { + paddedDate += L"0" + *token; + } else { + paddedDate += *token; } - - boost::date_time::format_date_parser parser(L"", locale); - boost::date_time::special_values_parser svp; - - for (Collection::iterator dateFormat = dateFormats.begin(); dateFormat != dateFormats.end(); ++dateFormat) - { - try - { - boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); - if (!date.is_not_a_date()) - return boost::posix_time::ptime(date); - } - catch (...) - { + } + + Collection dateFormats(Collection::newInstance()); + + switch (getDateOrder(locale)) { + case DATEORDER_DMY: + dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%Y"); + dateFormats.add(L"%d" + delimiter + L"%m" + delimiter + L"%y"); + dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%Y"); + dateFormats.add(L"%d" + delimiter + L"%b" + delimiter + L"%y"); + dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%Y"); + dateFormats.add(L"%d" + delimiter + L"%B" + delimiter + L"%y"); + break; + case DATEORDER_MDY: + dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%Y"); + dateFormats.add(L"%m" + delimiter + L"%d" + delimiter + L"%y"); + dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%Y"); + dateFormats.add(L"%b" + delimiter + L"%d" + delimiter + L"%y"); + dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%Y"); + dateFormats.add(L"%B" + delimiter + L"%d" + delimiter + L"%y"); + break; + case DATEORDER_YMD: + dateFormats.add(L"%Y" + delimiter + L"%m" + delimiter + L"%d"); + dateFormats.add(L"%y" + delimiter + L"%m" + delimiter + L"%d"); + dateFormats.add(L"%Y" + delimiter + L"%b" + delimiter + L"%d"); + dateFormats.add(L"%y" + delimiter + L"%b" + delimiter + L"%d"); + dateFormats.add(L"%Y" + delimiter + L"%B" + delimiter + L"%d"); + dateFormats.add(L"%y" + delimiter + L"%B" + delimiter + L"%d"); + break; + case DATEORDER_LOCALE: + // silence static analyzers + break; + } + + boost::date_time::format_date_parser parser(L"", locale); + boost::date_time::special_values_parser svp; + + for (Collection::iterator dateFormat = dateFormats.begin(); dateFormat != dateFormats.end(); ++dateFormat) { + try { + boost::gregorian::date date = parser.parse_date(paddedDate.c_str(), dateFormat->c_str(), svp); + if (!date.is_not_a_date()) { + return boost::posix_time::ptime(date); } + } catch (...) { } - - boost::throw_exception(ParseException(L"Invalid date '" + dateString + L"'")); - return boost::posix_time::ptime(); } + + boost::throw_exception(ParseException(L"Invalid date '" + dateString + L"'")); + return boost::posix_time::ptime(); +} + } diff --git a/src/core/document/Document.cpp b/src/core/document/Document.cpp index 715ae908..21e6f203 100644 --- a/src/core/document/Document.cpp +++ b/src/core/document/Document.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,148 +9,131 @@ #include "Fieldable.h" #include "Field.h" -namespace Lucene -{ - Document::Document() - { - fields = Collection::newInstance(); - boost = 1.0; - } - - Document::~Document() - { - } - - void Document::setBoost(double boost) - { - this->boost = boost; - } - - double Document::getBoost() - { - return boost; - } - - void Document::add(FieldablePtr field) - { - fields.add(field); - } - - /// Utility functor for comparing fieldable names. - /// see {@link Document}. - struct equalFieldableName - { - equalFieldableName(const String& name) : equalName(name) {} - inline bool operator()(const FieldablePtr& other) const - { - return (equalName == other->name()); - } - const String& equalName; - }; - - void Document::removeField(const String& name) - { - Collection::iterator field = fields.find_if(equalFieldableName(name)); - if (field != fields.end()) - fields.remove(field); - } - - void Document::removeFields(const String& name) - { - fields.remove_if(equalFieldableName(name)); - } - - FieldPtr Document::getField(const String& name) - { - return boost::static_pointer_cast(getFieldable(name)); - } - - FieldablePtr Document::getFieldable(const String& name) - { - Collection::iterator field = fields.find_if(equalFieldableName(name)); - return field == fields.end() ? FieldablePtr() : *field; +namespace Lucene { + +Document::Document() { + fields = Collection::newInstance(); + boost = 1.0; +} + +Document::~Document() { +} + +void Document::setBoost(double boost) { + this->boost = boost; +} + +double Document::getBoost() { + return boost; +} + +void Document::add(const FieldablePtr& field) { + fields.add(field); +} + +/// Utility functor for comparing fieldable names. +/// see {@link Document}. +struct equalFieldableName { + equalFieldableName(const String& name) : equalName(name) {} + inline bool operator()(const FieldablePtr& other) const { + return (equalName == other->name()); + } + const String& equalName; +}; + +void Document::removeField(const String& name) { + Collection::iterator field = fields.find_if(equalFieldableName(name)); + if (field != fields.end()) { + fields.remove(field); } - - String Document::get(const String& name) - { - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name && !(*field)->isBinary()) - return (*field)->stringValue(); +} + +void Document::removeFields(const String& name) { + fields.remove_if(equalFieldableName(name)); +} + +FieldPtr Document::getField(const String& name) { + return boost::static_pointer_cast(getFieldable(name)); +} + +FieldablePtr Document::getFieldable(const String& name) { + Collection::iterator field = fields.find_if(equalFieldableName(name)); + return field == fields.end() ? FieldablePtr() : *field; +} + +String Document::get(const String& name) { + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name && !(*field)->isBinary()) { + return (*field)->stringValue(); } - return L""; } - - Collection Document::getFields() - { - return fields; - } - - Collection Document::getFields(const String& name) - { - Collection result(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name) - result.add(boost::static_pointer_cast(*field)); + return L""; +} + +Collection Document::getFields() { + return fields; +} + +Collection Document::getFields(const String& name) { + Collection result(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name) { + result.add(boost::static_pointer_cast(*field)); } - return result; } - - Collection Document::getFieldables(const String& name) - { - Collection result(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name) - result.add(*field); + return result; +} + +Collection Document::getFieldables(const String& name) { + Collection result(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name) { + result.add(*field); } - return result; } - - Collection Document::getValues(const String& name) - { - Collection result(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name && !(*field)->isBinary()) - result.add((*field)->stringValue()); + return result; +} + +Collection Document::getValues(const String& name) { + Collection result(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name && !(*field)->isBinary()) { + result.add((*field)->stringValue()); } - return result; } - - Collection Document::getBinaryValues(const String& name) - { - Collection result(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name && (*field)->isBinary()) - result.add((*field)->getBinaryValue()); + return result; +} + +Collection Document::getBinaryValues(const String& name) { + Collection result(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name && (*field)->isBinary()) { + result.add((*field)->getBinaryValue()); } - return result; } - - ByteArray Document::getBinaryValue(const String& name) - { - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->name() == name && (*field)->isBinary()) - return (*field)->getBinaryValue(); + return result; +} + +ByteArray Document::getBinaryValue(const String& name) { + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->name() == name && (*field)->isBinary()) { + return (*field)->getBinaryValue(); } - return ByteArray(); } - - String Document::toString() - { - StringStream buffer; - buffer << L"Document<"; - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if (field != fields.begin()) - buffer << L" "; - buffer << (*field)->stringValue(); + return ByteArray(); +} + +String Document::toString() { + StringStream buffer; + buffer << L"Document<"; + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if (field != fields.begin()) { + buffer << L" "; } - buffer << L">"; - return buffer.str(); + buffer << (*field)->stringValue(); } + buffer << L">"; + return buffer.str(); +} + } diff --git a/src/core/document/Field.cpp b/src/core/document/Field.cpp index a2407d2a..1d4e439c 100644 --- a/src/core/document/Field.cpp +++ b/src/core/document/Field.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,369 +10,346 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - Field::Field(const String& name, const String& value, Store store, Index index) - { - ConstructField(name, value, store, index, TERM_VECTOR_NO); - } - - Field::Field(const String& name, const String& value, Store store, Index index, TermVector termVector) - { - ConstructField(name, value, store, index, termVector); - } - - Field::Field(const String& name, ReaderPtr reader) - { - ConstructField(name, reader, TERM_VECTOR_NO); - } - - Field::Field(const String& name, ReaderPtr reader, TermVector termVector) - { - ConstructField(name, reader, termVector); - } - - Field::Field(const String& name, TokenStreamPtr tokenStream) - { - ConstructField(name, tokenStream, TERM_VECTOR_NO); - } - - Field::Field(const String& name, TokenStreamPtr tokenStream, TermVector termVector) - { - ConstructField(name, tokenStream, termVector); - } - - Field::Field(const String& name, ByteArray value, Store store) - { - ConstructField(name, value, 0, value.size(), store); - } - - Field::Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) - { - ConstructField(name, value, offset, length, store); - } - - void Field::ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector) - { - if (name.empty() && value.empty()) - boost::throw_exception(IllegalArgumentException(L"name and value cannot both be empty")); - if (index == INDEX_NO && store == STORE_NO) - boost::throw_exception(IllegalArgumentException(L"it doesn't make sense to have a field that is neither indexed nor stored")); - if (index == INDEX_NO && termVector != TERM_VECTOR_NO) - boost::throw_exception(IllegalArgumentException(L"cannot store term vector information for a field that is not indexed")); - - this->_name = name; - this->fieldsData = value; - this->_isStored = isStored(store); - this->_isIndexed = isIndexed(index); - this->_isTokenized = isAnalyzed(index); - this->_omitNorms = omitNorms(index); - this->_isBinary = false; - - if (index == INDEX_NO) - this->omitTermFreqAndPositions = false; - - setStoreTermVector(termVector); +namespace Lucene { + +Field::Field(const String& name, const String& value, Store store, Index index) { + ConstructField(name, value, store, index, TERM_VECTOR_NO); +} + +Field::Field(const String& name, const String& value, Store store, Index index, TermVector termVector) { + ConstructField(name, value, store, index, termVector); +} + +Field::Field(const String& name, const ReaderPtr& reader) { + ConstructField(name, reader, TERM_VECTOR_NO); +} + +Field::Field(const String& name, const ReaderPtr& reader, TermVector termVector) { + ConstructField(name, reader, termVector); +} + +Field::Field(const String& name, const TokenStreamPtr& tokenStream) { + ConstructField(name, tokenStream, TERM_VECTOR_NO); +} + +Field::Field(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector) { + ConstructField(name, tokenStream, termVector); +} + +Field::Field(const String& name, ByteArray value, Store store) { + ConstructField(name, value, 0, value.size(), store); +} + +Field::Field(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { + ConstructField(name, value, offset, length, store); +} + +void Field::ConstructField(const String& name, const String& value, Store store, Index index, TermVector termVector) { + if (name.empty() && value.empty()) { + boost::throw_exception(IllegalArgumentException(L"name and value cannot both be empty")); } - - void Field::ConstructField(const String& name, ReaderPtr reader, TermVector termVector) - { - this->_name = name; - this->fieldsData = reader; - this->_isStored = false; - this->_isIndexed = true; - this->_isTokenized = true; - this->_isBinary = false; - - setStoreTermVector(termVector); + if (index == INDEX_NO && store == STORE_NO) { + boost::throw_exception(IllegalArgumentException(L"it doesn't make sense to have a field that is neither indexed nor stored")); } - - void Field::ConstructField(const String& name, TokenStreamPtr tokenStream, TermVector termVector) - { - this->_name = name; - this->fieldsData = VariantUtils::null(); - this->tokenStream = tokenStream; - this->_isStored = false; - this->_isIndexed = true; - this->_isTokenized = true; - this->_isBinary = false; - - setStoreTermVector(termVector); + if (index == INDEX_NO && termVector != TERM_VECTOR_NO) { + boost::throw_exception(IllegalArgumentException(L"cannot store term vector information for a field that is not indexed")); } - - void Field::ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) - { - if (store == STORE_NO) - boost::throw_exception(IllegalArgumentException(L"binary values can't be unstored")); - - this->_name = name; - this->fieldsData = value; - this->_isStored = isStored(store); - this->_isIndexed = false; - this->_isTokenized = false; + + this->_name = name; + this->fieldsData = value; + this->_isStored = isStored(store); + this->_isIndexed = isIndexed(index); + this->_isTokenized = isAnalyzed(index); + this->_omitNorms = omitNorms(index); + this->_isBinary = false; + + if (index == INDEX_NO) { this->omitTermFreqAndPositions = false; - this->_omitNorms = true; - this->_isBinary = true; - this->binaryLength = length; - this->binaryOffset = offset; - - setStoreTermVector(TERM_VECTOR_NO); - } - - Field::~Field() - { - } - - String Field::stringValue() - { - return VariantUtils::get(fieldsData); } - - ReaderPtr Field::readerValue() - { - return VariantUtils::get(fieldsData); + + setStoreTermVector(termVector); +} + +void Field::ConstructField(const String& name, const ReaderPtr& reader, TermVector termVector) { + this->_name = name; + this->fieldsData = reader; + this->_isStored = false; + this->_isIndexed = true; + this->_isTokenized = true; + this->_isBinary = false; + + setStoreTermVector(termVector); +} + +void Field::ConstructField(const String& name, const TokenStreamPtr& tokenStream, TermVector termVector) { + this->_name = name; + this->fieldsData = VariantUtils::null(); + this->tokenStream = tokenStream; + this->_isStored = false; + this->_isIndexed = true; + this->_isTokenized = true; + this->_isBinary = false; + + setStoreTermVector(termVector); +} + +void Field::ConstructField(const String& name, ByteArray value, int32_t offset, int32_t length, Store store) { + if (store == STORE_NO) { + boost::throw_exception(IllegalArgumentException(L"binary values can't be unstored")); } - - TokenStreamPtr Field::tokenStreamValue() - { - return tokenStream; + + this->_name = name; + this->fieldsData = value; + this->_isStored = isStored(store); + this->_isIndexed = false; + this->_isTokenized = false; + this->omitTermFreqAndPositions = false; + this->_omitNorms = true; + this->_isBinary = true; + this->binaryLength = length; + this->binaryOffset = offset; + + setStoreTermVector(TERM_VECTOR_NO); +} + +Field::~Field() { +} + +String Field::stringValue() { + return VariantUtils::get(fieldsData); +} + +ReaderPtr Field::readerValue() { + return VariantUtils::get(fieldsData); +} + +TokenStreamPtr Field::tokenStreamValue() { + return tokenStream; +} + +void Field::setValue(const String& value) { + if (_isBinary) { + boost::throw_exception(IllegalArgumentException(L"cannot set a String value on a binary field")); } - - void Field::setValue(const String& value) - { - if (_isBinary) - boost::throw_exception(IllegalArgumentException(L"cannot set a String value on a binary field")); - fieldsData = value; + fieldsData = value; +} + +void Field::setValue(const ReaderPtr& value) { + if (_isBinary) { + boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a binary field")); } - - void Field::setValue(ReaderPtr value) - { - if (_isBinary) - boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a binary field")); - if (_isStored) - boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a stored field")); - fieldsData = value; + if (_isStored) { + boost::throw_exception(IllegalArgumentException(L"cannot set a Reader value on a stored field")); } - - void Field::setValue(ByteArray value) - { - if (!_isBinary) - boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); - fieldsData = value; - binaryLength = value.size(); - binaryOffset = 0; + fieldsData = value; +} + +void Field::setValue(ByteArray value) { + if (!_isBinary) { + boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); } - - void Field::setValue(ByteArray value, int32_t offset, int32_t length) - { - if (!_isBinary) - boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); - fieldsData = value; - binaryLength = length; - binaryOffset = offset; + fieldsData = value; + binaryLength = value.size(); + binaryOffset = 0; +} + +void Field::setValue(ByteArray value, int32_t offset, int32_t length) { + if (!_isBinary) { + boost::throw_exception(IllegalArgumentException(L"cannot set a byte[] value on a non-binary field")); } - - void Field::setTokenStream(TokenStreamPtr tokenStream) - { - this->_isIndexed = true; - this->_isTokenized = true; - this->tokenStream = tokenStream; + fieldsData = value; + binaryLength = length; + binaryOffset = offset; +} + +void Field::setTokenStream(const TokenStreamPtr& tokenStream) { + this->_isIndexed = true; + this->_isTokenized = true; + this->tokenStream = tokenStream; +} + +bool Field::isStored(Store store) { + switch (store) { + case STORE_YES: + return true; + + case STORE_NO: + return false; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field store")); + return false; } - - bool Field::isStored(Store store) - { - switch (store) - { - case STORE_YES: - return true; - - case STORE_NO: - return false; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field store")); - return false; - } +} + +bool Field::isIndexed(Index index) { + switch (index) { + case INDEX_NO: + return false; + + case INDEX_ANALYZED: + return true; + + case INDEX_NOT_ANALYZED: + return true; + + case INDEX_NOT_ANALYZED_NO_NORMS: + return true; + + case INDEX_ANALYZED_NO_NORMS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field index")); + return false; } - - bool Field::isIndexed(Index index) - { - switch (index) - { - case INDEX_NO: - return false; - - case INDEX_ANALYZED: - return true; - - case INDEX_NOT_ANALYZED: - return true; - - case INDEX_NOT_ANALYZED_NO_NORMS: - return true; - - case INDEX_ANALYZED_NO_NORMS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field index")); - return false; - } +} + +bool Field::isAnalyzed(Index index) { + switch (index) { + case INDEX_NO: + return false; + + case INDEX_ANALYZED: + return true; + + case INDEX_NOT_ANALYZED: + return false; + + case INDEX_NOT_ANALYZED_NO_NORMS: + return false; + + case INDEX_ANALYZED_NO_NORMS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field index")); + return false; } - - bool Field::isAnalyzed(Index index) - { - switch (index) - { - case INDEX_NO: - return false; - - case INDEX_ANALYZED: - return true; - - case INDEX_NOT_ANALYZED: - return false; - - case INDEX_NOT_ANALYZED_NO_NORMS: - return false; - - case INDEX_ANALYZED_NO_NORMS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field index")); - return false; - } +} + +bool Field::omitNorms(Index index) { + switch (index) { + case INDEX_NO: + return true; + + case INDEX_ANALYZED: + return false; + + case INDEX_NOT_ANALYZED: + return false; + + case INDEX_NOT_ANALYZED_NO_NORMS: + return true; + + case INDEX_ANALYZED_NO_NORMS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field index")); + return false; } - - bool Field::omitNorms(Index index) - { - switch (index) - { - case INDEX_NO: - return true; - - case INDEX_ANALYZED: - return false; - - case INDEX_NOT_ANALYZED: - return false; - - case INDEX_NOT_ANALYZED_NO_NORMS: - return true; - - case INDEX_ANALYZED_NO_NORMS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field index")); - return false; - } +} + +Field::Index Field::toIndex(bool indexed, bool analyzed) { + return toIndex(indexed, analyzed, false); +} + +Field::Index Field::toIndex(bool indexed, bool analyzed, bool omitNorms) { + // If it is not indexed nothing else matters + if (!indexed) { + return INDEX_NO; } - - Field::Index Field::toIndex(bool indexed, bool analyzed) - { - return toIndex(indexed, analyzed, false); + + // typical, non-expert + if (!omitNorms) { + return analyzed ? INDEX_ANALYZED : INDEX_NOT_ANALYZED; } - - Field::Index Field::toIndex(bool indexed, bool analyzed, bool omitNorms) - { - // If it is not indexed nothing else matters - if (!indexed) - return INDEX_NO; - - // typical, non-expert - if (!omitNorms) - return analyzed ? INDEX_ANALYZED : INDEX_NOT_ANALYZED; - - // Expert: Norms omitted - return analyzed ? INDEX_ANALYZED_NO_NORMS : INDEX_NOT_ANALYZED_NO_NORMS; + + // Expert: Norms omitted + return analyzed ? INDEX_ANALYZED_NO_NORMS : INDEX_NOT_ANALYZED_NO_NORMS; +} + +bool Field::isStored(TermVector termVector) { + switch (termVector) { + case TERM_VECTOR_NO: + return false; + + case TERM_VECTOR_YES: + return true; + + case TERM_VECTOR_WITH_POSITIONS: + return true; + + case TERM_VECTOR_WITH_OFFSETS: + return true; + + case TERM_VECTOR_WITH_POSITIONS_OFFSETS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); + return false; } - - bool Field::isStored(TermVector termVector) - { - switch (termVector) - { - case TERM_VECTOR_NO: - return false; - - case TERM_VECTOR_YES: - return true; - - case TERM_VECTOR_WITH_POSITIONS: - return true; - - case TERM_VECTOR_WITH_OFFSETS: - return true; - - case TERM_VECTOR_WITH_POSITIONS_OFFSETS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); - return false; - } +} + +bool Field::withPositions(TermVector termVector) { + switch (termVector) { + case TERM_VECTOR_NO: + return false; + + case TERM_VECTOR_YES: + return false; + + case TERM_VECTOR_WITH_POSITIONS: + return true; + + case TERM_VECTOR_WITH_OFFSETS: + return false; + + case TERM_VECTOR_WITH_POSITIONS_OFFSETS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); + return false; } - - bool Field::withPositions(TermVector termVector) - { - switch (termVector) - { - case TERM_VECTOR_NO: - return false; - - case TERM_VECTOR_YES: - return false; - - case TERM_VECTOR_WITH_POSITIONS: - return true; - - case TERM_VECTOR_WITH_OFFSETS: - return false; - - case TERM_VECTOR_WITH_POSITIONS_OFFSETS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); - return false; - } +} + +bool Field::withOffsets(TermVector termVector) { + switch (termVector) { + case TERM_VECTOR_NO: + return false; + + case TERM_VECTOR_YES: + return false; + + case TERM_VECTOR_WITH_POSITIONS: + return false; + + case TERM_VECTOR_WITH_OFFSETS: + return true; + + case TERM_VECTOR_WITH_POSITIONS_OFFSETS: + return true; + + default: + boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); + return false; } - - bool Field::withOffsets(TermVector termVector) - { - switch (termVector) - { - case TERM_VECTOR_NO: - return false; - - case TERM_VECTOR_YES: - return false; - - case TERM_VECTOR_WITH_POSITIONS: - return false; - - case TERM_VECTOR_WITH_OFFSETS: - return true; - - case TERM_VECTOR_WITH_POSITIONS_OFFSETS: - return true; - - default: - boost::throw_exception(IllegalArgumentException(L"Invalid field term vector")); - return false; - } +} + +Field::TermVector Field::toTermVector(bool stored, bool withOffsets, bool withPositions) { + // If it is not stored, nothing else matters. + if (!stored) { + return TERM_VECTOR_NO; } - - Field::TermVector Field::toTermVector(bool stored, bool withOffsets, bool withPositions) - { - // If it is not stored, nothing else matters. - if (!stored) - return TERM_VECTOR_NO; - - if (withOffsets) - return withPositions ? TERM_VECTOR_WITH_POSITIONS_OFFSETS : TERM_VECTOR_WITH_OFFSETS; - - return withPositions ? TERM_VECTOR_WITH_POSITIONS : TERM_VECTOR_YES; + + if (withOffsets) { + return withPositions ? TERM_VECTOR_WITH_POSITIONS_OFFSETS : TERM_VECTOR_WITH_OFFSETS; } + + return withPositions ? TERM_VECTOR_WITH_POSITIONS : TERM_VECTOR_YES; +} + } diff --git a/src/core/document/FieldSelector.cpp b/src/core/document/FieldSelector.cpp index 669b17cf..5a7db90a 100644 --- a/src/core/document/FieldSelector.cpp +++ b/src/core/document/FieldSelector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,13 +7,12 @@ #include "LuceneInc.h" #include "FieldSelector.h" -namespace Lucene -{ - FieldSelector::FieldSelector() - { - } +namespace Lucene { + +FieldSelector::FieldSelector() { +} + +FieldSelector::~FieldSelector() { +} - FieldSelector::~FieldSelector() - { - } } diff --git a/src/core/document/Fieldable.cpp b/src/core/document/Fieldable.cpp index 559a0584..1059b04e 100644 --- a/src/core/document/Fieldable.cpp +++ b/src/core/document/Fieldable.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,137 +7,116 @@ #include "LuceneInc.h" #include "Fieldable.h" -namespace Lucene -{ - void Fieldable::setBoost(double boost) - { - BOOST_ASSERT(false); - // override - } - - double Fieldable::getBoost() - { - BOOST_ASSERT(false); - return 0; // override - } - - String Fieldable::name() - { - BOOST_ASSERT(false); - return L""; // override - } - - String Fieldable::stringValue() - { - BOOST_ASSERT(false); - return L""; // override - } - - ReaderPtr Fieldable::readerValue() - { - BOOST_ASSERT(false); - return ReaderPtr(); // override - } - - TokenStreamPtr Fieldable::tokenStreamValue() - { - BOOST_ASSERT(false); - return TokenStreamPtr(); // override - } - - bool Fieldable::isStored() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isIndexed() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isTokenized() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isTermVectorStored() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isStoreOffsetWithTermVector() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isStorePositionWithTermVector() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::isBinary() - { - BOOST_ASSERT(false); - return false; // override - } - - bool Fieldable::getOmitNorms() - { - BOOST_ASSERT(false); - return false; // override - } - - void Fieldable::setOmitNorms(bool omitNorms) - { - BOOST_ASSERT(false); - // override - } - - bool Fieldable::isLazy() - { - BOOST_ASSERT(false); - return false; // override - } - - int32_t Fieldable::getBinaryOffset() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t Fieldable::getBinaryLength() - { - BOOST_ASSERT(false); - return 0; // override - } - - ByteArray Fieldable::getBinaryValue() - { - BOOST_ASSERT(false); - return ByteArray(); // override - } - - ByteArray Fieldable::getBinaryValue(ByteArray result) - { - BOOST_ASSERT(false); - return ByteArray(); // override - } - - bool Fieldable::getOmitTermFreqAndPositions() - { - BOOST_ASSERT(false); - return false; // override - } - - void Fieldable::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) - { - BOOST_ASSERT(false); - // override - } +namespace Lucene { + +void Fieldable::setBoost(double boost) { + BOOST_ASSERT(false); + // override +} + +double Fieldable::getBoost() { + BOOST_ASSERT(false); + return 0; // override +} + +String Fieldable::name() { + BOOST_ASSERT(false); + return L""; // override +} + +String Fieldable::stringValue() { + BOOST_ASSERT(false); + return L""; // override +} + +ReaderPtr Fieldable::readerValue() { + BOOST_ASSERT(false); + return ReaderPtr(); // override +} + +TokenStreamPtr Fieldable::tokenStreamValue() { + BOOST_ASSERT(false); + return TokenStreamPtr(); // override +} + +bool Fieldable::isStored() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isIndexed() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isTokenized() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isTermVectorStored() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isStoreOffsetWithTermVector() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isStorePositionWithTermVector() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::isBinary() { + BOOST_ASSERT(false); + return false; // override +} + +bool Fieldable::getOmitNorms() { + BOOST_ASSERT(false); + return false; // override +} + +void Fieldable::setOmitNorms(bool omitNorms) { + BOOST_ASSERT(false); + // override +} + +bool Fieldable::isLazy() { + BOOST_ASSERT(false); + return false; // override +} + +int32_t Fieldable::getBinaryOffset() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t Fieldable::getBinaryLength() { + BOOST_ASSERT(false); + return 0; // override +} + +ByteArray Fieldable::getBinaryValue() { + BOOST_ASSERT(false); + return ByteArray(); // override +} + +ByteArray Fieldable::getBinaryValue(ByteArray result) { + BOOST_ASSERT(false); + return ByteArray(); // override +} + +bool Fieldable::getOmitTermFreqAndPositions() { + BOOST_ASSERT(false); + return false; // override +} + +void Fieldable::setOmitTermFreqAndPositions(bool omitTermFreqAndPositions) { + BOOST_ASSERT(false); + // override +} + } diff --git a/src/core/document/LoadFirstFieldSelector.cpp b/src/core/document/LoadFirstFieldSelector.cpp index 5efff051..2f0747c5 100644 --- a/src/core/document/LoadFirstFieldSelector.cpp +++ b/src/core/document/LoadFirstFieldSelector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "LoadFirstFieldSelector.h" -namespace Lucene -{ - LoadFirstFieldSelector::~LoadFirstFieldSelector() - { - } - - FieldSelector::FieldSelectorResult LoadFirstFieldSelector::accept(const String& fieldName) - { - return FieldSelector::SELECTOR_LOAD_AND_BREAK; - } +namespace Lucene { + +LoadFirstFieldSelector::~LoadFirstFieldSelector() { +} + +FieldSelector::FieldSelectorResult LoadFirstFieldSelector::accept(const String& fieldName) { + return FieldSelector::SELECTOR_LOAD_AND_BREAK; +} + } diff --git a/src/core/document/MapFieldSelector.cpp b/src/core/document/MapFieldSelector.cpp index 0341d970..7b033e67 100644 --- a/src/core/document/MapFieldSelector.cpp +++ b/src/core/document/MapFieldSelector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,27 +7,25 @@ #include "LuceneInc.h" #include "MapFieldSelector.h" -namespace Lucene -{ - MapFieldSelector::MapFieldSelector(MapStringFieldSelectorResult fieldSelections) - { - this->fieldSelections = fieldSelections; - } - - MapFieldSelector::MapFieldSelector(Collection fields) - { - fieldSelections = MapStringFieldSelectorResult::newInstance(); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - fieldSelections.put(*field, FieldSelector::SELECTOR_LOAD); - } - - MapFieldSelector::~MapFieldSelector() - { - } - - FieldSelector::FieldSelectorResult MapFieldSelector::accept(const String& fieldName) - { - MapStringFieldSelectorResult::iterator selection = fieldSelections.find(fieldName); - return selection != fieldSelections.end() ? selection->second : FieldSelector::SELECTOR_NO_LOAD; +namespace Lucene { + +MapFieldSelector::MapFieldSelector(MapStringFieldSelectorResult fieldSelections) { + this->fieldSelections = fieldSelections; +} + +MapFieldSelector::MapFieldSelector(Collection fields) { + fieldSelections = MapStringFieldSelectorResult::newInstance(); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + fieldSelections.put(*field, FieldSelector::SELECTOR_LOAD); } } + +MapFieldSelector::~MapFieldSelector() { +} + +FieldSelector::FieldSelectorResult MapFieldSelector::accept(const String& fieldName) { + MapStringFieldSelectorResult::iterator selection = fieldSelections.find(fieldName); + return selection != fieldSelections.end() ? selection->second : FieldSelector::SELECTOR_NO_LOAD; +} + +} diff --git a/src/core/document/NumberTools.cpp b/src/core/document/NumberTools.cpp index 64eac0f8..35d990fc 100644 --- a/src/core/document/NumberTools.cpp +++ b/src/core/document/NumberTools.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,92 +8,87 @@ #include "NumberTools.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t NumberTools::RADIX = 36; - const wchar_t NumberTools::NEGATIVE_PREFIX = L'-'; - const wchar_t NumberTools::POSITIVE_PREFIX = L'0'; - - NumberTools::~NumberTools() - { +namespace Lucene { + +const int32_t NumberTools::RADIX = 36; +const wchar_t NumberTools::NEGATIVE_PREFIX = L'-'; +const wchar_t NumberTools::POSITIVE_PREFIX = L'0'; + +NumberTools::~NumberTools() { +} + +const String& NumberTools::MIN_STRING_VALUE() { + static String _MIN_STRING_VALUE; + LUCENE_RUN_ONCE( + _MIN_STRING_VALUE += NEGATIVE_PREFIX; + _MIN_STRING_VALUE += L"0000000000000"; + ); + return _MIN_STRING_VALUE; +} + +const String& NumberTools::MAX_STRING_VALUE() { + static String _MAX_STRING_VALUE; + LUCENE_RUN_ONCE( + _MAX_STRING_VALUE += POSITIVE_PREFIX; + _MAX_STRING_VALUE += L"1y2p0ij32e8e7"; + ); + return _MAX_STRING_VALUE; +} + +int32_t NumberTools::STR_SIZE() { + static int32_t _STR_SIZE = 0; + LUCENE_RUN_ONCE( + _STR_SIZE = (int32_t)MIN_STRING_VALUE().length(); + ); + return _STR_SIZE; +} + +String NumberTools::longToString(int64_t l) { + if (l == std::numeric_limits::min()) { + // special case, because long is not symmetric around zero + return MIN_STRING_VALUE(); } - - const String& NumberTools::MIN_STRING_VALUE() - { - static String _MIN_STRING_VALUE; - if (_MIN_STRING_VALUE.empty()) - { - _MIN_STRING_VALUE += NEGATIVE_PREFIX; - _MIN_STRING_VALUE += L"0000000000000"; - } - return _MIN_STRING_VALUE; + + String buf; + buf.reserve(STR_SIZE()); + + if (l < 0) { + buf += NEGATIVE_PREFIX; + l = std::numeric_limits::max() + l + 1; } - - const String& NumberTools::MAX_STRING_VALUE() - { - static String _MAX_STRING_VALUE; - if (_MAX_STRING_VALUE.empty()) - { - _MAX_STRING_VALUE += POSITIVE_PREFIX; - _MAX_STRING_VALUE += L"1y2p0ij32e8e7"; - } - return _MAX_STRING_VALUE; + buf += POSITIVE_PREFIX; + + String num(StringUtils::toString(l, RADIX)); + + int32_t padLen = (int32_t)(STR_SIZE() - num.length() - buf.length()); + while (padLen-- > 0) { + buf += L'0'; } - - int32_t NumberTools::STR_SIZE() - { - static int32_t _STR_SIZE = 0; - if (_STR_SIZE == 0) - _STR_SIZE = (int32_t)MIN_STRING_VALUE().length(); - return _STR_SIZE; + + return buf + num; +} + +int64_t NumberTools::stringToLong(const String& str) { + if ((int32_t)str.length() != STR_SIZE()) { + boost::throw_exception(NumberFormatException(L"string is the wrong size")); } - - String NumberTools::longToString(int64_t l) - { - if (l == LLONG_MIN) - { - // special case, because long is not symmetric around zero - return MIN_STRING_VALUE(); - } - - String buf; - buf.reserve(STR_SIZE()); - - if (l < 0) - { - buf += NEGATIVE_PREFIX; - l = LLONG_MAX + l + 1; - } - buf += POSITIVE_PREFIX; - - String num(StringUtils::toString(l, RADIX)); - - int32_t padLen = (int32_t)(STR_SIZE() - num.length() - buf.length()); - while (padLen-- > 0) - buf += L'0'; - - return buf + num; + + if (str == MIN_STRING_VALUE()) { + return std::numeric_limits::min(); } - - int64_t NumberTools::stringToLong(const String& str) - { - if ((int32_t)str.length() != STR_SIZE()) - boost::throw_exception(NumberFormatException(L"string is the wrong size")); - - if (str == MIN_STRING_VALUE()) - return LLONG_MIN; - - wchar_t prefix = str[0]; - int64_t l = StringUtils::toLong(str.substr(1), RADIX); - - if (prefix == POSITIVE_PREFIX) - { // nop - } - else if (prefix == NEGATIVE_PREFIX) - l = l - LLONG_MAX - 1; - else - boost::throw_exception(NumberFormatException(L"string does not begin with the correct prefix")); - - return l; + + wchar_t prefix = str[0]; + int64_t l = StringUtils::toLong(str.substr(1), RADIX); + + if (prefix == POSITIVE_PREFIX) { + // nop + } else if (prefix == NEGATIVE_PREFIX) { + l = l - std::numeric_limits::max() - 1; + } else { + boost::throw_exception(NumberFormatException(L"string does not begin with the correct prefix")); } + + return l; +} + } diff --git a/src/core/document/NumericField.cpp b/src/core/document/NumericField.cpp index c9abd20f..da356a55 100644 --- a/src/core/document/NumericField.cpp +++ b/src/core/document/NumericField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,85 +11,73 @@ #include "NumericTokenStream.h" #include "StringUtils.h" -namespace Lucene -{ - NumericField::NumericField(const String& name) - : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) - { - setOmitTermFreqAndPositions(true); - tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); - } - - NumericField::NumericField(const String& name, Field::Store store, bool index) - : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) - { - setOmitTermFreqAndPositions(true); - tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); - } - - NumericField::NumericField(const String& name, int32_t precisionStep) - : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) - { - setOmitTermFreqAndPositions(true); - tokenStream = newLucene(precisionStep); - } - - NumericField::NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index) - : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) - { - setOmitTermFreqAndPositions(true); - tokenStream = newLucene(precisionStep); - } - - NumericField::~NumericField() - { - } - - TokenStreamPtr NumericField::tokenStreamValue() - { - return isIndexed() ? boost::static_pointer_cast(tokenStream) : TokenStreamPtr(); - } - - ByteArray NumericField::getBinaryValue(ByteArray result) - { - return ByteArray(); - } - - ReaderPtr NumericField::readerValue() - { - return ReaderPtr(); - } - - String NumericField::stringValue() - { - StringStream value; - value << fieldsData; - return value.str(); - } - - int64_t NumericField::getNumericValue() - { - return StringUtils::toLong(stringValue()); - } - - NumericFieldPtr NumericField::setLongValue(int64_t value) - { - tokenStream->setLongValue(value); - fieldsData = value; - return shared_from_this(); - } - - NumericFieldPtr NumericField::setIntValue(int32_t value) - { - tokenStream->setIntValue(value); - fieldsData = value; - return shared_from_this(); - } - - NumericFieldPtr NumericField::setDoubleValue(double value) - { - tokenStream->setDoubleValue(value); - fieldsData = value; - return shared_from_this(); - } +namespace Lucene { + +NumericField::NumericField(const String& name) + : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { + setOmitTermFreqAndPositions(true); + tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); +} + +NumericField::NumericField(const String& name, Field::Store store, bool index) + : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { + setOmitTermFreqAndPositions(true); + tokenStream = newLucene(NumericUtils::PRECISION_STEP_DEFAULT); +} + +NumericField::NumericField(const String& name, int32_t precisionStep) + : AbstractField(name, Field::STORE_NO, Field::INDEX_ANALYZED_NO_NORMS, Field::TERM_VECTOR_NO) { + setOmitTermFreqAndPositions(true); + tokenStream = newLucene(precisionStep); +} + +NumericField::NumericField(const String& name, int32_t precisionStep, Field::Store store, bool index) + : AbstractField(name, store, index ? Field::INDEX_ANALYZED_NO_NORMS : Field::INDEX_NO, Field::TERM_VECTOR_NO) { + setOmitTermFreqAndPositions(true); + tokenStream = newLucene(precisionStep); +} + +NumericField::~NumericField() { +} + +TokenStreamPtr NumericField::tokenStreamValue() { + return isIndexed() ? boost::static_pointer_cast(tokenStream) : TokenStreamPtr(); +} + +ByteArray NumericField::getBinaryValue(ByteArray result) { + return ByteArray(); +} + +ReaderPtr NumericField::readerValue() { + return ReaderPtr(); +} + +String NumericField::stringValue() { + StringStream value; + value << fieldsData; + return value.str(); +} + +int64_t NumericField::getNumericValue() { + return StringUtils::toLong(stringValue()); +} + +NumericFieldPtr NumericField::setLongValue(int64_t value) { + tokenStream->setLongValue(value); + fieldsData = value; + return shared_from_this(); +} + +NumericFieldPtr NumericField::setIntValue(int32_t value) { + tokenStream->setIntValue(value); + fieldsData = value; + return shared_from_this(); +} + +NumericFieldPtr NumericField::setDoubleValue(double value) { + tokenStream->setDoubleValue(value); + fieldsData = value; + return shared_from_this(); +} + } diff --git a/src/core/document/SetBasedFieldSelector.cpp b/src/core/document/SetBasedFieldSelector.cpp index 999a23fc..f42be80c 100644 --- a/src/core/document/SetBasedFieldSelector.cpp +++ b/src/core/document/SetBasedFieldSelector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,25 +7,25 @@ #include "LuceneInc.h" #include "SetBasedFieldSelector.h" -namespace Lucene -{ - SetBasedFieldSelector::SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad) - { - this->fieldsToLoad = fieldsToLoad; - this->lazyFieldsToLoad = lazyFieldsToLoad; - } - - SetBasedFieldSelector::~SetBasedFieldSelector() - { +namespace Lucene { + +SetBasedFieldSelector::SetBasedFieldSelector(HashSet fieldsToLoad, HashSet lazyFieldsToLoad) { + this->fieldsToLoad = fieldsToLoad; + this->lazyFieldsToLoad = lazyFieldsToLoad; +} + +SetBasedFieldSelector::~SetBasedFieldSelector() { +} + +FieldSelector::FieldSelectorResult SetBasedFieldSelector::accept(const String& fieldName) { + FieldSelector::FieldSelectorResult result = FieldSelector::SELECTOR_NO_LOAD; + if (fieldsToLoad.contains(fieldName)) { + result = FieldSelector::SELECTOR_LOAD; } - - FieldSelector::FieldSelectorResult SetBasedFieldSelector::accept(const String& fieldName) - { - FieldSelector::FieldSelectorResult result = FieldSelector::SELECTOR_NO_LOAD; - if (fieldsToLoad.contains(fieldName)) - result = FieldSelector::SELECTOR_LOAD; - if (lazyFieldsToLoad.contains(fieldName)) - result = FieldSelector::SELECTOR_LAZY_LOAD; - return result; + if (lazyFieldsToLoad.contains(fieldName)) { + result = FieldSelector::SELECTOR_LAZY_LOAD; } + return result; +} + } diff --git a/src/core/include/LuceneInc.h b/src/core/include/LuceneInc.h index 4f1ee0b7..b3943bab 100644 --- a/src/core/include/LuceneInc.h +++ b/src/core/include/LuceneInc.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,8 +8,13 @@ #include "targetver.h" +#ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX #define NOMINMAX +#endif #include diff --git a/src/core/include/_BooleanQuery.h b/src/core/include/_BooleanQuery.h index 66db36ea..f23a79ef 100644 --- a/src/core/include/_BooleanQuery.h +++ b/src/core/include/_BooleanQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,46 +9,45 @@ #include "SimilarityDelegator.h" -namespace Lucene -{ - /// The Weight for BooleanQuery, used to normalize, score and explain these queries. - class BooleanWeight : public Weight - { - public: - BooleanWeight(BooleanQueryPtr query, SearcherPtr searcher); - virtual ~BooleanWeight(); - - LUCENE_CLASS(BooleanWeight); - - protected: - BooleanQueryPtr query; - - /// The Similarity implementation. - SimilarityPtr similarity; - Collection weights; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual bool scoresDocsOutOfOrder(); - }; - - /// Disabled coord Similarity - class SimilarityDisableCoord : public SimilarityDelegator - { - public: - SimilarityDisableCoord(SimilarityPtr delegee); - virtual ~SimilarityDisableCoord(); - - LUCENE_CLASS(SimilarityDisableCoord); - - public: - virtual double coord(int32_t overlap, int32_t maxOverlap); - }; +namespace Lucene { + +/// The Weight for BooleanQuery, used to normalize, score and explain these queries. +class BooleanWeight : public Weight { +public: + BooleanWeight(const BooleanQueryPtr& query, const SearcherPtr& searcher); + virtual ~BooleanWeight(); + + LUCENE_CLASS(BooleanWeight); + +protected: + BooleanQueryPtr query; + + /// The Similarity implementation. + SimilarityPtr similarity; + Collection weights; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual bool scoresDocsOutOfOrder(); +}; + +/// Disabled coord Similarity +class SimilarityDisableCoord : public SimilarityDelegator { +public: + SimilarityDisableCoord(const SimilarityPtr& delegee); + virtual ~SimilarityDisableCoord(); + + LUCENE_CLASS(SimilarityDisableCoord); + +public: + virtual double coord(int32_t overlap, int32_t maxOverlap); +}; + } #endif diff --git a/src/core/include/_ByteFieldSource.h b/src/core/include/_ByteFieldSource.h index a9c2ec2e..0bff03fe 100644 --- a/src/core/include/_ByteFieldSource.h +++ b/src/core/include/_ByteFieldSource.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,26 @@ #include "DocValues.h" -namespace Lucene -{ - class ByteDocValues : public DocValues - { - public: - ByteDocValues(ByteFieldSourcePtr source, Collection arr); - virtual ~ByteDocValues(); - - LUCENE_CLASS(ByteDocValues); - - protected: - ByteFieldSourceWeakPtr _source; - Collection arr; - - public: - virtual double doubleVal(int32_t doc); - virtual int32_t intVal(int32_t doc); - virtual String toString(int32_t doc); - virtual CollectionValue getInnerArray(); - }; +namespace Lucene { + +class ByteDocValues : public DocValues { +public: + ByteDocValues(const ByteFieldSourcePtr& source, Collection arr); + virtual ~ByteDocValues(); + + LUCENE_CLASS(ByteDocValues); + +protected: + ByteFieldSourceWeakPtr _source; + Collection arr; + +public: + virtual double doubleVal(int32_t doc); + virtual int32_t intVal(int32_t doc); + virtual String toString(int32_t doc); + virtual CollectionValue getInnerArray(); +}; + } #endif diff --git a/src/core/include/_CachingSpanFilter.h b/src/core/include/_CachingSpanFilter.h index d0ca264f..271db3f3 100644 --- a/src/core/include/_CachingSpanFilter.h +++ b/src/core/include/_CachingSpanFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,19 +9,19 @@ #include "_CachingWrapperFilter.h" -namespace Lucene -{ - class FilterCacheSpanFilterResult : public FilterCache - { - public: - FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode); - virtual ~FilterCacheSpanFilterResult(); - - LUCENE_CLASS(FilterCacheSpanFilterResult); - - protected: - virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value); - }; +namespace Lucene { + +class FilterCacheSpanFilterResult : public FilterCache { +public: + FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode); + virtual ~FilterCacheSpanFilterResult(); + + LUCENE_CLASS(FilterCacheSpanFilterResult); + +protected: + virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value); +}; + } #endif diff --git a/src/core/include/_CachingWrapperFilter.h b/src/core/include/_CachingWrapperFilter.h index 5ccb1e81..7cf76e65 100644 --- a/src/core/include/_CachingWrapperFilter.h +++ b/src/core/include/_CachingWrapperFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,54 +9,52 @@ #include "FilteredDocIdSet.h" -namespace Lucene -{ - class FilterCache : public LuceneObject - { - public: - FilterCache(CachingWrapperFilter::DeletesMode deletesMode); - virtual ~FilterCache(); - - LUCENE_CLASS(FilterCache); - - public: - WeakMapObjectObject cache; - CachingWrapperFilter::DeletesMode deletesMode; - - public: - virtual LuceneObjectPtr get(IndexReaderPtr reader, LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey); - virtual void put(LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey, LuceneObjectPtr value); - - protected: - virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) = 0; - }; - - class FilterCacheDocIdSet : public FilterCache - { - public: - FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode); - virtual ~FilterCacheDocIdSet(); - - LUCENE_CLASS(FilterCacheDocIdSet); - - protected: - virtual LuceneObjectPtr mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value); - }; - - class FilteredCacheDocIdSet : public FilteredDocIdSet - { - public: - FilteredCacheDocIdSet(IndexReaderPtr reader, DocIdSetPtr innerSet); - virtual ~FilteredCacheDocIdSet(); - - LUCENE_CLASS(FilteredCacheDocIdSet); - - protected: - IndexReaderPtr reader; - - protected: - virtual bool match(int32_t docid); - }; +namespace Lucene { + +class FilterCache : public LuceneObject { +public: + FilterCache(CachingWrapperFilter::DeletesMode deletesMode); + virtual ~FilterCache(); + + LUCENE_CLASS(FilterCache); + +public: + WeakMapObjectObject cache; + CachingWrapperFilter::DeletesMode deletesMode; + +public: + virtual LuceneObjectPtr get(const IndexReaderPtr& reader, const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey); + virtual void put(const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey, const LuceneObjectPtr& value); + +protected: + virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) = 0; +}; + +class FilterCacheDocIdSet : public FilterCache { +public: + FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode); + virtual ~FilterCacheDocIdSet(); + + LUCENE_CLASS(FilterCacheDocIdSet); + +protected: + virtual LuceneObjectPtr mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value); +}; + +class FilteredCacheDocIdSet : public FilteredDocIdSet { +public: + FilteredCacheDocIdSet(const IndexReaderPtr& reader, const DocIdSetPtr& innerSet); + virtual ~FilteredCacheDocIdSet(); + + LUCENE_CLASS(FilteredCacheDocIdSet); + +protected: + IndexReaderPtr reader; + +protected: + virtual bool match(int32_t docid); +}; + } #endif diff --git a/src/core/include/_CheckIndex.h b/src/core/include/_CheckIndex.h index 0f2556a2..ab716487 100644 --- a/src/core/include/_CheckIndex.h +++ b/src/core/include/_CheckIndex.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,23 +9,23 @@ #include "SegmentTermDocs.h" -namespace Lucene -{ - class MySegmentTermDocs : public SegmentTermDocs - { - public: - MySegmentTermDocs(SegmentReaderPtr p); - virtual ~MySegmentTermDocs(); - - LUCENE_CLASS(MySegmentTermDocs); - - public: - int32_t delCount; - - public: - virtual void seek(TermPtr term); - virtual void skippingDoc(); - }; +namespace Lucene { + +class MySegmentTermDocs : public SegmentTermDocs { +public: + MySegmentTermDocs(const SegmentReaderPtr& p); + virtual ~MySegmentTermDocs(); + + LUCENE_CLASS(MySegmentTermDocs); + +public: + int32_t delCount; + +public: + virtual void seek(const TermPtr& term); + virtual void skippingDoc(); +}; + } #endif diff --git a/src/core/include/_ConcurrentMergeScheduler.h b/src/core/include/_ConcurrentMergeScheduler.h index 315859ff..7fb46c56 100644 --- a/src/core/include/_ConcurrentMergeScheduler.h +++ b/src/core/include/_ConcurrentMergeScheduler.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,28 +9,28 @@ #include "LuceneThread.h" -namespace Lucene -{ - class MergeThread : public LuceneThread - { - public: - MergeThread(ConcurrentMergeSchedulerPtr merger, IndexWriterPtr writer, OneMergePtr startMerge); - virtual ~MergeThread(); - - LUCENE_CLASS(MergeThread); - - protected: - ConcurrentMergeSchedulerWeakPtr _merger; - IndexWriterWeakPtr _writer; - OneMergePtr startMerge; - OneMergePtr runningMerge; - - public: - void setRunningMerge(OneMergePtr merge); - OneMergePtr getRunningMerge(); - void setThreadPriority(int32_t pri); - virtual void run(); - }; +namespace Lucene { + +class LPPAPI MergeThread : public LuceneThread { +public: + MergeThread(const ConcurrentMergeSchedulerPtr& merger, const IndexWriterPtr& writer, const OneMergePtr& startMerge); + virtual ~MergeThread(); + + LUCENE_CLASS(MergeThread); + +protected: + ConcurrentMergeSchedulerWeakPtr _merger; + IndexWriterWeakPtr _writer; + OneMergePtr startMerge; + OneMergePtr runningMerge; + +public: + void setRunningMerge(const OneMergePtr& merge); + OneMergePtr getRunningMerge(); + void setThreadPriority(int32_t pri); + virtual void run(); +}; + } #endif diff --git a/src/core/include/_ConstantScoreQuery.h b/src/core/include/_ConstantScoreQuery.h index 456d3bb1..5cba90b3 100644 --- a/src/core/include/_ConstantScoreQuery.h +++ b/src/core/include/_ConstantScoreQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,50 +9,49 @@ #include "Weight.h" -namespace Lucene -{ - class ConstantWeight : public Weight - { - public: - ConstantWeight(ConstantScoreQueryPtr constantScorer, SearcherPtr searcher); - virtual ~ConstantWeight(); - - LUCENE_CLASS(ConstantWeight); - - protected: - ConstantScoreQueryPtr constantScorer; - SimilarityPtr similarity; - double queryNorm; - double queryWeight; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; - - class ConstantScorer : public Scorer - { - public: - ConstantScorer(ConstantScoreQueryPtr constantScorer, SimilarityPtr similarity, IndexReaderPtr reader, WeightPtr w); - virtual ~ConstantScorer(); - - LUCENE_CLASS(ConstantScorer); - - public: - DocIdSetIteratorPtr docIdSetIterator; - double theScore; - int32_t doc; - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - virtual double score(); - virtual int32_t advance(int32_t target); - }; +namespace Lucene { + +class ConstantWeight : public Weight { +public: + ConstantWeight(const ConstantScoreQueryPtr& constantScorer, const SearcherPtr& searcher); + virtual ~ConstantWeight(); + + LUCENE_CLASS(ConstantWeight); + +protected: + ConstantScoreQueryPtr constantScorer; + SimilarityPtr similarity; + double queryNorm; + double queryWeight; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + +class ConstantScorer : public Scorer { +public: + ConstantScorer(const ConstantScoreQueryPtr& constantScorer, const SimilarityPtr& similarity, const IndexReaderPtr& reader, const WeightPtr& w); + virtual ~ConstantScorer(); + + LUCENE_CLASS(ConstantScorer); + +public: + DocIdSetIteratorPtr docIdSetIterator; + double theScore; + int32_t doc; + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + virtual double score(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_CustomScoreQuery.h b/src/core/include/_CustomScoreQuery.h index 9f276823..e9524236 100644 --- a/src/core/include/_CustomScoreQuery.h +++ b/src/core/include/_CustomScoreQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,78 +11,76 @@ #include "Scorer.h" #include "CustomScoreProvider.h" -namespace Lucene -{ - // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider - class DefaultCustomScoreProvider : public CustomScoreProvider - { - public: - DefaultCustomScoreProvider(CustomScoreQueryPtr customQuery, IndexReaderPtr reader); - virtual ~DefaultCustomScoreProvider(); - - LUCENE_CLASS(DefaultCustomScoreProvider); - - protected: - CustomScoreQueryWeakPtr _customQuery; - - public: - virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); - virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls); - virtual ExplanationPtr customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl); - }; - - class CustomWeight : public Weight - { - public: - CustomWeight(CustomScoreQueryPtr query, SearcherPtr searcher); - virtual ~CustomWeight(); - - LUCENE_CLASS(CustomWeight); - - public: - CustomScoreQueryPtr query; - SimilarityPtr similarity; - WeightPtr subQueryWeight; - Collection valSrcWeights; - bool qStrict; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - virtual bool scoresDocsOutOfOrder(); - - protected: - ExplanationPtr doExplain(IndexReaderPtr reader, int32_t doc); - }; - - /// A scorer that applies a (callback) function on scores of the subQuery. - class CustomScorer : public Scorer - { - public: - CustomScorer(SimilarityPtr similarity, IndexReaderPtr reader, CustomWeightPtr weight, ScorerPtr subQueryScorer, Collection valSrcScorers); - virtual ~CustomScorer(); - - LUCENE_CLASS(CustomScorer); - - protected: - double qWeight; - ScorerPtr subQueryScorer; - Collection valSrcScorers; - IndexReaderPtr reader; - CustomScoreProviderPtr provider; - Collection vScores; // reused in score() to avoid allocating this array for each doc - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - virtual double score(); - virtual int32_t advance(int32_t target); - }; +namespace Lucene { + +// when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider +class DefaultCustomScoreProvider : public CustomScoreProvider { +public: + DefaultCustomScoreProvider(const CustomScoreQueryPtr& customQuery, const IndexReaderPtr& reader); + virtual ~DefaultCustomScoreProvider(); + + LUCENE_CLASS(DefaultCustomScoreProvider); + +protected: + CustomScoreQueryWeakPtr _customQuery; + +public: + virtual double customScore(int32_t doc, double subQueryScore, Collection valSrcScores); + virtual double customScore(int32_t doc, double subQueryScore, double valSrcScore); + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls); + virtual ExplanationPtr customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl); +}; + +class CustomWeight : public Weight { +public: + CustomWeight(const CustomScoreQueryPtr& query, const SearcherPtr& searcher); + virtual ~CustomWeight(); + + LUCENE_CLASS(CustomWeight); + +public: + CustomScoreQueryPtr query; + SimilarityPtr similarity; + WeightPtr subQueryWeight; + Collection valSrcWeights; + bool qStrict; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); + virtual bool scoresDocsOutOfOrder(); + +protected: + ExplanationPtr doExplain(const IndexReaderPtr& reader, int32_t doc); +}; + +/// A scorer that applies a (callback) function on scores of the subQuery. +class CustomScorer : public Scorer { +public: + CustomScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const CustomWeightPtr& weight, const ScorerPtr& subQueryScorer, Collection valSrcScorers); + virtual ~CustomScorer(); + + LUCENE_CLASS(CustomScorer); + +protected: + double qWeight; + ScorerPtr subQueryScorer; + Collection valSrcScorers; + IndexReaderPtr reader; + CustomScoreProviderPtr provider; + Collection vScores; // reused in score() to avoid allocating this array for each doc + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + virtual double score(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_DirectoryReader.h b/src/core/include/_DirectoryReader.h index 20e82732..d0e7e26a 100644 --- a/src/core/include/_DirectoryReader.h +++ b/src/core/include/_DirectoryReader.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,40 +9,39 @@ #include "_SegmentInfos.h" -namespace Lucene -{ - class FindSegmentsOpen : public FindSegmentsFileT - { - public: - FindSegmentsOpen(bool readOnly, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor, SegmentInfosPtr infos, DirectoryPtr directory); - virtual ~FindSegmentsOpen(); - - LUCENE_CLASS(FindSegmentsOpen); - - protected: - bool readOnly; - IndexDeletionPolicyPtr deletionPolicy; - int32_t termInfosIndexDivisor; - - public: - virtual IndexReaderPtr doBody(const String& segmentFileName); - }; - - class FindSegmentsReopen : public FindSegmentsFileT - { - public: - FindSegmentsReopen(DirectoryReaderPtr reader, bool openReadOnly, SegmentInfosPtr infos, DirectoryPtr directory); - virtual ~FindSegmentsReopen(); - - LUCENE_CLASS(FindSegmentsReopen); - - protected: - DirectoryReaderWeakPtr _reader; - bool openReadOnly; - - public: - virtual DirectoryReaderPtr doBody(const String& segmentFileName); - }; +namespace Lucene { + +class FindSegmentsOpen : public FindSegmentsFileT { +public: + FindSegmentsOpen(bool readOnly, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor, const SegmentInfosPtr& infos, const DirectoryPtr& directory); + virtual ~FindSegmentsOpen(); + + LUCENE_CLASS(FindSegmentsOpen); + +protected: + bool readOnly; + IndexDeletionPolicyPtr deletionPolicy; + int32_t termInfosIndexDivisor; + +public: + virtual IndexReaderPtr doBody(const String& segmentFileName); +}; + +class FindSegmentsReopen : public FindSegmentsFileT { +public: + FindSegmentsReopen(const DirectoryReaderPtr& reader, bool openReadOnly, const SegmentInfosPtr& infos, const DirectoryPtr& directory); + virtual ~FindSegmentsReopen(); + + LUCENE_CLASS(FindSegmentsReopen); + +protected: + DirectoryReaderWeakPtr _reader; + bool openReadOnly; + +public: + virtual DirectoryReaderPtr doBody(const String& segmentFileName); +}; + } #endif diff --git a/src/core/include/_DisjunctionMaxQuery.h b/src/core/include/_DisjunctionMaxQuery.h index 71da9684..6b047cc3 100644 --- a/src/core/include/_DisjunctionMaxQuery.h +++ b/src/core/include/_DisjunctionMaxQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,46 +9,46 @@ #include "Weight.h" -namespace Lucene -{ - /// The Weight for DisjunctionMaxQuery, used to normalize, score and explain these queries. - class DisjunctionMaxWeight : public Weight - { - public: - /// Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. - DisjunctionMaxWeight(DisjunctionMaxQueryPtr query, SearcherPtr searcher); - virtual ~DisjunctionMaxWeight(); - - LUCENE_CLASS(DisjunctionMaxWeight); - - protected: - DisjunctionMaxQueryPtr query; - - /// The Similarity implementation. - SimilarityPtr similarity; - - /// The Weights for our subqueries, in 1-1 correspondence with disjuncts - Collection weights; - - public: - /// Return our associated DisjunctionMaxQuery - virtual QueryPtr getQuery(); - - /// Return our boost - virtual double getValue(); - - /// Compute the sub of squared weights of us applied to our subqueries. Used for normalization. - virtual double sumOfSquaredWeights(); - - /// Apply the computed normalization factor to our subqueries - virtual void normalize(double norm); - - /// Create the scorer used to score our associated DisjunctionMaxQuery - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - - /// Explain the score we computed for doc - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; +namespace Lucene { + +/// The Weight for DisjunctionMaxQuery, used to normalize, score and explain these queries. +class DisjunctionMaxWeight : public Weight { +public: + /// Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. + DisjunctionMaxWeight(const DisjunctionMaxQueryPtr& query, const SearcherPtr& searcher); + virtual ~DisjunctionMaxWeight(); + + LUCENE_CLASS(DisjunctionMaxWeight); + +protected: + DisjunctionMaxQueryPtr query; + + /// The Similarity implementation. + SimilarityPtr similarity; + + /// The Weights for our subqueries, in 1-1 correspondence with disjuncts + Collection weights; + +public: + /// Return our associated DisjunctionMaxQuery + virtual QueryPtr getQuery(); + + /// Return our boost + virtual double getValue(); + + /// Compute the sub of squared weights of us applied to our subqueries. Used for normalization. + virtual double sumOfSquaredWeights(); + + /// Apply the computed normalization factor to our subqueries + virtual void normalize(double norm); + + /// Create the scorer used to score our associated DisjunctionMaxQuery + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + + /// Explain the score we computed for doc + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + } #endif diff --git a/src/core/include/_DocIdBitSet.h b/src/core/include/_DocIdBitSet.h index 92afe0c3..1c6162f1 100644 --- a/src/core/include/_DocIdBitSet.h +++ b/src/core/include/_DocIdBitSet.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,25 +9,25 @@ #include "DocIdSet.h" -namespace Lucene -{ - class DocIdBitSetIterator : public DocIdSetIterator - { - public: - DocIdBitSetIterator(BitSetPtr bitSet); - virtual ~DocIdBitSetIterator(); - - LUCENE_CLASS(DocIdBitSetIterator); - - protected: - int32_t docId; - BitSetPtr bitSet; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - }; +namespace Lucene { + +class DocIdBitSetIterator : public DocIdSetIterator { +public: + DocIdBitSetIterator(const BitSetPtr& bitSet); + virtual ~DocIdBitSetIterator(); + + LUCENE_CLASS(DocIdBitSetIterator); + +protected: + int32_t docId; + BitSetPtr bitSet; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_DocIdSet.h b/src/core/include/_DocIdSet.h index 8afb44e6..3eb1fd89 100644 --- a/src/core/include/_DocIdSet.h +++ b/src/core/include/_DocIdSet.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,31 +9,30 @@ #include "DocIdSetIterator.h" -namespace Lucene -{ - class EmptyDocIdSetIterator : public DocIdSetIterator - { - public: - virtual ~EmptyDocIdSetIterator(); - LUCENE_CLASS(EmptyDocIdSetIterator); - - public: - virtual int32_t advance(int32_t target); - virtual int32_t docID(); - virtual int32_t nextDoc(); - }; - - /// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. - class EmptyDocIdSet : public DocIdSet - { - public: - virtual ~EmptyDocIdSet(); - LUCENE_CLASS(EmptyDocIdSet); - - public: - virtual DocIdSetIteratorPtr iterator(); - virtual bool isCacheable(); - }; +namespace Lucene { + +class EmptyDocIdSetIterator : public DocIdSetIterator { +public: + virtual ~EmptyDocIdSetIterator(); + LUCENE_CLASS(EmptyDocIdSetIterator); + +public: + virtual int32_t advance(int32_t target); + virtual int32_t docID(); + virtual int32_t nextDoc(); +}; + +/// An empty {@code DocIdSet} instance for easy use, eg. in Filters that hit no documents. +class EmptyDocIdSet : public DocIdSet { +public: + virtual ~EmptyDocIdSet(); + LUCENE_CLASS(EmptyDocIdSet); + +public: + virtual DocIdSetIteratorPtr iterator(); + virtual bool isCacheable(); +}; + } #endif diff --git a/src/core/include/_FieldCache.h b/src/core/include/_FieldCache.h index 12d8b641..efbfa7c4 100644 --- a/src/core/include/_FieldCache.h +++ b/src/core/include/_FieldCache.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,91 +9,85 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// @see FieldCache#DEFAULT_BYTE_PARSER() - class DefaultByteParser : public ByteParser - { - public: - virtual ~DefaultByteParser(); - LUCENE_CLASS(DefaultByteParser); - - public: - virtual uint8_t parseByte(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#DEFAULT_INT_PARSER() - class DefaultIntParser : public IntParser - { - public: - virtual ~DefaultIntParser(); - LUCENE_CLASS(DefaultIntParser); - - public: - virtual int32_t parseInt(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#NUMERIC_UTILS_INT_PARSER() - class NumericUtilsIntParser : public IntParser - { - public: - virtual ~NumericUtilsIntParser(); - LUCENE_CLASS(NumericUtilsIntParser); - - public: - virtual int32_t parseInt(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#DEFAULT_LONG_PARSER() - class DefaultLongParser : public LongParser - { - public: - virtual ~DefaultLongParser(); - LUCENE_CLASS(DefaultLongParser); - - public: - virtual int64_t parseLong(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#NUMERIC_UTILS_LONG_PARSER() - class NumericUtilsLongParser : public LongParser - { - public: - virtual ~NumericUtilsLongParser(); - LUCENE_CLASS(NumericUtilsLongParser); - - public: - virtual int64_t parseLong(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#DEFAULT_DOUBLE_PARSER() - class DefaultDoubleParser : public DoubleParser - { - public: - virtual ~DefaultDoubleParser(); - LUCENE_CLASS(DefaultDoubleParser); - - public: - virtual double parseDouble(const String& string); - virtual String toString(); - }; - - /// @see FieldCache#NUMERIC_UTILS_DOUBLE_PARSER() - class NumericUtilsDoubleParser : public DoubleParser - { - public: - virtual ~NumericUtilsDoubleParser(); - LUCENE_CLASS(NumericUtilsDoubleParser); - - public: - virtual double parseDouble(const String& string); - virtual String toString(); - }; +namespace Lucene { + +/// @see FieldCache#DEFAULT_BYTE_PARSER() +class DefaultByteParser : public ByteParser { +public: + virtual ~DefaultByteParser(); + LUCENE_CLASS(DefaultByteParser); + +public: + virtual uint8_t parseByte(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#DEFAULT_INT_PARSER() +class DefaultIntParser : public IntParser { +public: + virtual ~DefaultIntParser(); + LUCENE_CLASS(DefaultIntParser); + +public: + virtual int32_t parseInt(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#NUMERIC_UTILS_INT_PARSER() +class NumericUtilsIntParser : public IntParser { +public: + virtual ~NumericUtilsIntParser(); + LUCENE_CLASS(NumericUtilsIntParser); + +public: + virtual int32_t parseInt(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#DEFAULT_LONG_PARSER() +class DefaultLongParser : public LongParser { +public: + virtual ~DefaultLongParser(); + LUCENE_CLASS(DefaultLongParser); + +public: + virtual int64_t parseLong(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#NUMERIC_UTILS_LONG_PARSER() +class NumericUtilsLongParser : public LongParser { +public: + virtual ~NumericUtilsLongParser(); + LUCENE_CLASS(NumericUtilsLongParser); + +public: + virtual int64_t parseLong(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#DEFAULT_DOUBLE_PARSER() +class DefaultDoubleParser : public DoubleParser { +public: + virtual ~DefaultDoubleParser(); + LUCENE_CLASS(DefaultDoubleParser); + +public: + virtual double parseDouble(const String& string); + virtual String toString(); +}; + +/// @see FieldCache#NUMERIC_UTILS_DOUBLE_PARSER() +class NumericUtilsDoubleParser : public DoubleParser { +public: + virtual ~NumericUtilsDoubleParser(); + LUCENE_CLASS(NumericUtilsDoubleParser); + +public: + virtual double parseDouble(const String& string); + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_FieldCacheRangeFilter.h b/src/core/include/_FieldCacheRangeFilter.h index 43a75d6f..22b52fd6 100644 --- a/src/core/include/_FieldCacheRangeFilter.h +++ b/src/core/include/_FieldCacheRangeFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,261 +13,251 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - class FieldCacheRangeFilterString : public FieldCacheRangeFilter - { - public: - FieldCacheRangeFilterString(const String& field, ParserPtr parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilterString(); - - LUCENE_CLASS(FieldCacheRangeFilterString); - - public: - String lowerVal; - String upperVal; - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - - virtual String toString(); - virtual bool equals(LuceneObjectPtr other); - virtual int32_t hashCode(); - }; - - class FieldCacheDocIdSet : public DocIdSet - { - public: - FieldCacheDocIdSet(IndexReaderPtr reader, bool mayUseTermDocs); - virtual ~FieldCacheDocIdSet(); - - LUCENE_CLASS(FieldCacheDocIdSet); - - protected: - IndexReaderPtr reader; - bool mayUseTermDocs; - - public: - /// This method checks, if a doc is a hit, should throw ArrayIndexOutOfBounds, when position invalid - virtual bool matchDoc(int32_t doc) = 0; - - /// This DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs. - virtual bool isCacheable(); - - virtual DocIdSetIteratorPtr iterator(); - }; - - template - class FieldCacheDocIdSetNumeric : public FieldCacheDocIdSet - { - public: - FieldCacheDocIdSetNumeric(IndexReaderPtr reader, bool mayUseTermDocs, Collection values, TYPE inclusiveLowerPoint, TYPE inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) - { - this->values = values; - this->inclusiveLowerPoint = inclusiveLowerPoint; - this->inclusiveUpperPoint = inclusiveUpperPoint; +namespace Lucene { + +class FieldCacheRangeFilterString : public FieldCacheRangeFilter { +public: + FieldCacheRangeFilterString(const String& field, const ParserPtr& parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilterString(); + + LUCENE_CLASS(FieldCacheRangeFilterString); + +public: + String lowerVal; + String upperVal; + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + + virtual String toString(); + virtual bool equals(const LuceneObjectPtr& other); + virtual int32_t hashCode(); +}; + +class FieldCacheDocIdSet : public DocIdSet { +public: + FieldCacheDocIdSet(const IndexReaderPtr& reader, bool mayUseTermDocs); + virtual ~FieldCacheDocIdSet(); + + LUCENE_CLASS(FieldCacheDocIdSet); + +protected: + IndexReaderPtr reader; + bool mayUseTermDocs; + +public: + /// This method checks, if a doc is a hit, should throw ArrayIndexOutOfBounds, when position invalid + virtual bool matchDoc(int32_t doc) = 0; + + /// This DocIdSet is cacheable, if it works solely with FieldCache and no TermDocs. + virtual bool isCacheable(); + + virtual DocIdSetIteratorPtr iterator(); +}; + +template +class FieldCacheDocIdSetNumeric : public FieldCacheDocIdSet { +public: + FieldCacheDocIdSetNumeric(const IndexReaderPtr& reader, bool mayUseTermDocs, Collection values, TYPE inclusiveLowerPoint, TYPE inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { + this->values = values; + this->inclusiveLowerPoint = inclusiveLowerPoint; + this->inclusiveUpperPoint = inclusiveUpperPoint; + } + + virtual ~FieldCacheDocIdSetNumeric() { + } + +protected: + Collection values; + TYPE inclusiveLowerPoint; + TYPE inclusiveUpperPoint; + +public: + virtual bool matchDoc(int32_t doc) { + if (doc < 0 || doc >= values.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - virtual ~FieldCacheDocIdSetNumeric() - { + return (values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint); + } +}; + +template +class FieldCacheRangeFilterNumeric : public FieldCacheRangeFilter { +public: + FieldCacheRangeFilterNumeric(const String& field, const ParserPtr& parser, TYPE lowerVal, TYPE upperVal, TYPE maxVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { + this->lowerVal = lowerVal; + this->upperVal = upperVal; + this->maxVal = maxVal; + } + + virtual ~FieldCacheRangeFilterNumeric() { + } + +public: + TYPE lowerVal; + TYPE upperVal; + TYPE maxVal; + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader) { + if (!includeLower && lowerVal == maxVal) { + return DocIdSet::EMPTY_DOCIDSET(); } - - protected: - Collection values; - TYPE inclusiveLowerPoint; - TYPE inclusiveUpperPoint; - - public: - virtual bool matchDoc(int32_t doc) - { - if (doc < 0 || doc >= values.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (values[doc] >= inclusiveLowerPoint && values[doc] <= inclusiveUpperPoint); + int64_t inclusiveLowerPoint = (int64_t)(includeLower ? lowerVal : (lowerVal + 1)); + + if (!includeUpper && upperVal == 0) { + return DocIdSet::EMPTY_DOCIDSET(); } - }; - - template - class FieldCacheRangeFilterNumeric : public FieldCacheRangeFilter - { - public: - FieldCacheRangeFilterNumeric(const String& field, ParserPtr parser, TYPE lowerVal, TYPE upperVal, TYPE maxVal, bool includeLower, bool includeUpper) : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) - { - this->lowerVal = lowerVal; - this->upperVal = upperVal; - this->maxVal = maxVal; + int64_t inclusiveUpperPoint = (int64_t)(includeUpper ? upperVal : (upperVal - 1)); + + if (inclusiveLowerPoint > inclusiveUpperPoint) { + return DocIdSet::EMPTY_DOCIDSET(); } - - virtual ~FieldCacheRangeFilterNumeric() - { + + // we only request the usage of termDocs, if the range contains 0 + return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); + } + + virtual Collection getValues(const IndexReaderPtr& reader) = 0; + + virtual String toString() { + StringStream buffer; + buffer << field << L":" << (includeLower ? L"[" : L"{"); + buffer << lowerVal << L" TO " << lowerVal; + buffer << (includeLower ? L"]" : L"}"); + return buffer.str(); + } + + virtual bool equals(const LuceneObjectPtr& other) { + if (Filter::equals(other)) { + return true; } - - public: - TYPE lowerVal; - TYPE upperVal; - TYPE maxVal; - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader) - { - if (!includeLower && lowerVal == maxVal) - return DocIdSet::EMPTY_DOCIDSET(); - TYPE inclusiveLowerPoint = (TYPE)(includeLower ? lowerVal : (lowerVal + 1)); - - if (!includeUpper && upperVal == 0) - return DocIdSet::EMPTY_DOCIDSET(); - TYPE inclusiveUpperPoint = (TYPE)(includeUpper ? upperVal : (upperVal - 1)); - - if (inclusiveLowerPoint > inclusiveUpperPoint) - return DocIdSet::EMPTY_DOCIDSET(); - - // we only request the usage of termDocs, if the range contains 0 - return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); + boost::shared_ptr< FieldCacheRangeFilterNumeric > otherFilter(boost::dynamic_pointer_cast< FieldCacheRangeFilterNumeric >(other)); + if (!otherFilter) { + return false; } - - virtual Collection getValues(IndexReaderPtr reader) = 0; - - virtual String toString() - { - StringStream buffer; - buffer << field << L":" << (includeLower ? L"[" : L"{"); - buffer << lowerVal << L" TO " << lowerVal; - buffer << (includeLower ? L"]" : L"}"); - return buffer.str(); + if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) { + return false; } - - virtual bool equals(LuceneObjectPtr other) - { - if (Filter::equals(other)) - return true; - boost::shared_ptr< FieldCacheRangeFilterNumeric > otherFilter(boost::dynamic_pointer_cast< FieldCacheRangeFilterNumeric >(other)); - if (!otherFilter) - return false; - if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) - return false; - if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) - return false; - if (parser ? !parser->equals(otherFilter->parser) : otherFilter->parser) - return false; - return true; + if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) { + return false; } - - int32_t hashCode() - { - int32_t code = StringUtils::hashCode(field); - code ^= lowerVal == 0 ? 550356204 : (int32_t)lowerVal; - code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper - code ^= upperVal == 0 ? -1674416163 : (int32_t)upperVal; - code ^= parser ? parser->hashCode() : -1572457324; - code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); - return code; + if (parser.get() != NULL ? !parser->equals(otherFilter->parser) : otherFilter->parser.get() != NULL) { + return false; } - }; - - class FieldCacheRangeFilterByte : public FieldCacheRangeFilterNumeric - { - public: - FieldCacheRangeFilterByte(const String& field, ParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilterByte(); - - LUCENE_CLASS(FieldCacheRangeFilterByte); - - public: - virtual Collection getValues(IndexReaderPtr reader); - }; - - class FieldCacheRangeFilterInt : public FieldCacheRangeFilterNumeric - { - public: - FieldCacheRangeFilterInt(const String& field, ParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilterInt(); - - LUCENE_CLASS(FieldCacheRangeFilterInt); - - public: - virtual Collection getValues(IndexReaderPtr reader); - }; - - class FieldCacheRangeFilterLong : public FieldCacheRangeFilterNumeric - { - public: - FieldCacheRangeFilterLong(const String& field, ParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilterLong(); - - LUCENE_CLASS(FieldCacheRangeFilterLong); - - public: - virtual Collection getValues(IndexReaderPtr reader); - }; - - class FieldCacheRangeFilterDouble : public FieldCacheRangeFilterNumeric - { - public: - FieldCacheRangeFilterDouble(const String& field, ParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); - virtual ~FieldCacheRangeFilterDouble(); - - LUCENE_CLASS(FieldCacheRangeFilterDouble); - - public: - virtual DocIdSetPtr getDocIdSet(IndexReaderPtr reader); - virtual Collection getValues(IndexReaderPtr reader); - }; - - class FieldCacheDocIdSetString : public FieldCacheDocIdSet - { - public: - FieldCacheDocIdSetString(IndexReaderPtr reader, bool mayUseTermDocs, StringIndexPtr fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint); - virtual ~FieldCacheDocIdSetString(); - - LUCENE_CLASS(FieldCacheDocIdSetString); - - protected: - StringIndexPtr fcsi; - int32_t inclusiveLowerPoint; - int32_t inclusiveUpperPoint; - - public: - virtual bool matchDoc(int32_t doc); - }; - - /// A DocIdSetIterator using TermDocs to iterate valid docIds - class FieldDocIdSetIteratorTermDocs : public DocIdSetIterator - { - public: - FieldDocIdSetIteratorTermDocs(FieldCacheDocIdSetPtr cacheDocIdSet, TermDocsPtr termDocs); - virtual ~FieldDocIdSetIteratorTermDocs(); - - LUCENE_CLASS(FieldDocIdSetIteratorTermDocs); - - protected: - FieldCacheDocIdSetWeakPtr _cacheDocIdSet; - TermDocsPtr termDocs; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - }; - - /// A DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there - /// are no deletions are on the index. - class FieldDocIdSetIteratorIncrement : public DocIdSetIterator - { - public: - FieldDocIdSetIteratorIncrement(FieldCacheDocIdSetPtr cacheDocIdSet); - virtual ~FieldDocIdSetIteratorIncrement(); - - LUCENE_CLASS(FieldDocIdSetIteratorIncrement); - - protected: - FieldCacheDocIdSetWeakPtr _cacheDocIdSet; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - }; + return true; + } + + int32_t hashCode() { + int32_t code = StringUtils::hashCode(field); + code ^= lowerVal == 0 ? 550356204 : (int32_t)lowerVal; + code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper + code ^= upperVal == 0 ? -1674416163 : (int32_t)upperVal; + code ^= parser ? parser->hashCode() : -1572457324; + code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); + return code; + } +}; + +class FieldCacheRangeFilterByte : public FieldCacheRangeFilterNumeric { +public: + FieldCacheRangeFilterByte(const String& field, const ParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilterByte(); + + LUCENE_CLASS(FieldCacheRangeFilterByte); + +public: + virtual Collection getValues(const IndexReaderPtr& reader); +}; + +class FieldCacheRangeFilterInt : public FieldCacheRangeFilterNumeric { +public: + FieldCacheRangeFilterInt(const String& field, const ParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilterInt(); + + LUCENE_CLASS(FieldCacheRangeFilterInt); + +public: + virtual Collection getValues(const IndexReaderPtr& reader); +}; + +class FieldCacheRangeFilterLong : public FieldCacheRangeFilterNumeric { +public: + FieldCacheRangeFilterLong(const String& field, const ParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilterLong(); + + LUCENE_CLASS(FieldCacheRangeFilterLong); + +public: + virtual Collection getValues(const IndexReaderPtr& reader); +}; + +class FieldCacheRangeFilterDouble : public FieldCacheRangeFilterNumeric { +public: + FieldCacheRangeFilterDouble(const String& field, const ParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper); + virtual ~FieldCacheRangeFilterDouble(); + + LUCENE_CLASS(FieldCacheRangeFilterDouble); + +public: + virtual DocIdSetPtr getDocIdSet(const IndexReaderPtr& reader); + virtual Collection getValues(const IndexReaderPtr& reader); +}; + +class FieldCacheDocIdSetString : public FieldCacheDocIdSet { +public: + FieldCacheDocIdSetString(const IndexReaderPtr& reader, bool mayUseTermDocs, const StringIndexPtr& fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint); + virtual ~FieldCacheDocIdSetString(); + + LUCENE_CLASS(FieldCacheDocIdSetString); + +protected: + StringIndexPtr fcsi; + int32_t inclusiveLowerPoint; + int32_t inclusiveUpperPoint; + +public: + virtual bool matchDoc(int32_t doc); +}; + +/// A DocIdSetIterator using TermDocs to iterate valid docIds +class FieldDocIdSetIteratorTermDocs : public DocIdSetIterator { +public: + FieldDocIdSetIteratorTermDocs(const FieldCacheDocIdSetPtr& cacheDocIdSet, const TermDocsPtr& termDocs); + virtual ~FieldDocIdSetIteratorTermDocs(); + + LUCENE_CLASS(FieldDocIdSetIteratorTermDocs); + +protected: + FieldCacheDocIdSetWeakPtr _cacheDocIdSet; + TermDocsPtr termDocs; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); +}; + +/// A DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there +/// are no deletions are on the index. +class FieldDocIdSetIteratorIncrement : public DocIdSetIterator { +public: + FieldDocIdSetIteratorIncrement(const FieldCacheDocIdSetPtr& cacheDocIdSet); + virtual ~FieldDocIdSetIteratorIncrement(); + + LUCENE_CLASS(FieldDocIdSetIteratorIncrement); + +protected: + FieldCacheDocIdSetWeakPtr _cacheDocIdSet; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_FieldCacheSanityChecker.h b/src/core/include/_FieldCacheSanityChecker.h index 423b2333..f6216f67 100644 --- a/src/core/include/_FieldCacheSanityChecker.h +++ b/src/core/include/_FieldCacheSanityChecker.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,26 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// Simple pair object for using "readerKey + fieldName" a Map key - class ReaderField : public LuceneObject - { - public: - ReaderField(LuceneObjectPtr readerKey, const String& fieldName); - virtual ~ReaderField(); - - LUCENE_CLASS(ReaderField); - - public: - LuceneObjectPtr readerKey; - String fieldName; - - public: - virtual int32_t hashCode(); - virtual bool equals(LuceneObjectPtr other); - virtual String toString(); - }; +namespace Lucene { + +/// Simple pair object for using "readerKey + fieldName" a Map key +class ReaderField : public LuceneObject { +public: + ReaderField(const LuceneObjectPtr& readerKey, const String& fieldName); + virtual ~ReaderField(); + + LUCENE_CLASS(ReaderField); + +public: + LuceneObjectPtr readerKey; + String fieldName; + +public: + virtual int32_t hashCode(); + virtual bool equals(const LuceneObjectPtr& other); + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_FieldCacheTermsFilter.h b/src/core/include/_FieldCacheTermsFilter.h index aff3fffa..68708bd4 100644 --- a/src/core/include/_FieldCacheTermsFilter.h +++ b/src/core/include/_FieldCacheTermsFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,45 +10,44 @@ #include "DocIdSet.h" #include "DocIdSetIterator.h" -namespace Lucene -{ - class FieldCacheTermsFilterDocIdSet : public DocIdSet - { - public: - FieldCacheTermsFilterDocIdSet(Collection terms, StringIndexPtr fcsi); - virtual ~FieldCacheTermsFilterDocIdSet(); - - LUCENE_CLASS(FieldCacheTermsFilterDocIdSet); - - protected: - StringIndexPtr fcsi; - OpenBitSetPtr openBitSet; - - public: - virtual DocIdSetIteratorPtr iterator(); - - /// This DocIdSet implementation is cacheable. - virtual bool isCacheable(); - }; - - class FieldCacheTermsFilterDocIdSetIterator : public DocIdSetIterator - { - public: - FieldCacheTermsFilterDocIdSetIterator(StringIndexPtr fcsi, OpenBitSetPtr openBitSet); - virtual ~FieldCacheTermsFilterDocIdSetIterator(); - - LUCENE_CLASS(FieldCacheTermsFilterDocIdSetIterator); - - protected: - StringIndexPtr fcsi; - OpenBitSetPtr openBitSet; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - }; +namespace Lucene { + +class FieldCacheTermsFilterDocIdSet : public DocIdSet { +public: + FieldCacheTermsFilterDocIdSet(Collection terms, const StringIndexPtr& fcsi); + virtual ~FieldCacheTermsFilterDocIdSet(); + + LUCENE_CLASS(FieldCacheTermsFilterDocIdSet); + +protected: + StringIndexPtr fcsi; + OpenBitSetPtr openBitSet; + +public: + virtual DocIdSetIteratorPtr iterator(); + + /// This DocIdSet implementation is cacheable. + virtual bool isCacheable(); +}; + +class FieldCacheTermsFilterDocIdSetIterator : public DocIdSetIterator { +public: + FieldCacheTermsFilterDocIdSetIterator(const StringIndexPtr& fcsi, const OpenBitSetPtr& openBitSet); + virtual ~FieldCacheTermsFilterDocIdSetIterator(); + + LUCENE_CLASS(FieldCacheTermsFilterDocIdSetIterator); + +protected: + StringIndexPtr fcsi; + OpenBitSetPtr openBitSet; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_FieldValueHitQueue.h b/src/core/include/_FieldValueHitQueue.h index aef20d3f..ea1bba2c 100644 --- a/src/core/include/_FieldValueHitQueue.h +++ b/src/core/include/_FieldValueHitQueue.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,37 +9,36 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is just one comparator. - class OneComparatorFieldValueHitQueue : public FieldValueHitQueue - { - public: - OneComparatorFieldValueHitQueue(Collection fields, int32_t size); - virtual ~OneComparatorFieldValueHitQueue(); - - LUCENE_CLASS(OneComparatorFieldValueHitQueue); - - public: - FieldComparatorPtr comparator; - int32_t oneReverseMul; - - protected: - virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); - }; - - /// An implementation of {@link FieldValueHitQueue} which is optimized in case there is more than one comparator. - class MultiComparatorsFieldValueHitQueue : public FieldValueHitQueue - { - public: - MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size); - virtual ~MultiComparatorsFieldValueHitQueue(); - - LUCENE_CLASS(MultiComparatorsFieldValueHitQueue); - - protected: - virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); - }; +namespace Lucene { + +/// An implementation of {@link FieldValueHitQueue} which is optimized in case there is just one comparator. +class OneComparatorFieldValueHitQueue : public FieldValueHitQueue { +public: + OneComparatorFieldValueHitQueue(Collection fields, int32_t size); + virtual ~OneComparatorFieldValueHitQueue(); + + LUCENE_CLASS(OneComparatorFieldValueHitQueue); + +public: + FieldComparatorPtr comparator; + int32_t oneReverseMul; + +protected: + virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); +}; + +/// An implementation of {@link FieldValueHitQueue} which is optimized in case there is more than one comparator. +class MultiComparatorsFieldValueHitQueue : public FieldValueHitQueue { +public: + MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size); + virtual ~MultiComparatorsFieldValueHitQueue(); + + LUCENE_CLASS(MultiComparatorsFieldValueHitQueue); + +protected: + virtual bool lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second); +}; + } #endif diff --git a/src/core/include/_FilterManager.h b/src/core/include/_FilterManager.h index 2f7d8a34..5ccbe58d 100644 --- a/src/core/include/_FilterManager.h +++ b/src/core/include/_FilterManager.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,47 +9,46 @@ #include "LuceneThread.h" -namespace Lucene -{ - /// Holds the filter and the last time the filter was used, to make LRU-based cache cleaning possible. - class FilterItem : public LuceneObject - { - public: - FilterItem(FilterPtr filter); - virtual ~FilterItem(); - - LUCENE_CLASS(FilterItem); - - public: - FilterPtr filter; - int64_t timestamp; - }; - - /// Keeps the cache from getting too big. - /// - /// The SortedSet sortedFilterItems is used only to sort the items from the cache, so when it's time to clean - /// up we have the TreeSet sort the FilterItems by timestamp. - /// - /// Removes 1.5 * the numbers of items to make the cache smaller. - /// For example: If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 - /// round up to 8. This way we clean the cache a bit more, and avoid having the cache cleaner having to do - /// it frequently. - class FilterCleaner : public LuceneThread - { - public: - FilterCleaner(FilterManagerPtr manager); - virtual ~FilterCleaner(); - - LUCENE_CLASS(FilterCleaner); - - protected: - FilterManagerWeakPtr _manager; - bool running; - MapLongInt sortedFilterItems; - - public: - virtual void run(); - }; +namespace Lucene { + +/// Holds the filter and the last time the filter was used, to make LRU-based cache cleaning possible. +class FilterItem : public LuceneObject { +public: + FilterItem(const FilterPtr& filter); + virtual ~FilterItem(); + + LUCENE_CLASS(FilterItem); + +public: + FilterPtr filter; + int64_t timestamp; +}; + +/// Keeps the cache from getting too big. +/// +/// The SortedSet sortedFilterItems is used only to sort the items from the cache, so when it's time to clean +/// up we have the TreeSet sort the FilterItems by timestamp. +/// +/// Removes 1.5 * the numbers of items to make the cache smaller. +/// For example: If cache clean size is 10, and the cache is at 15, we would remove (15 - 10) * 1.5 = 7.5 +/// round up to 8. This way we clean the cache a bit more, and avoid having the cache cleaner having to do +/// it frequently. +class FilterCleaner : public LuceneThread { +public: + FilterCleaner(const FilterManagerPtr& manager); + virtual ~FilterCleaner(); + + LUCENE_CLASS(FilterCleaner); + +protected: + FilterManagerWeakPtr _manager; + bool running; + MapLongInt sortedFilterItems; + +public: + virtual void run(); +}; + } #endif diff --git a/src/core/include/_FilteredDocIdSet.h b/src/core/include/_FilteredDocIdSet.h index 19fde802..c71dc58b 100644 --- a/src/core/include/_FilteredDocIdSet.h +++ b/src/core/include/_FilteredDocIdSet.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,23 +9,23 @@ #include "FilteredDocIdSetIterator.h" -namespace Lucene -{ - /// Implementation of the contract to build a DocIdSetIterator. - class DefaultFilteredDocIdSetIterator : public FilteredDocIdSetIterator - { - public: - DefaultFilteredDocIdSetIterator(FilteredDocIdSetPtr filtered, DocIdSetIteratorPtr innerIter); - virtual ~DefaultFilteredDocIdSetIterator(); - - LUCENE_CLASS(DefaultFilteredDocIdSetIterator); - - protected: - FilteredDocIdSetPtr filtered; - - protected: - virtual bool match(int32_t docid); - }; +namespace Lucene { + +/// Implementation of the contract to build a DocIdSetIterator. +class DefaultFilteredDocIdSetIterator : public FilteredDocIdSetIterator { +public: + DefaultFilteredDocIdSetIterator(const FilteredDocIdSetPtr& filtered, const DocIdSetIteratorPtr& innerIter); + virtual ~DefaultFilteredDocIdSetIterator(); + + LUCENE_CLASS(DefaultFilteredDocIdSetIterator); + +protected: + FilteredDocIdSetPtr filtered; + +protected: + virtual bool match(int32_t docid); +}; + } #endif diff --git a/src/core/include/_FilteredQuery.h b/src/core/include/_FilteredQuery.h index 726a3751..80d91d2c 100644 --- a/src/core/include/_FilteredQuery.h +++ b/src/core/include/_FilteredQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,56 +10,55 @@ #include "Weight.h" #include "Scorer.h" -namespace Lucene -{ - class FilteredQueryWeight : public Weight - { - public: - FilteredQueryWeight(FilteredQueryPtr query, WeightPtr weight, SimilarityPtr similarity); - virtual ~FilteredQueryWeight(); - - LUCENE_CLASS(FilteredQueryWeight); - - protected: - FilteredQueryPtr query; - WeightPtr weight; - SimilarityPtr similarity; - double value; - - public: - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - virtual QueryPtr getQuery(); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - - friend class FilteredQueryWeightScorer; - }; - - class FilteredQueryWeightScorer : public Scorer - { - public: - FilteredQueryWeightScorer(FilteredQueryWeightPtr weight, ScorerPtr scorer, DocIdSetIteratorPtr docIdSetIterator, SimilarityPtr similarity); - virtual ~FilteredQueryWeightScorer(); - - LUCENE_CLASS(FilteredQueryWeightScorer); - - protected: - FilteredQueryWeightPtr weight; - ScorerPtr scorer; - DocIdSetIteratorPtr docIdSetIterator; - int32_t doc; - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - virtual int32_t advance(int32_t target); - virtual double score(); - - protected: - int32_t advanceToCommon(int32_t scorerDoc, int32_t disiDoc); - }; +namespace Lucene { + +class FilteredQueryWeight : public Weight { +public: + FilteredQueryWeight(const FilteredQueryPtr& query, const WeightPtr& weight, const SimilarityPtr& similarity); + virtual ~FilteredQueryWeight(); + + LUCENE_CLASS(FilteredQueryWeight); + +protected: + FilteredQueryPtr query; + WeightPtr weight; + SimilarityPtr similarity; + double value; + +public: + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); + virtual QueryPtr getQuery(); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + + friend class FilteredQueryWeightScorer; +}; + +class FilteredQueryWeightScorer : public Scorer { +public: + FilteredQueryWeightScorer(const FilteredQueryWeightPtr& weight, const ScorerPtr& scorer, const DocIdSetIteratorPtr& docIdSetIterator, const SimilarityPtr& similarity); + virtual ~FilteredQueryWeightScorer(); + + LUCENE_CLASS(FilteredQueryWeightScorer); + +protected: + FilteredQueryWeightPtr weight; + ScorerPtr scorer; + DocIdSetIteratorPtr docIdSetIterator; + int32_t doc; + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + virtual int32_t advance(int32_t target); + virtual double score(); + +protected: + int32_t advanceToCommon(int32_t scorerDoc, int32_t disiDoc); +}; + } #endif diff --git a/src/core/include/_FuzzyQuery.h b/src/core/include/_FuzzyQuery.h index d2705298..4e9cc6d1 100644 --- a/src/core/include/_FuzzyQuery.h +++ b/src/core/include/_FuzzyQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,33 +9,32 @@ #include "PriorityQueue.h" -namespace Lucene -{ - class ScoreTerm : public LuceneObject - { - public: - virtual ~ScoreTerm(); - LUCENE_CLASS(ScoreTerm); - - public: - TermPtr term; - double score; - - public: - int32_t compareTo(ScoreTermPtr other); - }; - - class ScoreTermQueue : public PriorityQueue - { - public: - ScoreTermQueue(int32_t size); - virtual ~ScoreTermQueue(); - - LUCENE_CLASS(ScoreTermQueue); - - protected: - virtual bool lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second); - }; +namespace Lucene { + +class ScoreTerm : public LuceneObject { +public: + virtual ~ScoreTerm(); + LUCENE_CLASS(ScoreTerm); + +public: + TermPtr term; + double score; + +public: + int32_t compareTo(const ScoreTermPtr& other); +}; + +class ScoreTermQueue : public PriorityQueue { +public: + ScoreTermQueue(int32_t size); + virtual ~ScoreTermQueue(); + + LUCENE_CLASS(ScoreTermQueue); + +protected: + virtual bool lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second); +}; + } #endif diff --git a/src/core/include/_IndexReader.h b/src/core/include/_IndexReader.h index 4f819111..ca252774 100644 --- a/src/core/include/_IndexReader.h +++ b/src/core/include/_IndexReader.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,19 +9,19 @@ #include "_SegmentInfos.h" -namespace Lucene -{ - class FindSegmentsModified : public FindSegmentsFileT - { - public: - FindSegmentsModified(SegmentInfosPtr infos, DirectoryPtr directory); - virtual ~FindSegmentsModified(); - - LUCENE_CLASS(FindSegmentsModified); - - public: - virtual uint64_t doBody(const String& segmentFileName); - }; +namespace Lucene { + +class FindSegmentsModified : public FindSegmentsFileT { +public: + FindSegmentsModified(const SegmentInfosPtr& infos, const DirectoryPtr& directory); + virtual ~FindSegmentsModified(); + + LUCENE_CLASS(FindSegmentsModified); + +public: + virtual uint64_t doBody(const String& segmentFileName); +}; + } #endif diff --git a/src/core/include/_IndexWriter.h b/src/core/include/_IndexWriter.h index 1070cc3b..a6837576 100644 --- a/src/core/include/_IndexWriter.h +++ b/src/core/include/_IndexWriter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,59 +9,59 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for 1) applying deletes, - /// 2) doing merges, 3) handing out a real-time reader. This pool reuses instances of the SegmentReaders - /// in all these places if it is in "near real-time mode" (getReader() has been called on this instance). - class ReaderPool : public LuceneObject - { - public: - ReaderPool(IndexWriterPtr writer); - virtual ~ReaderPool(); - - LUCENE_CLASS(ReaderPool); - - protected: - IndexWriterWeakPtr _indexWriter; - MapSegmentInfoSegmentReader readerMap; - - public: - /// Forcefully clear changes for the specified segments, and remove from the pool. - /// This is called on successful merge. - void clear(SegmentInfosPtr infos); - - /// used only by asserts - bool infoIsLive(SegmentInfoPtr info); - SegmentInfoPtr mapToLive(SegmentInfoPtr info); - - /// Release the segment reader (i.e. decRef it and close if there are no more references. - void release(SegmentReaderPtr sr); - - /// Release the segment reader (i.e. decRef it and close if there are no more references. - void release(SegmentReaderPtr sr, bool drop); - - /// Remove all our references to readers, and commits any pending changes. - void close(); - - /// Commit all segment reader in the pool. - void commit(); - - /// Returns a ref to a clone. NOTE: this clone is not enrolled in the pool, so you should - /// simply close() it when you're done (ie, do not call release()). - IndexReaderPtr getReadOnlyClone(const SegmentInfoPtr info, bool doOpenStores, int32_t termInfosIndexDivisor); - - /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling - /// {@link #release(SegmentReader)} - SegmentReaderPtr get(SegmentInfoPtr info, bool doOpenStores); - - /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling - /// {@link #release(SegmentReader)} - SegmentReaderPtr get(SegmentInfoPtr info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor); - - /// Returns a ref - SegmentReaderPtr getIfExists(SegmentInfoPtr info); - }; +namespace Lucene { + +/// Holds shared SegmentReader instances. IndexWriter uses SegmentReaders for 1) applying deletes, +/// 2) doing merges, 3) handing out a real-time reader. This pool reuses instances of the SegmentReaders +/// in all these places if it is in "near real-time mode" (getReader() has been called on this instance). +class ReaderPool : public LuceneObject { +public: + ReaderPool(const IndexWriterPtr& writer); + virtual ~ReaderPool(); + + LUCENE_CLASS(ReaderPool); + +protected: + IndexWriterWeakPtr _indexWriter; + MapSegmentInfoSegmentReader readerMap; + +public: + /// Forcefully clear changes for the specified segments, and remove from the pool. + /// This is called on successful merge. + void clear(const SegmentInfosPtr& infos); + + /// used only by asserts + bool infoIsLive(const SegmentInfoPtr& info); + SegmentInfoPtr mapToLive(const SegmentInfoPtr& info); + + /// Release the segment reader (i.e. decRef it and close if there are no more references. + void release(const SegmentReaderPtr& sr); + + /// Release the segment reader (i.e. decRef it and close if there are no more references. + void release(const SegmentReaderPtr& sr, bool drop); + + /// Remove all our references to readers, and commits any pending changes. + void close(); + + /// Commit all segment reader in the pool. + void commit(); + + /// Returns a ref to a clone. NOTE: this clone is not enrolled in the pool, so you should + /// simply close() it when you're done (ie, do not call release()). + IndexReaderPtr getReadOnlyClone(const SegmentInfoPtr& info, bool doOpenStores, int32_t termInfosIndexDivisor); + + /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling + /// {@link #release(SegmentReader)} + SegmentReaderPtr get(const SegmentInfoPtr& info, bool doOpenStores); + + /// Obtain a SegmentReader from the readerPool. The reader must be returned by calling + /// {@link #release(SegmentReader)} + SegmentReaderPtr get(const SegmentInfoPtr& info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor); + + /// Returns a ref + SegmentReaderPtr getIfExists(const SegmentInfoPtr& info); +}; + } #endif diff --git a/src/core/include/_IntFieldSource.h b/src/core/include/_IntFieldSource.h index b016a066..bbf25ae4 100644 --- a/src/core/include/_IntFieldSource.h +++ b/src/core/include/_IntFieldSource.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,26 @@ #include "DocValues.h" -namespace Lucene -{ - class IntDocValues : public DocValues - { - public: - IntDocValues(IntFieldSourcePtr source, Collection arr); - virtual ~IntDocValues(); - - LUCENE_CLASS(IntDocValues); - - protected: - IntFieldSourceWeakPtr _source; - Collection arr; - - public: - virtual double doubleVal(int32_t doc); - virtual int32_t intVal(int32_t doc); - virtual String toString(int32_t doc); - virtual CollectionValue getInnerArray(); - }; +namespace Lucene { + +class IntDocValues : public DocValues { +public: + IntDocValues(const IntFieldSourcePtr& source, Collection arr); + virtual ~IntDocValues(); + + LUCENE_CLASS(IntDocValues); + +protected: + IntFieldSourceWeakPtr _source; + Collection arr; + +public: + virtual double doubleVal(int32_t doc); + virtual int32_t intVal(int32_t doc); + virtual String toString(int32_t doc); + virtual CollectionValue getInnerArray(); +}; + } #endif diff --git a/src/core/include/_MMapDirectory.h b/src/core/include/_MMapDirectory.h index 19de2b8e..3438f666 100644 --- a/src/core/include/_MMapDirectory.h +++ b/src/core/include/_MMapDirectory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,51 +10,51 @@ #include #include "IndexInput.h" -namespace Lucene -{ - class MMapIndexInput : public IndexInput - { - public: - MMapIndexInput(const String& path = L""); - virtual ~MMapIndexInput(); - - LUCENE_CLASS(MMapIndexInput); - - protected: - int32_t _length; - bool isClone; - boost::iostreams::mapped_file_source file; - int32_t bufferPosition; // next byte to read - - public: - /// Reads and returns a single byte. - /// @see IndexOutput#writeByte(uint8_t) - virtual uint8_t readByte(); - - /// Reads a specified number of bytes into an array at the specified offset. - /// @param b the array to read bytes into. - /// @param offset the offset in the array to start storing bytes. - /// @param length the number of bytes to read. - /// @see IndexOutput#writeBytes(const uint8_t*,int) - virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); - - /// Returns the current position in this file, where the next read will occur. - /// @see #seek(int64_t) - virtual int64_t getFilePointer(); - - /// Sets current position in this file, where the next read will occur. - /// @see #getFilePointer() - virtual void seek(int64_t pos); - - /// The number of bytes in the file. - virtual int64_t length(); - - /// Closes the stream to further operations. - virtual void close(); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; +namespace Lucene { + +class MMapIndexInput : public IndexInput { +public: + MMapIndexInput(const String& path = L""); + virtual ~MMapIndexInput(); + + LUCENE_CLASS(MMapIndexInput); + +protected: + int32_t _length; + bool isClone; + boost::iostreams::mapped_file_source file; + int32_t bufferPosition; // next byte to read + +public: + /// Reads and returns a single byte. + /// @see IndexOutput#writeByte(uint8_t) + virtual uint8_t readByte(); + + /// Reads a specified number of bytes into an array at the specified offset. + /// @param b the array to read bytes into. + /// @param offset the offset in the array to start storing bytes. + /// @param length the number of bytes to read. + /// @see IndexOutput#writeBytes(const uint8_t*,int) + virtual void readBytes(uint8_t* b, int32_t offset, int32_t length); + + /// Returns the current position in this file, where the next read will occur. + /// @see #seek(int64_t) + virtual int64_t getFilePointer(); + + /// Sets current position in this file, where the next read will occur. + /// @see #getFilePointer() + virtual void seek(int64_t pos); + + /// The number of bytes in the file. + virtual int64_t length(); + + /// Closes the stream to further operations. + virtual void close(); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + } #endif diff --git a/src/core/include/_MatchAllDocsQuery.h b/src/core/include/_MatchAllDocsQuery.h index 058e5dc3..6ac81734 100644 --- a/src/core/include/_MatchAllDocsQuery.h +++ b/src/core/include/_MatchAllDocsQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,55 +10,54 @@ #include "Weight.h" #include "Scorer.h" -namespace Lucene -{ - class MatchAllDocsWeight : public Weight - { - public: - MatchAllDocsWeight(MatchAllDocsQueryPtr query, SearcherPtr searcher); - virtual ~MatchAllDocsWeight(); - - LUCENE_CLASS(MatchAllDocsWeight); - - protected: - MatchAllDocsQueryPtr query; - SimilarityPtr similarity; - double queryWeight; - double queryNorm; - - public: - virtual String toString(); - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; - - class MatchAllScorer : public Scorer - { - public: - MatchAllScorer(MatchAllDocsQueryPtr query, IndexReaderPtr reader, SimilarityPtr similarity, WeightPtr weight, ByteArray norms); - virtual ~MatchAllScorer(); - - LUCENE_CLASS(MatchAllScorer); - - public: - TermDocsPtr termDocs; - double _score; - ByteArray norms; - - protected: - MatchAllDocsQueryPtr query; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual double score(); - virtual int32_t advance(int32_t target); - }; +namespace Lucene { + +class MatchAllDocsWeight : public Weight { +public: + MatchAllDocsWeight(const MatchAllDocsQueryPtr& query, const SearcherPtr& searcher); + virtual ~MatchAllDocsWeight(); + + LUCENE_CLASS(MatchAllDocsWeight); + +protected: + MatchAllDocsQueryPtr query; + SimilarityPtr similarity; + double queryWeight; + double queryNorm; + +public: + virtual String toString(); + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + +class MatchAllScorer : public Scorer { +public: + MatchAllScorer(const MatchAllDocsQueryPtr& query, const IndexReaderPtr& reader, const SimilarityPtr& similarity, const WeightPtr& weight, ByteArray norms); + virtual ~MatchAllScorer(); + + LUCENE_CLASS(MatchAllScorer); + +public: + TermDocsPtr termDocs; + double _score; + ByteArray norms; + +protected: + MatchAllDocsQueryPtr query; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual double score(); + virtual int32_t advance(int32_t target); +}; + } #endif diff --git a/src/core/include/_MultiPhraseQuery.h b/src/core/include/_MultiPhraseQuery.h index 207eb56b..d161dc6e 100644 --- a/src/core/include/_MultiPhraseQuery.h +++ b/src/core/include/_MultiPhraseQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,32 +9,32 @@ #include "Weight.h" -namespace Lucene -{ - class MultiPhraseWeight : public Weight - { - public: - MultiPhraseWeight(MultiPhraseQueryPtr query, SearcherPtr searcher); - virtual ~MultiPhraseWeight(); - - LUCENE_CLASS(MultiPhraseWeight); - - protected: - MultiPhraseQueryPtr query; - SimilarityPtr similarity; - double value; - double idf; - double queryNorm; - double queryWeight; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; +namespace Lucene { + +class MultiPhraseWeight : public Weight { +public: + MultiPhraseWeight(const MultiPhraseQueryPtr& query, const SearcherPtr& searcher); + virtual ~MultiPhraseWeight(); + + LUCENE_CLASS(MultiPhraseWeight); + +protected: + MultiPhraseQueryPtr query; + SimilarityPtr similarity; + double value; + double idf; + double queryNorm; + double queryWeight; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + } #endif diff --git a/src/core/include/_MultiSearcher.h b/src/core/include/_MultiSearcher.h index 5d41ea8e..0b2b3433 100644 --- a/src/core/include/_MultiSearcher.h +++ b/src/core/include/_MultiSearcher.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,103 +10,100 @@ #include "Searcher.h" #include "Collector.h" -namespace Lucene -{ - /// Document Frequency cache acting as a Dummy-Searcher. This class is not a full-fledged Searcher, but - /// only supports the methods necessary to initialize Weights. - class CachedDfSource : public Searcher - { - public: - CachedDfSource(MapTermInt dfMap, int32_t maxDoc, SimilarityPtr similarity); - virtual ~CachedDfSource(); - - LUCENE_CLASS(CachedDfSource); - - protected: - MapTermInt dfMap; // Map from Terms to corresponding doc freqs - int32_t _maxDoc; // document count - - public: - virtual int32_t docFreq(TermPtr term); - virtual Collection docFreqs(Collection terms); - virtual int32_t maxDoc(); - virtual QueryPtr rewrite(QueryPtr query); - virtual void close(); - virtual DocumentPtr doc(int32_t n); - virtual DocumentPtr doc(int32_t n, FieldSelectorPtr fieldSelector); - virtual ExplanationPtr explain(WeightPtr weight, int32_t doc); - virtual void search(WeightPtr weight, FilterPtr filter, CollectorPtr results); - virtual TopDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n); - virtual TopFieldDocsPtr search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort); - }; - - /// A subclass for searching a single searchable - class MultiSearcherCallableNoSort : public LuceneObject - { - public: - MultiSearcherCallableNoSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, int32_t nDocs, - HitQueuePtr hq, int32_t i, Collection starts); - virtual ~MultiSearcherCallableNoSort(); - - LUCENE_CLASS(MultiSearcherCallableNoSort); - - protected: - SynchronizePtr lock; - SearchablePtr searchable; - WeightPtr weight; - FilterPtr filter; - int32_t nDocs; - int32_t i; - HitQueuePtr hq; - Collection starts; - - public: - TopDocsPtr call(); - }; - - /// A subclass for searching a single searchable - class MultiSearcherCallableWithSort : public LuceneObject - { - public: - MultiSearcherCallableWithSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, FilterPtr filter, - int32_t nDocs, FieldDocSortedHitQueuePtr hq, SortPtr sort, int32_t i, Collection starts); - virtual ~MultiSearcherCallableWithSort(); - - LUCENE_CLASS(MultiSearcherCallableWithSort); - - protected: - SynchronizePtr lock; - SearchablePtr searchable; - WeightPtr weight; - FilterPtr filter; - int32_t nDocs; - int32_t i; - FieldDocSortedHitQueuePtr hq; - Collection starts; - SortPtr sort; - - public: - TopFieldDocsPtr call(); - }; - - class MultiSearcherCollector : public Collector - { - public: - MultiSearcherCollector(CollectorPtr collector, int32_t start); - virtual ~MultiSearcherCollector(); - - LUCENE_CLASS(MultiSearcherCollector); - - protected: - CollectorPtr collector; - int32_t start; - - public: - virtual void setScorer(ScorerPtr scorer); - virtual void collect(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual bool acceptsDocsOutOfOrder(); - }; +namespace Lucene { + +/// Document Frequency cache acting as a Dummy-Searcher. This class is not a full-fledged Searcher, but +/// only supports the methods necessary to initialize Weights. +class CachedDfSource : public Searcher { +public: + CachedDfSource(MapTermInt dfMap, int32_t maxDoc, const SimilarityPtr& similarity); + virtual ~CachedDfSource(); + + LUCENE_CLASS(CachedDfSource); + +protected: + MapTermInt dfMap; // Map from Terms to corresponding doc freqs + int32_t _maxDoc; // document count + +public: + virtual int32_t docFreq(const TermPtr& term); + virtual Collection docFreqs(Collection terms); + virtual int32_t maxDoc(); + virtual QueryPtr rewrite(const QueryPtr& query); + virtual void close(); + virtual DocumentPtr doc(int32_t n); + virtual DocumentPtr doc(int32_t n, const FieldSelectorPtr& fieldSelector); + virtual ExplanationPtr explain(const WeightPtr& weight, int32_t doc); + virtual void search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results); + virtual TopDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n); + virtual TopFieldDocsPtr search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort); +}; + +/// A subclass for searching a single searchable +class MultiSearcherCallableNoSort : public LuceneObject { +public: + MultiSearcherCallableNoSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, int32_t nDocs, + const HitQueuePtr& hq, int32_t i, Collection starts); + virtual ~MultiSearcherCallableNoSort(); + + LUCENE_CLASS(MultiSearcherCallableNoSort); + +protected: + SynchronizePtr lock; + SearchablePtr searchable; + WeightPtr weight; + FilterPtr filter; + int32_t nDocs; + int32_t i; + HitQueuePtr hq; + Collection starts; + +public: + TopDocsPtr call(); +}; + +/// A subclass for searching a single searchable +class MultiSearcherCallableWithSort : public LuceneObject { +public: + MultiSearcherCallableWithSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, const FilterPtr& filter, + int32_t nDocs, const FieldDocSortedHitQueuePtr& hq, const SortPtr& sort, int32_t i, Collection starts); + virtual ~MultiSearcherCallableWithSort(); + + LUCENE_CLASS(MultiSearcherCallableWithSort); + +protected: + SynchronizePtr lock; + SearchablePtr searchable; + WeightPtr weight; + FilterPtr filter; + int32_t nDocs; + int32_t i; + FieldDocSortedHitQueuePtr hq; + Collection starts; + SortPtr sort; + +public: + TopFieldDocsPtr call(); +}; + +class MultiSearcherCollector : public Collector { +public: + MultiSearcherCollector(const CollectorPtr& collector, int32_t start); + virtual ~MultiSearcherCollector(); + + LUCENE_CLASS(MultiSearcherCollector); + +protected: + CollectorPtr collector; + int32_t start; + +public: + virtual void setScorer(const ScorerPtr& scorer); + virtual void collect(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual bool acceptsDocsOutOfOrder(); +}; + } #endif diff --git a/src/core/include/_MultiTermQuery.h b/src/core/include/_MultiTermQuery.h index 3f906fb9..27e8635d 100644 --- a/src/core/include/_MultiTermQuery.h +++ b/src/core/include/_MultiTermQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,48 +9,45 @@ #include "LuceneObject.h" -namespace Lucene -{ - class ConstantScoreFilterRewrite : public RewriteMethod - { - public: - virtual ~ConstantScoreFilterRewrite(); - LUCENE_CLASS(ConstantScoreFilterRewrite); - - public: - virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); - }; - - class ScoringBooleanQueryRewrite : public RewriteMethod - { - public: - virtual ~ScoringBooleanQueryRewrite(); - LUCENE_CLASS(ScoringBooleanQueryRewrite); - - public: - virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); - }; - - class ConstantScoreBooleanQueryRewrite : public ScoringBooleanQueryRewrite - { - public: - virtual ~ConstantScoreBooleanQueryRewrite(); - LUCENE_CLASS(ConstantScoreBooleanQueryRewrite); - - public: - virtual QueryPtr rewrite(IndexReaderPtr reader, MultiTermQueryPtr query); - }; - - class ConstantScoreAutoRewriteDefault : public ConstantScoreAutoRewrite - { - public: - virtual ~ConstantScoreAutoRewriteDefault(); - LUCENE_CLASS(ConstantScoreAutoRewriteDefault); - - public: - virtual void setTermCountCutoff(int32_t count); - virtual void setDocCountPercent(double percent); - }; +namespace Lucene { + +class ConstantScoreFilterRewrite : public RewriteMethod { +public: + virtual ~ConstantScoreFilterRewrite(); + LUCENE_CLASS(ConstantScoreFilterRewrite); + +public: + virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); +}; + +class ScoringBooleanQueryRewrite : public RewriteMethod { +public: + virtual ~ScoringBooleanQueryRewrite(); + LUCENE_CLASS(ScoringBooleanQueryRewrite); + +public: + virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); +}; + +class ConstantScoreBooleanQueryRewrite : public ScoringBooleanQueryRewrite { +public: + virtual ~ConstantScoreBooleanQueryRewrite(); + LUCENE_CLASS(ConstantScoreBooleanQueryRewrite); + +public: + virtual QueryPtr rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query); +}; + +class ConstantScoreAutoRewriteDefault : public ConstantScoreAutoRewrite { +public: + virtual ~ConstantScoreAutoRewriteDefault(); + LUCENE_CLASS(ConstantScoreAutoRewriteDefault); + +public: + virtual void setTermCountCutoff(int32_t count); + virtual void setDocCountPercent(double percent); +}; + } #endif diff --git a/src/core/include/_MultipleTermPositions.h b/src/core/include/_MultipleTermPositions.h index c5e7f197..b87536b3 100644 --- a/src/core/include/_MultipleTermPositions.h +++ b/src/core/include/_MultipleTermPositions.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,50 +9,49 @@ #include "PriorityQueue.h" -namespace Lucene -{ - class TermPositionsQueue : public PriorityQueue - { - public: - TermPositionsQueue(Collection termPositions); - virtual ~TermPositionsQueue(); - - LUCENE_CLASS(TermPositionsQueue); - - protected: - Collection termPositions; - - public: - virtual void initialize(); - - protected: - virtual bool lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second); - }; - - class IntQueue : public LuceneObject - { - public: - IntQueue(); - virtual ~IntQueue(); - - LUCENE_CLASS(IntQueue); - - protected: - int32_t arraySize; - int32_t index; - int32_t lastIndex; - Collection array; - - public: - void add(int32_t i); - int32_t next(); - void sort(); - void clear(); - int32_t size(); - - protected: - void growArray(); - }; +namespace Lucene { + +class TermPositionsQueue : public PriorityQueue { +public: + TermPositionsQueue(Collection termPositions); + virtual ~TermPositionsQueue(); + + LUCENE_CLASS(TermPositionsQueue); + +protected: + Collection termPositions; + +public: + virtual void initialize(); + +protected: + virtual bool lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second); +}; + +class IntQueue : public LuceneObject { +public: + IntQueue(); + virtual ~IntQueue(); + + LUCENE_CLASS(IntQueue); + +protected: + int32_t arraySize; + int32_t index; + int32_t lastIndex; + Collection array; + +public: + void add(int32_t i); + int32_t next(); + void sort(); + void clear(); + int32_t size(); + +protected: + void growArray(); +}; + } #endif diff --git a/src/core/include/_NativeFSLockFactory.h b/src/core/include/_NativeFSLockFactory.h index ae3f3c81..0a2ef999 100644 --- a/src/core/include/_NativeFSLockFactory.h +++ b/src/core/include/_NativeFSLockFactory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,33 +9,33 @@ #include "Lock.h" -namespace Lucene -{ - class NativeFSLock : public Lock - { - public: - NativeFSLock(const String& lockDir, const String& lockFileName); - virtual ~NativeFSLock(); - - LUCENE_CLASS(NativeFSLock); - - protected: - String lockDir; - String path; - filelockPtr lock; - - static SynchronizePtr LOCK_HELD_LOCK(); - static HashSet LOCK_HELD(); - - public: - virtual bool obtain(); - virtual void release(); - virtual bool isLocked(); - virtual String toString(); - - protected: - bool lockExists(); - }; +namespace Lucene { + +class NativeFSLock : public Lock { +public: + NativeFSLock(const String& lockDir, const String& lockFileName); + virtual ~NativeFSLock(); + + LUCENE_CLASS(NativeFSLock); + +protected: + String lockDir; + String path; + filelockPtr lock; + + static SynchronizePtr LOCK_HELD_LOCK(); + static HashSet LOCK_HELD(); + +public: + virtual bool obtain(); + virtual void release(); + virtual bool isLocked(); + virtual String toString(); + +protected: + bool lockExists(); +}; + } #endif diff --git a/src/core/include/_NearSpansUnordered.h b/src/core/include/_NearSpansUnordered.h index 98329370..02ab5406 100644 --- a/src/core/include/_NearSpansUnordered.h +++ b/src/core/include/_NearSpansUnordered.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,51 +10,50 @@ #include "Spans.h" #include "PriorityQueue.h" -namespace Lucene -{ - /// Wraps a Spans, and can be used to form a linked list. - class SpansCell : public Spans - { - public: - SpansCell(NearSpansUnorderedPtr unordered, SpansPtr spans, int32_t index); - virtual ~SpansCell(); - - LUCENE_CLASS(SpansCell); - - protected: - NearSpansUnorderedWeakPtr _unordered; - SpansPtr spans; - SpansCellPtr _next; - int32_t length; - int32_t index; - - public: - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual String toString(); - - protected: - bool adjust(bool condition); - - friend class NearSpansUnordered; - }; - - class CellQueue : public PriorityQueue - { - public: - CellQueue(int32_t size); - virtual ~CellQueue(); - - LUCENE_CLASS(CellQueue); - - protected: - virtual bool lessThan(const SpansCellPtr& first, const SpansCellPtr& second); - }; +namespace Lucene { + +/// Wraps a Spans, and can be used to form a linked list. +class SpansCell : public Spans { +public: + SpansCell(const NearSpansUnorderedPtr& unordered, const SpansPtr& spans, int32_t index); + virtual ~SpansCell(); + + LUCENE_CLASS(SpansCell); + +protected: + NearSpansUnorderedWeakPtr _unordered; + SpansPtr spans; + SpansCellPtr _next; + int32_t length; + int32_t index; + +public: + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual String toString(); + +protected: + bool adjust(bool condition); + + friend class NearSpansUnordered; +}; + +class CellQueue : public PriorityQueue { +public: + CellQueue(int32_t size); + virtual ~CellQueue(); + + LUCENE_CLASS(CellQueue); + +protected: + virtual bool lessThan(const SpansCellPtr& first, const SpansCellPtr& second); +}; + } #endif diff --git a/src/core/include/_NoLockFactory.h b/src/core/include/_NoLockFactory.h index 5de9edf7..cc80befc 100644 --- a/src/core/include/_NoLockFactory.h +++ b/src/core/include/_NoLockFactory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,21 +9,21 @@ #include "Lock.h" -namespace Lucene -{ - class NoLock : public Lock - { - public: - virtual ~NoLock(); - - LUCENE_CLASS(NoLock); - - public: - virtual bool obtain(); - virtual void release(); - virtual bool isLocked(); - virtual String toString(); - }; +namespace Lucene { + +class NoLock : public Lock { +public: + virtual ~NoLock(); + + LUCENE_CLASS(NoLock); + +public: + virtual bool obtain(); + virtual void release(); + virtual bool isLocked(); + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_NumericRangeQuery.h b/src/core/include/_NumericRangeQuery.h index 57122fa6..66b0b389 100644 --- a/src/core/include/_NumericRangeQuery.h +++ b/src/core/include/_NumericRangeQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,79 +10,77 @@ #include "FilteredTermEnum.h" #include "NumericUtils.h" -namespace Lucene -{ - /// Subclass of FilteredTermEnum for enumerating all terms that match the sub-ranges for trie range queries. - /// - /// Warning: This term enumeration is not guaranteed to be always ordered by {@link Term#compareTo}. The - /// ordering depends on how {@link NumericUtils#splitLongRange} and {@link NumericUtils#splitIntRange} - /// generates the sub-ranges. For {@link MultiTermQuery} ordering is not relevant. - class NumericRangeTermEnum : public FilteredTermEnum - { - public: - NumericRangeTermEnum(NumericRangeQueryPtr query, IndexReaderPtr reader); - virtual ~NumericRangeTermEnum(); - - LUCENE_CLASS(NumericRangeTermEnum); - - protected: - NumericRangeQueryWeakPtr _query; - IndexReaderPtr reader; - Collection rangeBounds; - TermPtr termTemplate; - String currentUpperBound; - - public: - virtual double difference(); - - /// Increments the enumeration to the next element. True if one exists. - virtual bool next(); - - /// Closes the enumeration to further activity, freeing resources. - virtual void close(); - - protected: - /// This is a dummy, it is not used by this class. - virtual bool endEnum(); - - /// This is a dummy, it is not used by this class. - virtual void setEnum(TermEnumPtr actualEnum); - - /// Compares if current upper bound is reached, this also updates the term count for statistics. - /// In contrast to {@link FilteredTermEnum}, a return value of false ends iterating the current enum - /// and forwards to the next sub-range. - virtual bool termCompare(TermPtr term); - }; - - class NumericLongRangeBuilder : public LongRangeBuilder - { - public: - NumericLongRangeBuilder(Collection rangeBounds); - virtual ~NumericLongRangeBuilder(); - - LUCENE_CLASS(NumericLongRangeBuilder); - - protected: - Collection rangeBounds; - - public: - virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); - }; - - class NumericIntRangeBuilder : public IntRangeBuilder - { - public: - NumericIntRangeBuilder(Collection rangeBounds); - virtual ~NumericIntRangeBuilder(); - - LUCENE_CLASS(NumericIntRangeBuilder); - - protected: - Collection rangeBounds; - - public: - virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); - }; +namespace Lucene { + +/// Subclass of FilteredTermEnum for enumerating all terms that match the sub-ranges for trie range queries. +/// +/// Warning: This term enumeration is not guaranteed to be always ordered by {@link Term#compareTo}. The +/// ordering depends on how {@link NumericUtils#splitLongRange} and {@link NumericUtils#splitIntRange} +/// generates the sub-ranges. For {@link MultiTermQuery} ordering is not relevant. +class LPPAPI NumericRangeTermEnum : public FilteredTermEnum { +public: + NumericRangeTermEnum(const NumericRangeQueryPtr& query, const IndexReaderPtr& reader); + virtual ~NumericRangeTermEnum(); + + LUCENE_CLASS(NumericRangeTermEnum); + +protected: + NumericRangeQueryWeakPtr _query; + IndexReaderPtr reader; + Collection rangeBounds; + TermPtr termTemplate; + String currentUpperBound; + +public: + virtual double difference(); + + /// Increments the enumeration to the next element. True if one exists. + virtual bool next(); + + /// Closes the enumeration to further activity, freeing resources. + virtual void close(); + +protected: + /// This is a dummy, it is not used by this class. + virtual bool endEnum(); + + /// This is a dummy, it is not used by this class. + virtual void setEnum(const TermEnumPtr& actualEnum); + + /// Compares if current upper bound is reached, this also updates the term count for statistics. + /// In contrast to {@link FilteredTermEnum}, a return value of false ends iterating the current enum + /// and forwards to the next sub-range. + virtual bool termCompare(const TermPtr& term); +}; + +class NumericLongRangeBuilder : public LongRangeBuilder { +public: + NumericLongRangeBuilder(Collection rangeBounds); + virtual ~NumericLongRangeBuilder(); + + LUCENE_CLASS(NumericLongRangeBuilder); + +protected: + Collection rangeBounds; + +public: + virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); +}; + +class NumericIntRangeBuilder : public IntRangeBuilder { +public: + NumericIntRangeBuilder(Collection rangeBounds); + virtual ~NumericIntRangeBuilder(); + + LUCENE_CLASS(NumericIntRangeBuilder); + +protected: + Collection rangeBounds; + +public: + virtual void addRange(const String& minPrefixCoded, const String& maxPrefixCoded); +}; + } #endif diff --git a/src/core/include/_OrdFieldSource.h b/src/core/include/_OrdFieldSource.h index 8438054a..df0687ff 100644 --- a/src/core/include/_OrdFieldSource.h +++ b/src/core/include/_OrdFieldSource.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,26 +9,26 @@ #include "DocValues.h" -namespace Lucene -{ - class LPPAPI OrdDocValues : public DocValues - { - public: - OrdDocValues(OrdFieldSourcePtr source, Collection arr); - virtual ~OrdDocValues(); - - LUCENE_CLASS(OrdDocValues); - - protected: - OrdFieldSourceWeakPtr _source; - Collection arr; - - public: - virtual double doubleVal(int32_t doc); - virtual String strVal(int32_t doc); - virtual String toString(int32_t doc); - virtual CollectionValue getInnerArray(); - }; +namespace Lucene { + +class LPPAPI OrdDocValues : public DocValues { +public: + OrdDocValues(const OrdFieldSourcePtr& source, Collection arr); + virtual ~OrdDocValues(); + + LUCENE_CLASS(OrdDocValues); + +protected: + OrdFieldSourceWeakPtr _source; + Collection arr; + +public: + virtual double doubleVal(int32_t doc); + virtual String strVal(int32_t doc); + virtual String toString(int32_t doc); + virtual CollectionValue getInnerArray(); +}; + } #endif diff --git a/src/core/include/_ParallelReader.h b/src/core/include/_ParallelReader.h index daeb8a57..53f6f599 100644 --- a/src/core/include/_ParallelReader.h +++ b/src/core/include/_ParallelReader.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,79 +11,77 @@ #include "TermDocs.h" #include "TermPositions.h" -namespace Lucene -{ - class ParallelTermEnum : public TermEnum - { - public: - ParallelTermEnum(ParallelReaderPtr reader); - ParallelTermEnum(ParallelReaderPtr reader, TermPtr term); - virtual ~ParallelTermEnum(); - - LUCENE_CLASS(ParallelTermEnum); - - protected: - ParallelReaderWeakPtr _reader; - String field; - MapStringIndexReader::iterator fieldIterator; - bool setIterator; - TermEnumPtr termEnum; - - public: - /// Increments the enumeration to the next element. True if one exists. - virtual bool next(); - - /// Returns the current Term in the enumeration. - virtual TermPtr term(); - - /// Returns the docFreq of the current Term in the enumeration. - virtual int32_t docFreq(); - - /// Closes the enumeration to further activity, freeing resources. - virtual void close(); - }; - - /// Wrap a TermDocs in order to support seek(Term) - class ParallelTermDocs : public TermPositions, public LuceneObject - { - public: - ParallelTermDocs(ParallelReaderPtr reader); - ParallelTermDocs(ParallelReaderPtr reader, TermPtr term); - virtual ~ParallelTermDocs(); - - LUCENE_CLASS(ParallelTermDocs); - - protected: - ParallelReaderWeakPtr _reader; - TermDocsPtr termDocs; - - public: - virtual int32_t doc(); - virtual int32_t freq(); - virtual void seek(TermPtr term); - virtual void seek(TermEnumPtr termEnum); - virtual bool next(); - virtual int32_t read(Collection docs, Collection freqs); - virtual bool skipTo(int32_t target); - virtual void close(); - }; - - class ParallelTermPositions : public ParallelTermDocs - { - public: - ParallelTermPositions(ParallelReaderPtr reader); - ParallelTermPositions(ParallelReaderPtr reader, TermPtr term); - virtual ~ParallelTermPositions(); - - LUCENE_CLASS(ParallelTermPositions); - - public: - virtual void seek(TermPtr term); - virtual int32_t nextPosition(); - virtual int32_t getPayloadLength(); - virtual ByteArray getPayload(ByteArray data, int32_t offset); - virtual bool isPayloadAvailable(); - }; +namespace Lucene { + +class ParallelTermEnum : public TermEnum { +public: + ParallelTermEnum(const ParallelReaderPtr& reader); + ParallelTermEnum(const ParallelReaderPtr& reader, const TermPtr& term); + virtual ~ParallelTermEnum(); + + LUCENE_CLASS(ParallelTermEnum); + +protected: + ParallelReaderWeakPtr _reader; + String field; + MapStringIndexReader::iterator fieldIterator; + bool setIterator; + TermEnumPtr termEnum; + +public: + /// Increments the enumeration to the next element. True if one exists. + virtual bool next(); + + /// Returns the current Term in the enumeration. + virtual TermPtr term(); + + /// Returns the docFreq of the current Term in the enumeration. + virtual int32_t docFreq(); + + /// Closes the enumeration to further activity, freeing resources. + virtual void close(); +}; + +/// Wrap a TermDocs in order to support seek(Term) +class ParallelTermDocs : public TermPositions, public LuceneObject { +public: + ParallelTermDocs(const ParallelReaderPtr& reader); + ParallelTermDocs(const ParallelReaderPtr& reader, const TermPtr& term); + virtual ~ParallelTermDocs(); + + LUCENE_CLASS(ParallelTermDocs); + +protected: + ParallelReaderWeakPtr _reader; + TermDocsPtr termDocs; + +public: + virtual int32_t doc(); + virtual int32_t freq(); + virtual void seek(const TermPtr& term); + virtual void seek(const TermEnumPtr& termEnum); + virtual bool next(); + virtual int32_t read(Collection& docs, Collection& freqs); + virtual bool skipTo(int32_t target); + virtual void close(); +}; + +class ParallelTermPositions : public ParallelTermDocs { +public: + ParallelTermPositions(const ParallelReaderPtr& reader); + ParallelTermPositions(const ParallelReaderPtr& reader, const TermPtr& term); + virtual ~ParallelTermPositions(); + + LUCENE_CLASS(ParallelTermPositions); + +public: + virtual void seek(const TermPtr& term); + virtual int32_t nextPosition(); + virtual int32_t getPayloadLength(); + virtual ByteArray getPayload(ByteArray data, int32_t offset); + virtual bool isPayloadAvailable(); +}; + } #endif diff --git a/src/core/include/_PayloadTermQuery.h b/src/core/include/_PayloadTermQuery.h index 309e9ae5..d82a7d0d 100644 --- a/src/core/include/_PayloadTermQuery.h +++ b/src/core/include/_PayloadTermQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,58 +10,57 @@ #include "SpanWeight.h" #include "SpanScorer.h" -namespace Lucene -{ - class PayloadTermWeight : public SpanWeight - { - public: - PayloadTermWeight(PayloadTermQueryPtr query, SearcherPtr searcher); - virtual ~PayloadTermWeight(); - - LUCENE_CLASS(PayloadTermWeight); - - public: - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - }; - - class PayloadTermSpanScorer : public SpanScorer - { - public: - PayloadTermSpanScorer(TermSpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms); - virtual ~PayloadTermSpanScorer(); - - LUCENE_CLASS(PayloadTermSpanScorer); - - protected: - ByteArray payload; - TermPositionsPtr positions; - double payloadScore; - int32_t payloadsSeen; - - public: - virtual double score(); - - protected: - virtual bool setFreqCurrentDoc(); - - void processPayload(SimilarityPtr similarity); - - /// Returns the SpanScorer score only. - /// - /// Should not be overridden without good cause - /// - /// @return the score for just the Span part without the payload - /// @see #score() - virtual double getSpanScore(); - - /// The score for the payload - /// - /// @return The score, as calculated by {@link PayloadFunction#docScore(int32_t, const String&, - /// int32_t, double)} - virtual double getPayloadScore(); - - virtual ExplanationPtr explain(int32_t doc); - }; +namespace Lucene { + +class PayloadTermWeight : public SpanWeight { +public: + PayloadTermWeight(const PayloadTermQueryPtr& query, const SearcherPtr& searcher); + virtual ~PayloadTermWeight(); + + LUCENE_CLASS(PayloadTermWeight); + +public: + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); +}; + +class PayloadTermSpanScorer : public SpanScorer { +public: + PayloadTermSpanScorer(const TermSpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms); + virtual ~PayloadTermSpanScorer(); + + LUCENE_CLASS(PayloadTermSpanScorer); + +protected: + ByteArray payload; + TermPositionsPtr positions; + double payloadScore; + int32_t payloadsSeen; + +public: + virtual double score(); + +protected: + virtual bool setFreqCurrentDoc(); + + void processPayload(const SimilarityPtr& similarity); + + /// Returns the SpanScorer score only. + /// + /// Should not be overridden without good cause + /// + /// @return the score for just the Span part without the payload + /// @see #score() + virtual double getSpanScore(); + + /// The score for the payload + /// + /// @return The score, as calculated by {@link PayloadFunction#docScore(int32_t, const String&, + /// int32_t, double)} + virtual double getPayloadScore(); + + virtual ExplanationPtr explain(int32_t doc); +}; + } #endif diff --git a/src/core/include/_PhraseQuery.h b/src/core/include/_PhraseQuery.h index 1246eaca..c82bf289 100644 --- a/src/core/include/_PhraseQuery.h +++ b/src/core/include/_PhraseQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,34 +9,34 @@ #include "Weight.h" -namespace Lucene -{ - class PhraseWeight : public Weight - { - public: - PhraseWeight(PhraseQueryPtr query, SearcherPtr searcher); - virtual ~PhraseWeight(); - - LUCENE_CLASS(PhraseWeight); - - protected: - PhraseQueryPtr query; - SimilarityPtr similarity; - double value; - double idf; - double queryNorm; - double queryWeight; - IDFExplanationPtr idfExp; - - public: - virtual String toString(); - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; +namespace Lucene { + +class PhraseWeight : public Weight { +public: + PhraseWeight(const PhraseQueryPtr& query, const SearcherPtr& searcher); + virtual ~PhraseWeight(); + + LUCENE_CLASS(PhraseWeight); + +protected: + PhraseQueryPtr query; + SimilarityPtr similarity; + double value; + double idf; + double queryNorm; + double queryWeight; + IDFExplanationPtr idfExp; + +public: + virtual String toString(); + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + } #endif diff --git a/src/core/include/_QueryWrapperFilter.h b/src/core/include/_QueryWrapperFilter.h index 145408d1..658a3d1b 100644 --- a/src/core/include/_QueryWrapperFilter.h +++ b/src/core/include/_QueryWrapperFilter.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,24 +9,24 @@ #include "DocIdSet.h" -namespace Lucene -{ - class QueryWrapperFilterDocIdSet : public DocIdSet - { - public: - QueryWrapperFilterDocIdSet(IndexReaderPtr reader, WeightPtr weight); - virtual ~QueryWrapperFilterDocIdSet(); - - LUCENE_CLASS(QueryWrapperFilterDocIdSet); - - protected: - IndexReaderPtr reader; - WeightPtr weight; - - public: - virtual DocIdSetIteratorPtr iterator(); - virtual bool isCacheable(); - }; +namespace Lucene { + +class QueryWrapperFilterDocIdSet : public DocIdSet { +public: + QueryWrapperFilterDocIdSet(const IndexReaderPtr& reader, const WeightPtr& weight); + virtual ~QueryWrapperFilterDocIdSet(); + + LUCENE_CLASS(QueryWrapperFilterDocIdSet); + +protected: + IndexReaderPtr reader; + WeightPtr weight; + +public: + virtual DocIdSetIteratorPtr iterator(); + virtual bool isCacheable(); +}; + } #endif diff --git a/src/core/include/_ReverseOrdFieldSource.h b/src/core/include/_ReverseOrdFieldSource.h index 8eb5f8cd..18c8e28f 100644 --- a/src/core/include/_ReverseOrdFieldSource.h +++ b/src/core/include/_ReverseOrdFieldSource.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,28 +9,28 @@ #include "DocValues.h" -namespace Lucene -{ - class ReverseOrdDocValues : public DocValues - { - public: - ReverseOrdDocValues(ReverseOrdFieldSourcePtr source, Collection arr, int32_t end); - virtual ~ReverseOrdDocValues(); - - LUCENE_CLASS(ReverseOrdDocValues); - - protected: - ReverseOrdFieldSourceWeakPtr _source; - Collection arr; - int32_t end; - - public: - virtual double doubleVal(int32_t doc); - virtual int32_t intVal(int32_t doc); - virtual String strVal(int32_t doc); - virtual String toString(int32_t doc); - virtual CollectionValue getInnerArray(); - }; +namespace Lucene { + +class ReverseOrdDocValues : public DocValues { +public: + ReverseOrdDocValues(const ReverseOrdFieldSourcePtr& source, Collection arr, int32_t end); + virtual ~ReverseOrdDocValues(); + + LUCENE_CLASS(ReverseOrdDocValues); + +protected: + ReverseOrdFieldSourceWeakPtr _source; + Collection arr; + int32_t end; + +public: + virtual double doubleVal(int32_t doc); + virtual int32_t intVal(int32_t doc); + virtual String strVal(int32_t doc); + virtual String toString(int32_t doc); + virtual CollectionValue getInnerArray(); +}; + } #endif diff --git a/src/core/include/_ScorerDocQueue.h b/src/core/include/_ScorerDocQueue.h index 6352fa6e..e994af3c 100644 --- a/src/core/include/_ScorerDocQueue.h +++ b/src/core/include/_ScorerDocQueue.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,24 +9,24 @@ #include "LuceneObject.h" -namespace Lucene -{ - class HeapedScorerDoc : public LuceneObject - { - public: - HeapedScorerDoc(ScorerPtr scorer); - HeapedScorerDoc(ScorerPtr scorer, int32_t doc); - virtual ~HeapedScorerDoc(); - - LUCENE_CLASS(HeapedScorerDoc); - - public: - ScorerPtr scorer; - int32_t doc; - - public: - void adjust(); - }; +namespace Lucene { + +class HeapedScorerDoc : public LuceneObject { +public: + HeapedScorerDoc(const ScorerPtr& scorer); + HeapedScorerDoc(const ScorerPtr& scorer, int32_t doc); + virtual ~HeapedScorerDoc(); + + LUCENE_CLASS(HeapedScorerDoc); + +public: + ScorerPtr scorer; + int32_t doc; + +public: + void adjust(); +}; + } #endif diff --git a/src/core/include/_SegmentInfos.h b/src/core/include/_SegmentInfos.h index ed2c72ad..ec714ebf 100644 --- a/src/core/include/_SegmentInfos.h +++ b/src/core/include/_SegmentInfos.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,65 +9,61 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// Utility class for executing code that needs to do something with the current segments file. - class FindSegmentsFile : public LuceneObject - { - public: - FindSegmentsFile(SegmentInfosPtr infos, DirectoryPtr directory); - virtual ~FindSegmentsFile(); - - LUCENE_CLASS(FindSegmentsFile); - - protected: - SegmentInfosWeakPtr _segmentInfos; - DirectoryPtr directory; - - public: - void doRun(IndexCommitPtr commit = IndexCommitPtr()); - virtual void runBody(const String& segmentFileName) = 0; - }; - - template - class FindSegmentsFileT : public FindSegmentsFile - { - public: - FindSegmentsFileT(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFile(infos, directory) {} - virtual ~FindSegmentsFileT() {} - - protected: - TYPE result; - - public: - virtual TYPE run(IndexCommitPtr commit = IndexCommitPtr()) - { - doRun(commit); - return result; - } - - virtual void runBody(const String& segmentFileName) - { - result = doBody(segmentFileName); - } - - virtual TYPE doBody(const String& segmentFileName) = 0; - }; - - /// Utility class for executing code that needs to do something with the current segments file. This is necessary with - /// lock-less commits because from the time you locate the current segments file name, until you actually open it, read - /// its contents, or check modified time, etc., it could have been deleted due to a writer commit finishing. - class FindSegmentsRead : public FindSegmentsFileT - { - public: - FindSegmentsRead(SegmentInfosPtr infos, DirectoryPtr directory); - virtual ~FindSegmentsRead(); - - LUCENE_CLASS(FindSegmentsRead); - - public: - virtual int64_t doBody(const String& segmentFileName); - }; +namespace Lucene { + +/// Utility class for executing code that needs to do something with the current segments file. +class FindSegmentsFile : public LuceneObject { +public: + FindSegmentsFile(const SegmentInfosPtr& infos, const DirectoryPtr& directory); + virtual ~FindSegmentsFile(); + + LUCENE_CLASS(FindSegmentsFile); + +protected: + SegmentInfosWeakPtr _segmentInfos; + DirectoryPtr directory; + +public: + void doRun(const IndexCommitPtr& commit = IndexCommitPtr()); + virtual void runBody(const String& segmentFileName) = 0; +}; + +template +class FindSegmentsFileT : public FindSegmentsFile { +public: + FindSegmentsFileT(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFile(infos, directory) {} + virtual ~FindSegmentsFileT() {} + +protected: + TYPE result; + +public: + virtual TYPE run(const IndexCommitPtr& commit = IndexCommitPtr()) { + doRun(commit); + return result; + } + + virtual void runBody(const String& segmentFileName) { + result = doBody(segmentFileName); + } + + virtual TYPE doBody(const String& segmentFileName) = 0; +}; + +/// Utility class for executing code that needs to do something with the current segments file. This is necessary with +/// lock-less commits because from the time you locate the current segments file name, until you actually open it, read +/// its contents, or check modified time, etc., it could have been deleted due to a writer commit finishing. +class FindSegmentsRead : public FindSegmentsFileT { +public: + FindSegmentsRead(const SegmentInfosPtr& infos, const DirectoryPtr& directory); + virtual ~FindSegmentsRead(); + + LUCENE_CLASS(FindSegmentsRead); + +public: + virtual int64_t doBody(const String& segmentFileName); +}; + } #endif diff --git a/src/core/include/_SegmentReader.h b/src/core/include/_SegmentReader.h index c4d9b40b..505f8c79 100644 --- a/src/core/include/_SegmentReader.h +++ b/src/core/include/_SegmentReader.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,154 +9,151 @@ #include "CloseableThreadLocal.h" -namespace Lucene -{ - /// Holds core readers that are shared (unchanged) when SegmentReader is cloned or reopened - class CoreReaders : public LuceneObject - { - public: - CoreReaders(SegmentReaderPtr origInstance, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, int32_t termsIndexDivisor); - virtual ~CoreReaders(); - - LUCENE_CLASS(CoreReaders); - - protected: - /// Counts how many other reader share the core objects (freqStream, proxStream, tis, etc.) of this reader; - /// when coreRef drops to 0, these core objects may be closed. A given instance of SegmentReader may be - /// closed, even those it shares core objects with other SegmentReaders - SegmentReaderRefPtr ref; - - SegmentReaderWeakPtr _origInstance; - - public: - String segment; - FieldInfosPtr fieldInfos; - IndexInputPtr freqStream; - IndexInputPtr proxStream; - TermInfosReaderPtr tisNoIndex; - - DirectoryPtr dir; - DirectoryPtr cfsDir; - int32_t readBufferSize; - int32_t termsIndexDivisor; - - TermInfosReaderPtr tis; - FieldsReaderPtr fieldsReaderOrig; - TermVectorsReaderPtr termVectorsReaderOrig; - CompoundFileReaderPtr cfsReader; - CompoundFileReaderPtr storeCFSReader; - - public: - TermVectorsReaderPtr getTermVectorsReaderOrig(); - FieldsReaderPtr getFieldsReaderOrig(); - void incRef(); - DirectoryPtr getCFSReader(); - TermInfosReaderPtr getTermsReader(); - bool termsIndexIsLoaded(); - - /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, - /// sharing a segment that's still being merged. This method is not fully thread safe, and relies on the - /// synchronization in IndexWriter - void loadTermsIndex(SegmentInfoPtr si, int32_t termsIndexDivisor); - - void openDocStores(SegmentInfoPtr si); - - void decRef(); - - friend class SegmentReader; - }; - - /// Sets the initial value - class FieldsReaderLocal : public CloseableThreadLocal - { - public: - FieldsReaderLocal(SegmentReaderPtr reader); - - protected: - SegmentReaderWeakPtr _reader; - - protected: - virtual FieldsReaderPtr initialValue(); - }; - - class SegmentReaderRef : public LuceneObject - { - public: - SegmentReaderRef(); - virtual ~SegmentReaderRef(); - - LUCENE_CLASS(SegmentReaderRef); - - protected: - int32_t _refCount; - - public: - virtual String toString(); - int32_t refCount(); - int32_t incRef(); - int32_t decRef(); - - friend class SegmentReader; - }; - - /// Byte[] referencing is used because a new norm object needs to be created for each clone, and the byte - /// array is all that is needed for sharing between cloned readers. The current norm referencing is for - /// sharing between readers whereas the byte[] referencing is for copy on write which is independent of - /// reader references (i.e. incRef, decRef). - class Norm : public LuceneObject - { - public: - Norm(); - Norm(SegmentReaderPtr reader, IndexInputPtr in, int32_t number, int64_t normSeek); - virtual ~Norm(); - - LUCENE_CLASS(Norm); - - protected: - SegmentReaderWeakPtr _reader; - int32_t refCount; - - /// If this instance is a clone, the originalNorm references the Norm that has a real open IndexInput - NormPtr origNorm; - SegmentReaderPtr origReader; - - IndexInputPtr in; - int64_t normSeek; - - SegmentReaderRefPtr _bytesRef; - ByteArray _bytes; - bool dirty; - int32_t number; - bool rollbackDirty; - - public: - void incRef(); - void decRef(); - - /// Load bytes but do not cache them if they were not already cached - void bytes(uint8_t* bytesOut, int32_t offset, int32_t length); - - /// Load & cache full bytes array. Returns bytes. - ByteArray bytes(); - - /// Only for testing - SegmentReaderRefPtr bytesRef(); - - /// Called if we intend to change a norm value. We make a private copy of bytes if it's shared - // with others - ByteArray copyOnWrite(); - - /// Returns a copy of this Norm instance that shares IndexInput & bytes with the original one - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - - /// Flush all pending changes to the next generation separate norms file. - void reWrite(SegmentInfoPtr si); - - protected: - void closeInput(); - - friend class SegmentReader; - }; +namespace Lucene { + +/// Holds core readers that are shared (unchanged) when SegmentReader is cloned or reopened +class CoreReaders : public LuceneObject { +public: + CoreReaders(const SegmentReaderPtr& origInstance, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, int32_t termsIndexDivisor); + virtual ~CoreReaders(); + + LUCENE_CLASS(CoreReaders); + +protected: + /// Counts how many other reader share the core objects (freqStream, proxStream, tis, etc.) of this reader; + /// when coreRef drops to 0, these core objects may be closed. A given instance of SegmentReader may be + /// closed, even those it shares core objects with other SegmentReaders + SegmentReaderRefPtr ref; + + SegmentReaderWeakPtr _origInstance; + +public: + String segment; + FieldInfosPtr fieldInfos; + IndexInputPtr freqStream; + IndexInputPtr proxStream; + TermInfosReaderPtr tisNoIndex; + + DirectoryPtr dir; + DirectoryPtr cfsDir; + int32_t readBufferSize; + int32_t termsIndexDivisor; + + TermInfosReaderPtr tis; + FieldsReaderPtr fieldsReaderOrig; + TermVectorsReaderPtr termVectorsReaderOrig; + CompoundFileReaderPtr cfsReader; + CompoundFileReaderPtr storeCFSReader; + +public: + TermVectorsReaderPtr getTermVectorsReaderOrig(); + FieldsReaderPtr getFieldsReaderOrig(); + void incRef(); + DirectoryPtr getCFSReader(); + TermInfosReaderPtr getTermsReader(); + bool termsIndexIsLoaded(); + + /// NOTE: only called from IndexWriter when a near real-time reader is opened, or applyDeletes is run, + /// sharing a segment that's still being merged. This method is not fully thread safe, and relies on the + /// synchronization in IndexWriter + void loadTermsIndex(const SegmentInfoPtr& si, int32_t termsIndexDivisor); + + void openDocStores(const SegmentInfoPtr& si); + + void decRef(); + + friend class SegmentReader; +}; + +/// Sets the initial value +class LPPAPI FieldsReaderLocal : public CloseableThreadLocal { +public: + FieldsReaderLocal(const SegmentReaderPtr& reader); + +protected: + SegmentReaderWeakPtr _reader; + +protected: + virtual FieldsReaderPtr initialValue(); +}; + +class LPPAPI SegmentReaderRef : public LuceneObject { +public: + SegmentReaderRef(); + virtual ~SegmentReaderRef(); + + LUCENE_CLASS(SegmentReaderRef); + +protected: + int32_t _refCount; + +public: + virtual String toString(); + int32_t refCount(); + int32_t incRef(); + int32_t decRef(); + + friend class SegmentReader; +}; + +/// Byte[] referencing is used because a new norm object needs to be created for each clone, and the byte +/// array is all that is needed for sharing between cloned readers. The current norm referencing is for +/// sharing between readers whereas the byte[] referencing is for copy on write which is independent of +/// reader references (i.e. incRef, decRef). +class LPPAPI Norm : public LuceneObject { +public: + Norm(); + Norm(const SegmentReaderPtr& reader, const IndexInputPtr& in, int32_t number, int64_t normSeek); + virtual ~Norm(); + + LUCENE_CLASS(Norm); + +protected: + SegmentReaderWeakPtr _reader; + int32_t refCount; + + /// If this instance is a clone, the originalNorm references the Norm that has a real open IndexInput + NormPtr origNorm; + SegmentReaderPtr origReader; + + IndexInputPtr in; + int64_t normSeek; + + SegmentReaderRefPtr _bytesRef; + ByteArray _bytes; + bool dirty; + int32_t number; + bool rollbackDirty; + +public: + void incRef(); + void decRef(); + + /// Load bytes but do not cache them if they were not already cached + void bytes(uint8_t* bytesOut, int32_t offset, int32_t length); + + /// Load & cache full bytes array. Returns bytes. + ByteArray bytes(); + + /// Only for testing + SegmentReaderRefPtr bytesRef(); + + /// Called if we intend to change a norm value. We make a private copy of bytes if it's shared + // with others + ByteArray copyOnWrite(); + + /// Returns a copy of this Norm instance that shares IndexInput & bytes with the original one + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); + + /// Flush all pending changes to the next generation separate norms file. + void reWrite(const SegmentInfoPtr& si); + +protected: + void closeInput(); + + friend class SegmentReader; +}; + } #endif diff --git a/src/core/include/_Similarity.h b/src/core/include/_Similarity.h index 2ec5cd07..a3d189bd 100644 --- a/src/core/include/_Similarity.h +++ b/src/core/include/_Similarity.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,28 +9,28 @@ #include "Explanation.h" -namespace Lucene -{ - class SimilarityIDFExplanation : public IDFExplanation - { - public: - SimilarityIDFExplanation(int32_t df, int32_t max, double idf); - SimilarityIDFExplanation(const String& exp, double idf); - - virtual ~SimilarityIDFExplanation(); - - LUCENE_CLASS(SimilarityIDFExplanation); - - protected: - String exp; - int32_t df; - int32_t max; - double idf; - - public: - virtual String explain(); - virtual double getIdf(); - }; +namespace Lucene { + +class SimilarityIDFExplanation : public IDFExplanation { +public: + SimilarityIDFExplanation(int32_t df, int32_t max, double idf); + SimilarityIDFExplanation(const String& exp, double idf); + + virtual ~SimilarityIDFExplanation(); + + LUCENE_CLASS(SimilarityIDFExplanation); + +protected: + String exp; + int32_t df; + int32_t max; + double idf; + +public: + virtual String explain(); + virtual double getIdf(); +}; + } #endif diff --git a/src/core/include/_SimpleFSDirectory.h b/src/core/include/_SimpleFSDirectory.h index e56e276d..26acd38f 100644 --- a/src/core/include/_SimpleFSDirectory.h +++ b/src/core/include/_SimpleFSDirectory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,105 +10,102 @@ #include "BufferedIndexInput.h" #include "BufferedIndexOutput.h" -namespace Lucene -{ - class InputFile : public LuceneObject - { - public: - InputFile(const String& path); - virtual ~InputFile(); - - LUCENE_CLASS(InputFile); - - public: - static const int32_t FILE_EOF; - static const int32_t FILE_ERROR; - - protected: - ifstreamPtr file; - int64_t position; - int64_t length; - - public: - void setPosition(int64_t position); - int64_t getPosition(); - int64_t getLength(); - int32_t read(uint8_t* b, int32_t offset, int32_t length); - void close(); - bool isValid(); - }; - - class SimpleFSIndexInput : public BufferedIndexInput - { - public: - SimpleFSIndexInput(); - SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize); - virtual ~SimpleFSIndexInput(); - - LUCENE_CLASS(SimpleFSIndexInput); - - protected: - String path; - InputFilePtr file; - bool isClone; - int32_t chunkSize; - - protected: - virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); - virtual void seekInternal(int64_t pos); - - public: - virtual int64_t length(); - virtual void close(); - - /// Method used for testing. - bool isValid(); - - /// Returns a clone of this stream. - virtual LuceneObjectPtr clone(LuceneObjectPtr other = LuceneObjectPtr()); - }; - - class OutputFile : public LuceneObject - { - public: - OutputFile(const String& path); - virtual ~OutputFile(); - - LUCENE_CLASS(OutputFile); - - protected: - ofstreamPtr file; - String path; - - public: - bool write(const uint8_t* b, int32_t offset, int32_t length); - void close(); - void setPosition(int64_t position); - int64_t getLength(); - void setLength(int64_t length); - void flush(); - bool isValid(); - }; - - class SimpleFSIndexOutput : public BufferedIndexOutput - { - public: - SimpleFSIndexOutput(const String& path); - virtual ~SimpleFSIndexOutput(); - - LUCENE_CLASS(SimpleFSIndexOutput); - - protected: - OutputFilePtr file; - bool isOpen; - - public: - virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); - virtual void close(); - virtual void seek(int64_t pos); - virtual int64_t length(); - virtual void setLength(int64_t length); - }; +namespace Lucene { + +class LPPAPI InputFile : public LuceneObject { +public: + InputFile(const String& path); + virtual ~InputFile(); + + LUCENE_CLASS(InputFile); + +public: + static const int32_t FILE_EOF; + static const int32_t FILE_ERROR; + +protected: + ifstreamPtr file; + int64_t position; + int64_t length; + +public: + void setPosition(int64_t position); + int64_t getPosition(); + int64_t getLength(); + int32_t read(uint8_t* b, int32_t offset, int32_t length); + void close(); + bool isValid(); +}; + +class LPPAPI SimpleFSIndexInput : public BufferedIndexInput { +public: + SimpleFSIndexInput(); + SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize); + virtual ~SimpleFSIndexInput(); + + LUCENE_CLASS(SimpleFSIndexInput); + +protected: + String path; + InputFilePtr file; + bool isClone; + int32_t chunkSize; + +protected: + virtual void readInternal(uint8_t* b, int32_t offset, int32_t length); + virtual void seekInternal(int64_t pos); + +public: + virtual int64_t length(); + virtual void close(); + + /// Method used for testing. + bool isValid(); + + /// Returns a clone of this stream. + virtual LuceneObjectPtr clone(const LuceneObjectPtr& other = LuceneObjectPtr()); +}; + +class OutputFile : public LuceneObject { +public: + OutputFile(const String& path); + virtual ~OutputFile(); + + LUCENE_CLASS(OutputFile); + +protected: + ofstreamPtr file; + String path; + +public: + bool write(const uint8_t* b, int32_t offset, int32_t length); + void close(); + void setPosition(int64_t position); + int64_t getLength(); + void setLength(int64_t length); + void flush(); + bool isValid(); +}; + +class SimpleFSIndexOutput : public BufferedIndexOutput { +public: + SimpleFSIndexOutput(const String& path); + virtual ~SimpleFSIndexOutput(); + + LUCENE_CLASS(SimpleFSIndexOutput); + +protected: + OutputFilePtr file; + bool isOpen; + +public: + virtual void flushBuffer(const uint8_t* b, int32_t offset, int32_t length); + virtual void close(); + virtual void seek(int64_t pos); + virtual int64_t length(); + virtual void setLength(int64_t length); +}; + } #endif diff --git a/src/core/include/_SimpleFSLockFactory.h b/src/core/include/_SimpleFSLockFactory.h index a0de522b..fdb4c1d5 100644 --- a/src/core/include/_SimpleFSLockFactory.h +++ b/src/core/include/_SimpleFSLockFactory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,35 +9,35 @@ #include "Lock.h" -namespace Lucene -{ - class SimpleFSLock : public Lock - { - public: - SimpleFSLock(const String& lockDir, const String& lockFileName); - virtual ~SimpleFSLock(); - - LUCENE_CLASS(SimpleFSLock); - - public: - String lockDir; - String lockFile; - - public: - /// Attempts to obtain exclusive access and immediately return upon success or failure. - /// @return true if exclusive access is obtained. - virtual bool obtain(); - - /// Releases exclusive access. - virtual void release(); - - /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} - /// before using the resource. - virtual bool isLocked(); - - /// Returns derived object name. - virtual String toString(); - }; +namespace Lucene { + +class SimpleFSLock : public Lock { +public: + SimpleFSLock(const String& lockDir, const String& lockFileName); + virtual ~SimpleFSLock(); + + LUCENE_CLASS(SimpleFSLock); + +public: + String lockDir; + String lockFile; + +public: + /// Attempts to obtain exclusive access and immediately return upon success or failure. + /// @return true if exclusive access is obtained. + virtual bool obtain(); + + /// Releases exclusive access. + virtual void release(); + + /// Returns true if the resource is currently locked. Note that one must still call {@link #obtain()} + /// before using the resource. + virtual bool isLocked(); + + /// Returns derived object name. + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_SingleInstanceLockFactory.h b/src/core/include/_SingleInstanceLockFactory.h index b26d5657..77259fca 100644 --- a/src/core/include/_SingleInstanceLockFactory.h +++ b/src/core/include/_SingleInstanceLockFactory.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,36 +9,36 @@ #include "Lock.h" -namespace Lucene -{ - class SingleInstanceLock : public Lock - { - public: - SingleInstanceLock(HashSet locks, const String& lockName); - virtual ~SingleInstanceLock(); - - LUCENE_CLASS(SingleInstanceLock); - - protected: - HashSet locks; - String lockName; - - public: - /// Attempts to obtain exclusive access and immediately return - /// upon success or failure. - /// @return true if exclusive access is obtained. - virtual bool obtain(); - - /// Releases exclusive access. - virtual void release(); - - /// Returns true if the resource is currently locked. Note that - /// one must still call {@link #obtain()} before using the resource. - virtual bool isLocked(); - - /// Returns derived object name. - virtual String toString(); - }; +namespace Lucene { + +class SingleInstanceLock : public Lock { +public: + SingleInstanceLock(HashSet locks, const String& lockName); + virtual ~SingleInstanceLock(); + + LUCENE_CLASS(SingleInstanceLock); + +protected: + HashSet locks; + String lockName; + +public: + /// Attempts to obtain exclusive access and immediately return + /// upon success or failure. + /// @return true if exclusive access is obtained. + virtual bool obtain(); + + /// Releases exclusive access. + virtual void release(); + + /// Returns true if the resource is currently locked. Note that + /// one must still call {@link #obtain()} before using the resource. + virtual bool isLocked(); + + /// Returns derived object name. + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_SnapshotDeletionPolicy.h b/src/core/include/_SnapshotDeletionPolicy.h index 7b1da72b..9327fa5f 100644 --- a/src/core/include/_SnapshotDeletionPolicy.h +++ b/src/core/include/_SnapshotDeletionPolicy.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,50 +9,50 @@ #include "IndexCommit.h" -namespace Lucene -{ - class MyCommitPoint : public IndexCommit - { - public: - MyCommitPoint(SnapshotDeletionPolicyPtr deletionPolicy, IndexCommitPtr cp); - virtual ~MyCommitPoint(); - - LUCENE_CLASS(MyCommitPoint); - - protected: - SnapshotDeletionPolicyWeakPtr _deletionPolicy; - - public: - IndexCommitPtr cp; - - public: - virtual String toString(); - - /// Get the segments file (segments_N) associated with this commit point. - virtual String getSegmentsFileName(); - - /// Returns all index files referenced by this commit point. - virtual HashSet getFileNames(); - - /// Returns the {@link Directory} for the index. - virtual DirectoryPtr getDirectory(); - - /// Delete this commit point. - virtual void deleteCommit(); - - virtual bool isDeleted(); - - /// Returns the version for this IndexCommit. - virtual int64_t getVersion(); - - /// Returns the generation (the _N in segments_N) for this IndexCommit. - virtual int64_t getGeneration(); - - /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. - virtual MapStringString getUserData(); - - virtual bool isOptimized(); - }; +namespace Lucene { + +class MyCommitPoint : public IndexCommit { +public: + MyCommitPoint(const SnapshotDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& cp); + virtual ~MyCommitPoint(); + + LUCENE_CLASS(MyCommitPoint); + +protected: + SnapshotDeletionPolicyWeakPtr _deletionPolicy; + +public: + IndexCommitPtr cp; + +public: + virtual String toString(); + + /// Get the segments file (segments_N) associated with this commit point. + virtual String getSegmentsFileName(); + + /// Returns all index files referenced by this commit point. + virtual HashSet getFileNames(); + + /// Returns the {@link Directory} for the index. + virtual DirectoryPtr getDirectory(); + + /// Delete this commit point. + virtual void deleteCommit(); + + virtual bool isDeleted(); + + /// Returns the version for this IndexCommit. + virtual int64_t getVersion(); + + /// Returns the generation (the _N in segments_N) for this IndexCommit. + virtual int64_t getGeneration(); + + /// Returns userData, previously passed to {@link IndexWriter#commit(Map)} for this commit. + virtual MapStringString getUserData(); + + virtual bool isOptimized(); +}; + } #endif diff --git a/src/core/include/_SortedVIntList.h b/src/core/include/_SortedVIntList.h index 12e39779..c5318ef3 100644 --- a/src/core/include/_SortedVIntList.h +++ b/src/core/include/_SortedVIntList.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,30 +9,30 @@ #include "DocIdSetIterator.h" -namespace Lucene -{ - class SortedDocIdSetIterator : public DocIdSetIterator - { - public: - SortedDocIdSetIterator(SortedVIntListPtr list); - virtual ~SortedDocIdSetIterator(); - - LUCENE_CLASS(SortedDocIdSetIterator); - - public: - SortedVIntListWeakPtr _list; - int32_t bytePos; - int32_t lastInt; - int32_t doc; - - public: - virtual int32_t docID(); - virtual int32_t nextDoc(); - virtual int32_t advance(int32_t target); - - protected: - void advance(); - }; +namespace Lucene { + +class SortedDocIdSetIterator : public DocIdSetIterator { +public: + SortedDocIdSetIterator(const SortedVIntListPtr& list); + virtual ~SortedDocIdSetIterator(); + + LUCENE_CLASS(SortedDocIdSetIterator); + +public: + SortedVIntListWeakPtr _list; + int32_t bytePos; + int32_t lastInt; + int32_t doc; + +public: + virtual int32_t docID(); + virtual int32_t nextDoc(); + virtual int32_t advance(int32_t target); + +protected: + void advance(); +}; + } #endif diff --git a/src/core/include/_SpanFirstQuery.h b/src/core/include/_SpanFirstQuery.h index 3c00980d..399e54a1 100644 --- a/src/core/include/_SpanFirstQuery.h +++ b/src/core/include/_SpanFirstQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,29 +9,29 @@ #include "Spans.h" -namespace Lucene -{ - class FirstSpans : public Spans - { - public: - FirstSpans(SpanFirstQueryPtr query, SpansPtr spans); - virtual ~FirstSpans(); - - LUCENE_CLASS(FirstSpans); - - protected: - SpanFirstQueryPtr query; - SpansPtr spans; - - public: - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - }; +namespace Lucene { + +class FirstSpans : public Spans { +public: + FirstSpans(const SpanFirstQueryPtr& query, const SpansPtr& spans); + virtual ~FirstSpans(); + + LUCENE_CLASS(FirstSpans); + +protected: + SpanFirstQueryPtr query; + SpansPtr spans; + +public: + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); +}; + } #endif diff --git a/src/core/include/_SpanNotQuery.h b/src/core/include/_SpanNotQuery.h index 281d11ad..584f238b 100644 --- a/src/core/include/_SpanNotQuery.h +++ b/src/core/include/_SpanNotQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,33 +9,33 @@ #include "Spans.h" -namespace Lucene -{ - class NotSpans : public Spans - { - public: - NotSpans(SpanNotQueryPtr query, SpansPtr includeSpans, SpansPtr excludeSpans); - virtual ~NotSpans(); - - LUCENE_CLASS(NotSpans); - - protected: - SpanNotQueryPtr query; - SpansPtr includeSpans; - bool moreInclude; - SpansPtr excludeSpans; - bool moreExclude; - - public: - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual String toString(); - }; +namespace Lucene { + +class NotSpans : public Spans { +public: + NotSpans(const SpanNotQueryPtr& query, const SpansPtr& includeSpans, const SpansPtr& excludeSpans); + virtual ~NotSpans(); + + LUCENE_CLASS(NotSpans); + +protected: + SpanNotQueryPtr query; + SpansPtr includeSpans; + bool moreInclude; + SpansPtr excludeSpans; + bool moreExclude; + +public: + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual String toString(); +}; + } #endif diff --git a/src/core/include/_SpanOrQuery.h b/src/core/include/_SpanOrQuery.h index 9f569abb..92763ab3 100644 --- a/src/core/include/_SpanOrQuery.h +++ b/src/core/include/_SpanOrQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,47 +10,46 @@ #include "PriorityQueue.h" #include "Spans.h" -namespace Lucene -{ - class SpanQueue : public PriorityQueue - { - public: - SpanQueue(int32_t size); - virtual ~SpanQueue(); - - LUCENE_CLASS(SpanQueue); - - protected: - virtual bool lessThan(const SpansPtr& first, const SpansPtr& second); - }; - - class OrSpans : public Spans - { - public: - OrSpans(SpanOrQueryPtr query, IndexReaderPtr reader); - virtual ~OrSpans(); - - LUCENE_CLASS(OrSpans); - - protected: - SpanOrQueryPtr query; - IndexReaderPtr reader; - SpanQueuePtr queue; - - public: - virtual bool next(); - virtual bool skipTo(int32_t target); - virtual int32_t doc(); - virtual int32_t start(); - virtual int32_t end(); - virtual Collection getPayload(); - virtual bool isPayloadAvailable(); - virtual String toString(); - - protected: - bool initSpanQueue(int32_t target); - SpansPtr top(); - }; +namespace Lucene { + +class SpanQueue : public PriorityQueue { +public: + SpanQueue(int32_t size); + virtual ~SpanQueue(); + + LUCENE_CLASS(SpanQueue); + +protected: + virtual bool lessThan(const SpansPtr& first, const SpansPtr& second); +}; + +class OrSpans : public Spans { +public: + OrSpans(const SpanOrQueryPtr& query, const IndexReaderPtr& reader); + virtual ~OrSpans(); + + LUCENE_CLASS(OrSpans); + +protected: + SpanOrQueryPtr query; + IndexReaderPtr reader; + SpanQueuePtr queue; + +public: + virtual bool next(); + virtual bool skipTo(int32_t target); + virtual int32_t doc(); + virtual int32_t start(); + virtual int32_t end(); + virtual Collection getPayload(); + virtual bool isPayloadAvailable(); + virtual String toString(); + +protected: + bool initSpanQueue(int32_t target); + SpansPtr top(); +}; + } #endif diff --git a/src/core/include/_StandardAnalyzer.h b/src/core/include/_StandardAnalyzer.h index e09aac4d..f9dbf51a 100644 --- a/src/core/include/_StandardAnalyzer.h +++ b/src/core/include/_StandardAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,17 +9,17 @@ #include "LuceneObject.h" -namespace Lucene -{ - class StandardAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~StandardAnalyzerSavedStreams(); - - public: - StandardTokenizerPtr tokenStream; - TokenStreamPtr filteredTokenStream; - }; +namespace Lucene { + +class StandardAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~StandardAnalyzerSavedStreams(); + +public: + StandardTokenizerPtr tokenStream; + TokenStreamPtr filteredTokenStream; +}; + } #endif diff --git a/src/core/include/_StopAnalyzer.h b/src/core/include/_StopAnalyzer.h index 57ac15bb..678aace5 100644 --- a/src/core/include/_StopAnalyzer.h +++ b/src/core/include/_StopAnalyzer.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,20 +9,20 @@ #include "LuceneObject.h" -namespace Lucene -{ - /// Filters LowerCaseTokenizer with StopFilter. - class StopAnalyzerSavedStreams : public LuceneObject - { - public: - virtual ~StopAnalyzerSavedStreams(); - - LUCENE_CLASS(StopAnalyzerSavedStreams); - - public: - TokenizerPtr source; - TokenStreamPtr result; - }; +namespace Lucene { + +/// Filters LowerCaseTokenizer with StopFilter. +class StopAnalyzerSavedStreams : public LuceneObject { +public: + virtual ~StopAnalyzerSavedStreams(); + + LUCENE_CLASS(StopAnalyzerSavedStreams); + +public: + TokenizerPtr source; + TokenStreamPtr result; +}; + } #endif diff --git a/src/core/include/_TermQuery.h b/src/core/include/_TermQuery.h index 46c5ebde..d6589ab5 100644 --- a/src/core/include/_TermQuery.h +++ b/src/core/include/_TermQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,34 +9,34 @@ #include "Weight.h" -namespace Lucene -{ - class TermWeight : public Weight - { - public: - TermWeight(TermQueryPtr query, SearcherPtr searcher); - virtual ~TermWeight(); - - LUCENE_CLASS(TermWeight); - - protected: - TermQueryPtr query; - SimilarityPtr similarity; - double value; - double idf; - double queryNorm; - double queryWeight; - IDFExplanationPtr idfExp; - - public: - virtual String toString(); - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; +namespace Lucene { + +class TermWeight : public Weight { +public: + TermWeight(const TermQueryPtr& query, const SearcherPtr& searcher); + virtual ~TermWeight(); + + LUCENE_CLASS(TermWeight); + +protected: + TermQueryPtr query; + SimilarityPtr similarity; + double value; + double idf; + double queryNorm; + double queryWeight; + IDFExplanationPtr idfExp; + +public: + virtual String toString(); + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + } #endif diff --git a/src/core/include/_TimeLimitingCollector.h b/src/core/include/_TimeLimitingCollector.h index 4729e7ca..a0cd94fd 100644 --- a/src/core/include/_TimeLimitingCollector.h +++ b/src/core/include/_TimeLimitingCollector.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,30 +9,30 @@ #include "LuceneThread.h" -namespace Lucene -{ - class TimerThread : public LuceneThread - { - public: - TimerThread(); - virtual ~TimerThread(); - - LUCENE_CLASS(TimerThread); - - protected: - int64_t time; - bool _stopThread; - - public: - virtual void start(); - virtual void run(); - - /// Get the timer value in milliseconds. - int64_t getMilliseconds(); - - /// Stop timer thread. - void stopThread(); - }; +namespace Lucene { + +class TimerThread : public LuceneThread { +public: + TimerThread(); + virtual ~TimerThread(); + + LUCENE_CLASS(TimerThread); + +protected: + int64_t time; + bool _stopThread; + +public: + virtual void start(); + virtual void run(); + + /// Get the timer value in milliseconds. + int64_t getMilliseconds(); + + /// Stop timer thread. + void stopThread(); +}; + } #endif diff --git a/src/core/include/_TopFieldCollector.h b/src/core/include/_TopFieldCollector.h index 2f362b60..611dc3f4 100644 --- a/src/core/include/_TopFieldCollector.h +++ b/src/core/include/_TopFieldCollector.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,210 +9,199 @@ #include "TopDocsCollector.h" -namespace Lucene -{ - /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore. - class OneComparatorNonScoringCollector : public TopFieldCollector - { - public: - OneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OneComparatorNonScoringCollector(); - - LUCENE_CLASS(OneComparatorNonScoringCollector); - - public: - FieldComparatorPtr comparator; - int32_t reverseMul; - - public: - virtual void initialize(); - virtual void updateBottom(int32_t doc); - virtual void collect(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore, - /// and assumes out of orderness in doc Ids collection. - class OutOfOrderOneComparatorNonScoringCollector : public OneComparatorNonScoringCollector - { - public: - OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderOneComparatorNonScoringCollector(); - - LUCENE_CLASS(OutOfOrderOneComparatorNonScoringCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore. - class OneComparatorScoringNoMaxScoreCollector : public OneComparatorNonScoringCollector - { - public: - OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OneComparatorScoringNoMaxScoreCollector(); - - LUCENE_CLASS(OneComparatorScoringNoMaxScoreCollector); - - public: - ScorerPtr scorer; - - public: - virtual void updateBottom(int32_t doc, double score); - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore, - /// and assumes out of orderness in doc Ids collection. - class OutOfOrderOneComparatorScoringNoMaxScoreCollector : public OneComparatorScoringNoMaxScoreCollector - { - public: - OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderOneComparatorScoringNoMaxScoreCollector(); - - LUCENE_CLASS(OutOfOrderOneComparatorScoringNoMaxScoreCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore. - class OneComparatorScoringMaxScoreCollector : public OneComparatorNonScoringCollector - { - public: - OneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OneComparatorScoringMaxScoreCollector(); - - LUCENE_CLASS(OneComparatorScoringMaxScoreCollector); - - public: - ScorerPtr scorer; - - public: - virtual void updateBottom(int32_t doc, double score); - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore, - /// and assumes out of orderness in doc Ids collection. - class OutOfOrderOneComparatorScoringMaxScoreCollector : public OneComparatorScoringMaxScoreCollector - { - public: - OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderOneComparatorScoringMaxScoreCollector(); - - LUCENE_CLASS(OutOfOrderOneComparatorScoringMaxScoreCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. - class MultiComparatorNonScoringCollector : public TopFieldCollector - { - public: - MultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~MultiComparatorNonScoringCollector(); - - LUCENE_CLASS(MultiComparatorNonScoringCollector); - - public: - Collection comparators; - Collection reverseMul; - - public: - virtual void initialize(); - virtual void updateBottom(int32_t doc); - virtual void collect(int32_t doc); - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. - class OutOfOrderMultiComparatorNonScoringCollector : public MultiComparatorNonScoringCollector - { - public: - OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderMultiComparatorNonScoringCollector(); - - LUCENE_CLASS(OutOfOrderMultiComparatorNonScoringCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. - class MultiComparatorScoringMaxScoreCollector : public MultiComparatorNonScoringCollector - { - public: - MultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~MultiComparatorScoringMaxScoreCollector(); - - LUCENE_CLASS(MultiComparatorScoringMaxScoreCollector); - - public: - ScorerWeakPtr _scorer; - - public: - virtual void updateBottom(int32_t doc, double score); - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. - class OutOfOrderMultiComparatorScoringMaxScoreCollector : public MultiComparatorScoringMaxScoreCollector - { - public: - OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderMultiComparatorScoringMaxScoreCollector(); - - LUCENE_CLASS(OutOfOrderMultiComparatorScoringMaxScoreCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. - class MultiComparatorScoringNoMaxScoreCollector : public MultiComparatorNonScoringCollector - { - public: - MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~MultiComparatorScoringNoMaxScoreCollector(); - - LUCENE_CLASS(MultiComparatorScoringNoMaxScoreCollector); - - public: - ScorerWeakPtr _scorer; - - public: - virtual void updateBottom(int32_t doc, double score); - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - }; - - /// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore, - /// and assumes out of orderness in doc Ids collection. - class OutOfOrderMultiComparatorScoringNoMaxScoreCollector : public MultiComparatorScoringNoMaxScoreCollector - { - public: - OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields); - virtual ~OutOfOrderMultiComparatorScoringNoMaxScoreCollector(); - - LUCENE_CLASS(OutOfOrderMultiComparatorScoringNoMaxScoreCollector); - - public: - virtual void collect(int32_t doc); - virtual void setScorer(ScorerPtr scorer); - virtual bool acceptsDocsOutOfOrder(); - }; +namespace Lucene { + +/// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore. +class OneComparatorNonScoringCollector : public TopFieldCollector { +public: + OneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OneComparatorNonScoringCollector(); + + LUCENE_CLASS(OneComparatorNonScoringCollector); + +public: + FieldComparatorPtr comparator; + int32_t reverseMul; + +public: + virtual void initialize(); + virtual void updateBottom(int32_t doc); + virtual void collect(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over one SortField criteria, without tracking document scores and maxScore, +/// and assumes out of orderness in doc Ids collection. +class OutOfOrderOneComparatorNonScoringCollector : public OneComparatorNonScoringCollector { +public: + OutOfOrderOneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderOneComparatorNonScoringCollector(); + + LUCENE_CLASS(OutOfOrderOneComparatorNonScoringCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore. +class OneComparatorScoringNoMaxScoreCollector : public OneComparatorNonScoringCollector { +public: + OneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OneComparatorScoringNoMaxScoreCollector(); + + LUCENE_CLASS(OneComparatorScoringNoMaxScoreCollector); + +public: + ScorerPtr scorer; + +public: + virtual void updateBottom(int32_t doc, double score); + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over one SortField criteria, while tracking document scores but no maxScore, +/// and assumes out of orderness in doc Ids collection. +class OutOfOrderOneComparatorScoringNoMaxScoreCollector : public OneComparatorScoringNoMaxScoreCollector { +public: + OutOfOrderOneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderOneComparatorScoringNoMaxScoreCollector(); + + LUCENE_CLASS(OutOfOrderOneComparatorScoringNoMaxScoreCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore. +class OneComparatorScoringMaxScoreCollector : public OneComparatorNonScoringCollector { +public: + OneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OneComparatorScoringMaxScoreCollector(); + + LUCENE_CLASS(OneComparatorScoringMaxScoreCollector); + +public: + ScorerPtr scorer; + +public: + virtual void updateBottom(int32_t doc, double score); + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over one SortField criteria, with tracking document scores and maxScore, +/// and assumes out of orderness in doc Ids collection. +class OutOfOrderOneComparatorScoringMaxScoreCollector : public OneComparatorScoringMaxScoreCollector { +public: + OutOfOrderOneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderOneComparatorScoringMaxScoreCollector(); + + LUCENE_CLASS(OutOfOrderOneComparatorScoringMaxScoreCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. +class MultiComparatorNonScoringCollector : public TopFieldCollector { +public: + MultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~MultiComparatorNonScoringCollector(); + + LUCENE_CLASS(MultiComparatorNonScoringCollector); + +public: + Collection comparators; + Collection reverseMul; + +public: + virtual void initialize(); + virtual void updateBottom(int32_t doc); + virtual void collect(int32_t doc); + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. +class OutOfOrderMultiComparatorNonScoringCollector : public MultiComparatorNonScoringCollector { +public: + OutOfOrderMultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderMultiComparatorNonScoringCollector(); + + LUCENE_CLASS(OutOfOrderMultiComparatorNonScoringCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. +class MultiComparatorScoringMaxScoreCollector : public MultiComparatorNonScoringCollector { +public: + MultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~MultiComparatorScoringMaxScoreCollector(); + + LUCENE_CLASS(MultiComparatorScoringMaxScoreCollector); + +public: + ScorerWeakPtr _scorer; + +public: + virtual void updateBottom(int32_t doc, double score); + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, without tracking document scores and maxScore. +class OutOfOrderMultiComparatorScoringMaxScoreCollector : public MultiComparatorScoringMaxScoreCollector { +public: + OutOfOrderMultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderMultiComparatorScoringMaxScoreCollector(); + + LUCENE_CLASS(OutOfOrderMultiComparatorScoringMaxScoreCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore. +class MultiComparatorScoringNoMaxScoreCollector : public MultiComparatorNonScoringCollector { +public: + MultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~MultiComparatorScoringNoMaxScoreCollector(); + + LUCENE_CLASS(MultiComparatorScoringNoMaxScoreCollector); + +public: + ScorerWeakPtr _scorer; + +public: + virtual void updateBottom(int32_t doc, double score); + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); +}; + +/// Implements a TopFieldCollector over multiple SortField criteria, with tracking document scores and maxScore, +/// and assumes out of orderness in doc Ids collection. +class OutOfOrderMultiComparatorScoringNoMaxScoreCollector : public MultiComparatorScoringNoMaxScoreCollector { +public: + OutOfOrderMultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields); + virtual ~OutOfOrderMultiComparatorScoringNoMaxScoreCollector(); + + LUCENE_CLASS(OutOfOrderMultiComparatorScoringNoMaxScoreCollector); + +public: + virtual void collect(int32_t doc); + virtual void setScorer(const ScorerPtr& scorer); + virtual bool acceptsDocsOutOfOrder(); +}; + } #endif diff --git a/src/core/include/_TopScoreDocCollector.h b/src/core/include/_TopScoreDocCollector.h index 92fa00e7..668b2d7a 100644 --- a/src/core/include/_TopScoreDocCollector.h +++ b/src/core/include/_TopScoreDocCollector.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,35 +9,34 @@ #include "TopDocsCollector.h" -namespace Lucene -{ - /// Assumes docs are scored in order. - class InOrderTopScoreDocCollector : public TopScoreDocCollector - { - public: - InOrderTopScoreDocCollector(int32_t numHits); - virtual ~InOrderTopScoreDocCollector(); - - LUCENE_CLASS(InOrderTopScoreDocCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; - - /// Assumes docs are scored out of order. - class OutOfOrderTopScoreDocCollector : public TopScoreDocCollector - { - public: - OutOfOrderTopScoreDocCollector(int32_t numHits); - virtual ~OutOfOrderTopScoreDocCollector(); - - LUCENE_CLASS(OutOfOrderTopScoreDocCollector); - - public: - virtual void collect(int32_t doc); - virtual bool acceptsDocsOutOfOrder(); - }; +namespace Lucene { + +/// Assumes docs are scored in order. +class InOrderTopScoreDocCollector : public TopScoreDocCollector { +public: + InOrderTopScoreDocCollector(int32_t numHits); + virtual ~InOrderTopScoreDocCollector(); + + LUCENE_CLASS(InOrderTopScoreDocCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + +/// Assumes docs are scored out of order. +class OutOfOrderTopScoreDocCollector : public TopScoreDocCollector { +public: + OutOfOrderTopScoreDocCollector(int32_t numHits); + virtual ~OutOfOrderTopScoreDocCollector(); + + LUCENE_CLASS(OutOfOrderTopScoreDocCollector); + +public: + virtual void collect(int32_t doc); + virtual bool acceptsDocsOutOfOrder(); +}; + } #endif diff --git a/src/core/include/_ValueSourceQuery.h b/src/core/include/_ValueSourceQuery.h index bfe4bf97..004722e3 100644 --- a/src/core/include/_ValueSourceQuery.h +++ b/src/core/include/_ValueSourceQuery.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,55 +10,54 @@ #include "Weight.h" #include "Scorer.h" -namespace Lucene -{ - class ValueSourceWeight : public Weight - { - public: - ValueSourceWeight(ValueSourceQueryPtr query, SearcherPtr searcher); - virtual ~ValueSourceWeight(); - - LUCENE_CLASS(ValueSourceWeight); - - public: - ValueSourceQueryPtr query; - SimilarityPtr similarity; - double queryNorm; - double queryWeight; - - public: - virtual QueryPtr getQuery(); - virtual double getValue(); - virtual double sumOfSquaredWeights(); - virtual void normalize(double norm); - virtual ScorerPtr scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer); - virtual ExplanationPtr explain(IndexReaderPtr reader, int32_t doc); - }; - - /// A scorer that (simply) matches all documents, and scores each document with the value of the value - /// source in effect. As an example, if the value source is a (cached) field source, then value of that - /// field in that document will be used. (assuming field is indexed for this doc, with a single token.) - class ValueSourceScorer : public Scorer - { - public: - ValueSourceScorer(SimilarityPtr similarity, IndexReaderPtr reader, ValueSourceWeightPtr weight); - virtual ~ValueSourceScorer(); - - LUCENE_CLASS(ValueSourceScorer); - - public: - ValueSourceWeightPtr weight; - double qWeight; - DocValuesPtr vals; - TermDocsPtr termDocs; - int32_t doc; - - public: - virtual int32_t nextDoc(); - virtual int32_t docID(); - virtual int32_t advance(int32_t target); - virtual double score(); - }; +namespace Lucene { + +class ValueSourceWeight : public Weight { +public: + ValueSourceWeight(const ValueSourceQueryPtr& query, const SearcherPtr& searcher); + virtual ~ValueSourceWeight(); + + LUCENE_CLASS(ValueSourceWeight); + +public: + ValueSourceQueryPtr query; + SimilarityPtr similarity; + double queryNorm; + double queryWeight; + +public: + virtual QueryPtr getQuery(); + virtual double getValue(); + virtual double sumOfSquaredWeights(); + virtual void normalize(double norm); + virtual ScorerPtr scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer); + virtual ExplanationPtr explain(const IndexReaderPtr& reader, int32_t doc); +}; + +/// A scorer that (simply) matches all documents, and scores each document with the value of the value +/// source in effect. As an example, if the value source is a (cached) field source, then value of that +/// field in that document will be used. (assuming field is indexed for this doc, with a single token.) +class ValueSourceScorer : public Scorer { +public: + ValueSourceScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const ValueSourceWeightPtr& weight); + virtual ~ValueSourceScorer(); + + LUCENE_CLASS(ValueSourceScorer); + +public: + ValueSourceWeightPtr weight; + double qWeight; + DocValuesPtr vals; + TermDocsPtr termDocs; + int32_t doc; + +public: + virtual int32_t nextDoc(); + virtual int32_t docID(); + virtual int32_t advance(int32_t target); + virtual double score(); +}; + } #endif diff --git a/src/core/index/AbstractAllTermDocs.cpp b/src/core/index/AbstractAllTermDocs.cpp index 73057625..aa6edccf 100644 --- a/src/core/index/AbstractAllTermDocs.cpp +++ b/src/core/index/AbstractAllTermDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,76 +7,66 @@ #include "LuceneInc.h" #include "AbstractAllTermDocs.h" -namespace Lucene -{ - AbstractAllTermDocs::AbstractAllTermDocs(int32_t maxDoc) - { - this->maxDoc = maxDoc; - this->_doc = -1; - } - - AbstractAllTermDocs::~AbstractAllTermDocs() - { - } - - void AbstractAllTermDocs::seek(TermPtr term) - { - if (!term) - _doc = -1; - else - boost::throw_exception(UnsupportedOperationException()); - } - - void AbstractAllTermDocs::seek(TermEnumPtr termEnum) - { +namespace Lucene { + +AbstractAllTermDocs::AbstractAllTermDocs(int32_t maxDoc) { + this->maxDoc = maxDoc; + this->_doc = -1; +} + +AbstractAllTermDocs::~AbstractAllTermDocs() { +} + +void AbstractAllTermDocs::seek(const TermPtr& term) { + if (!term) { + _doc = -1; + } else { boost::throw_exception(UnsupportedOperationException()); } - - int32_t AbstractAllTermDocs::doc() - { - return _doc; - } - - int32_t AbstractAllTermDocs::freq() - { - return 1; - } - - bool AbstractAllTermDocs::next() - { - return skipTo(_doc + 1); - } - - int32_t AbstractAllTermDocs::read(Collection docs, Collection freqs) - { - int32_t length = docs.size(); - int32_t i = 0; - while (i < length && _doc < maxDoc) - { - if (!isDeleted(_doc)) - { - docs[i] = _doc; - freqs[i] = 1; - ++i; - } - ++_doc; +} + +void AbstractAllTermDocs::seek(const TermEnumPtr& termEnum) { + boost::throw_exception(UnsupportedOperationException()); +} + +int32_t AbstractAllTermDocs::doc() { + return _doc; +} + +int32_t AbstractAllTermDocs::freq() { + return 1; +} + +bool AbstractAllTermDocs::next() { + return skipTo(_doc + 1); +} + +int32_t AbstractAllTermDocs::read(Collection& docs, Collection& freqs) { + int32_t length = docs.size(); + int32_t i = 0; + while (i < length && _doc < maxDoc) { + if (!isDeleted(_doc)) { + docs[i] = _doc; + freqs[i] = 1; + ++i; } - return i; + ++_doc; } - - bool AbstractAllTermDocs::skipTo(int32_t target) - { - _doc = target; - while (_doc < maxDoc) - { - if (!isDeleted(_doc)) - return true; - ++_doc; + return i; +} + +bool AbstractAllTermDocs::skipTo(int32_t target) { + _doc = target; + while (_doc < maxDoc) { + if (!isDeleted(_doc)) { + return true; } - return false; - } - - void AbstractAllTermDocs::close() - { + ++_doc; } + return false; +} + +void AbstractAllTermDocs::close() { +} + } diff --git a/src/core/index/AllTermDocs.cpp b/src/core/index/AllTermDocs.cpp index d29a83ae..96186ea8 100644 --- a/src/core/index/AllTermDocs.cpp +++ b/src/core/index/AllTermDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,21 +9,19 @@ #include "SegmentReader.h" #include "BitVector.h" -namespace Lucene -{ - AllTermDocs::AllTermDocs(SegmentReaderPtr parent) : AbstractAllTermDocs(parent->maxDoc()) - { - SyncLock parentLock(parent); - this->_deletedDocs = parent->deletedDocs; - } - - AllTermDocs::~AllTermDocs() - { - } - - bool AllTermDocs::isDeleted(int32_t doc) - { - BitVectorPtr deletedDocs(_deletedDocs.lock()); - return (deletedDocs && deletedDocs->get(_doc)); - } +namespace Lucene { + +AllTermDocs::AllTermDocs(const SegmentReaderPtr& parent) : AbstractAllTermDocs(parent->maxDoc()) { + SyncLock parentLock(parent); + this->_deletedDocs = parent->deletedDocs; +} + +AllTermDocs::~AllTermDocs() { +} + +bool AllTermDocs::isDeleted(int32_t doc) { + BitVectorPtr deletedDocs(_deletedDocs.lock()); + return (deletedDocs && deletedDocs->get(_doc)); +} + } diff --git a/src/core/index/BufferedDeletes.cpp b/src/core/index/BufferedDeletes.cpp index bd50ae8e..97d941da 100644 --- a/src/core/index/BufferedDeletes.cpp +++ b/src/core/index/BufferedDeletes.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,116 +8,109 @@ #include "BufferedDeletes.h" #include "MergeDocIDRemapper.h" -namespace Lucene -{ - BufferedDeletes::BufferedDeletes(bool doTermSort) - { - // doTermSort not used: always use sorted term map - terms = MapTermNum::newInstance(); - queries = MapQueryInt::newInstance(); - docIDs = Collection::newInstance(); - numTerms = 0; - bytesUsed = 0; - } - - BufferedDeletes::~BufferedDeletes() - { - } - - int32_t BufferedDeletes::size() - { - // We use numTerms not terms.size() intentionally, so that deletes by the same term - // multiple times "count", ie if you ask to flush every 1000 deletes then even dup'd - // terms are counted towards that 1000 - return numTerms + queries.size() + docIDs.size(); - } - - void BufferedDeletes::update(BufferedDeletesPtr in) - { - numTerms += in->numTerms; - bytesUsed += in->bytesUsed; - terms.putAll(in->terms.begin(), in->terms.end()); - queries.putAll(in->queries.begin(), in->queries.end()); - docIDs.addAll(in->docIDs.begin(), in->docIDs.end()); - in->clear(); - } - - void BufferedDeletes::clear() - { - terms.clear(); - queries.clear(); - docIDs.clear(); - numTerms = 0; - bytesUsed = 0; - } - - void BufferedDeletes::addBytesUsed(int64_t b) - { - bytesUsed += b; - } - - bool BufferedDeletes::any() - { - return (!terms.empty() || !docIDs.empty() || !queries.empty()); - } - - void BufferedDeletes::remap(MergeDocIDRemapperPtr mapper, SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount) - { - SyncLock syncLock(this); - - MapTermNum newDeleteTerms; - - // Remap delete-by-term - if (!terms.empty()) - { - newDeleteTerms = MapTermNum::newInstance(); - for (MapTermNum::iterator entry = terms.begin(); entry != terms.end(); ++entry) - newDeleteTerms.put(entry->first, newLucene(mapper->remap(entry->second->getNum()))); +namespace Lucene { + +BufferedDeletes::BufferedDeletes(bool doTermSort) { + // doTermSort not used: always use sorted term map + terms = MapTermNum::newInstance(); + queries = MapQueryInt::newInstance(); + docIDs = Collection::newInstance(); + numTerms = 0; + bytesUsed = 0; +} + +BufferedDeletes::~BufferedDeletes() { +} + +int32_t BufferedDeletes::size() { + // We use numTerms not terms.size() intentionally, so that deletes by the same term + // multiple times "count", ie if you ask to flush every 1000 deletes then even dup'd + // terms are counted towards that 1000 + return numTerms + queries.size() + docIDs.size(); +} + +void BufferedDeletes::update(const BufferedDeletesPtr& in) { + numTerms += in->numTerms; + bytesUsed += in->bytesUsed; + terms.putAll(in->terms.begin(), in->terms.end()); + queries.putAll(in->queries.begin(), in->queries.end()); + docIDs.addAll(in->docIDs.begin(), in->docIDs.end()); + in->clear(); +} + +void BufferedDeletes::clear() { + terms.clear(); + queries.clear(); + docIDs.clear(); + numTerms = 0; + bytesUsed = 0; +} + +void BufferedDeletes::addBytesUsed(int64_t b) { + bytesUsed += b; +} + +bool BufferedDeletes::any() { + return (!terms.empty() || !docIDs.empty() || !queries.empty()); +} + +void BufferedDeletes::remap(const MergeDocIDRemapperPtr& mapper, const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount) { + SyncLock syncLock(this); + + MapTermNum newDeleteTerms; + + // Remap delete-by-term + if (!terms.empty()) { + newDeleteTerms = MapTermNum::newInstance(); + for (MapTermNum::iterator entry = terms.begin(); entry != terms.end(); ++entry) { + newDeleteTerms.put(entry->first, newLucene(mapper->remap(entry->second->getNum()))); } - - // Remap delete-by-docID - Collection newDeleteDocIDs; - - if (!docIDs.empty()) - { - newDeleteDocIDs = Collection::newInstance(); - for (Collection::iterator num = docIDs.begin(); num != docIDs.end(); ++num) - newDeleteDocIDs.add(mapper->remap(*num)); + } + + // Remap delete-by-docID + Collection newDeleteDocIDs; + + if (!docIDs.empty()) { + newDeleteDocIDs = Collection::newInstance(); + for (Collection::iterator num = docIDs.begin(); num != docIDs.end(); ++num) { + newDeleteDocIDs.add(mapper->remap(*num)); } - - // Remap delete-by-query - MapQueryInt newDeleteQueries; - - if (!queries.empty()) - { - newDeleteQueries = MapQueryInt::newInstance(); - for (MapQueryInt::iterator entry = queries.begin(); entry != queries.end(); ++entry) - newDeleteQueries.put(entry->first, mapper->remap(entry->second)); + } + + // Remap delete-by-query + MapQueryInt newDeleteQueries; + + if (!queries.empty()) { + newDeleteQueries = MapQueryInt::newInstance(); + for (MapQueryInt::iterator entry = queries.begin(); entry != queries.end(); ++entry) { + newDeleteQueries.put(entry->first, mapper->remap(entry->second)); } - - if (newDeleteTerms) - terms = newDeleteTerms; - if (newDeleteDocIDs) - docIDs = newDeleteDocIDs; - if (newDeleteQueries) - queries = newDeleteQueries; } - - Num::Num(int32_t num) - { - this->num = num; + + if (newDeleteTerms) { + terms = newDeleteTerms; } - - int32_t Num::getNum() - { - return num; + if (newDeleteDocIDs) { + docIDs = newDeleteDocIDs; } - - void Num::setNum(int32_t num) - { - // Only record the new number if it's greater than the current one. This is important - // because if multiple threads are replacing the same doc at nearly the same time, it's - // possible that one thread that got a higher docID is scheduled before the other threads. - this->num = std::max(this->num, num); + if (newDeleteQueries) { + queries = newDeleteQueries; } } + +Num::Num(int32_t num) { + this->num = num; +} + +int32_t Num::getNum() { + return num; +} + +void Num::setNum(int32_t num) { + // Only record the new number if it's greater than the current one. This is important + // because if multiple threads are replacing the same doc at nearly the same time, it's + // possible that one thread that got a higher docID is scheduled before the other threads. + this->num = std::max(this->num, num); +} + +} diff --git a/src/core/index/ByteBlockPool.cpp b/src/core/index/ByteBlockPool.cpp index bed7d73e..c8f344dc 100644 --- a/src/core/index/ByteBlockPool.cpp +++ b/src/core/index/ByteBlockPool.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,116 +9,109 @@ #include "DocumentsWriter.h" #include "MiscUtils.h" -namespace Lucene -{ - // Size of each slice. These arrays should be at most 16 elements (index is encoded with 4 bits). First array - // is just a compact way to encode X+1 with a max. Second array is the length of each slice, ie first slice is - // 5 bytes, next slice is 14 bytes, etc. - const int32_t ByteBlockPool::nextLevelArray[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; - const int32_t ByteBlockPool::levelSizeArray[] = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; - - ByteBlockPool::ByteBlockPool(ByteBlockPoolAllocatorBasePtr allocator, bool trackAllocations) - { - buffers = Collection::newInstance(10); - bufferUpto = -1; - byteUpto = DocumentsWriter::BYTE_BLOCK_SIZE; - byteOffset = -DocumentsWriter::BYTE_BLOCK_SIZE; - - this->allocator = allocator; - this->trackAllocations = trackAllocations; - } - - ByteBlockPool::~ByteBlockPool() - { - } - - int32_t ByteBlockPool::FIRST_LEVEL_SIZE() - { - return levelSizeArray[0]; - } - - void ByteBlockPool::reset() - { - if (bufferUpto != -1) - { - // We allocated at least one buffer - for (int32_t i = 0; i < bufferUpto; ++i) - { - // Fully zero fill buffers that we fully used - MiscUtils::arrayFill(buffers[i].get(), 0, buffers[i].size(), 0); - } - - // Partial zero fill the final buffer - MiscUtils::arrayFill(buffers[bufferUpto].get(), 0, byteUpto, 0); - - if (bufferUpto > 0) - { - // Recycle all but the first buffer - allocator->recycleByteBlocks(buffers, 1, 1 + bufferUpto); - } - - // Re-use the first buffer - bufferUpto = 0; - byteUpto = 0; - byteOffset = 0; - buffer = buffers[0]; +namespace Lucene { + +// Size of each slice. These arrays should be at most 16 elements (index is encoded with 4 bits). First array +// is just a compact way to encode X+1 with a max. Second array is the length of each slice, ie first slice is +// 5 bytes, next slice is 14 bytes, etc. +const int32_t ByteBlockPool::nextLevelArray[] = {1, 2, 3, 4, 5, 6, 7, 8, 9, 9}; +const int32_t ByteBlockPool::levelSizeArray[] = {5, 14, 20, 30, 40, 40, 80, 80, 120, 200}; + +ByteBlockPool::ByteBlockPool(const ByteBlockPoolAllocatorBasePtr& allocator, bool trackAllocations) { + buffers = Collection::newInstance(10); + bufferUpto = -1; + byteUpto = DocumentsWriter::BYTE_BLOCK_SIZE; + byteOffset = -DocumentsWriter::BYTE_BLOCK_SIZE; + + this->allocator = allocator; + this->trackAllocations = trackAllocations; +} + +ByteBlockPool::~ByteBlockPool() { +} + +int32_t ByteBlockPool::FIRST_LEVEL_SIZE() { + return levelSizeArray[0]; +} + +void ByteBlockPool::reset() { + if (bufferUpto != -1) { + // We allocated at least one buffer + for (int32_t i = 0; i < bufferUpto; ++i) { + // Fully zero fill buffers that we fully used + MiscUtils::arrayFill(buffers[i].get(), 0, buffers[i].size(), 0); } - } - - void ByteBlockPool::nextBuffer() - { - if (1 + bufferUpto == buffers.size()) - buffers.resize((int32_t)((double)buffers.size() * 1.5)); - buffers[1 + bufferUpto] = allocator->getByteBlock(trackAllocations); - buffer = buffers[1 + bufferUpto]; - ++bufferUpto; - + + // Partial zero fill the final buffer + MiscUtils::arrayFill(buffers[bufferUpto].get(), 0, byteUpto, 0); + + if (bufferUpto > 0) { + // Recycle all but the first buffer + allocator->recycleByteBlocks(buffers, 1, 1 + bufferUpto); + } + + // Re-use the first buffer + bufferUpto = 0; byteUpto = 0; - byteOffset += DocumentsWriter::BYTE_BLOCK_SIZE; + byteOffset = 0; + buffer = buffers[0]; } - - int32_t ByteBlockPool::newSlice(int32_t size) - { - if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - size) - nextBuffer(); - int32_t upto = byteUpto; - byteUpto += size; - buffer[byteUpto - 1] = 16; - return upto; +} + +void ByteBlockPool::nextBuffer() { + if (1 + bufferUpto == buffers.size()) { + buffers.resize((int32_t)((double)buffers.size() * 1.5)); } - - int32_t ByteBlockPool::allocSlice(ByteArray slice, int32_t upto) - { - int32_t level = slice[upto] & 15; - int32_t newLevel = nextLevelArray[level]; - int32_t newSize = levelSizeArray[newLevel]; - - // Maybe allocate another block - if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - newSize) - nextBuffer(); - - int32_t newUpto = byteUpto; - int32_t offset = newUpto + byteOffset; - byteUpto += newSize; - - // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address) - buffer[newUpto] = slice[upto - 3]; - buffer[newUpto + 1] = slice[upto - 2]; - buffer[newUpto + 2] = slice[upto - 1]; - - // Write forwarding address at end of last slice - slice[upto - 3] = (uint8_t)MiscUtils::unsignedShift(offset, 24); - slice[upto - 2] = (uint8_t)MiscUtils::unsignedShift(offset, 16); - slice[upto - 1] = (uint8_t)MiscUtils::unsignedShift(offset, 8); - slice[upto] = (uint8_t)offset; - - // Write new level - buffer[byteUpto - 1] = (uint8_t)(16 | newLevel); - - return (newUpto + 3); + buffers[1 + bufferUpto] = allocator->getByteBlock(trackAllocations); + buffer = buffers[1 + bufferUpto]; + ++bufferUpto; + + byteUpto = 0; + byteOffset += DocumentsWriter::BYTE_BLOCK_SIZE; +} + +int32_t ByteBlockPool::newSlice(int32_t size) { + if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - size) { + nextBuffer(); } - - ByteBlockPoolAllocatorBase::~ByteBlockPoolAllocatorBase() - { + int32_t upto = byteUpto; + byteUpto += size; + buffer[byteUpto - 1] = 16; + return upto; +} + +int32_t ByteBlockPool::allocSlice(ByteArray slice, int32_t upto) { + int32_t level = slice[upto] & 15; + int32_t newLevel = nextLevelArray[level]; + int32_t newSize = levelSizeArray[newLevel]; + + // Maybe allocate another block + if (byteUpto > DocumentsWriter::BYTE_BLOCK_SIZE - newSize) { + nextBuffer(); } + + int32_t newUpto = byteUpto; + int32_t offset = newUpto + byteOffset; + byteUpto += newSize; + + // Copy forward the past 3 bytes (which we are about to overwrite with the forwarding address) + buffer[newUpto] = slice[upto - 3]; + buffer[newUpto + 1] = slice[upto - 2]; + buffer[newUpto + 2] = slice[upto - 1]; + + // Write forwarding address at end of last slice + slice[upto - 3] = (uint8_t)MiscUtils::unsignedShift(offset, 24); + slice[upto - 2] = (uint8_t)MiscUtils::unsignedShift(offset, 16); + slice[upto - 1] = (uint8_t)MiscUtils::unsignedShift(offset, 8); + slice[upto] = (uint8_t)offset; + + // Write new level + buffer[byteUpto - 1] = (uint8_t)(16 | newLevel); + + return (newUpto + 3); +} + +ByteBlockPoolAllocatorBase::~ByteBlockPoolAllocatorBase() { +} + } diff --git a/src/core/index/ByteSliceReader.cpp b/src/core/index/ByteSliceReader.cpp index 43bfcc9b..b609510f 100644 --- a/src/core/index/ByteSliceReader.cpp +++ b/src/core/index/ByteSliceReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,155 +10,133 @@ #include "IndexOutput.h" #include "MiscUtils.h" -namespace Lucene -{ - ByteSliceReader::ByteSliceReader() - { - bufferUpto = 0; - upto = 0; - limit = 0; - level = 0; - bufferOffset = 0; - endIndex = 0; - } - - ByteSliceReader::~ByteSliceReader() - { - } - - void ByteSliceReader::init(ByteBlockPoolPtr pool, int32_t startIndex, int32_t endIndex) - { - BOOST_ASSERT(endIndex - startIndex >= 0); - BOOST_ASSERT(startIndex >= 0); - BOOST_ASSERT(endIndex >= 0); - - this->pool = pool; - this->endIndex = endIndex; - - level = 0; - bufferUpto = startIndex / DocumentsWriter::BYTE_BLOCK_SIZE; - bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; - buffer = pool->buffers[bufferUpto]; - upto = startIndex & DocumentsWriter::BYTE_BLOCK_MASK; - - int32_t firstSize = ByteBlockPool::levelSizeArray[0]; - - if (startIndex + firstSize >= endIndex) - { - // There is only this one slice to read - limit = endIndex & DocumentsWriter::BYTE_BLOCK_MASK; - } - else - limit = upto + firstSize - 4; +namespace Lucene { + +ByteSliceReader::ByteSliceReader() { + bufferUpto = 0; + upto = 0; + limit = 0; + level = 0; + bufferOffset = 0; + endIndex = 0; +} + +ByteSliceReader::~ByteSliceReader() { +} + +void ByteSliceReader::init(const ByteBlockPoolPtr& pool, int32_t startIndex, int32_t endIndex) { + BOOST_ASSERT(endIndex - startIndex >= 0); + BOOST_ASSERT(startIndex >= 0); + BOOST_ASSERT(endIndex >= 0); + + this->pool = pool; + this->endIndex = endIndex; + + level = 0; + bufferUpto = startIndex / DocumentsWriter::BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; + buffer = pool->buffers[bufferUpto]; + upto = startIndex & DocumentsWriter::BYTE_BLOCK_MASK; + + int32_t firstSize = ByteBlockPool::levelSizeArray[0]; + + if (startIndex + firstSize >= endIndex) { + // There is only this one slice to read + limit = endIndex & DocumentsWriter::BYTE_BLOCK_MASK; + } else { + limit = upto + firstSize - 4; } - - bool ByteSliceReader::eof() - { - BOOST_ASSERT(upto + bufferOffset <= endIndex); - return (upto + bufferOffset == endIndex); +} + +bool ByteSliceReader::eof() { + BOOST_ASSERT(upto + bufferOffset <= endIndex); + return (upto + bufferOffset == endIndex); +} + +uint8_t ByteSliceReader::readByte() { + BOOST_ASSERT(!eof()); + BOOST_ASSERT(upto <= limit); + if (upto == limit) { + nextSlice(); } - - uint8_t ByteSliceReader::readByte() - { - BOOST_ASSERT(!eof()); - BOOST_ASSERT(upto <= limit); - if (upto == limit) + return buffer[upto++]; +} + +int64_t ByteSliceReader::writeTo(const IndexOutputPtr& out) { + int64_t size = 0; + while (true) { + if (limit + bufferOffset == endIndex) { + BOOST_ASSERT(endIndex - bufferOffset >= upto); + out->writeBytes(buffer.get(), upto, limit - upto); + size += limit - upto; + break; + } else { + out->writeBytes(buffer.get(), upto, limit - upto); + size += limit-upto; nextSlice(); - return buffer[upto++]; - } - - int64_t ByteSliceReader::writeTo(IndexOutputPtr out) - { - int64_t size = 0; - while (true) - { - if (limit + bufferOffset == endIndex) - { - BOOST_ASSERT(endIndex - bufferOffset >= upto); - out->writeBytes(buffer.get(), upto, limit - upto); - size += limit - upto; - break; - } - else - { - out->writeBytes(buffer.get(), upto, limit - upto); - size += limit-upto; - nextSlice(); - } } - return size; } - - void ByteSliceReader::nextSlice() - { - // Skip to our next slice - int32_t nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + - ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff); - - level = ByteBlockPool::nextLevelArray[level]; - int32_t newSize = ByteBlockPool::levelSizeArray[level]; - - bufferUpto = nextIndex / DocumentsWriter::BYTE_BLOCK_SIZE; - bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; - - this->buffer = pool->buffers[bufferUpto]; - upto = nextIndex & DocumentsWriter::BYTE_BLOCK_MASK; - - if (nextIndex + newSize >= endIndex) - { - // We are advancing to the final slice - BOOST_ASSERT(endIndex - nextIndex > 0); - limit = endIndex - bufferOffset; - } - else - { - // This is not the final slice (subtract 4 for the forwarding address at the end of this new slice) - limit = upto + newSize - 4; - } + return size; +} + +void ByteSliceReader::nextSlice() { + // Skip to our next slice + int32_t nextIndex = ((buffer[limit] & 0xff) << 24) + ((buffer[1 + limit] & 0xff) << 16) + + ((buffer[2 + limit] & 0xff) << 8) + (buffer[3 + limit] & 0xff); + + level = ByteBlockPool::nextLevelArray[level]; + int32_t newSize = ByteBlockPool::levelSizeArray[level]; + + bufferUpto = nextIndex / DocumentsWriter::BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * DocumentsWriter::BYTE_BLOCK_SIZE; + + this->buffer = pool->buffers[bufferUpto]; + upto = nextIndex & DocumentsWriter::BYTE_BLOCK_MASK; + + if (nextIndex + newSize >= endIndex) { + // We are advancing to the final slice + BOOST_ASSERT(endIndex - nextIndex > 0); + limit = endIndex - bufferOffset; + } else { + // This is not the final slice (subtract 4 for the forwarding address at the end of this new slice) + limit = upto + newSize - 4; } - - void ByteSliceReader::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - while (length > 0) - { - int32_t numLeft = limit - upto; - if (numLeft < length) - { - // Read entire slice - MiscUtils::arrayCopy(buffer.get(), upto, b, offset, numLeft); - offset += numLeft; - length -= numLeft; - nextSlice(); - } - else - { - // This slice is the last one - MiscUtils::arrayCopy(buffer.get(), upto, b, offset, length); - upto += length; - break; - } +} + +void ByteSliceReader::readBytes(uint8_t* b, int32_t offset, int32_t length) { + while (length > 0) { + int32_t numLeft = limit - upto; + if (numLeft < length) { + // Read entire slice + MiscUtils::arrayCopy(buffer.get(), upto, b, offset, numLeft); + offset += numLeft; + length -= numLeft; + nextSlice(); + } else { + // This slice is the last one + MiscUtils::arrayCopy(buffer.get(), upto, b, offset, length); + upto += length; + break; } } - - int64_t ByteSliceReader::getFilePointer() - { - boost::throw_exception(RuntimeException(L"not implemented")); - return 0; - } - - int64_t ByteSliceReader::length() - { - boost::throw_exception(RuntimeException(L"not implemented")); - return 0; - } - - void ByteSliceReader::seek(int64_t pos) - { - boost::throw_exception(RuntimeException(L"not implemented")); - } - - void ByteSliceReader::close() - { - boost::throw_exception(RuntimeException(L"not implemented")); - } +} + +int64_t ByteSliceReader::getFilePointer() { + boost::throw_exception(RuntimeException(L"not implemented")); + return 0; +} + +int64_t ByteSliceReader::length() { + boost::throw_exception(RuntimeException(L"not implemented")); + return 0; +} + +void ByteSliceReader::seek(int64_t pos) { + boost::throw_exception(RuntimeException(L"not implemented")); +} + +void ByteSliceReader::close() { + boost::throw_exception(RuntimeException(L"not implemented")); +} + } diff --git a/src/core/index/ByteSliceWriter.cpp b/src/core/index/ByteSliceWriter.cpp index 8994b31f..c68bb2b1 100644 --- a/src/core/index/ByteSliceWriter.cpp +++ b/src/core/index/ByteSliceWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,72 +9,62 @@ #include "DocumentsWriter.h" #include "MiscUtils.h" -namespace Lucene -{ - ByteSliceWriter::ByteSliceWriter(ByteBlockPoolPtr pool) - { - this->pool = pool; - upto = 0; - offset0 = 0; - } - - ByteSliceWriter::~ByteSliceWriter() - { - } - - void ByteSliceWriter::init(int32_t address) - { - slice = pool->buffers[address >> DocumentsWriter::BYTE_BLOCK_SHIFT]; +namespace Lucene { + +ByteSliceWriter::ByteSliceWriter(const ByteBlockPoolPtr& pool) { + this->pool = pool; + upto = 0; + offset0 = 0; +} + +ByteSliceWriter::~ByteSliceWriter() { +} + +void ByteSliceWriter::init(int32_t address) { + slice = pool->buffers[address >> DocumentsWriter::BYTE_BLOCK_SHIFT]; + BOOST_ASSERT(slice); + upto = (address & DocumentsWriter::BYTE_BLOCK_MASK); + offset0 = address; + BOOST_ASSERT(upto < slice.size()); +} + +void ByteSliceWriter::writeByte(uint8_t b) { + BOOST_ASSERT(slice); + if (slice[upto] != 0) { + upto = pool->allocSlice(slice, upto); + slice = pool->buffer; + offset0 = pool->byteOffset; BOOST_ASSERT(slice); - upto = (address & DocumentsWriter::BYTE_BLOCK_MASK); - offset0 = address; - BOOST_ASSERT(upto < slice.size()); } - - void ByteSliceWriter::writeByte(uint8_t b) - { - BOOST_ASSERT(slice); - if (slice[upto] != 0) - { + slice[upto++] = b; + BOOST_ASSERT(upto != slice.size()); +} + +void ByteSliceWriter::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { + int32_t offsetEnd = offset + length; + while (offset < offsetEnd) { + if (slice[upto] != 0) { + // End marker upto = pool->allocSlice(slice, upto); slice = pool->buffer; offset0 = pool->byteOffset; - BOOST_ASSERT(slice); } - slice[upto++] = b; + + slice[upto++] = b[offset++]; BOOST_ASSERT(upto != slice.size()); } - - void ByteSliceWriter::writeBytes(const uint8_t* b, int32_t offset, int32_t length) - { - int32_t offsetEnd = offset + length; - while (offset < offsetEnd) - { - if (slice[upto] != 0) - { - // End marker - upto = pool->allocSlice(slice, upto); - slice = pool->buffer; - offset0 = pool->byteOffset; - } - - slice[upto++] = b[offset++]; - BOOST_ASSERT(upto != slice.size()); - } - } - - int32_t ByteSliceWriter::getAddress() - { - return upto + (offset0 & DocumentsWriter::BYTE_BLOCK_NOT_MASK); - } - - void ByteSliceWriter::writeVInt(int32_t i) - { - while ((i & ~0x7f) != 0) - { - writeByte((uint8_t)((i & 0x7f) | 0x80)); - i = MiscUtils::unsignedShift(i, 7); - } - writeByte((uint8_t)i); +} + +int32_t ByteSliceWriter::getAddress() { + return upto + (offset0 & DocumentsWriter::BYTE_BLOCK_NOT_MASK); +} + +void ByteSliceWriter::writeVInt(int32_t i) { + while ((i & ~0x7f) != 0) { + writeByte((uint8_t)((i & 0x7f) | 0x80)); + i = MiscUtils::unsignedShift(i, 7); } + writeByte((uint8_t)i); +} + } diff --git a/src/core/index/CharBlockPool.cpp b/src/core/index/CharBlockPool.cpp index a97fc2a6..b552d7cd 100644 --- a/src/core/index/CharBlockPool.cpp +++ b/src/core/index/CharBlockPool.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,39 +8,37 @@ #include "CharBlockPool.h" #include "DocumentsWriter.h" -namespace Lucene -{ - CharBlockPool::CharBlockPool(DocumentsWriterPtr docWriter) - { - numBuffer = 0; - bufferUpto = -1; - charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; - charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; - buffers = Collection::newInstance(10); - this->_docWriter = docWriter; - } - - CharBlockPool::~CharBlockPool() - { - } - - void CharBlockPool::reset() - { - DocumentsWriterPtr(_docWriter)->recycleCharBlocks(buffers, 1 + bufferUpto); - bufferUpto = -1; - charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; - charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; - } - - void CharBlockPool::nextBuffer() - { - if (1 + bufferUpto == buffers.size()) - buffers.resize((int32_t)((double)buffers.size() * 1.5)); - buffers[1 + bufferUpto] = DocumentsWriterPtr(_docWriter)->getCharBlock(); - buffer = buffers[1 + bufferUpto]; - ++bufferUpto; - - charUpto = 0; - charOffset += DocumentsWriter::CHAR_BLOCK_SIZE; +namespace Lucene { + +CharBlockPool::CharBlockPool(const DocumentsWriterPtr& docWriter) { + numBuffer = 0; + bufferUpto = -1; + charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; + charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; + buffers = Collection::newInstance(10); + this->_docWriter = docWriter; +} + +CharBlockPool::~CharBlockPool() { +} + +void CharBlockPool::reset() { + DocumentsWriterPtr(_docWriter)->recycleCharBlocks(buffers, 1 + bufferUpto); + bufferUpto = -1; + charUpto = DocumentsWriter::CHAR_BLOCK_SIZE; + charOffset = -DocumentsWriter::CHAR_BLOCK_SIZE; +} + +void CharBlockPool::nextBuffer() { + if (1 + bufferUpto == buffers.size()) { + buffers.resize((int32_t)((double)buffers.size() * 1.5)); } + buffers[1 + bufferUpto] = DocumentsWriterPtr(_docWriter)->getCharBlock(); + buffer = buffers[1 + bufferUpto]; + ++bufferUpto; + + charUpto = 0; + charOffset += DocumentsWriter::CHAR_BLOCK_SIZE; +} + } diff --git a/src/core/index/CheckIndex.cpp b/src/core/index/CheckIndex.cpp index 9e910f1f..289cb27e 100644 --- a/src/core/index/CheckIndex.cpp +++ b/src/core/index/CheckIndex.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -22,738 +22,648 @@ #include "InfoStream.h" #include "StringUtils.h" -namespace Lucene -{ - bool CheckIndex::_assertsOn = false; - - CheckIndex::CheckIndex(DirectoryPtr dir) - { - this->dir = dir; +namespace Lucene { + +bool CheckIndex::_assertsOn = false; + +CheckIndex::CheckIndex(const DirectoryPtr& dir) { + this->dir = dir; +} + +CheckIndex::~CheckIndex() { +} + +void CheckIndex::setInfoStream(const InfoStreamPtr& out) { + infoStream = out; +} + +void CheckIndex::msg(const String& msg) { + if (infoStream) { + *infoStream << msg << L"\n"; } - - CheckIndex::~CheckIndex() - { +} + +IndexStatusPtr CheckIndex::checkIndex() { + return checkIndex(Collection()); +} + +IndexStatusPtr CheckIndex::checkIndex(Collection onlySegments) { + SegmentInfosPtr sis(newLucene()); + IndexStatusPtr result(newLucene()); + result->dir = dir; + try { + sis->read(dir); + } catch (...) { + msg(L"ERROR: could not read any segments file in directory"); + result->missingSegments = true; + return result; } - - void CheckIndex::setInfoStream(InfoStreamPtr out) - { - infoStream = out; + + int32_t numSegments = sis->size(); + String segmentsFileName(sis->getCurrentSegmentFileName()); + IndexInputPtr input; + + try { + input = dir->openInput(segmentsFileName); + } catch (...) { + msg(L"ERROR: could not open segments file in directory"); + result->cantOpenSegments = true; + return result; } - - void CheckIndex::msg(const String& msg) - { - if (infoStream) - *infoStream << msg << L"\n"; + + int32_t format = 0; + try { + format = input->readInt(); + } catch (...) { + msg(L"ERROR: could not read segment file version in directory"); + result->missingSegmentVersion = true; + if (input) { + input->close(); + } + return result; } - - IndexStatusPtr CheckIndex::checkIndex() - { - return checkIndex(Collection()); + if (input) { + input->close(); } - - IndexStatusPtr CheckIndex::checkIndex(Collection onlySegments) - { - SegmentInfosPtr sis(newLucene()); - IndexStatusPtr result(newLucene()); - result->dir = dir; - try - { - sis->read(dir); - } - catch (...) - { - msg(L"ERROR: could not read any segments file in directory"); - result->missingSegments = true; - return result; - } - - int32_t numSegments = sis->size(); - String segmentsFileName(sis->getCurrentSegmentFileName()); - IndexInputPtr input; - - try - { - input = dir->openInput(segmentsFileName); - } - catch (...) - { - msg(L"ERROR: could not open segments file in directory"); - result->cantOpenSegments = true; - return result; + + String sFormat; + bool skip = false; + + if (format == SegmentInfos::FORMAT) { + sFormat = L"FORMAT [Lucene Pre-2.1]"; + } + if (format == SegmentInfos::FORMAT_LOCKLESS) { + sFormat = L"FORMAT_LOCKLESS [Lucene 2.1]"; + } else if (format == SegmentInfos::FORMAT_SINGLE_NORM_FILE) { + sFormat = L"FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; + } else if (format == SegmentInfos::FORMAT_SHARED_DOC_STORE) { + sFormat = L"FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; + } else { + if (format == SegmentInfos::FORMAT_CHECKSUM) { + sFormat = L"FORMAT_CHECKSUM [Lucene 2.4]"; + } else if (format == SegmentInfos::FORMAT_DEL_COUNT) { + sFormat = L"FORMAT_DEL_COUNT [Lucene 2.4]"; + } else if (format == SegmentInfos::FORMAT_HAS_PROX) { + sFormat = L"FORMAT_HAS_PROX [Lucene 2.4]"; + } else if (format == SegmentInfos::FORMAT_USER_DATA) { + sFormat = L"FORMAT_USER_DATA [Lucene 2.9]"; + } else if (format == SegmentInfos::FORMAT_DIAGNOSTICS) { + sFormat = L"FORMAT_DIAGNOSTICS [Lucene 2.9]"; + } else if (format < SegmentInfos::CURRENT_FORMAT) { + sFormat = L"int=" + StringUtils::toString(format) + L" [newer version of Lucene than this tool]"; + skip = true; + } else { + sFormat = StringUtils::toString(format) + L" [Lucene 1.3 or prior]"; } - - int32_t format = 0; - try - { - format = input->readInt(); + } + + result->segmentsFileName = segmentsFileName; + result->numSegments = numSegments; + result->segmentFormat = sFormat; + result->userData = sis->getUserData(); + String userDataString; + if (!sis->getUserData().empty()) { + userDataString = L" userData(size)=" + StringUtils::toString(sis->getUserData().size()); + } + + msg(L"Segments file=" + segmentsFileName + L" numSegments=" + StringUtils::toString(numSegments) + + L" version=" + sFormat + userDataString); + + if (onlySegments) { + result->partial = true; + msg(L"\nChecking only these segments:"); + for (Collection::iterator s = onlySegments.begin(); s != onlySegments.end(); ++s) { + msg(L" " + *s); } - catch (...) - { - msg(L"ERROR: could not read segment file version in directory"); - result->missingSegmentVersion = true; - if (input) - input->close(); - return result; + result->segmentsChecked.addAll(onlySegments.begin(), onlySegments.end()); + msg(L":"); + } + + if (skip) { + msg(L"\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on;" \ + L" please re-compile this tool on the matching version of Lucene; exiting"); + result->toolOutOfDate = true; + return result; + } + + result->newSegments = boost::dynamic_pointer_cast(sis->clone()); + result->newSegments->clear(); + + for (int32_t i = 0; i < numSegments; ++i) { + SegmentInfoPtr info(sis->info(i)); + if (onlySegments && !onlySegments.contains(info->name)) { + continue; } - if (input) - input->close(); - - String sFormat; - bool skip = false; - - if (format == SegmentInfos::FORMAT) - sFormat = L"FORMAT [Lucene Pre-2.1]"; - if (format == SegmentInfos::FORMAT_LOCKLESS) - sFormat = L"FORMAT_LOCKLESS [Lucene 2.1]"; - else if (format == SegmentInfos::FORMAT_SINGLE_NORM_FILE) - sFormat = L"FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; - else if (format == SegmentInfos::FORMAT_SHARED_DOC_STORE) - sFormat = L"FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; - else - { - if (format == SegmentInfos::FORMAT_CHECKSUM) - sFormat = L"FORMAT_CHECKSUM [Lucene 2.4]"; - else if (format == SegmentInfos::FORMAT_DEL_COUNT) - sFormat = L"FORMAT_DEL_COUNT [Lucene 2.4]"; - else if (format == SegmentInfos::FORMAT_HAS_PROX) - sFormat = L"FORMAT_HAS_PROX [Lucene 2.4]"; - else if (format == SegmentInfos::FORMAT_USER_DATA) - sFormat = L"FORMAT_USER_DATA [Lucene 2.9]"; - else if (format == SegmentInfos::FORMAT_DIAGNOSTICS) - sFormat = L"FORMAT_DIAGNOSTICS [Lucene 2.9]"; - else if (format < SegmentInfos::CURRENT_FORMAT) - { - sFormat = L"int=" + StringUtils::toString(format) + L" [newer version of Lucene than this tool]"; - skip = true; + SegmentInfoStatusPtr segInfoStat(newLucene()); + result->segmentInfos.add(segInfoStat); + msg(L" name=" + info->name + L" docCount=" + StringUtils::toString(info->docCount)); + segInfoStat->name = info->name; + segInfoStat->docCount = info->docCount; + + int32_t toLoseDocCount = info->docCount; + + SegmentReaderPtr reader; + + try { + msg(L" compound=" + StringUtils::toString(info->getUseCompoundFile())); + segInfoStat->compound = info->getUseCompoundFile(); + msg(L" hasProx=" + StringUtils::toString(info->getHasProx())); + segInfoStat->hasProx = info->getHasProx(); + msg(L" numFiles=" + StringUtils::toString(info->files().size())); + segInfoStat->numFiles = info->files().size(); + msg(L" size (MB)=" + StringUtils::toString((double)info->sizeInBytes() / (double)(1024 * 1024))); + segInfoStat->sizeMB = (double)info->sizeInBytes() / (double)(1024 * 1024); + MapStringString diagnostics(info->getDiagnostics()); + segInfoStat->diagnostics = diagnostics; + if (!diagnostics.empty()) { + msg(L" diagnostics (size)= " + StringUtils::toString(diagnostics.size())); } - else - sFormat = StringUtils::toString(format) + L" [Lucene 1.3 or prior]"; - } - - result->segmentsFileName = segmentsFileName; - result->numSegments = numSegments; - result->segmentFormat = sFormat; - result->userData = sis->getUserData(); - String userDataString; - if (!sis->getUserData().empty()) - userDataString = L" userData(size)=" + StringUtils::toString(sis->getUserData().size()); - - msg(L"Segments file=" + segmentsFileName + L" numSegments=" + StringUtils::toString(numSegments) + - L" version=" + sFormat + userDataString); - - if (onlySegments) - { - result->partial = true; - msg(L"\nChecking only these segments:"); - for (Collection::iterator s = onlySegments.begin(); s != onlySegments.end(); ++s) - msg(L" " + *s); - result->segmentsChecked.addAll(onlySegments.begin(), onlySegments.end()); - msg(L":"); - } - - if (skip) - { - msg(L"\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on;" \ - L" please re-compile this tool on the matching version of Lucene; exiting"); - result->toolOutOfDate = true; - return result; - } - - result->newSegments = boost::dynamic_pointer_cast(sis->clone()); - result->newSegments->clear(); - - for (int32_t i = 0; i < numSegments; ++i) - { - SegmentInfoPtr info(sis->info(i)); - if (onlySegments && !onlySegments.contains(info->name)) - continue; - SegmentInfoStatusPtr segInfoStat(newLucene()); - result->segmentInfos.add(segInfoStat); - msg(L" name=" + info->name + L" docCount=" + StringUtils::toString(info->docCount)); - segInfoStat->name = info->name; - segInfoStat->docCount = info->docCount; - - int32_t toLoseDocCount = info->docCount; - - SegmentReaderPtr reader; - - try - { - msg(L" compound=" + StringUtils::toString(info->getUseCompoundFile())); - segInfoStat->compound = info->getUseCompoundFile(); - msg(L" hasProx=" + StringUtils::toString(info->getHasProx())); - segInfoStat->hasProx = info->getHasProx(); - msg(L" numFiles=" + StringUtils::toString(info->files().size())); - segInfoStat->numFiles = info->files().size(); - msg(L" size (MB)=" + StringUtils::toString((double)info->sizeInBytes() / (double)(1024 * 1024))); - segInfoStat->sizeMB = (double)info->sizeInBytes() / (double)(1024 * 1024); - MapStringString diagnostics(info->getDiagnostics()); - segInfoStat->diagnostics = diagnostics; - if (!diagnostics.empty()) - msg(L" diagnostics (size)= " + StringUtils::toString(diagnostics.size())); - - int32_t docStoreOffset = info->getDocStoreOffset(); - if (docStoreOffset != -1) - { - msg(L" docStoreOffset=" + StringUtils::toString(docStoreOffset)); - segInfoStat->docStoreOffset = docStoreOffset; - msg(L" docStoreSegment=" + info->getDocStoreSegment()); - segInfoStat->docStoreSegment = info->getDocStoreSegment(); - msg(L" docStoreIsCompoundFile=" + StringUtils::toString(info->getDocStoreIsCompoundFile())); - segInfoStat->docStoreCompoundFile = info->getDocStoreIsCompoundFile(); - } - String delFileName(info->getDelFileName()); - if (delFileName.empty()) - { - msg(L" no deletions"); - segInfoStat->hasDeletions = false; - } - else - { - msg(L" has deletions [delFileName=" + delFileName + L"]"); - segInfoStat->hasDeletions = true; - segInfoStat->deletionsFileName = delFileName; + + int32_t docStoreOffset = info->getDocStoreOffset(); + if (docStoreOffset != -1) { + msg(L" docStoreOffset=" + StringUtils::toString(docStoreOffset)); + segInfoStat->docStoreOffset = docStoreOffset; + msg(L" docStoreSegment=" + info->getDocStoreSegment()); + segInfoStat->docStoreSegment = info->getDocStoreSegment(); + msg(L" docStoreIsCompoundFile=" + StringUtils::toString(info->getDocStoreIsCompoundFile())); + segInfoStat->docStoreCompoundFile = info->getDocStoreIsCompoundFile(); + } + String delFileName(info->getDelFileName()); + if (delFileName.empty()) { + msg(L" no deletions"); + segInfoStat->hasDeletions = false; + } else { + msg(L" has deletions [delFileName=" + delFileName + L"]"); + segInfoStat->hasDeletions = true; + segInfoStat->deletionsFileName = delFileName; + } + msg(L" test: open reader........."); + reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); + + segInfoStat->openReaderPassed = true; + + int32_t numDocs = reader->numDocs(); + toLoseDocCount = numDocs; + if (reader->hasDeletions()) { + if (reader->deletedDocs->count() != info->getDelCount()) { + boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } - msg(L" test: open reader........."); - reader = SegmentReader::get(true, info, IndexReader::DEFAULT_TERMS_INDEX_DIVISOR); - - segInfoStat->openReaderPassed = true; - - int32_t numDocs = reader->numDocs(); - toLoseDocCount = numDocs; - if (reader->hasDeletions()) - { - if (reader->deletedDocs->count() != info->getDelCount()) - { - boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + - L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); - } - if (reader->deletedDocs->count() > reader->maxDoc()) - { - boost::throw_exception(RuntimeException(L"too many deleted docs: maxDoc()=" + StringUtils::toString(reader->maxDoc()) + - L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); - } - if (info->docCount - numDocs != info->getDelCount()) - { - boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + - L" vs reader=" + StringUtils::toString((info->docCount - numDocs)))); - } - segInfoStat->numDeleted = info->docCount - numDocs; - msg(L"OK [" + StringUtils::toString(segInfoStat->numDeleted) + L" deleted docs]"); + if (reader->deletedDocs->count() > reader->maxDoc()) { + boost::throw_exception(RuntimeException(L"too many deleted docs: maxDoc()=" + StringUtils::toString(reader->maxDoc()) + + L" vs deletedDocs.count()=" + StringUtils::toString(reader->deletedDocs->count()))); } - else - { - if (info->getDelCount() != 0) - { - boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + - L" vs reader=" + StringUtils::toString(info->docCount - numDocs))); - } - msg(L"OK"); + if (info->docCount - numDocs != info->getDelCount()) { + boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + + L" vs reader=" + StringUtils::toString((info->docCount - numDocs)))); } - if (reader->maxDoc() != info->docCount) - { - boost::throw_exception(RuntimeException(L"SegmentReader.maxDoc() " + StringUtils::toString(reader->maxDoc()) + - L" != SegmentInfos.docCount " + StringUtils::toString(info->docCount))); + segInfoStat->numDeleted = info->docCount - numDocs; + msg(L"OK [" + StringUtils::toString(segInfoStat->numDeleted) + L" deleted docs]"); + } else { + if (info->getDelCount() != 0) { + boost::throw_exception(RuntimeException(L"delete count mismatch: info=" + StringUtils::toString(info->getDelCount()) + + L" vs reader=" + StringUtils::toString(info->docCount - numDocs))); } - msg(L" test: fields.............."); - HashSet fieldNames(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); - msg(L"OK [" + StringUtils::toString(fieldNames.size()) + L" fields]"); - segInfoStat->numFields = fieldNames.size(); - - // Test Field Norms - segInfoStat->fieldNormStatus = testFieldNorms(Collection::newInstance(fieldNames.begin(), fieldNames.end()), reader); - - // Test the Term Index - segInfoStat->termIndexStatus = testTermIndex(info, reader); - - // Test Stored Fields - segInfoStat->storedFieldStatus = testStoredFields(info, reader); - - // Test Term Vectors - segInfoStat->termVectorStatus = testTermVectors(info, reader); - - // Rethrow the first exception we encountered. This will cause stats for failed segments to be incremented properly - if (!segInfoStat->fieldNormStatus->error.isNull()) - boost::throw_exception(RuntimeException(L"Field Norm test failed")); - else if (!segInfoStat->termIndexStatus->error.isNull()) - boost::throw_exception(RuntimeException(L"Term Index test failed")); - else if (!segInfoStat->storedFieldStatus->error.isNull()) - boost::throw_exception(RuntimeException(L"Stored Field test failed")); - else if (!segInfoStat->termVectorStatus->error.isNull()) - boost::throw_exception(RuntimeException(L"Term Vector test failed")); - - msg(L""); + msg(L"OK"); } - catch (...) - { - msg(L"FAILED"); - String comment(L"fixIndex() would remove reference to this segment"); - msg(L" WARNING: " + comment + L"; full exception:"); - msg(L""); - result->totLoseDocCount += toLoseDocCount; - ++result->numBadSegments; - - if (reader) - reader->close(); - - continue; + if (reader->maxDoc() != info->docCount) { + boost::throw_exception(RuntimeException(L"SegmentReader.maxDoc() " + StringUtils::toString(reader->maxDoc()) + + L" != SegmentInfos.docCount " + StringUtils::toString(info->docCount))); } - if (reader) + msg(L" test: fields.............."); + HashSet fieldNames(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); + msg(L"OK [" + StringUtils::toString(fieldNames.size()) + L" fields]"); + segInfoStat->numFields = fieldNames.size(); + + // Test Field Norms + segInfoStat->fieldNormStatus = testFieldNorms(Collection::newInstance(fieldNames.begin(), fieldNames.end()), reader); + + // Test the Term Index + segInfoStat->termIndexStatus = testTermIndex(info, reader); + + // Test Stored Fields + segInfoStat->storedFieldStatus = testStoredFields(info, reader); + + // Test Term Vectors + segInfoStat->termVectorStatus = testTermVectors(info, reader); + + // Rethrow the first exception we encountered. This will cause stats for failed segments to be incremented properly + if (!segInfoStat->fieldNormStatus->error.isNull()) { + boost::throw_exception(RuntimeException(L"Field Norm test failed")); + } else if (!segInfoStat->termIndexStatus->error.isNull()) { + boost::throw_exception(RuntimeException(L"Term Index test failed")); + } else if (!segInfoStat->storedFieldStatus->error.isNull()) { + boost::throw_exception(RuntimeException(L"Stored Field test failed")); + } else if (!segInfoStat->termVectorStatus->error.isNull()) { + boost::throw_exception(RuntimeException(L"Term Vector test failed")); + } + + msg(L""); + } catch (...) { + msg(L"FAILED"); + String comment(L"fixIndex() would remove reference to this segment"); + msg(L" WARNING: " + comment + L"; full exception:"); + msg(L""); + result->totLoseDocCount += toLoseDocCount; + ++result->numBadSegments; + + if (reader) { reader->close(); - - // Keeper - result->newSegments->add(boost::dynamic_pointer_cast(info->clone())); - } - - if (result->numBadSegments == 0) - { - result->clean = true; - msg(L"No problems were detected with this index.\n"); + } + + continue; } - else - { - msg(L"WARNING: " + StringUtils::toString(result->numBadSegments) + - L" broken segments (containing " + StringUtils::toString(result->totLoseDocCount) + - L" documents) detected"); + if (reader) { + reader->close(); } - - return result; + + // Keeper + result->newSegments->add(boost::dynamic_pointer_cast(info->clone())); } - - FieldNormStatusPtr CheckIndex::testFieldNorms(Collection fieldNames, SegmentReaderPtr reader) - { - FieldNormStatusPtr status(newLucene()); - - try - { - // Test Field Norms - msg(L" test: field norms........."); - - ByteArray b(ByteArray::newInstance(reader->maxDoc())); - for (Collection::iterator fieldName = fieldNames.begin(); fieldName != fieldNames.end(); ++fieldName) - { - if (reader->hasNorms(*fieldName)) - { - reader->norms(*fieldName, b, 0); - ++status->totFields; - } + + if (result->numBadSegments == 0) { + result->clean = true; + msg(L"No problems were detected with this index.\n"); + } else { + msg(L"WARNING: " + StringUtils::toString(result->numBadSegments) + + L" broken segments (containing " + StringUtils::toString(result->totLoseDocCount) + + L" documents) detected"); + } + + return result; +} + +FieldNormStatusPtr CheckIndex::testFieldNorms(Collection fieldNames, const SegmentReaderPtr& reader) { + FieldNormStatusPtr status(newLucene()); + + try { + // Test Field Norms + msg(L" test: field norms........."); + + ByteArray b(ByteArray::newInstance(reader->maxDoc())); + for (Collection::iterator fieldName = fieldNames.begin(); fieldName != fieldNames.end(); ++fieldName) { + if (reader->hasNorms(*fieldName)) { + reader->norms(*fieldName, b, 0); + ++status->totFields; } - - msg(L"OK [" + StringUtils::toString(status->totFields) + L" fields]"); } - catch (LuceneException& e) - { - msg(L"ERROR [" + e.getError() + L"]"); - status->error = e; - } - - return status; + + msg(L"OK [" + StringUtils::toString(status->totFields) + L" fields]"); + } catch (LuceneException& e) { + msg(L"ERROR [" + e.getError() + L"]"); + status->error = e; } - - TermIndexStatusPtr CheckIndex::testTermIndex(SegmentInfoPtr info, SegmentReaderPtr reader) - { - TermIndexStatusPtr status(newLucene()); - - try - { - msg(L" test: terms, freq, prox..."); - - TermEnumPtr termEnum(reader->terms()); - TermPositionsPtr termPositions(reader->termPositions()); - - // Used only to count up # deleted docs for this term - MySegmentTermDocsPtr myTermDocs(newLucene(reader)); - - int32_t maxDoc = reader->maxDoc(); - - while (termEnum->next()) - { - ++status->termCount; - TermPtr term(termEnum->term()); - int32_t docFreq = termEnum->docFreq(); - termPositions->seek(term); - int32_t lastDoc = -1; - int32_t freq0 = 0; - status->totFreq += docFreq; - while (termPositions->next()) - { - ++freq0; - int32_t doc = termPositions->doc(); - int32_t freq = termPositions->freq(); - if (doc <= lastDoc) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L": doc " + StringUtils::toString(doc) + - L" <= lastDoc " + StringUtils::toString(lastDoc))); - } - if (doc >= maxDoc) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L": doc " + StringUtils::toString(doc) + - L" >= maxDoc " + StringUtils::toString(maxDoc))); - } - - lastDoc = doc; - if (freq <= 0) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L": doc " + StringUtils::toString(doc) + - L": freq " + StringUtils::toString(freq) + + + return status; +} + +TermIndexStatusPtr CheckIndex::testTermIndex(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { + TermIndexStatusPtr status(newLucene()); + + try { + msg(L" test: terms, freq, prox..."); + + TermEnumPtr termEnum(reader->terms()); + TermPositionsPtr termPositions(reader->termPositions()); + + // Used only to count up # deleted docs for this term + MySegmentTermDocsPtr myTermDocs(newLucene(reader)); + + int32_t maxDoc = reader->maxDoc(); + + while (termEnum->next()) { + ++status->termCount; + TermPtr term(termEnum->term()); + int32_t docFreq = termEnum->docFreq(); + termPositions->seek(term); + int32_t lastDoc = -1; + int32_t freq0 = 0; + status->totFreq += docFreq; + while (termPositions->next()) { + ++freq0; + int32_t doc = termPositions->doc(); + int32_t freq = termPositions->freq(); + if (doc <= lastDoc) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L": doc " + StringUtils::toString(doc) + + L" <= lastDoc " + StringUtils::toString(lastDoc))); + } + if (doc >= maxDoc) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L": doc " + StringUtils::toString(doc) + + L" >= maxDoc " + StringUtils::toString(maxDoc))); + } + + lastDoc = doc; + if (freq <= 0) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L": doc " + StringUtils::toString(doc) + + L": freq " + StringUtils::toString(freq) + + L" is out of bounds")); + } + + int32_t lastPos = -1; + status->totPos += freq; + for (int32_t j = 0; j < freq; ++j) { + int32_t pos = termPositions->nextPosition(); + if (pos < -1) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L": doc " + StringUtils::toString(doc) + + L": pos " + StringUtils::toString(pos) + L" is out of bounds")); } - - int32_t lastPos = -1; - status->totPos += freq; - for (int32_t j = 0; j < freq; ++j) - { - int32_t pos = termPositions->nextPosition(); - if (pos < -1) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L": doc " + StringUtils::toString(doc) + - L": pos " + StringUtils::toString(pos) + - L" is out of bounds")); - } - if (pos < lastPos) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L": doc " + StringUtils::toString(doc) + - L": pos " + StringUtils::toString(pos) + - L" < lastPos " + StringUtils::toString(lastPos))); - } - lastPos = pos; - } - } - - // Now count how many deleted docs occurred in this term - int32_t delCount; - if (reader->hasDeletions()) - { - myTermDocs->seek(term); - while (myTermDocs->next()) - { + if (pos < lastPos) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L": doc " + StringUtils::toString(doc) + + L": pos " + StringUtils::toString(pos) + + L" < lastPos " + StringUtils::toString(lastPos))); } - delCount = myTermDocs->delCount; + lastPos = pos; } - else - delCount = 0; - - if (freq0 + delCount != docFreq) - { - boost::throw_exception(RuntimeException(L"term " + term->toString() + - L"docFreq=" + StringUtils::toString(docFreq) + - L" != num docs seen " + StringUtils::toString(freq0) + - L" + num docs deleted " + StringUtils::toString(delCount))); + } + + // Now count how many deleted docs occurred in this term + int32_t delCount; + if (reader->hasDeletions()) { + myTermDocs->seek(term); + while (myTermDocs->next()) { } + delCount = myTermDocs->delCount; + } else { + delCount = 0; + } + + if (freq0 + delCount != docFreq) { + boost::throw_exception(RuntimeException(L"term " + term->toString() + + L"docFreq=" + StringUtils::toString(docFreq) + + L" != num docs seen " + StringUtils::toString(freq0) + + L" + num docs deleted " + StringUtils::toString(delCount))); } - - msg(L"OK [" + StringUtils::toString(status->termCount) + L" terms; " + StringUtils::toString(status->totFreq) + - L" terms/docs pairs; " + StringUtils::toString(status->totPos) + L" tokens]"); - } - catch (LuceneException& e) - { - msg(L"ERROR [" + e.getError() + L"]"); - status->error = e; } - - return status; + + msg(L"OK [" + StringUtils::toString(status->termCount) + L" terms; " + StringUtils::toString(status->totFreq) + + L" terms/docs pairs; " + StringUtils::toString(status->totPos) + L" tokens]"); + } catch (LuceneException& e) { + msg(L"ERROR [" + e.getError() + L"]"); + status->error = e; } - - StoredFieldStatusPtr CheckIndex::testStoredFields(SegmentInfoPtr info, SegmentReaderPtr reader) - { - StoredFieldStatusPtr status(newLucene()); - - try - { - msg(L" test: stored fields......."); - - // Scan stored fields for all documents - for (int32_t j = 0; j < info->docCount; ++j) - { - if (!reader->isDeleted(j)) - { - ++status->docCount; - DocumentPtr doc(reader->document(j, FieldSelectorPtr())); - status->totFields += doc->getFields().size(); - } - } - - // Validate docCount - if (status->docCount != reader->numDocs()) - { - boost::throw_exception(RuntimeException(L"docCount=" + StringUtils::toString(status->docCount) + - L" but saw " + StringUtils::toString(status->docCount) + - L" undeleted docs")); + + return status; +} + +StoredFieldStatusPtr CheckIndex::testStoredFields(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { + StoredFieldStatusPtr status(newLucene()); + + try { + msg(L" test: stored fields......."); + + // Scan stored fields for all documents + for (int32_t j = 0; j < info->docCount; ++j) { + if (!reader->isDeleted(j)) { + ++status->docCount; + DocumentPtr doc(reader->document(j, FieldSelectorPtr())); + status->totFields += doc->getFields().size(); } - - msg(L"OK [" + StringUtils::toString(status->totFields) + L" total field count; avg " + - StringUtils::toString((double)status->totFields / (double)status->docCount) + L" fields per doc]"); } - catch (LuceneException& e) - { - msg(L"ERROR [" + e.getError() + L"]"); - status->error = e; + + // Validate docCount + if (status->docCount != reader->numDocs()) { + boost::throw_exception(RuntimeException(L"docCount=" + StringUtils::toString(status->docCount) + + L" but saw " + StringUtils::toString(status->docCount) + + L" undeleted docs")); } - - return status; + + msg(L"OK [" + StringUtils::toString(status->totFields) + L" total field count; avg " + + StringUtils::toString((double)status->totFields / (double)status->docCount) + L" fields per doc]"); + } catch (LuceneException& e) { + msg(L"ERROR [" + e.getError() + L"]"); + status->error = e; } - - TermVectorStatusPtr CheckIndex::testTermVectors(SegmentInfoPtr info, SegmentReaderPtr reader) - { - TermVectorStatusPtr status(newLucene()); - - try - { - msg(L" test: term vectors........"); - - for (int32_t j = 0; j < info->docCount; ++j) - { - if (!reader->isDeleted(j)) - { - ++status->docCount; - Collection tfv(reader->getTermFreqVectors(j)); - if (tfv) - status->totVectors += tfv.size(); + + return status; +} + +TermVectorStatusPtr CheckIndex::testTermVectors(const SegmentInfoPtr& info, const SegmentReaderPtr& reader) { + TermVectorStatusPtr status(newLucene()); + + try { + msg(L" test: term vectors........"); + + for (int32_t j = 0; j < info->docCount; ++j) { + if (!reader->isDeleted(j)) { + ++status->docCount; + Collection tfv(reader->getTermFreqVectors(j)); + if (tfv) { + status->totVectors += tfv.size(); } } - - msg(L"OK [" + StringUtils::toString(status->totVectors) + L" total vector count; avg " + - StringUtils::toString((double)status->totVectors / (double)status->docCount) + L" term/freq vector fields per doc]"); - } - catch (LuceneException& e) - { - msg(L"ERROR [" + e.getError() + L"]"); - status->error = e; } - - return status; - } - - void CheckIndex::fixIndex(IndexStatusPtr result) - { - if (result->partial) - boost::throw_exception(IllegalArgumentException(L"can only fix an index that was fully checked (this status checked a subset of segments)")); - result->newSegments->commit(result->dir); - } - - bool CheckIndex::testAsserts() - { - _assertsOn = true; - return true; + + msg(L"OK [" + StringUtils::toString(status->totVectors) + L" total vector count; avg " + + StringUtils::toString((double)status->totVectors / (double)status->docCount) + L" term/freq vector fields per doc]"); + } catch (LuceneException& e) { + msg(L"ERROR [" + e.getError() + L"]"); + status->error = e; } - - bool CheckIndex::assertsOn() - { - BOOST_ASSERT(testAsserts()); - return _assertsOn; + + return status; +} + +void CheckIndex::fixIndex(const IndexStatusPtr& result) { + if (result->partial) { + boost::throw_exception(IllegalArgumentException(L"can only fix an index that was fully checked (this status checked a subset of segments)")); } - - int CheckIndex::main(Collection args) - { - bool doFix = false; - Collection onlySegments(Collection::newInstance()); - String indexPath; - for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) - { - if (*arg == L"-fix") - doFix = true; - else if (*arg == L"-segment") - { - if (arg + 1 == args.end()) - { - std::wcout << L"ERROR: missing name for -segment option\n"; - return 1; - } - ++arg; - onlySegments.add(*arg); + result->newSegments->commit(result->dir); +} + +bool CheckIndex::testAsserts() { + _assertsOn = true; + return true; +} + +bool CheckIndex::assertsOn() { + BOOST_ASSERT(testAsserts()); + return _assertsOn; +} + +int CheckIndex::main(Collection args) { + bool doFix = false; + Collection onlySegments(Collection::newInstance()); + String indexPath; + for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { + if (*arg == L"-fix") { + doFix = true; + } else if (*arg == L"-segment") { + if (arg + 1 == args.end()) { + std::wcout << L"ERROR: missing name for -segment option\n"; + return 1; } - else - { - if (!indexPath.empty()) - { - std::wcout << L"ERROR: unexpected extra argument '" << *arg << L"'\n"; - return 1; - } - indexPath = *arg; + ++arg; + onlySegments.add(*arg); + } else { + if (!indexPath.empty()) { + std::wcout << L"ERROR: unexpected extra argument '" << *arg << L"'\n"; + return 1; } + indexPath = *arg; } - - if (indexPath.empty()) - { - std::wcout << L"\nERROR: index path not specified\n"; - std::wcout << L"Usage: CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n"; - std::wcout << L"\n"; - std::wcout << L" -fix: actually write a new segments_N file, removing any problematic segments\n"; - std::wcout << L" -segment X: only check the specified segments. This can be specified multiple\n"; - std::wcout << L" times, to check more than one segment, eg '-segment _2 -segment _a'.\n"; - std::wcout << L" You can't use this with the -fix option\n"; - std::wcout << L"\n"; - std::wcout << L"**WARNING**: -fix should only be used on an emergency basis as it will cause\n"; - std::wcout << L"documents (perhaps many) to be permanently removed from the index. Always make\n"; - std::wcout << L"a backup copy of your index before running this! Do not run this tool on an index\n"; - std::wcout << L"that is actively being written to. You have been warned!\n"; - std::wcout << L"\n"; - std::wcout << L"Run without -fix, this tool will open the index, report version information\n"; - std::wcout << L"and report any exceptions it hits and what action it would take if -fix were\n"; - std::wcout << L"specified. With -fix, this tool will remove any segments that have issues and\n"; - std::wcout << L"write a new segments_N file. This means all documents contained in the affected\n"; - std::wcout << L"segments will be removed.\n"; - std::wcout << L"\n"; - std::wcout << L"This tool exits with exit code 1 if the index cannot be opened or has any\n"; - std::wcout << L"corruption, else 0.\n\n"; - return 1; - } - - if (!assertsOn()) - std::wcout << L"\nNOTE: testing will be more thorough if you run with '-ea', so assertions are enabled\n"; - - if (onlySegments.empty()) - onlySegments.reset(); - else if (doFix) - { - std::wcout << L"ERROR: cannot specify both -fix and -segment\n"; - return 1; - } - - std::wcout << L"\nOpening index @ " << indexPath << L"\n\n"; - DirectoryPtr dir; - try - { - dir = FSDirectory::open(indexPath); - } - catch (...) - { - std::wcout << L"ERROR: could not open directory \"" << indexPath << L"\"; exiting\n"; - return 1; - } - - CheckIndexPtr checker(newLucene(dir)); - checker->setInfoStream(newLucene()); - - IndexStatusPtr result(checker->checkIndex(onlySegments)); - if (result->missingSegments) - return 1; - - if (!result->clean) - { - if (!doFix) - std::wcout << L"WARNING: would write new segments file, and " << result->totLoseDocCount << L" documents would be lost, if -fix were specified\n\n"; - else - { - std::wcout << L"WARNING: " << result->totLoseDocCount + L" documents will be lost\n"; - std::wcout << L"NOTE: will write new segments file in 5 seconds; this will remove " << result->totLoseDocCount; - std::wcout << L" docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!\n"; - for (int32_t sec = 0; sec < 5; ++sec) - { - LuceneThread::threadSleep(1000); - std::wcout << L" " << (5 - sec) << L"...\n"; - } - std::wcout << L"Writing...\n"; - checker->fixIndex(result); - std::wcout << L"OK\n"; - std::wcout << L"Wrote new segments file \"" << result->newSegments->getCurrentSegmentFileName() << L"\"\n"; - } - } - - std::wcout << L"\n"; - return ((result && result->clean) ? 0 : 1); - } - - IndexStatus::IndexStatus() - { - clean = false; - missingSegments = false; - cantOpenSegments = false; - missingSegmentVersion = false; - numSegments = false; - segmentInfos = Collection::newInstance(); - segmentsChecked = Collection::newInstance(); - toolOutOfDate = false; - totLoseDocCount = 0; - numBadSegments = 0; - partial = false; - } - - IndexStatus::~IndexStatus() - { - } - - SegmentInfoStatus::SegmentInfoStatus() - { - docCount = 0; - compound = false; - numFiles = 0; - sizeMB = 0; - docStoreOffset = -1; - docStoreCompoundFile = false; - hasDeletions = false; - numDeleted = 0; - openReaderPassed = false; - numFields = 0; - hasProx = false; - } - - SegmentInfoStatus::~SegmentInfoStatus() - { - } - - FieldNormStatus::FieldNormStatus() - { - totFields = 0; - } - - FieldNormStatus::~FieldNormStatus() - { - } - - TermIndexStatus::TermIndexStatus() - { - termCount = 0; - totFreq = 0; - totPos = 0; - } - - TermIndexStatus::~TermIndexStatus() - { - } - - StoredFieldStatus::StoredFieldStatus() - { - docCount = 0; - totFields = 0; } - - StoredFieldStatus::~StoredFieldStatus() - { - } - - TermVectorStatus::TermVectorStatus() - { - docCount = 0; - totVectors = 0; + + if (indexPath.empty()) { + std::wcout << L"\nERROR: index path not specified\n"; + std::wcout << L"Usage: CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n"; + std::wcout << L"\n"; + std::wcout << L" -fix: actually write a new segments_N file, removing any problematic segments\n"; + std::wcout << L" -segment X: only check the specified segments. This can be specified multiple\n"; + std::wcout << L" times, to check more than one segment, eg '-segment _2 -segment _a'.\n"; + std::wcout << L" You can't use this with the -fix option\n"; + std::wcout << L"\n"; + std::wcout << L"**WARNING**: -fix should only be used on an emergency basis as it will cause\n"; + std::wcout << L"documents (perhaps many) to be permanently removed from the index. Always make\n"; + std::wcout << L"a backup copy of your index before running this! Do not run this tool on an index\n"; + std::wcout << L"that is actively being written to. You have been warned!\n"; + std::wcout << L"\n"; + std::wcout << L"Run without -fix, this tool will open the index, report version information\n"; + std::wcout << L"and report any exceptions it hits and what action it would take if -fix were\n"; + std::wcout << L"specified. With -fix, this tool will remove any segments that have issues and\n"; + std::wcout << L"write a new segments_N file. This means all documents contained in the affected\n"; + std::wcout << L"segments will be removed.\n"; + std::wcout << L"\n"; + std::wcout << L"This tool exits with exit code 1 if the index cannot be opened or has any\n"; + std::wcout << L"corruption, else 0.\n\n"; + return 1; } - - TermVectorStatus::~TermVectorStatus() - { + + if (!assertsOn()) { + std::wcout << L"\nNOTE: testing will be more thorough if you run with '-ea', so assertions are enabled\n"; } - - MySegmentTermDocs::MySegmentTermDocs(SegmentReaderPtr p) : SegmentTermDocs(p) - { - delCount = 0; + + if (onlySegments.empty()) { + onlySegments.reset(); + } else if (doFix) { + std::wcout << L"ERROR: cannot specify both -fix and -segment\n"; + return 1; } - - MySegmentTermDocs::~MySegmentTermDocs() - { + + std::wcout << L"\nOpening index @ " << indexPath << L"\n\n"; + DirectoryPtr dir; + try { + dir = FSDirectory::open(indexPath); + } catch (...) { + std::wcout << L"ERROR: could not open directory \"" << indexPath << L"\"; exiting\n"; + return 1; } - - void MySegmentTermDocs::seek(TermPtr term) - { - SegmentTermDocs::seek(term); - delCount = 0; + + CheckIndexPtr checker(newLucene(dir)); + checker->setInfoStream(newLucene()); + + IndexStatusPtr result(checker->checkIndex(onlySegments)); + if (result->missingSegments) { + return 1; } - - void MySegmentTermDocs::skippingDoc() - { - ++delCount; + + if (!result->clean) { + if (!doFix) { + std::wcout << L"WARNING: would write new segments file, and " << result->totLoseDocCount << L" documents would be lost, if -fix were specified\n\n"; + } else { + std::wcout << L"WARNING: " << result->totLoseDocCount << L" documents will be lost\n"; + std::wcout << L"NOTE: will write new segments file in 5 seconds; this will remove " << result->totLoseDocCount; + std::wcout << L" docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!\n"; + for (int32_t sec = 0; sec < 5; ++sec) { + LuceneThread::threadSleep(1000); + std::wcout << L" " << (5 - sec) << L"...\n"; + } + std::wcout << L"Writing...\n"; + checker->fixIndex(result); + std::wcout << L"OK\n"; + std::wcout << L"Wrote new segments file \"" << result->newSegments->getCurrentSegmentFileName() << L"\"\n"; + } } + + std::wcout << L"\n"; + return ((result && result->clean) ? 0 : 1); +} + +IndexStatus::IndexStatus() { + clean = false; + missingSegments = false; + cantOpenSegments = false; + missingSegmentVersion = false; + numSegments = false; + segmentInfos = Collection::newInstance(); + segmentsChecked = Collection::newInstance(); + toolOutOfDate = false; + totLoseDocCount = 0; + numBadSegments = 0; + partial = false; +} + +IndexStatus::~IndexStatus() { +} + +SegmentInfoStatus::SegmentInfoStatus() { + docCount = 0; + compound = false; + numFiles = 0; + sizeMB = 0; + docStoreOffset = -1; + docStoreCompoundFile = false; + hasDeletions = false; + numDeleted = 0; + openReaderPassed = false; + numFields = 0; + hasProx = false; +} + +SegmentInfoStatus::~SegmentInfoStatus() { +} + +FieldNormStatus::FieldNormStatus() { + totFields = 0; +} + +FieldNormStatus::~FieldNormStatus() { +} + +TermIndexStatus::TermIndexStatus() { + termCount = 0; + totFreq = 0; + totPos = 0; +} + +TermIndexStatus::~TermIndexStatus() { +} + +StoredFieldStatus::StoredFieldStatus() { + docCount = 0; + totFields = 0; +} + +StoredFieldStatus::~StoredFieldStatus() { +} + +TermVectorStatus::TermVectorStatus() { + docCount = 0; + totVectors = 0; +} + +TermVectorStatus::~TermVectorStatus() { +} + +MySegmentTermDocs::MySegmentTermDocs(const SegmentReaderPtr& p) : SegmentTermDocs(p) { + delCount = 0; +} + +MySegmentTermDocs::~MySegmentTermDocs() { +} + +void MySegmentTermDocs::seek(const TermPtr& term) { + SegmentTermDocs::seek(term); + delCount = 0; +} + +void MySegmentTermDocs::skippingDoc() { + ++delCount; +} + } diff --git a/src/core/index/CompoundFileReader.cpp b/src/core/index/CompoundFileReader.cpp index b2c81219..48d8accc 100644 --- a/src/core/index/CompoundFileReader.cpp +++ b/src/core/index/CompoundFileReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,229 +7,201 @@ #include "LuceneInc.h" #include "CompoundFileReader.h" -namespace Lucene -{ - CompoundFileReader::CompoundFileReader(DirectoryPtr dir, const String& name) - { - ConstructReader(dir, name, BufferedIndexInput::BUFFER_SIZE); - } - - CompoundFileReader::CompoundFileReader(DirectoryPtr dir, const String& name, int32_t readBufferSize) - { - ConstructReader(dir, name, readBufferSize); - } - - CompoundFileReader::~CompoundFileReader() - { - } - - void CompoundFileReader::ConstructReader(DirectoryPtr dir, const String& name, int32_t readBufferSize) - { - directory = dir; - fileName = name; - this->readBufferSize = readBufferSize; - this->entries = MapStringFileEntryPtr::newInstance(); - - bool success = false; - - LuceneException finally; - try - { - stream = dir->openInput(name, readBufferSize); - - // read the directory and init files - int32_t count = stream->readVInt(); - - FileEntryPtr entry; - for (int32_t i = 0; i < count; ++i) - { - int64_t offset = stream->readLong(); - String id(stream->readString()); - - if (entry) - { - // set length of the previous entry - entry->length = offset - entry->offset; - } - - entry = newInstance(); - entry->offset = offset; - entries.put(id, entry); +namespace Lucene { + +CompoundFileReader::CompoundFileReader(const DirectoryPtr& dir, const String& name) { + ConstructReader(dir, name, BufferedIndexInput::BUFFER_SIZE); +} + +CompoundFileReader::CompoundFileReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize) { + ConstructReader(dir, name, readBufferSize); +} + +CompoundFileReader::~CompoundFileReader() { +} + +void CompoundFileReader::ConstructReader(const DirectoryPtr& dir, const String& name, int32_t readBufferSize) { + directory = dir; + fileName = name; + this->readBufferSize = readBufferSize; + this->entries = MapStringFileEntryPtr::newInstance(); + + bool success = false; + + LuceneException finally; + try { + stream = dir->openInput(name, readBufferSize); + + // read the directory and init files + int32_t count = stream->readVInt(); + + FileEntryPtr entry; + for (int32_t i = 0; i < count; ++i) { + int64_t offset = stream->readLong(); + String id(stream->readString()); + + if (entry) { + // set length of the previous entry + entry->length = offset - entry->offset; } - - // set the length of the final entry - if (entry) - entry->length = stream->length() - entry->offset; - - success = true; - } - catch (LuceneException& e) - { - finally = e; + + entry = newInstance(); + entry->offset = offset; + entries.put(id, entry); } - - if (!success && stream) - { - try - { - stream->close(); - } - catch (...) - { - } + + // set the length of the final entry + if (entry) { + entry->length = stream->length() - entry->offset; } - - finally.throwException(); - } - - DirectoryPtr CompoundFileReader::getDirectory() - { - return directory; - } - - String CompoundFileReader::getName() - { - return fileName; - } - - void CompoundFileReader::close() - { - SyncLock syncLock(this); - if (!stream) - boost::throw_exception(IOException(L"Already closed")); - - entries.clear(); - stream->close(); - stream.reset(); - } - - IndexInputPtr CompoundFileReader::openInput(const String& name) - { - SyncLock syncLock(this); - // Default to readBufferSize passed in when we were opened - return openInput(name, readBufferSize); - } - - IndexInputPtr CompoundFileReader::openInput(const String& name, int32_t bufferSize) - { - SyncLock syncLock(this); - if (!stream) - boost::throw_exception(IOException(L"Stream closed")); - - MapStringFileEntryPtr::iterator entry = entries.find(name); - if (entry == entries.end()) - boost::throw_exception(IOException(L"No sub-file with id " + name + L" found")); - - return newLucene(stream, entry->second->offset, entry->second->length, readBufferSize); - } - - HashSet CompoundFileReader::listAll() - { - HashSet res(HashSet::newInstance()); - for (MapStringFileEntryPtr::iterator entry = entries.begin(); entry != entries.end(); ++entry) - res.add(entry->first); - return res; - } - - bool CompoundFileReader::fileExists(const String& name) - { - return entries.contains(name); - } - - uint64_t CompoundFileReader::fileModified(const String& name) - { - return directory->fileModified(fileName); - } - - void CompoundFileReader::touchFile(const String& name) - { - directory->touchFile(fileName); - } - - void CompoundFileReader::deleteFile(const String& name) - { - boost::throw_exception(UnsupportedOperationException()); - } - - void CompoundFileReader::renameFile(const String& from, const String& to) - { - boost::throw_exception(UnsupportedOperationException()); - } - - int64_t CompoundFileReader::fileLength(const String& name) - { - MapStringFileEntryPtr::iterator entry = entries.find(name); - if (entry == entries.end()) - boost::throw_exception(IOException(L"File " + name + L" does not exist")); - return entry->second->length; - } - - IndexOutputPtr CompoundFileReader::createOutput(const String& name) - { - boost::throw_exception(UnsupportedOperationException()); - return IndexOutputPtr(); - } - - LockPtr CompoundFileReader::makeLock(const String& name) - { - boost::throw_exception(UnsupportedOperationException()); - return LockPtr(); - } - - CSIndexInput::CSIndexInput() - { - fileOffset = 0; - _length = 0; - } - - CSIndexInput::CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length) : BufferedIndexInput(BufferedIndexInput::BUFFER_SIZE) - { - this->base = boost::dynamic_pointer_cast(base->clone()); - this->fileOffset = fileOffset; - this->_length = length; + + success = true; + } catch (LuceneException& e) { + finally = e; } - - CSIndexInput::CSIndexInput(IndexInputPtr base, int64_t fileOffset, int64_t length, int32_t readBufferSize) : BufferedIndexInput(readBufferSize) - { - this->base = boost::dynamic_pointer_cast(base->clone()); - this->fileOffset = fileOffset; - this->_length = length; + + if (!success && stream) { + try { + stream->close(); + } catch (...) { + } } - - CSIndexInput::~CSIndexInput() - { + + finally.throwException(); +} + +DirectoryPtr CompoundFileReader::getDirectory() { + return directory; +} + +String CompoundFileReader::getName() { + return fileName; +} + +void CompoundFileReader::close() { + SyncLock syncLock(this); + if (!stream) { + boost::throw_exception(IOException(L"Already closed")); } - - void CSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) - { - int64_t start = getFilePointer(); - if (start + length > _length) - boost::throw_exception(IOException(L"read past EOF")); - base->seek(fileOffset + start); - base->readBytes(b, offset, length, false); + + entries.clear(); + stream->close(); + stream.reset(); +} + +IndexInputPtr CompoundFileReader::openInput(const String& name) { + SyncLock syncLock(this); + // Default to readBufferSize passed in when we were opened + return openInput(name, readBufferSize); +} + +IndexInputPtr CompoundFileReader::openInput(const String& name, int32_t bufferSize) { + SyncLock syncLock(this); + if (!stream) { + boost::throw_exception(IOException(L"Stream closed")); } - - void CSIndexInput::seekInternal(int64_t pos) - { + + MapStringFileEntryPtr::iterator entry = entries.find(name); + if (entry == entries.end()) { + boost::throw_exception(IOException(L"No sub-file with id " + name + L" found")); } - - void CSIndexInput::close() - { - base->close(); + + return newLucene(stream, entry->second->offset, entry->second->length, readBufferSize); +} + +HashSet CompoundFileReader::listAll() { + HashSet res(HashSet::newInstance()); + for (MapStringFileEntryPtr::iterator entry = entries.begin(); entry != entries.end(); ++entry) { + res.add(entry->first); } - - int64_t CSIndexInput::length() - { - return _length; + return res; +} + +bool CompoundFileReader::fileExists(const String& name) { + return entries.contains(name); +} + +uint64_t CompoundFileReader::fileModified(const String& name) { + return directory->fileModified(fileName); +} + +void CompoundFileReader::touchFile(const String& name) { + directory->touchFile(fileName); +} + +void CompoundFileReader::deleteFile(const String& name) { + boost::throw_exception(UnsupportedOperationException()); +} + +void CompoundFileReader::renameFile(const String& from, const String& to) { + boost::throw_exception(UnsupportedOperationException()); +} + +int64_t CompoundFileReader::fileLength(const String& name) { + MapStringFileEntryPtr::iterator entry = entries.find(name); + if (entry == entries.end()) { + boost::throw_exception(IOException(L"File " + name + L" does not exist")); } - - LuceneObjectPtr CSIndexInput::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - CSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(BufferedIndexInput::clone(clone))); - cloneIndexInput->base = boost::dynamic_pointer_cast(this->base->clone()); - cloneIndexInput->fileOffset = fileOffset; - cloneIndexInput->_length = _length; - return cloneIndexInput; + return entry->second->length; +} + +IndexOutputPtr CompoundFileReader::createOutput(const String& name) { + boost::throw_exception(UnsupportedOperationException()); + return IndexOutputPtr(); +} + +LockPtr CompoundFileReader::makeLock(const String& name) { + boost::throw_exception(UnsupportedOperationException()); + return LockPtr(); +} + +CSIndexInput::CSIndexInput() { + fileOffset = 0; + _length = 0; +} + +CSIndexInput::CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length) : BufferedIndexInput(BufferedIndexInput::BUFFER_SIZE) { + this->base = boost::dynamic_pointer_cast(base->clone()); + this->fileOffset = fileOffset; + this->_length = length; +} + +CSIndexInput::CSIndexInput(const IndexInputPtr& base, int64_t fileOffset, int64_t length, int32_t readBufferSize) : BufferedIndexInput(readBufferSize) { + this->base = boost::dynamic_pointer_cast(base->clone()); + this->fileOffset = fileOffset; + this->_length = length; +} + +CSIndexInput::~CSIndexInput() { +} + +void CSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { + int64_t start = getFilePointer(); + if (start + length > _length) { + boost::throw_exception(IOException(L"read past EOF")); } + base->seek(fileOffset + start); + base->readBytes(b, offset, length, false); +} + +void CSIndexInput::seekInternal(int64_t pos) { +} + +void CSIndexInput::close() { + base->close(); +} + +int64_t CSIndexInput::length() { + return _length; +} + +LuceneObjectPtr CSIndexInput::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + CSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(BufferedIndexInput::clone(clone))); + cloneIndexInput->base = boost::dynamic_pointer_cast(this->base->clone()); + cloneIndexInput->fileOffset = fileOffset; + cloneIndexInput->_length = _length; + return cloneIndexInput; +} + } diff --git a/src/core/index/CompoundFileWriter.cpp b/src/core/index/CompoundFileWriter.cpp index 81f26b53..002c8677 100644 --- a/src/core/index/CompoundFileWriter.cpp +++ b/src/core/index/CompoundFileWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,183 +12,168 @@ #include "IndexOutput.h" #include "StringUtils.h" -namespace Lucene -{ - CompoundFileWriter::CompoundFileWriter(DirectoryPtr dir, const String& name, CheckAbortPtr checkAbort) - { - if (!dir) - boost::throw_exception(IllegalArgumentException(L"directory cannot be empty")); - if (name.empty()) - boost::throw_exception(IllegalArgumentException(L"name cannot be empty")); - this->checkAbort = checkAbort; - _directory = dir; - fileName = name; - ids = HashSet::newInstance(); - entries = Collection::newInstance(); - merged = false; +namespace Lucene { + +CompoundFileWriter::CompoundFileWriter(const DirectoryPtr& dir, const String& name, const CheckAbortPtr& checkAbort) { + if (!dir) { + boost::throw_exception(IllegalArgumentException(L"directory cannot be empty")); } - - CompoundFileWriter::~CompoundFileWriter() - { + if (name.empty()) { + boost::throw_exception(IllegalArgumentException(L"name cannot be empty")); } - - DirectoryPtr CompoundFileWriter::getDirectory() - { - return DirectoryPtr(_directory); + this->checkAbort = checkAbort; + _directory = dir; + fileName = name; + ids = HashSet::newInstance(); + entries = Collection::newInstance(); + merged = false; +} + +CompoundFileWriter::~CompoundFileWriter() { +} + +DirectoryPtr CompoundFileWriter::getDirectory() { + return DirectoryPtr(_directory); +} + +String CompoundFileWriter::getName() { + return fileName; +} + +void CompoundFileWriter::addFile(const String& file) { + if (merged) { + boost::throw_exception(IllegalStateException(L"Can't add extensions after merge has been called")); } - - String CompoundFileWriter::getName() - { - return fileName; + + if (file.empty()) { + boost::throw_exception(IllegalArgumentException(L"file cannot be empty")); } - - void CompoundFileWriter::addFile(const String& file) - { - if (merged) - boost::throw_exception(IllegalStateException(L"Can't add extensions after merge has been called")); - - if (file.empty()) - boost::throw_exception(IllegalArgumentException(L"file cannot be empty")); - - if (!ids.add(file)) - boost::throw_exception(IllegalArgumentException(L"File " + file + L" already added")); - - FileEntry entry; - entry.file = file; - entries.add(entry); + + if (!ids.add(file)) { + boost::throw_exception(IllegalArgumentException(L"File " + file + L" already added")); } - - void CompoundFileWriter::close() - { - if (merged) - boost::throw_exception(IllegalStateException(L"Merge already performed")); - - if (entries.empty()) - boost::throw_exception(IllegalStateException(L"No entries to merge have been defined")); - - merged = true; - - DirectoryPtr directory(_directory); - - // open the compound stream - IndexOutputPtr os; - LuceneException finally; - try - { - os = directory->createOutput(fileName); - - // Write the number of entries - os->writeVInt(entries.size()); - - // Write the directory with all offsets at 0. Remember the positions of directory entries so that we - // can adjust the offsets later - int64_t totalSize = 0; - for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) - { - fe->directoryOffset = os->getFilePointer(); - os->writeLong(0); // for now - os->writeString(fe->file); - totalSize += directory->fileLength(fe->file); - } - - // Pre-allocate size of file as optimization - this can potentially help IO performance as we write the - // file and also later during searching. It also uncovers a disk-full situation earlier and hopefully - // without actually filling disk to 100% - int64_t finalLength = totalSize + os->getFilePointer(); - os->setLength(finalLength); - - // Open the files and copy their data into the stream. Remember the locations of each file's data section. - ByteArray buffer(ByteArray::newInstance(16384)); - for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) - { - fe->dataOffset = os->getFilePointer(); - copyFile(*fe, os, buffer); - } - - // Write the data offsets into the directory of the compound stream - for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) - { - os->seek(fe->directoryOffset); - os->writeLong(fe->dataOffset); - } - - BOOST_ASSERT(finalLength == os->length()); - - // Close the output stream. Set the os to null before trying to close so that if an exception occurs during - // the close, the finally clause below will not attempt to close the stream the second time. - IndexOutputPtr tmp(os); - os.reset(); - tmp->close(); + + FileEntry entry; + entry.file = file; + entries.add(entry); +} + +void CompoundFileWriter::close() { + if (merged) { + boost::throw_exception(IllegalStateException(L"Merge already performed")); + } + + if (entries.empty()) { + boost::throw_exception(IllegalStateException(L"No entries to merge have been defined")); + } + + merged = true; + + DirectoryPtr directory(_directory); + + // open the compound stream + IndexOutputPtr os; + LuceneException finally; + try { + os = directory->createOutput(fileName); + + // Write the number of entries + os->writeVInt(entries.size()); + + // Write the directory with all offsets at 0. Remember the positions of directory entries so that we + // can adjust the offsets later + int64_t totalSize = 0; + for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { + fe->directoryOffset = os->getFilePointer(); + os->writeLong(0); // for now + os->writeString(fe->file); + totalSize += directory->fileLength(fe->file); } - catch (LuceneException& e) - { - finally = e; + + // Pre-allocate size of file as optimization - this can potentially help IO performance as we write the + // file and also later during searching. It also uncovers a disk-full situation earlier and hopefully + // without actually filling disk to 100% + int64_t finalLength = totalSize + os->getFilePointer(); + os->setLength(finalLength); + + // Open the files and copy their data into the stream. Remember the locations of each file's data section. + ByteArray buffer(ByteArray::newInstance(16384)); + for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { + fe->dataOffset = os->getFilePointer(); + copyFile(*fe, os, buffer); } - - if (os) - { - try - { - os->close(); - } - catch (LuceneException&) - { - } + + // Write the data offsets into the directory of the compound stream + for (Collection::iterator fe = entries.begin(); fe != entries.end(); ++fe) { + os->seek(fe->directoryOffset); + os->writeLong(fe->dataOffset); } - finally.throwException(); + + BOOST_ASSERT(finalLength == os->length()); + + // Close the output stream. Set the os to null before trying to close so that if an exception occurs during + // the close, the finally clause below will not attempt to close the stream the second time. + IndexOutputPtr tmp(os); + os.reset(); + tmp->close(); + } catch (LuceneException& e) { + finally = e; } - - void CompoundFileWriter::copyFile(const FileEntry& source, IndexOutputPtr os, ByteArray buffer) - { - IndexInputPtr is; - DirectoryPtr directory(_directory); - LuceneException finally; - try - { - int64_t startPtr = os->getFilePointer(); - - is = directory->openInput(source.file); - int64_t length = is->length(); - int64_t remainder = length; - int32_t chunk = buffer.size(); - - while (remainder > 0) - { - int32_t len = std::min(chunk, (int32_t)remainder); - is->readBytes(buffer.get(), 0, len, false); - os->writeBytes(buffer.get(), len); - remainder -= len; - if (checkAbort) - { - // Roughly every 2 MB we will check if it's time to abort - checkAbort->work(80); - } - } - - // Verify that remainder is 0 - if (remainder != 0) - { - boost::throw_exception(IOException(L"Non-zero remainder length after copying: " + StringUtils::toString(remainder) + - L" (id: " + source.file + L", length: " + StringUtils::toString(length) + - L", buffer size: " + StringUtils::toString(chunk) + L")")); - } - - // Verify that the output length diff is equal to original file - int64_t endPtr = os->getFilePointer(); - int64_t diff = endPtr - startPtr; - if (diff != length) - { - boost::throw_exception(IOException(L"Difference in the output file offsets " + StringUtils::toString(diff) + - L" does not match the original file length " + StringUtils::toString(length))); + + if (os) { + try { + os->close(); + } catch (LuceneException&) { + } + } + finally.throwException(); +} + +void CompoundFileWriter::copyFile(const FileEntry& source, const IndexOutputPtr& os, ByteArray buffer) { + IndexInputPtr is; + DirectoryPtr directory(_directory); + LuceneException finally; + try { + int64_t startPtr = os->getFilePointer(); + + is = directory->openInput(source.file); + int64_t length = is->length(); + int64_t remainder = length; + int64_t chunk = buffer.size(); + + while (remainder > 0) { + int32_t len = (int32_t)std::min(chunk, remainder); + is->readBytes(buffer.get(), 0, len, false); + os->writeBytes(buffer.get(), len); + remainder -= len; + if (checkAbort) { + // Roughly every 2 MB we will check if it's time to abort + checkAbort->work(80); } } - catch (LuceneException& e) - { - finally = e; + + // Verify that remainder is 0 + if (remainder != 0) { + boost::throw_exception(IOException(L"Non-zero remainder length after copying: " + StringUtils::toString(remainder) + + L" (id: " + source.file + L", length: " + StringUtils::toString(length) + + L", buffer size: " + StringUtils::toString(chunk) + L")")); + } + + // Verify that the output length diff is equal to original file + int64_t endPtr = os->getFilePointer(); + int64_t diff = endPtr - startPtr; + if (diff != length) { + boost::throw_exception(IOException(L"Difference in the output file offsets " + StringUtils::toString(diff) + + L" does not match the original file length " + StringUtils::toString(length))); } - - if (is) - is->close(); - finally.throwException(); + } catch (LuceneException& e) { + finally = e; + } + + if (is) { + is->close(); } + finally.throwException(); +} + } diff --git a/src/core/index/ConcurrentMergeScheduler.cpp b/src/core/index/ConcurrentMergeScheduler.cpp index fe4933ee..61c1ab15 100644 --- a/src/core/index/ConcurrentMergeScheduler.cpp +++ b/src/core/index/ConcurrentMergeScheduler.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,341 +11,298 @@ #include "TestPoint.h" #include "StringUtils.h" -namespace Lucene -{ - Collection ConcurrentMergeScheduler::allInstances; - bool ConcurrentMergeScheduler::anyExceptions = false; - - ConcurrentMergeScheduler::ConcurrentMergeScheduler() - { - mergeThreadPriority = -1; - mergeThreads = SetMergeThread::newInstance(); - maxThreadCount = 1; - suppressExceptions = false; - closed = false; - } - - ConcurrentMergeScheduler::~ConcurrentMergeScheduler() - { - } - - void ConcurrentMergeScheduler::initialize() - { - // Only for testing - if (allInstances) - addMyself(); - } - - void ConcurrentMergeScheduler::setMaxThreadCount(int32_t count) - { - if (count < 1) - boost::throw_exception(IllegalArgumentException(L"count should be at least 1")); - maxThreadCount = count; - } - - int32_t ConcurrentMergeScheduler::getMaxThreadCount() - { - return maxThreadCount; +namespace Lucene { + +Collection ConcurrentMergeScheduler::allInstances; +bool ConcurrentMergeScheduler::anyExceptions = false; + +ConcurrentMergeScheduler::ConcurrentMergeScheduler() { + mergeThreadPriority = -1; + mergeThreads = SetMergeThread::newInstance(); + maxThreadCount = 1; + suppressExceptions = false; + closed = false; +} + +ConcurrentMergeScheduler::~ConcurrentMergeScheduler() { +} + +void ConcurrentMergeScheduler::initialize() { + // Only for testing + if (allInstances) { + addMyself(); } - - int32_t ConcurrentMergeScheduler::getMergeThreadPriority() - { - SyncLock syncLock(this); - initMergeThreadPriority(); - return mergeThreadPriority; +} + +void ConcurrentMergeScheduler::setMaxThreadCount(int32_t count) { + if (count < 1) { + boost::throw_exception(IllegalArgumentException(L"count should be at least 1")); } - - void ConcurrentMergeScheduler::setMergeThreadPriority(int32_t pri) - { - SyncLock syncLock(this); - if (pri > LuceneThread::MAX_PRIORITY || pri < LuceneThread::MIN_PRIORITY) - { - boost::throw_exception(IllegalArgumentException(L"priority must be in range " + StringUtils::toString(LuceneThread::MIN_PRIORITY) + - L" .. " + StringUtils::toString(LuceneThread::MAX_PRIORITY) + L" inclusive")); - } - mergeThreadPriority = pri; - - for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) - (*merge)->setThreadPriority(pri); + maxThreadCount = count; +} + +int32_t ConcurrentMergeScheduler::getMaxThreadCount() { + return maxThreadCount; +} + +int32_t ConcurrentMergeScheduler::getMergeThreadPriority() { + SyncLock syncLock(this); + initMergeThreadPriority(); + return mergeThreadPriority; +} + +void ConcurrentMergeScheduler::setMergeThreadPriority(int32_t pri) { + SyncLock syncLock(this); + if (pri > LuceneThread::MAX_THREAD_PRIORITY || pri < LuceneThread::MIN_THREAD_PRIORITY) { + boost::throw_exception(IllegalArgumentException(L"priority must be in range " + StringUtils::toString(LuceneThread::MIN_THREAD_PRIORITY) + + L" .. " + StringUtils::toString(LuceneThread::MAX_THREAD_PRIORITY) + L" inclusive")); } - - bool ConcurrentMergeScheduler::verbose() - { - return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); + mergeThreadPriority = pri; + + for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) { + (*merge)->setThreadPriority(pri); } - - void ConcurrentMergeScheduler::message(const String& message) - { - if (verbose() && !_writer.expired()) - IndexWriterPtr(_writer)->message(L"CMS: " + message); +} + +bool ConcurrentMergeScheduler::verbose() { + return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); +} + +void ConcurrentMergeScheduler::message(const String& message) { + if (verbose() && !_writer.expired()) { + IndexWriterPtr(_writer)->message(L"CMS: " + message); } - - void ConcurrentMergeScheduler::initMergeThreadPriority() - { - SyncLock syncLock(this); - if (mergeThreadPriority == -1) - { - // Default to slightly higher priority than our calling thread - mergeThreadPriority = std::min(LuceneThread::NORM_PRIORITY + 1, LuceneThread::MAX_PRIORITY); - } +} + +void ConcurrentMergeScheduler::initMergeThreadPriority() { + SyncLock syncLock(this); + if (mergeThreadPriority == -1) { + // Default to slightly higher priority than our calling thread + mergeThreadPriority = std::min(LuceneThread::NORM_THREAD_PRIORITY + 1, LuceneThread::MAX_THREAD_PRIORITY); } - - void ConcurrentMergeScheduler::close() - { - sync(); - closed = true; +} + +void ConcurrentMergeScheduler::close() { + sync(); + closed = true; +} + +void ConcurrentMergeScheduler::sync() { + SyncLock syncLock(this); + while (mergeThreadCount() > 0) { + message(L"now wait for threads; currently " + StringUtils::toString(mergeThreads.size()) + L" still running"); + wait(1000); } - - void ConcurrentMergeScheduler::sync() - { - SyncLock syncLock(this); - while (mergeThreadCount() > 0) - { - message(L"now wait for threads; currently " + StringUtils::toString(mergeThreads.size()) + L" still running"); - wait(1000); + mergeThreads.clear(); +} + +int32_t ConcurrentMergeScheduler::mergeThreadCount() { + SyncLock syncLock(this); + int32_t count = 0; + for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) { + if ((*merge)->isAlive()) { + ++count; } - mergeThreads.clear(); } - - int32_t ConcurrentMergeScheduler::mergeThreadCount() - { - SyncLock syncLock(this); - int32_t count = 0; - for (SetMergeThread::iterator merge = mergeThreads.begin(); merge != mergeThreads.end(); ++merge) - { - if ((*merge)->isAlive()) - ++count; + return count; +} + +void ConcurrentMergeScheduler::merge(const IndexWriterPtr& writer) { + BOOST_ASSERT(!writer->holdsLock()); + + this->_writer = writer; + + initMergeThreadPriority(); + + dir = writer->getDirectory(); + + // First, quickly run through the newly proposed merges and add any orthogonal merges (ie a merge not + // involving segments already pending to be merged) to the queue. If we are way behind on merging, + // many of these newly proposed merges will likely already be registered. + message(L"now merge"); + message(L" index: " + writer->segString()); + + // Iterate, pulling from the IndexWriter's queue of pending merges, until it's empty + while (true) { + OneMergePtr merge(writer->getNextMerge()); + if (!merge) { + message(L" no more merges pending; now return"); + return; } - return count; - } - - void ConcurrentMergeScheduler::merge(IndexWriterPtr writer) - { - BOOST_ASSERT(!writer->holdsLock()); - - this->_writer = writer; - - initMergeThreadPriority(); - - dir = writer->getDirectory(); - - // First, quickly run through the newly proposed merges and add any orthogonal merges (ie a merge not - // involving segments already pending to be merged) to the queue. If we are way behind on merging, - // many of these newly proposed merges will likely already be registered. - message(L"now merge"); - message(L" index: " + writer->segString()); - - // Iterate, pulling from the IndexWriter's queue of pending merges, until it's empty - while (true) - { - OneMergePtr merge(writer->getNextMerge()); - if (!merge) - { - message(L" no more merges pending; now return"); - return; - } - - // We do this with the primary thread to keep deterministic assignment of segment names - writer->mergeInit(merge); - - bool success = false; - LuceneException finally; - try - { - SyncLock syncLock(this); - MergeThreadPtr merger; - while (mergeThreadCount() >= maxThreadCount) - { - message(L" too many merge threads running; stalling..."); - wait(1000); - } - - message(L" consider merge " + merge->segString(dir)); - - BOOST_ASSERT(mergeThreadCount() < maxThreadCount); - - // OK to spawn a new merge thread to handle this merge - merger = getMergeThread(writer, merge); - mergeThreads.add(merger); - message(L" launch new thread"); - - merger->start(); - success = true; - } - catch (LuceneException& e) - { - finally = e; + + // We do this with the primary thread to keep deterministic assignment of segment names + writer->mergeInit(merge); + + bool success = false; + LuceneException finally; + try { + SyncLock syncLock(this); + MergeThreadPtr merger; + while (mergeThreadCount() >= maxThreadCount) { + message(L" too many merge threads running; stalling..."); + wait(1000); } - if (!success) - writer->mergeFinish(merge); - finally.throwException(); + + message(L" consider merge " + merge->segString(dir)); + + BOOST_ASSERT(mergeThreadCount() < maxThreadCount); + + // OK to spawn a new merge thread to handle this merge + merger = getMergeThread(writer, merge); + mergeThreads.add(merger); + message(L" launch new thread"); + + merger->start(); + success = true; + } catch (LuceneException& e) { + finally = e; } - } - - void ConcurrentMergeScheduler::doMerge(OneMergePtr merge) - { - TestScope testScope(L"ConcurrentMergeScheduler", L"doMerge"); - IndexWriterPtr(_writer)->merge(merge); - } - - MergeThreadPtr ConcurrentMergeScheduler::getMergeThread(IndexWriterPtr writer, OneMergePtr merge) - { - SyncLock syncLock(this); - MergeThreadPtr thread(newLucene(shared_from_this(), writer, merge)); - thread->setThreadPriority(mergeThreadPriority); - return thread; - } - - void ConcurrentMergeScheduler::handleMergeException(const LuceneException& exc) - { - // When an exception is hit during merge, IndexWriter removes any partial files and then - // allows another merge to run. If whatever caused the error is not transient then the - // exception will keep happening, so, we sleep here to avoid saturating CPU in such cases - LuceneThread::threadSleep(250); // pause 250 msec - boost::throw_exception(MergeException()); - } - - bool ConcurrentMergeScheduler::anyUnhandledExceptions() - { - if (!allInstances) - boost::throw_exception(RuntimeException(L"setTestMode() was not called")); - SyncLock instancesLock(&allInstances); - for (Collection::iterator instance = allInstances.begin(); instance != allInstances.end(); ++instance) - (*instance)->sync(); - bool v = anyExceptions; - anyExceptions = false; - return v; - } - - void ConcurrentMergeScheduler::clearUnhandledExceptions() - { - SyncLock instancesLock(&allInstances); - anyExceptions = false; - } - - void ConcurrentMergeScheduler::addMyself() - { - SyncLock instancesLock(&allInstances); - int32_t size = allInstances.size(); - int32_t upto = 0; - for (int32_t i = 0; i < size; ++i) - { - ConcurrentMergeSchedulerPtr other(allInstances[i]); - if (!(other->closed && other->mergeThreadCount() == 0)) - { - // Keep this one for now: it still has threads or may spawn new threads - allInstances[upto++] = other; - } - - allInstances.remove(allInstances.begin() + upto, allInstances.end()); - allInstances.add(shared_from_this()); + if (!success) { + writer->mergeFinish(merge); } + finally.throwException(); } - - void ConcurrentMergeScheduler::setSuppressExceptions() - { - suppressExceptions = true; - } - - void ConcurrentMergeScheduler::clearSuppressExceptions() - { - suppressExceptions = false; - } - - void ConcurrentMergeScheduler::setTestMode() - { - allInstances = Collection::newInstance(); - } - - MergeThread::MergeThread(ConcurrentMergeSchedulerPtr merger, IndexWriterPtr writer, OneMergePtr startMerge) - { - this->_merger = merger; - this->_writer = writer; - this->startMerge = startMerge; +} + +void ConcurrentMergeScheduler::doMerge(const OneMergePtr& merge) { + TestScope testScope(L"ConcurrentMergeScheduler", L"doMerge"); + IndexWriterPtr(_writer)->merge(merge); +} + +MergeThreadPtr ConcurrentMergeScheduler::getMergeThread(const IndexWriterPtr& writer, const OneMergePtr& merge) { + SyncLock syncLock(this); + MergeThreadPtr thread(newLucene(shared_from_this(), writer, merge)); + thread->setThreadPriority(mergeThreadPriority); + return thread; +} + +void ConcurrentMergeScheduler::handleMergeException(const LuceneException& exc) { + // When an exception is hit during merge, IndexWriter removes any partial files and then + // allows another merge to run. If whatever caused the error is not transient then the + // exception will keep happening, so, we sleep here to avoid saturating CPU in such cases + LuceneThread::threadSleep(250); // pause 250 msec + boost::throw_exception(MergeException()); +} + +bool ConcurrentMergeScheduler::anyUnhandledExceptions() { + if (!allInstances) { + boost::throw_exception(RuntimeException(L"setTestMode() was not called")); } - - MergeThread::~MergeThread() - { + SyncLock instancesLock(&allInstances); + for (Collection::iterator instance = allInstances.begin(); instance != allInstances.end(); ++instance) { + (*instance)->sync(); } - - void MergeThread::setRunningMerge(OneMergePtr merge) - { - ConcurrentMergeSchedulerPtr merger(_merger); - SyncLock syncLock(merger); - runningMerge = merge; + bool v = anyExceptions; + anyExceptions = false; + return v; +} + +void ConcurrentMergeScheduler::clearUnhandledExceptions() { + SyncLock instancesLock(&allInstances); + anyExceptions = false; +} + +void ConcurrentMergeScheduler::addMyself() { + SyncLock instancesLock(&allInstances); + int32_t size = allInstances.size(); + int32_t upto = 0; + for (int32_t i = 0; i < size; ++i) { + ConcurrentMergeSchedulerPtr other(allInstances[i]); + if (!(other->closed && other->mergeThreadCount() == 0)) { + // Keep this one for now: it still has threads or may spawn new threads + allInstances[upto++] = other; + } + + allInstances.remove(allInstances.begin() + upto, allInstances.end()); + allInstances.add(shared_from_this()); } - - OneMergePtr MergeThread::getRunningMerge() - { - ConcurrentMergeSchedulerPtr merger(_merger); - SyncLock syncLock(merger); - return runningMerge; +} + +void ConcurrentMergeScheduler::setSuppressExceptions() { + suppressExceptions = true; +} + +void ConcurrentMergeScheduler::clearSuppressExceptions() { + suppressExceptions = false; +} + +void ConcurrentMergeScheduler::setTestMode() { + allInstances = Collection::newInstance(); +} + +MergeThread::MergeThread(const ConcurrentMergeSchedulerPtr& merger, const IndexWriterPtr& writer, const OneMergePtr& startMerge) { + this->_merger = merger; + this->_writer = writer; + this->startMerge = startMerge; +} + +MergeThread::~MergeThread() { +} + +void MergeThread::setRunningMerge(const OneMergePtr& merge) { + ConcurrentMergeSchedulerPtr merger(_merger); + SyncLock syncLock(merger); + runningMerge = merge; +} + +OneMergePtr MergeThread::getRunningMerge() { + ConcurrentMergeSchedulerPtr merger(_merger); + SyncLock syncLock(merger); + return runningMerge; +} + +void MergeThread::setThreadPriority(int32_t pri) { + try { + setPriority(pri); + } catch (...) { } - - void MergeThread::setThreadPriority(int32_t pri) - { - try - { - setPriority(pri); +} + +void MergeThread::run() { + // First time through the while loop we do the merge that we were started with + OneMergePtr merge(this->startMerge); + ConcurrentMergeSchedulerPtr merger(_merger); + + LuceneException finally; + try { + merger->message(L" merge thread: start"); + IndexWriterPtr writer(_writer); + + while (true) { + setRunningMerge(merge); + merger->doMerge(merge); + + // Subsequent times through the loop we do any new merge that writer says is necessary + merge = writer->getNextMerge(); + if (merge) { + writer->mergeInit(merge); + merger->message(L" merge thread: do another merge " + merge->segString(merger->dir)); + } else { + break; + } } - catch (...) - { + + merger->message(L" merge thread: done"); + } catch (MergeAbortedException&) { + // Ignore the exception if it was due to abort + } catch (LuceneException& e) { + if (!merger->suppressExceptions) { + // suppressExceptions is normally only set during testing. + merger->anyExceptions = true; + merger->handleMergeException(e); + } else { + finally = e; } } - - void MergeThread::run() + { - // First time through the while loop we do the merge that we were started with - OneMergePtr merge(this->startMerge); - ConcurrentMergeSchedulerPtr merger(_merger); - - LuceneException finally; - try - { - merger->message(L" merge thread: start"); - IndexWriterPtr writer(_writer); - - while (true) - { - setRunningMerge(merge); - merger->doMerge(merge); - - // Subsequent times through the loop we do any new merge that writer says is necessary - merge = writer->getNextMerge(); - if (merge) - { - writer->mergeInit(merge); - merger->message(L" merge thread: do another merge " + merge->segString(merger->dir)); - } - else - break; - } - - merger->message(L" merge thread: done"); - } - catch (MergeAbortedException&) - { - // Ignore the exception if it was due to abort - } - catch (LuceneException& e) - { - if (!merger->suppressExceptions) - { - // suppressExceptions is normally only set during testing. - merger->anyExceptions = true; - merger->handleMergeException(e); - } - else - finally = e; - } - - { - SyncLock syncLock(merger); - merger->notifyAll(); - - bool removed = merger->mergeThreads.remove(shared_from_this()); - BOOST_ASSERT(removed); - } - finally.throwException(); + SyncLock syncLock(merger); + merger->notifyAll(); + + bool removed = merger->mergeThreads.remove(shared_from_this()); + BOOST_ASSERT(removed); } + finally.throwException(); +} + } diff --git a/src/core/index/DefaultSkipListReader.cpp b/src/core/index/DefaultSkipListReader.cpp index 805556b5..486f5e9a 100644 --- a/src/core/index/DefaultSkipListReader.cpp +++ b/src/core/index/DefaultSkipListReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,90 +8,82 @@ #include "DefaultSkipListReader.h" #include "MiscUtils.h" -namespace Lucene -{ - DefaultSkipListReader::DefaultSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval) - : MultiLevelSkipListReader(skipStream, maxSkipLevels, skipInterval) - { - currentFieldStoresPayloads = false; - lastFreqPointer = 0; - lastProxPointer = 0; - lastPayloadLength = 0; - - freqPointer = Collection::newInstance(maxSkipLevels); - proxPointer = Collection::newInstance(maxSkipLevels); - payloadLength = Collection::newInstance(maxSkipLevels); - - MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), 0); - MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), 0); - MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); - } - - DefaultSkipListReader::~DefaultSkipListReader() - { - } - - void DefaultSkipListReader::init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads) - { - MultiLevelSkipListReader::init(skipPointer, df); - this->currentFieldStoresPayloads = storesPayloads; - lastFreqPointer = freqBasePointer; - lastProxPointer = proxBasePointer; - - MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), freqBasePointer); - MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), proxBasePointer); - MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); - } - - int64_t DefaultSkipListReader::getFreqPointer() - { - return lastFreqPointer; - } - - int64_t DefaultSkipListReader::getProxPointer() - { - return lastProxPointer; - } - - int32_t DefaultSkipListReader::getPayloadLength() - { - return lastPayloadLength; - } - - void DefaultSkipListReader::seekChild(int32_t level) - { - MultiLevelSkipListReader::seekChild(level); - freqPointer[level] = lastFreqPointer; - proxPointer[level] = lastProxPointer; - payloadLength[level] = lastPayloadLength; - } - - void DefaultSkipListReader::setLastSkipData(int32_t level) - { - MultiLevelSkipListReader::setLastSkipData(level); - lastFreqPointer = freqPointer[level]; - lastProxPointer = proxPointer[level]; - lastPayloadLength = payloadLength[level]; - } - - int32_t DefaultSkipListReader::readSkipData(int32_t level, IndexInputPtr skipStream) - { - int32_t delta; - if (currentFieldStoresPayloads) - { - // The current field stores payloads. If the doc delta is odd then we have to read the current - // payload length because it differs from the length of the previous payload - delta = skipStream->readVInt(); - if ((delta & 1) != 0) - payloadLength[level] = skipStream->readVInt(); - delta = MiscUtils::unsignedShift(delta, 1); +namespace Lucene { + +DefaultSkipListReader::DefaultSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval) + : MultiLevelSkipListReader(skipStream, maxSkipLevels, skipInterval) { + currentFieldStoresPayloads = false; + lastFreqPointer = 0; + lastProxPointer = 0; + lastPayloadLength = 0; + + freqPointer = Collection::newInstance(maxSkipLevels); + proxPointer = Collection::newInstance(maxSkipLevels); + payloadLength = Collection::newInstance(maxSkipLevels); + + MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), 0); + MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), 0); + MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); +} + +DefaultSkipListReader::~DefaultSkipListReader() { +} + +void DefaultSkipListReader::init(int64_t skipPointer, int64_t freqBasePointer, int64_t proxBasePointer, int32_t df, bool storesPayloads) { + MultiLevelSkipListReader::init(skipPointer, df); + this->currentFieldStoresPayloads = storesPayloads; + lastFreqPointer = freqBasePointer; + lastProxPointer = proxBasePointer; + + MiscUtils::arrayFill(freqPointer.begin(), 0, freqPointer.size(), freqBasePointer); + MiscUtils::arrayFill(proxPointer.begin(), 0, proxPointer.size(), proxBasePointer); + MiscUtils::arrayFill(payloadLength.begin(), 0, payloadLength.size(), 0); +} + +int64_t DefaultSkipListReader::getFreqPointer() { + return lastFreqPointer; +} + +int64_t DefaultSkipListReader::getProxPointer() { + return lastProxPointer; +} + +int32_t DefaultSkipListReader::getPayloadLength() { + return lastPayloadLength; +} + +void DefaultSkipListReader::seekChild(int32_t level) { + MultiLevelSkipListReader::seekChild(level); + freqPointer[level] = lastFreqPointer; + proxPointer[level] = lastProxPointer; + payloadLength[level] = lastPayloadLength; +} + +void DefaultSkipListReader::setLastSkipData(int32_t level) { + MultiLevelSkipListReader::setLastSkipData(level); + lastFreqPointer = freqPointer[level]; + lastProxPointer = proxPointer[level]; + lastPayloadLength = payloadLength[level]; +} + +int32_t DefaultSkipListReader::readSkipData(int32_t level, const IndexInputPtr& skipStream) { + int32_t delta; + if (currentFieldStoresPayloads) { + // The current field stores payloads. If the doc delta is odd then we have to read the current + // payload length because it differs from the length of the previous payload + delta = skipStream->readVInt(); + if ((delta & 1) != 0) { + payloadLength[level] = skipStream->readVInt(); } - else - delta = skipStream->readVInt(); - - freqPointer[level] += skipStream->readVInt(); - proxPointer[level] += skipStream->readVInt(); - - return delta; + delta = MiscUtils::unsignedShift(delta, 1); + } else { + delta = skipStream->readVInt(); } + + freqPointer[level] += skipStream->readVInt(); + proxPointer[level] += skipStream->readVInt(); + + return delta; +} + } diff --git a/src/core/index/DefaultSkipListWriter.cpp b/src/core/index/DefaultSkipListWriter.cpp index 3963ced3..64c5a2cb 100644 --- a/src/core/index/DefaultSkipListWriter.cpp +++ b/src/core/index/DefaultSkipListWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,110 +9,100 @@ #include "IndexOutput.h" #include "MiscUtils.h" -namespace Lucene -{ - DefaultSkipListWriter::DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, IndexOutputPtr freqOutput, IndexOutputPtr proxOutput) : MultiLevelSkipListWriter(skipInterval, numberOfSkipLevels, docCount) - { - curDoc = 0; - curStorePayloads = false; - curPayloadLength = 0; - curFreqPointer = 0; - curProxPointer = 0; - - this->freqOutput = freqOutput; - this->proxOutput = proxOutput; +namespace Lucene { - lastSkipDoc = Collection::newInstance(numberOfSkipLevels); - lastSkipPayloadLength = Collection::newInstance(numberOfSkipLevels); - lastSkipFreqPointer = Collection::newInstance(numberOfSkipLevels); - lastSkipProxPointer = Collection::newInstance(numberOfSkipLevels); - } - - DefaultSkipListWriter::~DefaultSkipListWriter() - { - } - - void DefaultSkipListWriter::setFreqOutput(IndexOutputPtr freqOutput) - { - this->freqOutput = freqOutput; - } - - void DefaultSkipListWriter::setProxOutput(IndexOutputPtr proxOutput) - { - this->proxOutput = proxOutput; - } - - void DefaultSkipListWriter::setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength) - { - this->curDoc = doc; - this->curStorePayloads = storePayloads; - this->curPayloadLength = payloadLength; - this->curFreqPointer = freqOutput->getFilePointer(); - if (proxOutput) - this->curProxPointer = proxOutput->getFilePointer(); +DefaultSkipListWriter::DefaultSkipListWriter(int32_t skipInterval, int32_t numberOfSkipLevels, int32_t docCount, const IndexOutputPtr& freqOutput, const IndexOutputPtr& proxOutput) : MultiLevelSkipListWriter(skipInterval, numberOfSkipLevels, docCount) { + curDoc = 0; + curStorePayloads = false; + curPayloadLength = 0; + curFreqPointer = 0; + curProxPointer = 0; + + this->freqOutput = freqOutput; + this->proxOutput = proxOutput; + + lastSkipDoc = Collection::newInstance(numberOfSkipLevels); + lastSkipPayloadLength = Collection::newInstance(numberOfSkipLevels); + lastSkipFreqPointer = Collection::newInstance(numberOfSkipLevels); + lastSkipProxPointer = Collection::newInstance(numberOfSkipLevels); +} + +DefaultSkipListWriter::~DefaultSkipListWriter() { +} + +void DefaultSkipListWriter::setFreqOutput(const IndexOutputPtr& freqOutput) { + this->freqOutput = freqOutput; +} + +void DefaultSkipListWriter::setProxOutput(const IndexOutputPtr& proxOutput) { + this->proxOutput = proxOutput; +} + +void DefaultSkipListWriter::setSkipData(int32_t doc, bool storePayloads, int32_t payloadLength) { + this->curDoc = doc; + this->curStorePayloads = storePayloads; + this->curPayloadLength = payloadLength; + this->curFreqPointer = freqOutput->getFilePointer(); + if (proxOutput) { + this->curProxPointer = proxOutput->getFilePointer(); } - - void DefaultSkipListWriter::resetSkip() - { - MultiLevelSkipListWriter::resetSkip(); - MiscUtils::arrayFill(lastSkipDoc.begin(), 0, lastSkipDoc.size(), 0); - MiscUtils::arrayFill(lastSkipPayloadLength.begin(), 0, lastSkipPayloadLength.size(), -1); // we don't have to write the first length in the skip list - MiscUtils::arrayFill(lastSkipFreqPointer.begin(), 0, lastSkipFreqPointer.size(), freqOutput->getFilePointer()); - if (proxOutput) - MiscUtils::arrayFill(lastSkipProxPointer.begin(), 0, lastSkipProxPointer.size(), proxOutput->getFilePointer()); +} + +void DefaultSkipListWriter::resetSkip() { + MultiLevelSkipListWriter::resetSkip(); + MiscUtils::arrayFill(lastSkipDoc.begin(), 0, lastSkipDoc.size(), 0); + MiscUtils::arrayFill(lastSkipPayloadLength.begin(), 0, lastSkipPayloadLength.size(), -1); // we don't have to write the first length in the skip list + MiscUtils::arrayFill(lastSkipFreqPointer.begin(), 0, lastSkipFreqPointer.size(), freqOutput->getFilePointer()); + if (proxOutput) { + MiscUtils::arrayFill(lastSkipProxPointer.begin(), 0, lastSkipProxPointer.size(), proxOutput->getFilePointer()); } - - void DefaultSkipListWriter::writeSkipData(int32_t level, IndexOutputPtr skipBuffer) - { - // To efficiently store payloads in the posting lists we do not store the length of - // every payload. Instead we omit the length for a payload if the previous payload had - // the same length. - // However, in order to support skipping the payload length at every skip point must be known. - // So we use the same length encoding that we use for the posting lists for the skip data as well: - // Case 1: current field does not store payloads - // SkipDatum --> DocSkip, FreqSkip, ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // DocSkip records the document number before every SkipInterval th document in TermFreqs. - // Document numbers are represented as differences from the previous value in the sequence. - // Case 2: current field stores payloads - // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip - // DocSkip,FreqSkip,ProxSkip --> VInt - // PayloadLength --> VInt - // In this case DocSkip/2 is the difference between - // the current and the previous value. If DocSkip - // is odd, then a PayloadLength encoded as VInt follows, - // if DocSkip is even, then it is assumed that the - // current payload length equals the length at the previous - // skip point - if (curStorePayloads) - { - int32_t delta = curDoc - lastSkipDoc[level]; - if (curPayloadLength == lastSkipPayloadLength[level]) - { - // the current payload length equals the length at the previous skip point, so we don't store - // the length again - skipBuffer->writeVInt(delta * 2); - } - else - { - // the payload length is different from the previous one. We shift the DocSkip, set the lowest - // bit and store the current payload length as VInt. - skipBuffer->writeVInt(delta * 2 + 1); - skipBuffer->writeVInt(curPayloadLength); - lastSkipPayloadLength[level] = curPayloadLength; - } - } - else - { - // current field does not store payloads - skipBuffer->writeVInt(curDoc - lastSkipDoc[level]); - } - skipBuffer->writeVInt((int32_t)(curFreqPointer - lastSkipFreqPointer[level])); - skipBuffer->writeVInt((int32_t)(curProxPointer - lastSkipProxPointer[level])); - - lastSkipDoc[level] = curDoc; +} - lastSkipFreqPointer[level] = curFreqPointer; - lastSkipProxPointer[level] = curProxPointer; +void DefaultSkipListWriter::writeSkipData(int32_t level, const IndexOutputPtr& skipBuffer) { + // To efficiently store payloads in the posting lists we do not store the length of + // every payload. Instead we omit the length for a payload if the previous payload had + // the same length. + // However, in order to support skipping the payload length at every skip point must be known. + // So we use the same length encoding that we use for the posting lists for the skip data as well: + // Case 1: current field does not store payloads + // SkipDatum --> DocSkip, FreqSkip, ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // DocSkip records the document number before every SkipInterval th document in TermFreqs. + // Document numbers are represented as differences from the previous value in the sequence. + // Case 2: current field stores payloads + // SkipDatum --> DocSkip, PayloadLength?, FreqSkip,ProxSkip + // DocSkip,FreqSkip,ProxSkip --> VInt + // PayloadLength --> VInt + // In this case DocSkip/2 is the difference between + // the current and the previous value. If DocSkip + // is odd, then a PayloadLength encoded as VInt follows, + // if DocSkip is even, then it is assumed that the + // current payload length equals the length at the previous + // skip point + if (curStorePayloads) { + int32_t delta = curDoc - lastSkipDoc[level]; + if (curPayloadLength == lastSkipPayloadLength[level]) { + // the current payload length equals the length at the previous skip point, so we don't store + // the length again + skipBuffer->writeVInt(delta * 2); + } else { + // the payload length is different from the previous one. We shift the DocSkip, set the lowest + // bit and store the current payload length as VInt. + skipBuffer->writeVInt(delta * 2 + 1); + skipBuffer->writeVInt(curPayloadLength); + lastSkipPayloadLength[level] = curPayloadLength; + } + } else { + // current field does not store payloads + skipBuffer->writeVInt(curDoc - lastSkipDoc[level]); } + skipBuffer->writeVInt((int32_t)(curFreqPointer - lastSkipFreqPointer[level])); + skipBuffer->writeVInt((int32_t)(curProxPointer - lastSkipProxPointer[level])); + + lastSkipDoc[level] = curDoc; + + lastSkipFreqPointer[level] = curFreqPointer; + lastSkipProxPointer[level] = curProxPointer; +} + } diff --git a/src/core/index/DirectoryReader.cpp b/src/core/index/DirectoryReader.cpp index 2d2d1cea..8704c32c 100644 --- a/src/core/index/DirectoryReader.cpp +++ b/src/core/index/DirectoryReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -27,1286 +27,1120 @@ #include "FieldCache.h" #include "MiscUtils.h" -namespace Lucene -{ - DirectoryReader::DirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) - { - normsCache = MapStringByteArray::newInstance(); - _maxDoc = 0; - _numDocs = -1; - _hasDeletions = false; - synced = HashSet::newInstance(); - stale = false; - rollbackHasChanges = false; - - this->_directory = directory; - this->readOnly = readOnly; - this->segmentInfos = sis; - this->deletionPolicy = deletionPolicy; - this->termInfosIndexDivisor = termInfosIndexDivisor; - - if (!readOnly) - { - // We assume that this segments_N was previously properly sync'd - HashSet files(sis->files(directory, true)); - synced.addAll(files.begin(), files.end()); +namespace Lucene { + +DirectoryReader::DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { + normsCache = MapStringByteArray::newInstance(); + _maxDoc = 0; + _numDocs = -1; + _hasDeletions = false; + synced = HashSet::newInstance(); + stale = false; + rollbackHasChanges = false; + + this->_directory = directory; + this->readOnly = readOnly; + this->segmentInfos = sis; + this->deletionPolicy = deletionPolicy; + this->termInfosIndexDivisor = termInfosIndexDivisor; + + if (!readOnly) { + // We assume that this segments_N was previously properly sync'd + HashSet files(sis->files(directory, true)); + synced.addAll(files.begin(), files.end()); + } + + // To reduce the chance of hitting FileNotFound (and having to retry), we open segments in + // reverse because IndexWriter merges & deletes the newest segments first. + Collection readers(Collection::newInstance(sis->size())); + + for (int32_t i = sis->size() - 1; i >= 0; --i) { + bool success = false; + LuceneException finally; + try { + readers[i] = SegmentReader::get(readOnly, sis->info(i), termInfosIndexDivisor); + success = true; + } catch (LuceneException& e) { + finally = e; } - - // To reduce the chance of hitting FileNotFound (and having to retry), we open segments in - // reverse because IndexWriter merges & deletes the newest segments first. - Collection readers(Collection::newInstance(sis->size())); - - for (int32_t i = sis->size() - 1; i >= 0; --i) - { - bool success = false; - LuceneException finally; - try - { - readers[i] = SegmentReader::get(readOnly, sis->info(i), termInfosIndexDivisor); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - { - // Close all readers we had opened - for (Collection::iterator closeReader = readers.begin(); closeReader != readers.end(); ++closeReader) - { - try - { - if (*closeReader) - (*closeReader)->close(); - } - catch (...) - { - // keep going - we want to clean up as much as possible + if (!success) { + // Close all readers we had opened + for (Collection::iterator closeReader = readers.begin(); closeReader != readers.end(); ++closeReader) { + try { + if (*closeReader) { + (*closeReader)->close(); } + } catch (...) { + // keep going - we want to clean up as much as possible } } - finally.throwException(); } - - _initialize(readers); + finally.throwException(); } - - DirectoryReader::DirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor) - { - normsCache = MapStringByteArray::newInstance(); - _maxDoc = 0; - _numDocs = -1; - _hasDeletions = false; - synced = HashSet::newInstance(); - stale = false; - rollbackHasChanges = false; - - this->_directory = writer->getDirectory(); - this->readOnly = true; - this->segmentInfos = infos; - this->segmentInfosStart = boost::dynamic_pointer_cast(infos->clone()); - this->termInfosIndexDivisor = termInfosIndexDivisor; - - if (!readOnly) - { - // We assume that this segments_N was previously properly sync'd - HashSet files(infos->files(_directory, true)); - synced.addAll(files.begin(), files.end()); - } - - // IndexWriter synchronizes externally before calling us, which ensures infos will not change; so there's - // no need to process segments in reverse order - int32_t numSegments = infos->size(); - Collection readers(Collection::newInstance(numSegments)); - DirectoryPtr dir(writer->getDirectory()); - int32_t upto = 0; - - for (int32_t i = 0; i < numSegments; ++i) - { - bool success = false; - LuceneException finally; - try - { - SegmentInfoPtr info(infos->info(i)); - if (info->dir == dir) - readers[upto++] = boost::dynamic_pointer_cast(writer->readerPool->getReadOnlyClone(info, true, termInfosIndexDivisor)); - success = true; - } - catch (LuceneException& e) - { - finally = e; + + _initialize(readers); +} + +DirectoryReader::DirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor) { + normsCache = MapStringByteArray::newInstance(); + _maxDoc = 0; + _numDocs = -1; + _hasDeletions = false; + synced = HashSet::newInstance(); + stale = false; + rollbackHasChanges = false; + + this->_directory = writer->getDirectory(); + this->readOnly = true; + this->segmentInfos = infos; + this->segmentInfosStart = boost::dynamic_pointer_cast(infos->clone()); + this->termInfosIndexDivisor = termInfosIndexDivisor; + + if (!readOnly) { + // We assume that this segments_N was previously properly sync'd + HashSet files(infos->files(_directory, true)); + synced.addAll(files.begin(), files.end()); + } + + // IndexWriter synchronizes externally before calling us, which ensures infos will not change; so there's + // no need to process segments in reverse order + int32_t numSegments = infos->size(); + Collection readers(Collection::newInstance(numSegments)); + DirectoryPtr dir(writer->getDirectory()); + int32_t upto = 0; + + for (int32_t i = 0; i < numSegments; ++i) { + bool success = false; + LuceneException finally; + try { + SegmentInfoPtr info(infos->info(i)); + if (info->dir == dir) { + readers[upto++] = boost::dynamic_pointer_cast(writer->readerPool->getReadOnlyClone(info, true, termInfosIndexDivisor)); } - if (!success) - { - // Close all readers we had opened - for (--upto; upto >= 0; --upto) - { - try - { - if (readers[upto]) - readers[upto]->close(); - } - catch (...) - { - // keep going - we want to clean up as much as possible + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + // Close all readers we had opened + for (--upto; upto >= 0; --upto) { + try { + if (readers[upto]) { + readers[upto]->close(); } + } catch (...) { + // keep going - we want to clean up as much as possible } } - finally.throwException(); - } - - this->_writer = writer; - - if (upto < readers.size()) - { - // This means some segments were in a foreign Directory - readers.resize(upto); } - - _initialize(readers); + finally.throwException(); } - - DirectoryReader::DirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, Collection oldReaders, - Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, - bool doClone, int32_t termInfosIndexDivisor) - { - normsCache = MapStringByteArray::newInstance(); - _maxDoc = 0; - _numDocs = -1; - _hasDeletions = false; - synced = HashSet::newInstance(); - stale = false; - rollbackHasChanges = false; - - this->_directory = directory; - this->readOnly = readOnly; - this->segmentInfos = infos; - this->termInfosIndexDivisor = termInfosIndexDivisor; - if (!readOnly) - { - // We assume that this segments_N was previously properly sync'd - HashSet files(infos->files(directory, true)); - synced.addAll(files.begin(), files.end()); + + this->_writer = writer; + + if (upto < readers.size()) { + // This means some segments were in a foreign Directory + readers.resize(upto); + } + + _initialize(readers); +} + +DirectoryReader::DirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, Collection oldReaders, + Collection oldStarts, MapStringByteArray oldNormsCache, bool readOnly, + bool doClone, int32_t termInfosIndexDivisor) { + normsCache = MapStringByteArray::newInstance(); + _maxDoc = 0; + _numDocs = -1; + _hasDeletions = false; + synced = HashSet::newInstance(); + stale = false; + rollbackHasChanges = false; + + this->_directory = directory; + this->readOnly = readOnly; + this->segmentInfos = infos; + this->termInfosIndexDivisor = termInfosIndexDivisor; + if (!readOnly) { + // We assume that this segments_N was previously properly sync'd + HashSet files(infos->files(directory, true)); + synced.addAll(files.begin(), files.end()); + } + + // we put the old SegmentReaders in a map, that allows us to lookup a reader using its segment name + MapStringInt segmentReaders(MapStringInt::newInstance()); + + if (oldReaders) { + int32_t segReader = 0; + // create a Map SegmentName->SegmentReader + for (Collection::iterator reader = oldReaders.begin(); reader != oldReaders.end(); ++reader) { + segmentReaders.put((*reader)->getSegmentName(), segReader++); } - - // we put the old SegmentReaders in a map, that allows us to lookup a reader using its segment name - MapStringInt segmentReaders(MapStringInt::newInstance()); - - if (oldReaders) - { - int32_t segReader = 0; - // create a Map SegmentName->SegmentReader - for (Collection::iterator reader = oldReaders.begin(); reader != oldReaders.end(); ++reader) - segmentReaders.put((*reader)->getSegmentName(), segReader++); + } + + Collection newReaders(Collection::newInstance(infos->size())); + + // remember which readers are shared between the old and the re-opened DirectoryReader - we have to incRef those readers + Collection readerShared(Collection::newInstance(infos->size())); + + for (int32_t i = infos->size() - 1; i >= 0; --i) { + // find SegmentReader for this segment + MapStringInt::iterator oldReaderIndex = segmentReaders.find(infos->info(i)->name); + if (oldReaderIndex == segmentReaders.end()) { + // this is a new segment, no old SegmentReader can be reused + newReaders[i].reset(); + } else { + // there is an old reader for this segment - we'll try to reopen it + newReaders[i] = oldReaders[oldReaderIndex->second]; } - - Collection newReaders(Collection::newInstance(infos->size())); - - // remember which readers are shared between the old and the re-opened DirectoryReader - we have to incRef those readers - Collection readerShared(Collection::newInstance(infos->size())); - - for (int32_t i = infos->size() - 1; i >= 0; --i) - { - // find SegmentReader for this segment - MapStringInt::iterator oldReaderIndex = segmentReaders.find(infos->info(i)->name); - if (oldReaderIndex == segmentReaders.end()) - { - // this is a new segment, no old SegmentReader can be reused - newReaders[i].reset(); - } - else - { - // there is an old reader for this segment - we'll try to reopen it - newReaders[i] = oldReaders[oldReaderIndex->second]; - } - - bool success = false; - LuceneException finally; - try - { - SegmentReaderPtr newReader; - if (!newReaders[i] || infos->info(i)->getUseCompoundFile() != newReaders[i]->getSegmentInfo()->getUseCompoundFile()) - { - // We should never see a totally new segment during cloning - BOOST_ASSERT(!doClone); - - // this is a new reader; in case we hit an exception we can close it safely - newReader = SegmentReader::get(readOnly, infos->info(i), termInfosIndexDivisor); - } - else - newReader = newReaders[i]->reopenSegment(infos->info(i), doClone, readOnly); - - if (newReader == newReaders[i]) - { - // this reader will be shared between the old and the new one, so we must incRef it - readerShared[i] = true; - newReader->incRef(); - } - else - { - readerShared[i] = false; - newReaders[i] = newReader; - } - success = true; + + bool success = false; + LuceneException finally; + try { + SegmentReaderPtr newReader; + if (!newReaders[i] || infos->info(i)->getUseCompoundFile() != newReaders[i]->getSegmentInfo()->getUseCompoundFile()) { + // We should never see a totally new segment during cloning + BOOST_ASSERT(!doClone); + + // this is a new reader; in case we hit an exception we can close it safely + newReader = SegmentReader::get(readOnly, infos->info(i), termInfosIndexDivisor); + } else { + newReader = newReaders[i]->reopenSegment(infos->info(i), doClone, readOnly); } - catch (LuceneException& e) - { - finally = e; + + if (newReader == newReaders[i]) { + // this reader will be shared between the old and the new one, so we must incRef it + readerShared[i] = true; + newReader->incRef(); + } else { + readerShared[i] = false; + newReaders[i] = newReader; } - if (!success) - { - for (++i; i < infos->size(); ++i) - { - if (newReaders[i]) - { - try - { - if (!readerShared[i]) - { - // this is a new subReader that is not used by the old one, we can close it - newReaders[i]->close(); - } - else - { - // this subReader is also used by the old reader, so instead closing we must decRef it - newReaders[i]->decRef(); - } - } - catch (...) - { - // keep going - we want to clean up as much as possible + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + for (++i; i < infos->size(); ++i) { + if (newReaders[i]) { + try { + if (!readerShared[i]) { + // this is a new subReader that is not used by the old one, we can close it + newReaders[i]->close(); + } else { + // this subReader is also used by the old reader, so instead closing we must decRef it + newReaders[i]->decRef(); } + } catch (...) { + // keep going - we want to clean up as much as possible } } } - finally.throwException(); } - - // initialize the readers to calculate maxDoc before we try to reuse the old normsCache - _initialize(newReaders); - - // try to copy unchanged norms from the old normsCache to the new one - if (oldNormsCache) - { - for (MapStringByteArray::iterator entry = oldNormsCache.begin(); entry != oldNormsCache.end(); ++entry) - { - if (!hasNorms(entry->first)) - continue; - - ByteArray bytes(ByteArray::newInstance(maxDoc())); - - for (int32_t i = 0; i < subReaders.size(); ++i) - { - MapStringInt::iterator oldReaderIndex = segmentReaders.find(subReaders[i]->getSegmentName()); - - // this SegmentReader was not re-opened, we can copy all of its norms - if (oldReaderIndex != segmentReaders.end() && (oldReaders[oldReaderIndex->second] == subReaders[i] - || oldReaders[oldReaderIndex->second]->_norms.get(entry->first) == subReaders[i]->_norms.get(entry->first))) - { - // we don't have to synchronize here: either this constructor is called from a SegmentReader, in which - // case no old norms cache is present, or it is called from MultiReader.reopen(), which is synchronized - MiscUtils::arrayCopy(entry->second.get(), oldStarts[oldReaderIndex->second], bytes.get(), starts[i], starts[i + 1] - starts[i]); - } - else - subReaders[i]->norms(entry->first, bytes, starts[i]); + finally.throwException(); + } + + // initialize the readers to calculate maxDoc before we try to reuse the old normsCache + _initialize(newReaders); + + // try to copy unchanged norms from the old normsCache to the new one + if (oldNormsCache) { + for (MapStringByteArray::iterator entry = oldNormsCache.begin(); entry != oldNormsCache.end(); ++entry) { + if (!hasNorms(entry->first)) { + continue; + } + + ByteArray bytes(ByteArray::newInstance(maxDoc())); + + for (int32_t i = 0; i < subReaders.size(); ++i) { + MapStringInt::iterator oldReaderIndex = segmentReaders.find(subReaders[i]->getSegmentName()); + + // this SegmentReader was not re-opened, we can copy all of its norms + if (oldReaderIndex != segmentReaders.end() && (oldReaders[oldReaderIndex->second] == subReaders[i] + || oldReaders[oldReaderIndex->second]->_norms.get(entry->first) == subReaders[i]->_norms.get(entry->first))) { + // we don't have to synchronize here: either this constructor is called from a SegmentReader, in which + // case no old norms cache is present, or it is called from MultiReader.reopen(), which is synchronized + MiscUtils::arrayCopy(entry->second.get(), oldStarts[oldReaderIndex->second], bytes.get(), starts[i], starts[i + 1] - starts[i]); + } else { + subReaders[i]->norms(entry->first, bytes, starts[i]); } - - normsCache.put(entry->first, bytes); // update cache } + + normsCache.put(entry->first, bytes); // update cache } } - - DirectoryReader::~DirectoryReader() - { - } - - void DirectoryReader::_initialize(Collection subReaders) - { - this->subReaders = subReaders; - starts = Collection::newInstance(subReaders.size() + 1); - for (int32_t i = 0; i < subReaders.size(); ++i) - { - starts[i] = _maxDoc; - _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs - - if (subReaders[i]->hasDeletions()) - _hasDeletions = true; +} + +DirectoryReader::~DirectoryReader() { +} + +void DirectoryReader::_initialize(Collection subReaders) { + this->subReaders = subReaders; + starts = Collection::newInstance(subReaders.size() + 1); + for (int32_t i = 0; i < subReaders.size(); ++i) { + starts[i] = _maxDoc; + _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs + + if (subReaders[i]->hasDeletions()) { + _hasDeletions = true; } - starts[subReaders.size()] = _maxDoc; - - if (!readOnly) - maxIndexVersion = SegmentInfos::readCurrentVersion(_directory); - } - - IndexReaderPtr DirectoryReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor) - { - return newLucene(readOnly, deletionPolicy, termInfosIndexDivisor, newLucene(), directory)->run(commit); } - - LuceneObjectPtr DirectoryReader::clone(LuceneObjectPtr other) - { - try - { - return DirectoryReader::clone(readOnly, other); // Preserve current readOnly - } - catch (LuceneException& e) - { - boost::throw_exception(RuntimeException(e.getError())); - } - return DirectoryReaderPtr(); + starts[subReaders.size()] = _maxDoc; + + if (!readOnly) { + maxIndexVersion = SegmentInfos::readCurrentVersion(_directory); } - - LuceneObjectPtr DirectoryReader::clone(bool openReadOnly, LuceneObjectPtr other) - { - SyncLock syncLock(this); - DirectoryReaderPtr newReader(doReopen(boost::dynamic_pointer_cast(segmentInfos->clone()), true, openReadOnly)); - - if (shared_from_this() != newReader) - newReader->deletionPolicy = deletionPolicy; - - newReader->_writer = _writer; - - // If we're cloning a non-readOnly reader, move the writeLock (if there is one) to the new reader - if (!openReadOnly && writeLock) - { - // In near real-time search, reader is always readonly - BOOST_ASSERT(_writer.expired()); - newReader->writeLock = writeLock; - newReader->_hasChanges = _hasChanges; - newReader->_hasDeletions = _hasDeletions; - writeLock.reset(); - _hasChanges = false; - } - - return newReader; +} + +IndexReaderPtr DirectoryReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor) { + return newLucene(readOnly, deletionPolicy, termInfosIndexDivisor, newLucene(), directory)->run(commit); +} + +LuceneObjectPtr DirectoryReader::clone(const LuceneObjectPtr& other) { + try { + return DirectoryReader::clone(readOnly, other); // Preserve current readOnly + } catch (LuceneException& e) { + boost::throw_exception(RuntimeException(e.getError())); } - - IndexReaderPtr DirectoryReader::reopen() - { - // Preserve current readOnly - return doReopen(readOnly, IndexCommitPtr()); + return DirectoryReaderPtr(); +} + +LuceneObjectPtr DirectoryReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { + SyncLock syncLock(this); + DirectoryReaderPtr newReader(doReopen(boost::dynamic_pointer_cast(segmentInfos->clone()), true, openReadOnly)); + + if (shared_from_this() != newReader) { + newReader->deletionPolicy = deletionPolicy; } - - IndexReaderPtr DirectoryReader::reopen(bool openReadOnly) - { - return doReopen(openReadOnly, IndexCommitPtr()); + + newReader->_writer = _writer; + + // If we're cloning a non-readOnly reader, move the writeLock (if there is one) to the new reader + if (!openReadOnly && writeLock) { + // In near real-time search, reader is always readonly + BOOST_ASSERT(_writer.expired()); + newReader->writeLock = writeLock; + newReader->_hasChanges = _hasChanges; + newReader->_hasDeletions = _hasDeletions; + writeLock.reset(); + _hasChanges = false; } - - IndexReaderPtr DirectoryReader::reopen(IndexCommitPtr commit) - { - return doReopen(true, commit); + + return newReader; +} + +IndexReaderPtr DirectoryReader::reopen() { + // Preserve current readOnly + return doReopen(readOnly, IndexCommitPtr()); +} + +IndexReaderPtr DirectoryReader::reopen(bool openReadOnly) { + return doReopen(openReadOnly, IndexCommitPtr()); +} + +IndexReaderPtr DirectoryReader::reopen(const IndexCommitPtr& commit) { + return doReopen(true, commit); +} + +IndexReaderPtr DirectoryReader::doReopenFromWriter(bool openReadOnly, const IndexCommitPtr& commit) { + BOOST_ASSERT(readOnly); + + if (!openReadOnly) { + boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)")); } - - IndexReaderPtr DirectoryReader::doReopenFromWriter(bool openReadOnly, IndexCommitPtr commit) - { - BOOST_ASSERT(readOnly); - - if (!openReadOnly) - boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() can only be reopened with openReadOnly=true (got false)")); - - if (commit) - boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() cannot currently accept a commit")); - - return IndexWriterPtr(_writer)->getReader(); + + if (commit) { + boost::throw_exception(IllegalArgumentException(L"a reader obtained from IndexWriter.getReader() cannot currently accept a commit")); } - - IndexReaderPtr DirectoryReader::doReopen(bool openReadOnly, IndexCommitPtr commit) - { - ensureOpen(); - - BOOST_ASSERT(!commit || openReadOnly); - - IndexWriterPtr writer(_writer.lock()); - - // If we were obtained by writer.getReader(), re-ask the writer to get a new reader. - if (writer) - return doReopenFromWriter(openReadOnly, commit); - else - return doReopenNoWriter(openReadOnly, commit); + + return IndexWriterPtr(_writer)->getReader(); +} + +IndexReaderPtr DirectoryReader::doReopen(bool openReadOnly, const IndexCommitPtr& commit) { + ensureOpen(); + + BOOST_ASSERT(!commit || openReadOnly); + + IndexWriterPtr writer(_writer.lock()); + + // If we were obtained by writer.getReader(), re-ask the writer to get a new reader. + if (writer) { + return doReopenFromWriter(openReadOnly, commit); + } else { + return doReopenNoWriter(openReadOnly, commit); } - - IndexReaderPtr DirectoryReader::doReopenNoWriter(bool openReadOnly, IndexCommitPtr commit) - { - SyncLock syncLock(this); - if (!commit) - { - if (_hasChanges) - { - // We have changes, which means we are not readOnly - BOOST_ASSERT(!readOnly); - // and we hold the write lock - BOOST_ASSERT(writeLock); - // so no other writer holds the write lock, which means no changes could have been done to the index - BOOST_ASSERT(isCurrent()); - - if (openReadOnly) - return boost::dynamic_pointer_cast(clone(openReadOnly)); - else - return shared_from_this(); +} + +IndexReaderPtr DirectoryReader::doReopenNoWriter(bool openReadOnly, const IndexCommitPtr& commit) { + SyncLock syncLock(this); + if (!commit) { + if (_hasChanges) { + // We have changes, which means we are not readOnly + BOOST_ASSERT(!readOnly); + // and we hold the write lock + BOOST_ASSERT(writeLock); + // so no other writer holds the write lock, which means no changes could have been done to the index + BOOST_ASSERT(isCurrent()); + + if (openReadOnly) { + return boost::dynamic_pointer_cast(clone(openReadOnly)); + } else { + return shared_from_this(); } - else if (isCurrent()) - { - if (openReadOnly != readOnly) - { - // Just fallback to clone - return boost::dynamic_pointer_cast(clone(openReadOnly)); - } - else - return shared_from_this(); + } else if (isCurrent()) { + if (openReadOnly != readOnly) { + // Just fallback to clone + return boost::dynamic_pointer_cast(clone(openReadOnly)); + } else { + return shared_from_this(); } } - else - { - if (_directory != commit->getDirectory()) - boost::throw_exception(IOException(L"the specified commit does not match the specified Directory")); - if (segmentInfos && commit->getSegmentsFileName() == segmentInfos->getCurrentSegmentFileName()) - { - if (readOnly != openReadOnly) - { - // Just fallback to clone - return boost::dynamic_pointer_cast(clone(openReadOnly)); - } - else - return shared_from_this(); + } else { + if (_directory != commit->getDirectory()) { + boost::throw_exception(IOException(L"the specified commit does not match the specified Directory")); + } + if (segmentInfos && commit->getSegmentsFileName() == segmentInfos->getCurrentSegmentFileName()) { + if (readOnly != openReadOnly) { + // Just fallback to clone + return boost::dynamic_pointer_cast(clone(openReadOnly)); + } else { + return shared_from_this(); } } - - return newLucene(shared_from_this(), openReadOnly, newLucene(), _directory)->run(commit); - } - - DirectoryReaderPtr DirectoryReader::doReopen(SegmentInfosPtr infos, bool doClone, bool openReadOnly) - { - SyncLock syncLock(this); - if (openReadOnly) - return newLucene(_directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); - else - return newLucene(_directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); - } - - int64_t DirectoryReader::getVersion() - { - ensureOpen(); - return segmentInfos->getVersion(); - } - - Collection DirectoryReader::getTermFreqVectors(int32_t docNumber) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment - } - - TermFreqVectorPtr DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); - } - - void DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); } - - void DirectoryReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); - } - - bool DirectoryReader::isOptimized() - { - ensureOpen(); - return (segmentInfos->size() == 1 && !hasDeletions()); + + return newLucene(shared_from_this(), openReadOnly, newLucene(), _directory)->run(commit); +} + +DirectoryReaderPtr DirectoryReader::doReopen(const SegmentInfosPtr& infos, bool doClone, bool openReadOnly) { + SyncLock syncLock(this); + if (openReadOnly) { + return newLucene(_directory, infos, subReaders, starts, normsCache, doClone, termInfosIndexDivisor); + } else { + return newLucene(_directory, infos, subReaders, starts, normsCache, false, doClone, termInfosIndexDivisor); } - - int32_t DirectoryReader::numDocs() - { - // Don't call ensureOpen() here (it could affect performance) - - // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless - if (_numDocs == -1) // check cache - { - int32_t n = 0; // cache miss - recompute - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - n += (*reader)->numDocs(); // sum from readers - _numDocs = n; +} + +int64_t DirectoryReader::getVersion() { + ensureOpen(); + return segmentInfos->getVersion(); +} + +Collection DirectoryReader::getTermFreqVectors(int32_t docNumber) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment +} + +TermFreqVectorPtr DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); +} + +void DirectoryReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); +} + +void DirectoryReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); +} + +bool DirectoryReader::isOptimized() { + ensureOpen(); + return (segmentInfos->size() == 1 && !hasDeletions()); +} + +int32_t DirectoryReader::numDocs() { + // Don't call ensureOpen() here (it could affect performance) + + // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless + if (_numDocs == -1) { // check cache + int32_t n = 0; // cache miss - recompute + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + n += (*reader)->numDocs(); // sum from readers } - return _numDocs; - } - - int32_t DirectoryReader::maxDoc() - { - // Don't call ensureOpen() here (it could affect performance) - return _maxDoc; - } - - DocumentPtr DirectoryReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - ensureOpen(); - int32_t i = readerIndex(n); // find segment num - return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader - } - - bool DirectoryReader::isDeleted(int32_t n) - { - // Don't call ensureOpen() here (it could affect performance) - int32_t i = readerIndex(n); // find segment num - return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader - } - - bool DirectoryReader::hasDeletions() - { - // Don't call ensureOpen() here (it could affect performance) - return _hasDeletions; - } - - void DirectoryReader::doDelete(int32_t docNum) - { - _numDocs = -1; // invalidate cache - int32_t i = readerIndex(docNum); // find segment num - subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader - _hasDeletions = true; - } - - void DirectoryReader::doUndeleteAll() - { - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->undeleteAll(); - _hasDeletions = false; - _numDocs = -1; // invalidate cache - } - - int32_t DirectoryReader::readerIndex(int32_t n) - { - return readerIndex(n, this->starts, this->subReaders.size()); + _numDocs = n; } - - int32_t DirectoryReader::readerIndex(int32_t n, Collection starts, int32_t numSubReaders) - { - // Binary search to locate reader - Collection::iterator reader = std::upper_bound(starts.begin(), starts.begin() + numSubReaders, n); - return (int32_t)(std::distance(starts.begin(), reader) - 1); + return _numDocs; +} + +int32_t DirectoryReader::maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return _maxDoc; +} + +DocumentPtr DirectoryReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + ensureOpen(); + int32_t i = readerIndex(n); // find segment num + return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader +} + +bool DirectoryReader::isDeleted(int32_t n) { + // Don't call ensureOpen() here (it could affect performance) + int32_t i = readerIndex(n); // find segment num + return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader +} + +bool DirectoryReader::hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return _hasDeletions; +} + +void DirectoryReader::doDelete(int32_t docNum) { + _numDocs = -1; // invalidate cache + int32_t i = readerIndex(docNum); // find segment num + subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader + _hasDeletions = true; +} + +void DirectoryReader::doUndeleteAll() { + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->undeleteAll(); } - - bool DirectoryReader::hasNorms(const String& field) - { - ensureOpen(); - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - { - if ((*reader)->hasNorms(field)) - return true; + _hasDeletions = false; + _numDocs = -1; // invalidate cache +} + +int32_t DirectoryReader::readerIndex(int32_t n) { + return readerIndex(n, this->starts, this->subReaders.size()); +} + +int32_t DirectoryReader::readerIndex(int32_t n, Collection starts, int32_t numSubReaders) { + // Binary search to locate reader + Collection::iterator reader = std::upper_bound(starts.begin(), starts.begin() + numSubReaders, n); + return (int32_t)(std::distance(starts.begin(), reader) - 1); +} + +bool DirectoryReader::hasNorms(const String& field) { + ensureOpen(); + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + if ((*reader)->hasNorms(field)) { + return true; } - return false; } - - ByteArray DirectoryReader::norms(const String& field) - { - SyncLock syncLock(this); - ensureOpen(); - ByteArray bytes(normsCache.get(field)); - if (bytes) - return bytes; // cache hit - if (!hasNorms(field)) - return ByteArray(); - - bytes = ByteArray::newInstance(maxDoc()); - for (int32_t i = 0; i < subReaders.size(); ++i) - subReaders[i]->norms(field, bytes, starts[i]); - normsCache.put(field, bytes); // update cache - return bytes; + return false; +} + +ByteArray DirectoryReader::norms(const String& field) { + SyncLock syncLock(this); + ensureOpen(); + ByteArray bytes(normsCache.get(field)); + if (bytes) { + return bytes; // cache hit } - - void DirectoryReader::norms(const String& field, ByteArray norms, int32_t offset) - { - SyncLock syncLock(this); - ensureOpen(); - ByteArray bytes(normsCache.get(field)); - if (!bytes && !hasNorms(field)) - MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); - else if (bytes) // cache hit - MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); - else - { - for (int32_t i = 0; i < subReaders.size(); ++i) // read from segments - subReaders[i]->norms(field, norms, offset + starts[i]); - } + if (!hasNorms(field)) { + return ByteArray(); } - - void DirectoryReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - { - SyncLock normsLock(&normsCache); - normsCache.remove(field); // clear cache - } - int32_t i = readerIndex(doc); // find segment num - subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch + + bytes = ByteArray::newInstance(maxDoc()); + for (int32_t i = 0; i < subReaders.size(); ++i) { + subReaders[i]->norms(field, bytes, starts[i]); } - - TermEnumPtr DirectoryReader::terms() - { - ensureOpen(); - return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, TermPtr()); + normsCache.put(field, bytes); // update cache + return bytes; +} + +void DirectoryReader::norms(const String& field, ByteArray norms, int32_t offset) { + SyncLock syncLock(this); + ensureOpen(); + ByteArray bytes(normsCache.get(field)); + if (!bytes && !hasNorms(field)) { + MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); + } else if (bytes) { // cache hit + MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); + } else { + for (int32_t i = 0; i < subReaders.size(); ++i) { // read from segments + subReaders[i]->norms(field, norms, offset + starts[i]); + } } - - TermEnumPtr DirectoryReader::terms(TermPtr t) +} + +void DirectoryReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { - ensureOpen(); - return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, t); + SyncLock normsLock(&normsCache); + normsCache.remove(field); // clear cache } - - int32_t DirectoryReader::docFreq(TermPtr t) - { - ensureOpen(); - int32_t total = 0; // sum freqs in segments - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - total += (*reader)->docFreq(t); - return total; + int32_t i = readerIndex(doc); // find segment num + subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch +} + +TermEnumPtr DirectoryReader::terms() { + ensureOpen(); + return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, TermPtr()); +} + +TermEnumPtr DirectoryReader::terms(const TermPtr& t) { + ensureOpen(); + return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts, t); +} + +int32_t DirectoryReader::docFreq(const TermPtr& t) { + ensureOpen(); + int32_t total = 0; // sum freqs in segments + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + total += (*reader)->docFreq(t); } - - TermDocsPtr DirectoryReader::termDocs() - { - ensureOpen(); - return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); + return total; +} + +TermDocsPtr DirectoryReader::termDocs() { + ensureOpen(); + return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); +} + +TermPositionsPtr DirectoryReader::termPositions() { + ensureOpen(); + return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); +} + +void DirectoryReader::acquireWriteLock() { + if (readOnly) { + // NOTE: we should not reach this code with the core IndexReader classes; + // however, an external subclass of IndexReader could reach this. + ReadOnlySegmentReader::noWrite(); } - - TermPositionsPtr DirectoryReader::termPositions() - { + + if (segmentInfos) { ensureOpen(); - return newLucene(shared_from_this(), Collection::newInstance(subReaders.begin(), subReaders.end()), starts); - } - - void DirectoryReader::acquireWriteLock() - { - if (readOnly) - { - // NOTE: we should not reach this code with the core IndexReader classes; - // however, an external subclass of IndexReader could reach this. - ReadOnlySegmentReader::noWrite(); + if (stale) { + boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); } - - if (segmentInfos) - { - ensureOpen(); - if (stale) + + if (!writeLock) { + LockPtr writeLock(_directory->makeLock(IndexWriter::WRITE_LOCK_NAME)); + if (!writeLock->obtain((int32_t)IndexWriter::WRITE_LOCK_TIMEOUT)) { // obtain write lock + boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); + } + this->writeLock = writeLock; + + // we have to check whether index has changed since this reader was opened. + // if so, this reader is no longer valid for deletion + if (SegmentInfos::readCurrentVersion(_directory) > maxIndexVersion) { + stale = true; + this->writeLock->release(); + this->writeLock.reset(); boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); - - if (!writeLock) - { - LockPtr writeLock(_directory->makeLock(IndexWriter::WRITE_LOCK_NAME)); - if (!writeLock->obtain((int32_t)IndexWriter::WRITE_LOCK_TIMEOUT)) // obtain write lock - boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); - this->writeLock = writeLock; - - // we have to check whether index has changed since this reader was opened. - // if so, this reader is no longer valid for deletion - if (SegmentInfos::readCurrentVersion(_directory) > maxIndexVersion) - { - stale = true; - this->writeLock->release(); - this->writeLock.reset(); - boost::throw_exception(StaleReaderException(L"IndexReader out of date and no longer valid for delete, undelete, or setNorm operations")); - } } } } - - void DirectoryReader::doCommit(MapStringString commitUserData) - { - if (_hasChanges) - { - segmentInfos->setUserData(commitUserData); - - // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter - IndexFileDeleterPtr deleter(newLucene(_directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, InfoStreamPtr(), DocumentsWriterPtr(), synced)); - segmentInfos->updateGeneration(deleter->getLastSegmentInfos()); - - // Checkpoint the state we are about to change, in case we have to roll back - startCommit(); - - bool success = false; - LuceneException finally; - try - { - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->commit(); - - // Sync all files we just wrote - HashSet files(segmentInfos->files(_directory, false)); - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (!synced.contains(*fileName)) - { - BOOST_ASSERT(_directory->fileExists(*fileName)); - _directory->sync(*fileName); - synced.add(*fileName); - } - } - - segmentInfos->commit(_directory); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - // Rollback changes that were made to SegmentInfos but failed to get [fully] - // committed. This way this reader instance remains consistent (matched to what's - // actually in the index) - rollbackCommit(); - - // Recompute deletable files & remove them (so partially written .del files, etc, - // are removed) - deleter->refresh(); +} + +void DirectoryReader::doCommit(MapStringString commitUserData) { + if (_hasChanges) { + segmentInfos->setUserData(commitUserData); + + // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter + IndexFileDeleterPtr deleter(newLucene(_directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, InfoStreamPtr(), DocumentsWriterPtr(), synced)); + segmentInfos->updateGeneration(deleter->getLastSegmentInfos()); + + // Checkpoint the state we are about to change, in case we have to roll back + startCommit(); + + bool success = false; + LuceneException finally; + try { + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->commit(); } - finally.throwException(); - - // Have the deleter remove any now unreferenced files due to this commit - deleter->checkpoint(segmentInfos, true); - deleter->close(); - - maxIndexVersion = segmentInfos->getVersion(); - - if (writeLock) - { - writeLock->release(); // release write lock - writeLock.reset(); + + // Sync all files we just wrote + HashSet files(segmentInfos->files(_directory, false)); + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (!synced.contains(*fileName)) { + BOOST_ASSERT(_directory->fileExists(*fileName)); + _directory->sync(*fileName); + synced.add(*fileName); + } } + + segmentInfos->commit(_directory); + success = true; + } catch (LuceneException& e) { + finally = e; } - _hasChanges = false; - } - - void DirectoryReader::startCommit() - { - rollbackHasChanges = _hasChanges; - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->startCommit(); - } - - void DirectoryReader::rollbackCommit() - { - _hasChanges = rollbackHasChanges; - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->rollbackCommit(); - } - - MapStringString DirectoryReader::getCommitUserData() - { - ensureOpen(); - return segmentInfos->getUserData(); - } - - bool DirectoryReader::isCurrent() - { - ensureOpen(); - IndexWriterPtr writer(_writer.lock()); - if (!writer || writer->isClosed()) - { - // we loaded SegmentInfos from the directory - return (SegmentInfos::readCurrentVersion(_directory) == segmentInfos->getVersion()); + + if (!success) { + // Rollback changes that were made to SegmentInfos but failed to get [fully] + // committed. This way this reader instance remains consistent (matched to what's + // actually in the index) + rollbackCommit(); + + // Recompute deletable files & remove them (so partially written .del files, etc, + // are removed) + deleter->refresh(); } - else - return writer->nrtIsCurrent(segmentInfosStart); - } - - void DirectoryReader::doClose() - { - SyncLock syncLock(this); - LuceneException ioe; - normsCache.reset(); - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - { - // try to close each reader, even if an exception is thrown - try - { - (*reader)->decRef(); - } - catch (LuceneException& e) - { - if (ioe.isNull()) - ioe = e; - } + finally.throwException(); + + // Have the deleter remove any now unreferenced files due to this commit + deleter->checkpoint(segmentInfos, true); + deleter->close(); + + maxIndexVersion = segmentInfos->getVersion(); + + if (writeLock) { + writeLock->release(); // release write lock + writeLock.reset(); } - - // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is - // generally not a good idea): - FieldCache::DEFAULT()->purge(shared_from_this()); - - // throw the first exception - ioe.throwException(); - } - - HashSet DirectoryReader::getFieldNames(FieldOption fieldOption) - { - ensureOpen(); - return getFieldNames(fieldOption, Collection::newInstance(subReaders.begin(), subReaders.end())); } - - HashSet DirectoryReader::getFieldNames(FieldOption fieldOption, Collection subReaders) - { - // maintain a unique set of field names - HashSet fieldSet(HashSet::newInstance()); - - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - { - HashSet names((*reader)->getFieldNames(fieldOption)); - fieldSet.addAll(names.begin(), names.end()); - } - return fieldSet; + _hasChanges = false; +} + +void DirectoryReader::startCommit() { + rollbackHasChanges = _hasChanges; + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->startCommit(); } - - Collection DirectoryReader::getSequentialSubReaders() - { - return Collection::newInstance(subReaders.begin(), subReaders.end()); +} + +void DirectoryReader::rollbackCommit() { + _hasChanges = rollbackHasChanges; + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->rollbackCommit(); } - - DirectoryPtr DirectoryReader::directory() - { - // Don't ensureOpen here -- in certain cases, when a cloned/reopened reader needs to commit, it may call - // this method on the closed original reader - return _directory; +} + +MapStringString DirectoryReader::getCommitUserData() { + ensureOpen(); + return segmentInfos->getUserData(); +} + +bool DirectoryReader::isCurrent() { + ensureOpen(); + IndexWriterPtr writer(_writer.lock()); + if (!writer || writer->isClosed()) { + // we loaded SegmentInfos from the directory + return (SegmentInfos::readCurrentVersion(_directory) == segmentInfos->getVersion()); + } else { + return writer->nrtIsCurrent(segmentInfosStart); } - - int32_t DirectoryReader::getTermInfosIndexDivisor() - { - return termInfosIndexDivisor; +} + +void DirectoryReader::doClose() { + SyncLock syncLock(this); + LuceneException ioe; + normsCache.reset(); + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + // try to close each reader, even if an exception is thrown + try { + (*reader)->decRef(); + } catch (LuceneException& e) { + if (ioe.isNull()) { + ioe = e; + } + } } - - IndexCommitPtr DirectoryReader::getIndexCommit() - { - return newLucene(segmentInfos, _directory); + + // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is + // generally not a good idea): + FieldCache::DEFAULT()->purge(shared_from_this()); + + // throw the first exception + ioe.throwException(); +} + +HashSet DirectoryReader::getFieldNames(FieldOption fieldOption) { + ensureOpen(); + return getFieldNames(fieldOption, Collection::newInstance(subReaders.begin(), subReaders.end())); +} + +HashSet DirectoryReader::getFieldNames(FieldOption fieldOption, Collection subReaders) { + // maintain a unique set of field names + HashSet fieldSet(HashSet::newInstance()); + + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + HashSet names((*reader)->getFieldNames(fieldOption)); + fieldSet.addAll(names.begin(), names.end()); } - - Collection DirectoryReader::listCommits(DirectoryPtr dir) - { - HashSet files(dir->listAll()); - - Collection commits(Collection::newInstance()); - - SegmentInfosPtr latest(newLucene()); - latest->read(dir); - int64_t currentGen = latest->getGeneration(); - - commits.add(newLucene(latest, dir)); - - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS()) && + return fieldSet; +} + +Collection DirectoryReader::getSequentialSubReaders() { + return Collection::newInstance(subReaders.begin(), subReaders.end()); +} + +DirectoryPtr DirectoryReader::directory() { + // Don't ensureOpen here -- in certain cases, when a cloned/reopened reader needs to commit, it may call + // this method on the closed original reader + return _directory; +} + +int32_t DirectoryReader::getTermInfosIndexDivisor() { + return termInfosIndexDivisor; +} + +IndexCommitPtr DirectoryReader::getIndexCommit() { + return newLucene(segmentInfos, _directory); +} + +Collection DirectoryReader::listCommits(const DirectoryPtr& dir) { + HashSet files(dir->listAll()); + + Collection commits(Collection::newInstance()); + + SegmentInfosPtr latest(newLucene()); + latest->read(dir); + int64_t currentGen = latest->getGeneration(); + + commits.add(newLucene(latest, dir)); + + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS()) && *fileName != IndexFileNames::SEGMENTS_GEN() && - SegmentInfos::generationFromSegmentsFileName(*fileName) < currentGen) - { - SegmentInfosPtr sis(newLucene()); - try - { - // IOException allowed to throw there, in case segments_N is corrupt - sis->read(dir, *fileName); - } - catch (FileNotFoundException&) - { - sis.reset(); - } - - if (sis) - commits.add(newLucene(sis, dir)); + SegmentInfos::generationFromSegmentsFileName(*fileName) < currentGen) { + SegmentInfosPtr sis(newLucene()); + try { + // IOException allowed to throw there, in case segments_N is corrupt + sis->read(dir, *fileName); + } catch (FileNotFoundException&) { + sis.reset(); + } + + if (sis) { + commits.add(newLucene(sis, dir)); } } - - return commits; - } - - FindSegmentsOpen::FindSegmentsOpen(bool readOnly, IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor, SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) - { - this->readOnly = readOnly; - this->deletionPolicy = deletionPolicy; - this->termInfosIndexDivisor = termInfosIndexDivisor; - } - - FindSegmentsOpen::~FindSegmentsOpen() - { } - - IndexReaderPtr FindSegmentsOpen::doBody(const String& segmentFileName) - { - SegmentInfosPtr segmentInfos(_segmentInfos); - segmentInfos->read(directory, segmentFileName); - if (readOnly) - return newLucene(directory, segmentInfos, deletionPolicy, termInfosIndexDivisor); - else - return newLucene(directory, segmentInfos, deletionPolicy, false, termInfosIndexDivisor); - } - - FindSegmentsReopen::FindSegmentsReopen(DirectoryReaderPtr reader, bool openReadOnly, SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) - { - this->_reader = reader; - this->openReadOnly = openReadOnly; - } - - FindSegmentsReopen::~FindSegmentsReopen() - { - } - - DirectoryReaderPtr FindSegmentsReopen::doBody(const String& segmentFileName) - { - SegmentInfosPtr segmentInfos(_segmentInfos); - segmentInfos->read(directory, segmentFileName); - return DirectoryReaderPtr(_reader)->doReopen(segmentInfos, false, openReadOnly); + + return commits; +} + +FindSegmentsOpen::FindSegmentsOpen(bool readOnly, const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor, const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { + this->readOnly = readOnly; + this->deletionPolicy = deletionPolicy; + this->termInfosIndexDivisor = termInfosIndexDivisor; +} + +FindSegmentsOpen::~FindSegmentsOpen() { +} + +IndexReaderPtr FindSegmentsOpen::doBody(const String& segmentFileName) { + SegmentInfosPtr segmentInfos(_segmentInfos); + segmentInfos->read(directory, segmentFileName); + if (readOnly) { + return newLucene(directory, segmentInfos, deletionPolicy, termInfosIndexDivisor); + } else { + return newLucene(directory, segmentInfos, deletionPolicy, false, termInfosIndexDivisor); } - - MultiTermEnum::MultiTermEnum(IndexReaderPtr topReader, Collection readers, Collection starts, TermPtr t) - { - _docFreq = 0; - this->_topReader = topReader; - queue = newLucene(readers.size()); - matchingSegments = Collection::newInstance(readers.size() + 1); - for (int32_t i = 0; i < readers.size(); ++i) - { - IndexReaderPtr reader(readers[i]); - TermEnumPtr termEnum; - - if (t) - termEnum = reader->terms(t); - else - termEnum = reader->terms(); - - SegmentMergeInfoPtr smi(newLucene(starts[i], termEnum, reader)); - smi->ord = i; - if (t ? termEnum->term() : smi->next()) - queue->add(smi); // initialize queue - else - smi->close(); +} + +FindSegmentsReopen::FindSegmentsReopen(const DirectoryReaderPtr& reader, bool openReadOnly, const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { + this->_reader = reader; + this->openReadOnly = openReadOnly; +} + +FindSegmentsReopen::~FindSegmentsReopen() { +} + +DirectoryReaderPtr FindSegmentsReopen::doBody(const String& segmentFileName) { + SegmentInfosPtr segmentInfos(_segmentInfos); + segmentInfos->read(directory, segmentFileName); + return DirectoryReaderPtr(_reader)->doReopen(segmentInfos, false, openReadOnly); +} + +MultiTermEnum::MultiTermEnum(const IndexReaderPtr& topReader, Collection readers, Collection starts, const TermPtr& t) { + _docFreq = 0; + this->_topReader = topReader; + queue = newLucene(readers.size()); + matchingSegments = Collection::newInstance(readers.size() + 1); + for (int32_t i = 0; i < readers.size(); ++i) { + IndexReaderPtr reader(readers[i]); + TermEnumPtr termEnum; + + if (t) { + termEnum = reader->terms(t); + } else { + termEnum = reader->terms(); + } + + SegmentMergeInfoPtr smi(newLucene(starts[i], termEnum, reader)); + smi->ord = i; + if (t.get() != NULL ? termEnum->term().get() != NULL : smi->next()) { + queue->add(smi); // initialize queue + } else { + smi->close(); } - - if (t && !queue->empty()) - next(); } - - MultiTermEnum::~MultiTermEnum() - { + + if (t && !queue->empty()) { + next(); } - - bool MultiTermEnum::next() - { - for (Collection::iterator smi = matchingSegments.begin(); smi != matchingSegments.end(); ++smi) - { - if (!(*smi)) - break; - if ((*smi)->next()) - queue->add(*smi); - else - (*smi)->close(); // done with segment - } - - int32_t numMatchingSegments = 0; - matchingSegments[0].reset(); - - SegmentMergeInfoPtr top(queue->top()); - - if (!top) - { - _term.reset(); - return false; +} + +MultiTermEnum::~MultiTermEnum() { +} + +bool MultiTermEnum::next() { + for (Collection::iterator smi = matchingSegments.begin(); smi != matchingSegments.end(); ++smi) { + if (!(*smi)) { + break; } - - _term = top->term; - _docFreq = 0; - - while (top && _term->compareTo(top->term) == 0) - { - matchingSegments[numMatchingSegments++] = top; - queue->pop(); - _docFreq += top->termEnum->docFreq(); // increment freq - top = queue->top(); + if ((*smi)->next()) { + queue->add(*smi); + } else { + (*smi)->close(); // done with segment } - - matchingSegments[numMatchingSegments].reset(); - return true; - } - - TermPtr MultiTermEnum::term() - { - return _term; - } - - int32_t MultiTermEnum::docFreq() - { - return _docFreq; - } - - void MultiTermEnum::close() - { - queue->close(); - } - - MultiTermDocs::MultiTermDocs(IndexReaderPtr topReader, Collection r, Collection s) - { - this->_topReader = topReader; - readers = r; - starts = s; - base = 0; - pointer = 0; - readerTermDocs = Collection::newInstance(r.size()); - } - - MultiTermDocs::~MultiTermDocs() - { } - - int32_t MultiTermDocs::doc() - { - return base + current->doc(); + + int32_t numMatchingSegments = 0; + matchingSegments[0].reset(); + + SegmentMergeInfoPtr top(queue->top()); + + if (!top) { + _term.reset(); + return false; } - - int32_t MultiTermDocs::freq() - { - return current->freq(); + + _term = top->term; + _docFreq = 0; + + while (top && _term->compareTo(top->term) == 0) { + matchingSegments[numMatchingSegments++] = top; + queue->pop(); + _docFreq += top->termEnum->docFreq(); // increment freq + top = queue->top(); } - - void MultiTermDocs::seek(TermPtr term) - { - this->term = term; - this->base = 0; - this->pointer = 0; - this->current.reset(); - this->tenum.reset(); - this->smi.reset(); - this->matchingSegmentPos = 0; + + matchingSegments[numMatchingSegments].reset(); + return true; +} + +TermPtr MultiTermEnum::term() { + return _term; +} + +int32_t MultiTermEnum::docFreq() { + return _docFreq; +} + +void MultiTermEnum::close() { + queue->close(); +} + +MultiTermDocs::MultiTermDocs(const IndexReaderPtr& topReader, Collection r, Collection s) { + this->_topReader = topReader; + readers = r; + starts = s; + base = 0; + pointer = 0; + readerTermDocs = Collection::newInstance(r.size()); +} + +MultiTermDocs::~MultiTermDocs() { +} + +int32_t MultiTermDocs::doc() { + return base + current->doc(); +} + +int32_t MultiTermDocs::freq() { + return current->freq(); +} + +void MultiTermDocs::seek(const TermPtr& term) { + this->term = term; + this->base = 0; + this->pointer = 0; + this->current.reset(); + this->tenum.reset(); + this->smi.reset(); + this->matchingSegmentPos = 0; +} + +void MultiTermDocs::seek(const TermEnumPtr& termEnum) { + seek(termEnum->term()); + MultiTermEnumPtr multiTermEnum(boost::dynamic_pointer_cast(termEnum)); + if (multiTermEnum) { + tenum = multiTermEnum; + if (IndexReaderPtr(_topReader) != IndexReaderPtr(tenum->_topReader)) { + tenum.reset(); + } } - - void MultiTermDocs::seek(TermEnumPtr termEnum) - { - seek(termEnum->term()); - MultiTermEnumPtr multiTermEnum(boost::dynamic_pointer_cast(termEnum)); - if (multiTermEnum) - { - tenum = multiTermEnum; - if (IndexReaderPtr(_topReader) != IndexReaderPtr(tenum->_topReader)) - tenum.reset(); +} + +bool MultiTermDocs::next() { + while (true) { + if (current && current->next()) { + return true; + } else if (pointer < readers.size()) { + if (tenum) { + smi = tenum->matchingSegments[matchingSegmentPos++]; + if (!smi) { + pointer = readers.size(); + return false; + } + pointer = smi->ord; + } + base = starts[pointer]; + current = termDocs(pointer++); + } else { + return false; } } - - bool MultiTermDocs::next() - { - while (true) - { - if (current && current->next()) - return true; - else if (pointer < readers.size()) - { - if (tenum) - { +} + +int32_t MultiTermDocs::read(Collection& docs, Collection& freqs) { + while (true) { + while (!current) { + if (pointer < readers.size()) { // try next segment + if (tenum) { smi = tenum->matchingSegments[matchingSegmentPos++]; - if (!smi) - { + if (!smi) { pointer = readers.size(); - return false; + return 0; } pointer = smi->ord; } base = starts[pointer]; current = termDocs(pointer++); + } else { + return 0; } - else - return false; } - } - - int32_t MultiTermDocs::read(Collection docs, Collection freqs) - { - while (true) - { - while (!current) - { - if (pointer < readers.size()) // try next segment - { - if (tenum) - { - smi = tenum->matchingSegments[matchingSegmentPos++]; - if (!smi) - { - pointer = readers.size(); - return 0; - } - pointer = smi->ord; - } - base = starts[pointer]; - current = termDocs(pointer++); - } - else - return 0; - } - int32_t end = current->read(docs, freqs); - if (end == 0) // none left in segment - current.reset(); - else // got some - { - for (int32_t i = 0; i < end; ++i) // adjust doc numbers - docs[i] += base; - return end; + int32_t end = current->read(docs, freqs); + if (end == 0) { // none left in segment + current.reset(); + } else { // got some + for (int32_t i = 0; i < end; ++i) { // adjust doc numbers + docs[i] += base; } + return end; } } - - bool MultiTermDocs::skipTo(int32_t target) - { - while (true) - { - if (current && current->skipTo(target - base)) - return true; - else if (pointer < readers.size()) - { - if (tenum) - { - smi = tenum->matchingSegments[matchingSegmentPos++]; - if (!smi) - { - pointer = readers.size(); - return false; - } - pointer = smi->ord; +} + +bool MultiTermDocs::skipTo(int32_t target) { + while (true) { + if (current && current->skipTo(target - base)) { + return true; + } else if (pointer < readers.size()) { + if (tenum) { + smi = tenum->matchingSegments[matchingSegmentPos++]; + if (!smi) { + pointer = readers.size(); + return false; } - base = starts[pointer]; - current = termDocs(pointer++); + pointer = smi->ord; } - else - return false; - } - } - - TermDocsPtr MultiTermDocs::termDocs(int32_t i) - { - TermDocsPtr result(readerTermDocs[i]); - if (!result) - { - readerTermDocs[i] = termDocs(readers[i]); - result = readerTermDocs[i]; - } - if (smi) - { - BOOST_ASSERT(smi->ord == i); - BOOST_ASSERT(smi->termEnum->term()->equals(term)); - result->seek(smi->termEnum); + base = starts[pointer]; + current = termDocs(pointer++); + } else { + return false; } - else - result->seek(term); - return result; - } - - TermDocsPtr MultiTermDocs::termDocs(IndexReaderPtr reader) - { - return term ? reader->termDocs() : reader->termDocs(TermPtr()); } - - void MultiTermDocs::close() - { - for (Collection::iterator termDoc = readerTermDocs.begin(); termDoc != readerTermDocs.end(); ++termDoc) - { - if (*termDoc) - (*termDoc)->close(); +} + +TermDocsPtr MultiTermDocs::termDocs(int32_t i) { + TermDocsPtr result(readerTermDocs[i]); + if (!result) { + readerTermDocs[i] = termDocs(readers[i]); + result = readerTermDocs[i]; + } + if (smi) { + BOOST_ASSERT(smi->ord == i); + BOOST_ASSERT(smi->termEnum->term()->equals(term)); + result->seek(smi->termEnum); + } else { + result->seek(term); + } + return result; +} + +TermDocsPtr MultiTermDocs::termDocs(const IndexReaderPtr& reader) { + return term ? reader->termDocs() : reader->termDocs(TermPtr()); +} + +void MultiTermDocs::close() { + for (Collection::iterator termDoc = readerTermDocs.begin(); termDoc != readerTermDocs.end(); ++termDoc) { + if (*termDoc) { + (*termDoc)->close(); } } - - MultiTermPositions::MultiTermPositions(IndexReaderPtr topReader, Collection r, Collection s) : MultiTermDocs(topReader, r, s) - { - } - - MultiTermPositions::~MultiTermPositions() - { - } - - TermDocsPtr MultiTermPositions::termDocs(IndexReaderPtr reader) - { - return reader->termPositions(); - } - - int32_t MultiTermPositions::nextPosition() - { - return boost::static_pointer_cast(current)->nextPosition(); - } - - int32_t MultiTermPositions::getPayloadLength() - { - return boost::static_pointer_cast(current)->getPayloadLength(); - } - - ByteArray MultiTermPositions::getPayload(ByteArray data, int32_t offset) - { - return boost::static_pointer_cast(current)->getPayload(data, offset); - } - - bool MultiTermPositions::isPayloadAvailable() - { - return boost::static_pointer_cast(current)->isPayloadAvailable(); - } - - ReaderCommit::ReaderCommit(SegmentInfosPtr infos, DirectoryPtr dir) - { - segmentsFileName = infos->getCurrentSegmentFileName(); - this->dir = dir; - userData = infos->getUserData(); - HashSet files(infos->files(dir, true)); - this->files = HashSet::newInstance(files.begin(), files.end()); - version = infos->getVersion(); - generation = infos->getGeneration(); - _isOptimized = infos->size() == 1 && !infos->info(0)->hasDeletions(); - } - - ReaderCommit::~ReaderCommit() - { - } - - String ReaderCommit::toString() - { - return L"DirectoryReader::ReaderCommit(" + segmentsFileName + L")"; - } - - bool ReaderCommit::isOptimized() - { - return _isOptimized; - } - - String ReaderCommit::getSegmentsFileName() - { - return segmentsFileName; - } - - HashSet ReaderCommit::getFileNames() - { - return files; - } - - DirectoryPtr ReaderCommit::getDirectory() - { - return dir; - } - - int64_t ReaderCommit::getVersion() - { - return version; - } - - int64_t ReaderCommit::getGeneration() - { - return generation; - } - - bool ReaderCommit::isDeleted() - { - return false; - } - - MapStringString ReaderCommit::getUserData() - { - return userData; - } - - void ReaderCommit::deleteCommit() - { - boost::throw_exception(UnsupportedOperationException(L"This IndexCommit does not support deletions.")); - } +} + +MultiTermPositions::MultiTermPositions(const IndexReaderPtr& topReader, Collection r, Collection s) : MultiTermDocs(topReader, r, s) { +} + +MultiTermPositions::~MultiTermPositions() { +} + +TermDocsPtr MultiTermPositions::termDocs(const IndexReaderPtr& reader) { + return reader->termPositions(); +} + +int32_t MultiTermPositions::nextPosition() { + return boost::static_pointer_cast(current)->nextPosition(); +} + +int32_t MultiTermPositions::getPayloadLength() { + return boost::static_pointer_cast(current)->getPayloadLength(); +} + +ByteArray MultiTermPositions::getPayload(ByteArray data, int32_t offset) { + return boost::static_pointer_cast(current)->getPayload(data, offset); +} + +bool MultiTermPositions::isPayloadAvailable() { + return boost::static_pointer_cast(current)->isPayloadAvailable(); +} + +ReaderCommit::ReaderCommit(const SegmentInfosPtr& infos, const DirectoryPtr& dir) { + segmentsFileName = infos->getCurrentSegmentFileName(); + this->dir = dir; + userData = infos->getUserData(); + HashSet files(infos->files(dir, true)); + this->files = HashSet::newInstance(files.begin(), files.end()); + version = infos->getVersion(); + generation = infos->getGeneration(); + _isOptimized = infos->size() == 1 && !infos->info(0)->hasDeletions(); +} + +ReaderCommit::~ReaderCommit() { +} + +String ReaderCommit::toString() { + return L"DirectoryReader::ReaderCommit(" + segmentsFileName + L")"; +} + +bool ReaderCommit::isOptimized() { + return _isOptimized; +} + +String ReaderCommit::getSegmentsFileName() { + return segmentsFileName; +} + +HashSet ReaderCommit::getFileNames() { + return files; +} + +DirectoryPtr ReaderCommit::getDirectory() { + return dir; +} + +int64_t ReaderCommit::getVersion() { + return version; +} + +int64_t ReaderCommit::getGeneration() { + return generation; +} + +bool ReaderCommit::isDeleted() { + return false; +} + +MapStringString ReaderCommit::getUserData() { + return userData; +} + +void ReaderCommit::deleteCommit() { + boost::throw_exception(UnsupportedOperationException(L"This IndexCommit does not support deletions.")); +} + } diff --git a/src/core/index/DocConsumer.cpp b/src/core/index/DocConsumer.cpp index 568fe334..8004ff73 100644 --- a/src/core/index/DocConsumer.cpp +++ b/src/core/index/DocConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "DocConsumer.h" -namespace Lucene -{ - DocConsumer::~DocConsumer() - { - } +namespace Lucene { + +DocConsumer::~DocConsumer() { +} + } diff --git a/src/core/index/DocConsumerPerThread.cpp b/src/core/index/DocConsumerPerThread.cpp index e13c6709..28997c0d 100644 --- a/src/core/index/DocConsumerPerThread.cpp +++ b/src/core/index/DocConsumerPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "DocConsumerPerThread.h" -namespace Lucene -{ - DocConsumerPerThread::~DocConsumerPerThread() - { - } +namespace Lucene { + +DocConsumerPerThread::~DocConsumerPerThread() { +} + } diff --git a/src/core/index/DocFieldConsumer.cpp b/src/core/index/DocFieldConsumer.cpp index 7203eef2..bee555bf 100644 --- a/src/core/index/DocFieldConsumer.cpp +++ b/src/core/index/DocFieldConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "DocFieldConsumer.h" -namespace Lucene -{ - DocFieldConsumer::~DocFieldConsumer() - { - } - - void DocFieldConsumer::setFieldInfos(FieldInfosPtr fieldInfos) - { - this->fieldInfos = fieldInfos; - } +namespace Lucene { + +DocFieldConsumer::~DocFieldConsumer() { +} + +void DocFieldConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { + this->fieldInfos = fieldInfos; +} + } diff --git a/src/core/index/DocFieldConsumerPerField.cpp b/src/core/index/DocFieldConsumerPerField.cpp index 7a771fa3..0fb3b5a8 100644 --- a/src/core/index/DocFieldConsumerPerField.cpp +++ b/src/core/index/DocFieldConsumerPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "DocFieldConsumerPerField.h" -namespace Lucene -{ - DocFieldConsumerPerField::~DocFieldConsumerPerField() - { - } +namespace Lucene { + +DocFieldConsumerPerField::~DocFieldConsumerPerField() { +} + } diff --git a/src/core/index/DocFieldConsumerPerThread.cpp b/src/core/index/DocFieldConsumerPerThread.cpp index b0818e9b..ecbbec23 100644 --- a/src/core/index/DocFieldConsumerPerThread.cpp +++ b/src/core/index/DocFieldConsumerPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "DocFieldConsumerPerThread.h" -namespace Lucene -{ - DocFieldConsumerPerThread::~DocFieldConsumerPerThread() - { - } +namespace Lucene { + +DocFieldConsumerPerThread::~DocFieldConsumerPerThread() { +} + } diff --git a/src/core/index/DocFieldConsumers.cpp b/src/core/index/DocFieldConsumers.cpp index c8a39525..5198962b 100644 --- a/src/core/index/DocFieldConsumers.cpp +++ b/src/core/index/DocFieldConsumers.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,168 +10,133 @@ #include "DocFieldConsumersPerThread.h" #include "MiscUtils.h" -namespace Lucene -{ - DocFieldConsumers::DocFieldConsumers(DocFieldConsumerPtr one, DocFieldConsumerPtr two) - { - freeCount = 0; - allocCount = 0; - docFreeList = Collection::newInstance(1); - - this->one = one; - this->two = two; - } - - DocFieldConsumers::~DocFieldConsumers() - { - } - - void DocFieldConsumers::setFieldInfos(FieldInfosPtr fieldInfos) - { - DocFieldConsumer::setFieldInfos(fieldInfos); - one->setFieldInfos(fieldInfos); - two->setFieldInfos(fieldInfos); - } - - void DocFieldConsumers::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField oneThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); - MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField twoThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); - - for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - Collection oneFields(Collection::newInstance()); - Collection twoFields(Collection::newInstance()); - - for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) - { - oneFields.add(boost::static_pointer_cast(*perField)->one); - twoFields.add(boost::static_pointer_cast(*perField)->two); - } - - oneThreadsAndFields.put(boost::static_pointer_cast(entry->first)->one, oneFields); - twoThreadsAndFields.put(boost::static_pointer_cast(entry->first)->two, oneFields); - } +namespace Lucene { - one->flush(oneThreadsAndFields, state); - two->flush(twoThreadsAndFields, state); - } - - void DocFieldConsumers::closeDocStore(SegmentWriteStatePtr state) - { - LuceneException finally; - try - { - one->closeDocStore(state); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - two->closeDocStore(state); - } - catch (LuceneException& e) - { - finally = e; +DocFieldConsumers::DocFieldConsumers(const DocFieldConsumerPtr& one, const DocFieldConsumerPtr& two) { + freeCount = 0; + allocCount = 0; + docFreeList = Collection::newInstance(1); + + this->one = one; + this->two = two; +} + +DocFieldConsumers::~DocFieldConsumers() { +} + +void DocFieldConsumers::setFieldInfos(const FieldInfosPtr& fieldInfos) { + DocFieldConsumer::setFieldInfos(fieldInfos); + one->setFieldInfos(fieldInfos); + two->setFieldInfos(fieldInfos); +} + +void DocFieldConsumers::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField oneThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); + MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField twoThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); + + for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + Collection oneFields(Collection::newInstance()); + Collection twoFields(Collection::newInstance()); + + for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { + oneFields.add(boost::static_pointer_cast(*perField)->one); + twoFields.add(boost::static_pointer_cast(*perField)->two); } - finally.throwException(); + + oneThreadsAndFields.put(boost::static_pointer_cast(entry->first)->one, oneFields); + twoThreadsAndFields.put(boost::static_pointer_cast(entry->first)->two, oneFields); } - - bool DocFieldConsumers::freeRAM() - { - return (one->freeRAM() || two->freeRAM()); + + one->flush(oneThreadsAndFields, state); + two->flush(twoThreadsAndFields, state); +} + +void DocFieldConsumers::closeDocStore(const SegmentWriteStatePtr& state) { + LuceneException finally; + try { + one->closeDocStore(state); + } catch (LuceneException& e) { + finally = e; } - - DocFieldConsumerPerThreadPtr DocFieldConsumers::addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) - { - return newLucene(docFieldProcessorPerThread, shared_from_this(), one->addThread(docFieldProcessorPerThread), two->addThread(docFieldProcessorPerThread)); + try { + two->closeDocStore(state); + } catch (LuceneException& e) { + finally = e; } - - DocFieldConsumersPerDocPtr DocFieldConsumers::getPerDoc() - { - SyncLock syncLock(this); - if (freeCount == 0) - { - ++allocCount; - if (allocCount > docFreeList.size()) - { - // Grow our free list up front to make sure we have enough space to recycle all outstanding - // PerDoc instances - BOOST_ASSERT(allocCount == 1 + docFreeList.size()); - docFreeList.resize(MiscUtils::getNextSize(allocCount)); - } - return newLucene(shared_from_this()); + finally.throwException(); +} + +bool DocFieldConsumers::freeRAM() { + return (one->freeRAM() || two->freeRAM()); +} + +DocFieldConsumerPerThreadPtr DocFieldConsumers::addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) { + return newLucene(docFieldProcessorPerThread, shared_from_this(), one->addThread(docFieldProcessorPerThread), two->addThread(docFieldProcessorPerThread)); +} + +DocFieldConsumersPerDocPtr DocFieldConsumers::getPerDoc() { + SyncLock syncLock(this); + if (freeCount == 0) { + ++allocCount; + if (allocCount > docFreeList.size()) { + // Grow our free list up front to make sure we have enough space to recycle all outstanding + // PerDoc instances + BOOST_ASSERT(allocCount == 1 + docFreeList.size()); + docFreeList.resize(MiscUtils::getNextSize(allocCount)); } - else - return docFreeList[--freeCount]; - } - - void DocFieldConsumers::freePerDoc(DocFieldConsumersPerDocPtr perDoc) - { - SyncLock syncLock(this); - BOOST_ASSERT(freeCount < docFreeList.size()); - docFreeList[freeCount++] = perDoc; + return newLucene(shared_from_this()); + } else { + return docFreeList[--freeCount]; } - - DocFieldConsumersPerDoc::DocFieldConsumersPerDoc(DocFieldConsumersPtr fieldConsumers) - { - this->_fieldConsumers = fieldConsumers; - } - - DocFieldConsumersPerDoc::~DocFieldConsumersPerDoc() - { +} + +void DocFieldConsumers::freePerDoc(const DocFieldConsumersPerDocPtr& perDoc) { + SyncLock syncLock(this); + BOOST_ASSERT(freeCount < docFreeList.size()); + docFreeList[freeCount++] = perDoc; +} + +DocFieldConsumersPerDoc::DocFieldConsumersPerDoc(const DocFieldConsumersPtr& fieldConsumers) { + this->_fieldConsumers = fieldConsumers; +} + +DocFieldConsumersPerDoc::~DocFieldConsumersPerDoc() { +} + +int64_t DocFieldConsumersPerDoc::sizeInBytes() { + return one->sizeInBytes() + two->sizeInBytes(); +} + +void DocFieldConsumersPerDoc::finish() { + LuceneException finally; + try { + one->finish(); + } catch (LuceneException& e) { + finally = e; } - - int64_t DocFieldConsumersPerDoc::sizeInBytes() - { - return one->sizeInBytes() + two->sizeInBytes(); + try { + two->finish(); + } catch (LuceneException& e) { + finally = e; } - - void DocFieldConsumersPerDoc::finish() - { - LuceneException finally; - try - { - one->finish(); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - two->finish(); - } - catch (LuceneException& e) - { - finally = e; - } - DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); - finally.throwException(); + DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); + finally.throwException(); +} + +void DocFieldConsumersPerDoc::abort() { + LuceneException finally; + try { + one->abort(); + } catch (LuceneException& e) { + finally = e; } - - void DocFieldConsumersPerDoc::abort() - { - LuceneException finally; - try - { - one->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - two->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); - finally.throwException(); + try { + two->abort(); + } catch (LuceneException& e) { + finally = e; } + DocFieldConsumersPtr(_fieldConsumers)->freePerDoc(shared_from_this()); + finally.throwException(); +} + } diff --git a/src/core/index/DocFieldConsumersPerField.cpp b/src/core/index/DocFieldConsumersPerField.cpp index eebc532b..a2f80951 100644 --- a/src/core/index/DocFieldConsumersPerField.cpp +++ b/src/core/index/DocFieldConsumersPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,44 +7,35 @@ #include "LuceneInc.h" #include "DocFieldConsumersPerField.h" -namespace Lucene -{ - DocFieldConsumersPerField::DocFieldConsumersPerField(DocFieldConsumersPerThreadPtr perThread, DocFieldConsumerPerFieldPtr one, DocFieldConsumerPerFieldPtr two) - { - this->_perThread = perThread; - this->one = one; - this->two = two; - } - - DocFieldConsumersPerField::~DocFieldConsumersPerField() - { - } - - void DocFieldConsumersPerField::processFields(Collection fields, int32_t count) - { - one->processFields(fields, count); - two->processFields(fields, count); +namespace Lucene { + +DocFieldConsumersPerField::DocFieldConsumersPerField(const DocFieldConsumersPerThreadPtr& perThread, const DocFieldConsumerPerFieldPtr& one, const DocFieldConsumerPerFieldPtr& two) { + this->_perThread = perThread; + this->one = one; + this->two = two; +} + +DocFieldConsumersPerField::~DocFieldConsumersPerField() { +} + +void DocFieldConsumersPerField::processFields(Collection fields, int32_t count) { + one->processFields(fields, count); + two->processFields(fields, count); +} + +void DocFieldConsumersPerField::abort() { + LuceneException finally; + try { + one->abort(); + } catch (LuceneException& e) { + finally = e; } - - void DocFieldConsumersPerField::abort() - { - LuceneException finally; - try - { - one->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - two->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - finally.throwException(); + try { + two->abort(); + } catch (LuceneException& e) { + finally = e; } + finally.throwException(); +} + } diff --git a/src/core/index/DocFieldConsumersPerThread.cpp b/src/core/index/DocFieldConsumersPerThread.cpp index 408342ef..6a7a9bd8 100644 --- a/src/core/index/DocFieldConsumersPerThread.cpp +++ b/src/core/index/DocFieldConsumersPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,72 +10,60 @@ #include "DocFieldConsumers.h" #include "DocFieldConsumersPerField.h" -namespace Lucene -{ - DocFieldConsumersPerThread::DocFieldConsumersPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, - DocFieldConsumersPtr parent, - DocFieldConsumerPerThreadPtr one, DocFieldConsumerPerThreadPtr two) - { - this->_parent = parent; - this->one = one; - this->two = two; - docState = docFieldProcessorPerThread->docState; - } - - DocFieldConsumersPerThread::~DocFieldConsumersPerThread() - { - } - - void DocFieldConsumersPerThread::startDocument() - { - one->startDocument(); - two->startDocument(); - } - - void DocFieldConsumersPerThread::abort() - { - LuceneException finally; - try - { - one->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - two->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - finally.throwException(); +namespace Lucene { + +DocFieldConsumersPerThread::DocFieldConsumersPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, + const DocFieldConsumersPtr& parent, + const DocFieldConsumerPerThreadPtr& one, const DocFieldConsumerPerThreadPtr& two) { + this->_parent = parent; + this->one = one; + this->two = two; + docState = docFieldProcessorPerThread->docState; +} + +DocFieldConsumersPerThread::~DocFieldConsumersPerThread() { +} + +void DocFieldConsumersPerThread::startDocument() { + one->startDocument(); + two->startDocument(); +} + +void DocFieldConsumersPerThread::abort() { + LuceneException finally; + try { + one->abort(); + } catch (LuceneException& e) { + finally = e; } - - DocWriterPtr DocFieldConsumersPerThread::finishDocument() - { - DocWriterPtr oneDoc(one->finishDocument()); - DocWriterPtr twoDoc(two->finishDocument()); - if (!oneDoc) - return twoDoc; - else if (!twoDoc) - return oneDoc; - else - { - DocFieldConsumersPerDocPtr both(DocFieldConsumersPtr(_parent)->getPerDoc()); - both->docID = docState->docID; - BOOST_ASSERT(oneDoc->docID == docState->docID); - BOOST_ASSERT(twoDoc->docID == docState->docID); - both->one = oneDoc; - both->two = twoDoc; - return both; - } + try { + two->abort(); + } catch (LuceneException& e) { + finally = e; } - - DocFieldConsumerPerFieldPtr DocFieldConsumersPerThread::addField(FieldInfoPtr fi) - { - return newLucene(shared_from_this(), one->addField(fi), two->addField(fi)); + finally.throwException(); +} + +DocWriterPtr DocFieldConsumersPerThread::finishDocument() { + DocWriterPtr oneDoc(one->finishDocument()); + DocWriterPtr twoDoc(two->finishDocument()); + if (!oneDoc) { + return twoDoc; + } else if (!twoDoc) { + return oneDoc; + } else { + DocFieldConsumersPerDocPtr both(DocFieldConsumersPtr(_parent)->getPerDoc()); + both->docID = docState->docID; + BOOST_ASSERT(oneDoc->docID == docState->docID); + BOOST_ASSERT(twoDoc->docID == docState->docID); + both->one = oneDoc; + both->two = twoDoc; + return both; } } + +DocFieldConsumerPerFieldPtr DocFieldConsumersPerThread::addField(const FieldInfoPtr& fi) { + return newLucene(shared_from_this(), one->addField(fi), two->addField(fi)); +} + +} diff --git a/src/core/index/DocFieldProcessor.cpp b/src/core/index/DocFieldProcessor.cpp index 73c6731d..d0c18317 100644 --- a/src/core/index/DocFieldProcessor.cpp +++ b/src/core/index/DocFieldProcessor.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,61 +15,54 @@ #include "FieldInfos.h" #include "TestPoint.h" -namespace Lucene -{ - DocFieldProcessor::DocFieldProcessor(DocumentsWriterPtr docWriter, DocFieldConsumerPtr consumer) - { - this->fieldInfos = newLucene(); - this->_docWriter = docWriter; - this->consumer = consumer; - consumer->setFieldInfos(fieldInfos); - fieldsWriter = newLucene(docWriter, fieldInfos); - } - - DocFieldProcessor::~DocFieldProcessor() - { - } - - void DocFieldProcessor::closeDocStore(SegmentWriteStatePtr state) - { - consumer->closeDocStore(state); - fieldsWriter->closeDocStore(state); - } - - void DocFieldProcessor::flush(Collection threads, SegmentWriteStatePtr state) - { - TestScope testScope(L"DocFieldProcessor", L"flush"); - MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField childThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); - - for (Collection::iterator thread = threads.begin(); thread != threads.end(); ++thread) - { - DocFieldProcessorPerThreadPtr perThread(boost::static_pointer_cast(*thread)); - childThreadsAndFields.put(perThread->consumer, perThread->fields()); - perThread->trimFields(state); - } - fieldsWriter->flush(state); - consumer->flush(childThreadsAndFields, state); - - // Important to save after asking consumer to flush so consumer can alter the FieldInfo* if necessary. - // eg FreqProxTermsWriter does this with FieldInfo.storePayload. - String fileName(state->segmentFileName(IndexFileNames::FIELD_INFOS_EXTENSION())); - fieldInfos->write(state->directory, fileName); - state->flushedFiles.add(fileName); - } - - void DocFieldProcessor::abort() - { - fieldsWriter->abort(); - consumer->abort(); - } - - bool DocFieldProcessor::freeRAM() - { - return consumer->freeRAM(); - } - - DocConsumerPerThreadPtr DocFieldProcessor::addThread(DocumentsWriterThreadStatePtr perThread) - { - return newLucene(perThread, shared_from_this()); +namespace Lucene { + +DocFieldProcessor::DocFieldProcessor(const DocumentsWriterPtr& docWriter, const DocFieldConsumerPtr& consumer) { + this->fieldInfos = newLucene(); + this->_docWriter = docWriter; + this->consumer = consumer; + consumer->setFieldInfos(fieldInfos); + fieldsWriter = newLucene(docWriter, fieldInfos); +} + +DocFieldProcessor::~DocFieldProcessor() { +} + +void DocFieldProcessor::closeDocStore(const SegmentWriteStatePtr& state) { + consumer->closeDocStore(state); + fieldsWriter->closeDocStore(state); +} + +void DocFieldProcessor::flush(Collection threads, const SegmentWriteStatePtr& state) { + TestScope testScope(L"DocFieldProcessor", L"flush"); + MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField childThreadsAndFields(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::newInstance()); + + for (Collection::iterator thread = threads.begin(); thread != threads.end(); ++thread) { + DocFieldProcessorPerThreadPtr perThread(boost::static_pointer_cast(*thread)); + childThreadsAndFields.put(perThread->consumer, perThread->fields()); + perThread->trimFields(state); } + fieldsWriter->flush(state); + consumer->flush(childThreadsAndFields, state); + + // Important to save after asking consumer to flush so consumer can alter the FieldInfo* if necessary. + // eg FreqProxTermsWriter does this with FieldInfo.storePayload. + String fileName(state->segmentFileName(IndexFileNames::FIELD_INFOS_EXTENSION())); + fieldInfos->write(state->directory, fileName); + state->flushedFiles.add(fileName); +} + +void DocFieldProcessor::abort() { + fieldsWriter->abort(); + consumer->abort(); +} + +bool DocFieldProcessor::freeRAM() { + return consumer->freeRAM(); +} + +DocConsumerPerThreadPtr DocFieldProcessor::addThread(const DocumentsWriterThreadStatePtr& perThread) { + return newLucene(perThread, shared_from_this()); +} + } diff --git a/src/core/index/DocFieldProcessorPerField.cpp b/src/core/index/DocFieldProcessorPerField.cpp index 702f0ec5..63f1044a 100644 --- a/src/core/index/DocFieldProcessorPerField.cpp +++ b/src/core/index/DocFieldProcessorPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,23 +10,21 @@ #include "DocFieldConsumerPerThread.h" #include "DocFieldConsumerPerField.h" -namespace Lucene -{ - DocFieldProcessorPerField::DocFieldProcessorPerField(DocFieldProcessorPerThreadPtr perThread, FieldInfoPtr fieldInfo) - { - lastGen = -1; - fieldCount = 0; - fields = Collection::newInstance(1); - this->consumer = perThread->consumer->addField(fieldInfo); - this->fieldInfo = fieldInfo; - } - - DocFieldProcessorPerField::~DocFieldProcessorPerField() - { - } - - void DocFieldProcessorPerField::abort() - { - consumer->abort(); - } +namespace Lucene { + +DocFieldProcessorPerField::DocFieldProcessorPerField(const DocFieldProcessorPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { + lastGen = -1; + fieldCount = 0; + fields = Collection::newInstance(1); + this->consumer = perThread->consumer->addField(fieldInfo); + this->fieldInfo = fieldInfo; +} + +DocFieldProcessorPerField::~DocFieldProcessorPerField() { +} + +void DocFieldProcessorPerField::abort() { + consumer->abort(); +} + } diff --git a/src/core/index/DocFieldProcessorPerThread.cpp b/src/core/index/DocFieldProcessorPerThread.cpp index 745aa791..827ee32c 100644 --- a/src/core/index/DocFieldProcessorPerThread.cpp +++ b/src/core/index/DocFieldProcessorPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -25,329 +25,289 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DocFieldProcessorPerThread::DocFieldProcessorPerThread(DocumentsWriterThreadStatePtr threadState, DocFieldProcessorPtr docFieldProcessor) - { - _fields = Collection::newInstance(1); - fieldHash = Collection::newInstance(2); - hashMask = 1; - fieldGen = 0; - fieldCount = 0; - totalFieldCount = 0; - - this->docState = threadState->docState; - this->_docFieldProcessor = docFieldProcessor; - this->fieldInfos = docFieldProcessor->fieldInfos; - - docFreeList = Collection::newInstance(1); - freeCount = 0; - allocCount = 0; - } - - DocFieldProcessorPerThread::~DocFieldProcessorPerThread() - { - } - - void DocFieldProcessorPerThread::initialize() - { - DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); - consumer = docFieldProcessor->consumer->addThread(shared_from_this()); - fieldsWriter = docFieldProcessor->fieldsWriter->addThread(docState); - } - - void DocFieldProcessorPerThread::abort() - { - for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) - { - DocFieldProcessorPerFieldPtr current(*field); - while (current) - { - DocFieldProcessorPerFieldPtr next(current->next); - current->abort(); - current = next; - } +namespace Lucene { + +DocFieldProcessorPerThread::DocFieldProcessorPerThread(const DocumentsWriterThreadStatePtr& threadState, const DocFieldProcessorPtr& docFieldProcessor) { + _fields = Collection::newInstance(1); + fieldHash = Collection::newInstance(2); + hashMask = 1; + fieldGen = 0; + fieldCount = 0; + totalFieldCount = 0; + + this->docState = threadState->docState; + this->_docFieldProcessor = docFieldProcessor; + this->fieldInfos = docFieldProcessor->fieldInfos; + + docFreeList = Collection::newInstance(1); + freeCount = 0; + allocCount = 0; +} + +DocFieldProcessorPerThread::~DocFieldProcessorPerThread() { +} + +void DocFieldProcessorPerThread::initialize() { + DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); + consumer = docFieldProcessor->consumer->addThread(shared_from_this()); + fieldsWriter = docFieldProcessor->fieldsWriter->addThread(docState); +} + +void DocFieldProcessorPerThread::abort() { + for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { + DocFieldProcessorPerFieldPtr current(*field); + while (current) { + DocFieldProcessorPerFieldPtr next(current->next); + current->abort(); + current = next; } - fieldsWriter->abort(); - consumer->abort(); } - - Collection DocFieldProcessorPerThread::fields() - { - Collection fields(Collection::newInstance()); - for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) - { - DocFieldProcessorPerFieldPtr current(*field); - while (current) - { - fields.add(current->consumer); - current = current->next; - } + fieldsWriter->abort(); + consumer->abort(); +} + +Collection DocFieldProcessorPerThread::fields() { + Collection fields(Collection::newInstance()); + for (Collection::iterator field = fieldHash.begin(); field != fieldHash.end(); ++field) { + DocFieldProcessorPerFieldPtr current(*field); + while (current) { + fields.add(current->consumer); + current = current->next; } - BOOST_ASSERT(fields.size() == totalFieldCount); - return fields; } - - void DocFieldProcessorPerThread::trimFields(SegmentWriteStatePtr state) - { - for (Collection::iterator perField = fieldHash.begin(); perField != fieldHash.end(); ++perField) - { - DocFieldProcessorPerFieldPtr current(*perField); - DocFieldProcessorPerFieldPtr lastPerField; - - while (current) - { - if (current->lastGen == -1) - { - // This field was not seen since the previous flush, so, free up its resources now - - // Unhash - if (!lastPerField) - *perField = current->next; - else - lastPerField->next = current->next; - - DocumentsWriterPtr docWriter(state->_docWriter); - if (docWriter->infoStream) - *(docWriter->infoStream) << L" purge field=" << current->fieldInfo->name << L"\n"; - - --totalFieldCount; + BOOST_ASSERT(fields.size() == totalFieldCount); + return fields; +} + +void DocFieldProcessorPerThread::trimFields(const SegmentWriteStatePtr& state) { + for (Collection::iterator perField = fieldHash.begin(); perField != fieldHash.end(); ++perField) { + DocFieldProcessorPerFieldPtr current(*perField); + DocFieldProcessorPerFieldPtr lastPerField; + + while (current) { + if (current->lastGen == -1) { + // This field was not seen since the previous flush, so, free up its resources now + + // Unhash + if (!lastPerField) { + *perField = current->next; + } else { + lastPerField->next = current->next; } - else - { - // Reset - current->lastGen = -1; - lastPerField = current; + + DocumentsWriterPtr docWriter(state->_docWriter); + if (docWriter->infoStream) { + *(docWriter->infoStream) << L" purge field=" << current->fieldInfo->name << L"\n"; } - - current = current->next; + + --totalFieldCount; + } else { + // Reset + current->lastGen = -1; + lastPerField = current; } + + current = current->next; } } - - void DocFieldProcessorPerThread::rehash() - { - int32_t newHashSize = (fieldHash.size() * 2); - BOOST_ASSERT(newHashSize > fieldHash.size()); - - Collection newHashArray(Collection::newInstance(newHashSize)); - - // Rehash - int32_t newHashMask = newHashSize - 1; - for (Collection::iterator fp0 = fieldHash.begin(); fp0 != fieldHash.end(); ++fp0) - { - DocFieldProcessorPerFieldPtr current(*fp0); - while (current) - { - int32_t hashPos2 = StringUtils::hashCode(current->fieldInfo->name) & newHashMask; - DocFieldProcessorPerFieldPtr nextFP0(current->next); - current->next = newHashArray[hashPos2]; - newHashArray[hashPos2] = current; - current = nextFP0; - } +} + +void DocFieldProcessorPerThread::rehash() { + int32_t newHashSize = (fieldHash.size() * 2); + BOOST_ASSERT(newHashSize > fieldHash.size()); + + Collection newHashArray(Collection::newInstance(newHashSize)); + + // Rehash + int32_t newHashMask = newHashSize - 1; + for (Collection::iterator fp0 = fieldHash.begin(); fp0 != fieldHash.end(); ++fp0) { + DocFieldProcessorPerFieldPtr current(*fp0); + while (current) { + int32_t hashPos2 = StringUtils::hashCode(current->fieldInfo->name) & newHashMask; + DocFieldProcessorPerFieldPtr nextFP0(current->next); + current->next = newHashArray[hashPos2]; + newHashArray[hashPos2] = current; + current = nextFP0; } - - fieldHash = newHashArray; - hashMask = newHashMask; } - - struct lessFieldInfoName - { - inline bool operator()(const DocFieldProcessorPerFieldPtr& first, const DocFieldProcessorPerFieldPtr& second) const - { - return (first->fieldInfo->name < second->fieldInfo->name); + + fieldHash = newHashArray; + hashMask = newHashMask; +} + +struct lessFieldInfoName { + inline bool operator()(const DocFieldProcessorPerFieldPtr& first, const DocFieldProcessorPerFieldPtr& second) const { + return (first->fieldInfo->name < second->fieldInfo->name); + } +}; + +DocWriterPtr DocFieldProcessorPerThread::processDocument() { + consumer->startDocument(); + fieldsWriter->startDocument(); + + DocumentPtr doc(docState->doc); + + DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); + DocumentsWriterPtr docWriter(docFieldProcessor->_docWriter); + bool testPoint = IndexWriterPtr(docWriter->_writer)->testPoint(L"DocumentsWriter.ThreadState.init start"); + BOOST_ASSERT(testPoint); + + fieldCount = 0; + int32_t thisFieldGen = fieldGen++; + + Collection docFields(doc->getFields()); + + // Absorb any new fields first seen in this document. + // Also absorb any changes to fields we had already seen before (eg suddenly turning on norms or + // vectors, etc.) + for (Collection::iterator field = docFields.begin(); field != docFields.end(); ++field) { + String fieldName((*field)->name()); + + // Make sure we have a PerField allocated + int32_t hashPos = StringUtils::hashCode(fieldName) & hashMask; + + DocFieldProcessorPerFieldPtr fp(fieldHash[hashPos]); + while (fp && fp->fieldInfo->name != fieldName) { + fp = fp->next; } - }; - - DocWriterPtr DocFieldProcessorPerThread::processDocument() - { - consumer->startDocument(); - fieldsWriter->startDocument(); - - DocumentPtr doc(docState->doc); - - DocFieldProcessorPtr docFieldProcessor(_docFieldProcessor); - DocumentsWriterPtr docWriter(docFieldProcessor->_docWriter); - bool testPoint = IndexWriterPtr(docWriter->_writer)->testPoint(L"DocumentsWriter.ThreadState.init start"); - BOOST_ASSERT(testPoint); - - fieldCount = 0; - int32_t thisFieldGen = fieldGen++; - - Collection docFields(doc->getFields()); - - // Absorb any new fields first seen in this document. - // Also absorb any changes to fields we had already seen before (eg suddenly turning on norms or - // vectors, etc.) - for (Collection::iterator field = docFields.begin(); field != docFields.end(); ++field) - { - String fieldName((*field)->name()); - - // Make sure we have a PerField allocated - int32_t hashPos = StringUtils::hashCode(fieldName) & hashMask; - - DocFieldProcessorPerFieldPtr fp(fieldHash[hashPos]); - while (fp && fp->fieldInfo->name != fieldName) - fp = fp->next; - - if (!fp) - { - FieldInfoPtr fi(fieldInfos->add(fieldName, (*field)->isIndexed(), (*field)->isTermVectorStored(), - (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), - (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions())); - - fp = newLucene(shared_from_this(), fi); - fp->next = fieldHash[hashPos]; - fieldHash[hashPos] = fp; - ++totalFieldCount; - - if (totalFieldCount >= fieldHash.size() / 2) - rehash(); - } - else - { - fp->fieldInfo->update((*field)->isIndexed(), (*field)->isTermVectorStored(), - (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), - (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); - } - - if (thisFieldGen != fp->lastGen) - { - // First time we're seeing this field for this doc - fp->fieldCount = 0; - - if (fieldCount == _fields.size()) - _fields.resize(_fields.size() * 2); - - _fields[fieldCount++] = fp; - fp->lastGen = thisFieldGen; + + if (!fp) { + FieldInfoPtr fi(fieldInfos->add(fieldName, (*field)->isIndexed(), (*field)->isTermVectorStored(), + (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), + (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions())); + + fp = newLucene(shared_from_this(), fi); + fp->next = fieldHash[hashPos]; + fieldHash[hashPos] = fp; + ++totalFieldCount; + + if (totalFieldCount >= fieldHash.size() / 2) { + rehash(); } - - if (fp->fieldCount == fp->fields.size()) - fp->fields.resize(fp->fields.size() * 2); - - fp->fields[fp->fieldCount++] = *field; - if ((*field)->isStored()) - fieldsWriter->addField(*field, fp->fieldInfo); + } else { + fp->fieldInfo->update((*field)->isIndexed(), (*field)->isTermVectorStored(), + (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), + (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } - - // If we are writing vectors then we must visit fields in sorted order so they are written in sorted order. - std::sort(_fields.begin(), _fields.begin() + fieldCount, lessFieldInfoName()); - - for (int32_t i = 0; i < fieldCount; ++i) - _fields[i]->consumer->processFields(_fields[i]->fields, _fields[i]->fieldCount); - - if (!docState->maxTermPrefix.empty() && docState->infoStream) - { - *(docState->infoStream) << L"WARNING: document contains at least one immense term (longer than the max length " << - StringUtils::toString(DocumentsWriter::MAX_TERM_LENGTH) << L"), all of which were skipped. " << - L"Please correct the analyzer to not produce such terms. The prefix of the first immense " << - L"term is: '" << StringUtils::toString(docState->maxTermPrefix) << L"...'\n"; - docState->maxTermPrefix.clear(); + + if (thisFieldGen != fp->lastGen) { + // First time we're seeing this field for this doc + fp->fieldCount = 0; + + if (fieldCount == _fields.size()) { + _fields.resize(_fields.size() * 2); + } + + _fields[fieldCount++] = fp; + fp->lastGen = thisFieldGen; } - - DocWriterPtr one(fieldsWriter->finishDocument()); - DocWriterPtr two(consumer->finishDocument()); - - if (!one) - return two; - else if (!two) - return one; - else - { - DocFieldProcessorPerThreadPerDocPtr both(getPerDoc()); - both->docID = docState->docID; - BOOST_ASSERT(one->docID == docState->docID); - BOOST_ASSERT(two->docID == docState->docID); - both->one = one; - both->two = two; - return both; + + if (fp->fieldCount == fp->fields.size()) { + fp->fields.resize(fp->fields.size() * 2); } - } - - DocFieldProcessorPerThreadPerDocPtr DocFieldProcessorPerThread::getPerDoc() - { - SyncLock syncLock(this); - if (freeCount == 0) - { - ++allocCount; - if (allocCount > docFreeList.size()) - { - // Grow our free list up front to make sure we have enough space to recycle all - // outstanding PerDoc instances - BOOST_ASSERT(allocCount == docFreeList.size() + 1); - docFreeList.resize(MiscUtils::getNextSize(allocCount)); - } - return newLucene(shared_from_this()); + + fp->fields[fp->fieldCount++] = *field; + if ((*field)->isStored()) { + fieldsWriter->addField(*field, fp->fieldInfo); } - else - return docFreeList[--freeCount]; - } - - void DocFieldProcessorPerThread::freePerDoc(DocFieldProcessorPerThreadPerDocPtr perDoc) - { - SyncLock syncLock(this); - BOOST_ASSERT(freeCount < docFreeList.size()); - docFreeList[freeCount++] = perDoc; } - - DocFieldProcessorPerThreadPerDoc::DocFieldProcessorPerThreadPerDoc(DocFieldProcessorPerThreadPtr docProcessor) - { - this->_docProcessor = docProcessor; + + // If we are writing vectors then we must visit fields in sorted order so they are written in sorted order. + std::sort(_fields.begin(), _fields.begin() + fieldCount, lessFieldInfoName()); + + for (int32_t i = 0; i < fieldCount; ++i) { + _fields[i]->consumer->processFields(_fields[i]->fields, _fields[i]->fieldCount); } - - DocFieldProcessorPerThreadPerDoc::~DocFieldProcessorPerThreadPerDoc() - { + + if (!docState->maxTermPrefix.empty() && docState->infoStream) { + *(docState->infoStream) << L"WARNING: document contains at least one immense term (longer than the max length " << + StringUtils::toString(DocumentsWriter::MAX_TERM_LENGTH) << L"), all of which were skipped. " << + L"Please correct the analyzer to not produce such terms. The prefix of the first immense " << + L"term is: '" << StringUtils::toString(docState->maxTermPrefix) << L"...'\n"; + docState->maxTermPrefix.clear(); } - - int64_t DocFieldProcessorPerThreadPerDoc::sizeInBytes() - { - return one->sizeInBytes() + two->sizeInBytes(); + + DocWriterPtr one(fieldsWriter->finishDocument()); + DocWriterPtr two(consumer->finishDocument()); + + if (!one) { + return two; + } else if (!two) { + return one; + } else { + DocFieldProcessorPerThreadPerDocPtr both(getPerDoc()); + both->docID = docState->docID; + BOOST_ASSERT(one->docID == docState->docID); + BOOST_ASSERT(two->docID == docState->docID); + both->one = one; + both->two = two; + return both; } - - void DocFieldProcessorPerThreadPerDoc::finish() - { - LuceneException finally; - try - { - try - { - one->finish(); - } - catch (LuceneException& e) - { - finally = e; - } - two->finish(); +} + +DocFieldProcessorPerThreadPerDocPtr DocFieldProcessorPerThread::getPerDoc() { + SyncLock syncLock(this); + if (freeCount == 0) { + ++allocCount; + if (allocCount > docFreeList.size()) { + // Grow our free list up front to make sure we have enough space to recycle all + // outstanding PerDoc instances + BOOST_ASSERT(allocCount == docFreeList.size() + 1); + docFreeList.resize(MiscUtils::getNextSize(allocCount)); } - catch (LuceneException& e) - { + return newLucene(shared_from_this()); + } else { + return docFreeList[--freeCount]; + } +} + +void DocFieldProcessorPerThread::freePerDoc(const DocFieldProcessorPerThreadPerDocPtr& perDoc) { + SyncLock syncLock(this); + BOOST_ASSERT(freeCount < docFreeList.size()); + docFreeList[freeCount++] = perDoc; +} + +DocFieldProcessorPerThreadPerDoc::DocFieldProcessorPerThreadPerDoc(const DocFieldProcessorPerThreadPtr& docProcessor) { + this->_docProcessor = docProcessor; +} + +DocFieldProcessorPerThreadPerDoc::~DocFieldProcessorPerThreadPerDoc() { +} + +int64_t DocFieldProcessorPerThreadPerDoc::sizeInBytes() { + return one->sizeInBytes() + two->sizeInBytes(); +} + +void DocFieldProcessorPerThreadPerDoc::finish() { + LuceneException finally; + try { + try { + one->finish(); + } catch (LuceneException& e) { finally = e; } - DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); - finally.throwException(); + two->finish(); + } catch (LuceneException& e) { + finally = e; } - - void DocFieldProcessorPerThreadPerDoc::abort() - { - LuceneException finally; - try - { - try - { - one->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - two->abort(); - } - catch (LuceneException& e) - { + DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); + finally.throwException(); +} + +void DocFieldProcessorPerThreadPerDoc::abort() { + LuceneException finally; + try { + try { + one->abort(); + } catch (LuceneException& e) { finally = e; } - DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); - finally.throwException(); + two->abort(); + } catch (LuceneException& e) { + finally = e; } + DocFieldProcessorPerThreadPtr(_docProcessor)->freePerDoc(shared_from_this()); + finally.throwException(); +} + } diff --git a/src/core/index/DocInverter.cpp b/src/core/index/DocInverter.cpp index ad54a084..ed7a39c7 100644 --- a/src/core/index/DocInverter.cpp +++ b/src/core/index/DocInverter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,68 +15,59 @@ #include "DocInverterPerField.h" #include "DocInverterPerThread.h" -namespace Lucene -{ - DocInverter::DocInverter(InvertedDocConsumerPtr consumer, InvertedDocEndConsumerPtr endConsumer) - { - this->consumer = consumer; - this->endConsumer = endConsumer; - } - - DocInverter::~DocInverter() - { - } - - void DocInverter::setFieldInfos(FieldInfosPtr fieldInfos) - { - DocFieldConsumer::setFieldInfos(fieldInfos); - consumer->setFieldInfos(fieldInfos); - endConsumer->setFieldInfos(fieldInfos); - } - - void DocInverter::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField childThreadsAndFields(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance()); - MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField endChildThreadsAndFields(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::newInstance()); - - for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - Collection childFields(Collection::newInstance()); - Collection endChildFields(Collection::newInstance()); - - for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) - { - childFields.add(boost::static_pointer_cast(*perField)->consumer); - endChildFields.add(boost::static_pointer_cast(*perField)->endConsumer); - } - - childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); - endChildThreadsAndFields.put(boost::static_pointer_cast(entry->first)->endConsumer, endChildFields); +namespace Lucene { + +DocInverter::DocInverter(const InvertedDocConsumerPtr& consumer, const InvertedDocEndConsumerPtr& endConsumer) { + this->consumer = consumer; + this->endConsumer = endConsumer; +} + +DocInverter::~DocInverter() { +} + +void DocInverter::setFieldInfos(const FieldInfosPtr& fieldInfos) { + DocFieldConsumer::setFieldInfos(fieldInfos); + consumer->setFieldInfos(fieldInfos); + endConsumer->setFieldInfos(fieldInfos); +} + +void DocInverter::flush(MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField childThreadsAndFields(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance()); + MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField endChildThreadsAndFields(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::newInstance()); + + for (MapDocFieldConsumerPerThreadCollectionDocFieldConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + Collection childFields(Collection::newInstance()); + Collection endChildFields(Collection::newInstance()); + + for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { + childFields.add(boost::static_pointer_cast(*perField)->consumer); + endChildFields.add(boost::static_pointer_cast(*perField)->endConsumer); } - - consumer->flush(childThreadsAndFields, state); - endConsumer->flush(endChildThreadsAndFields, state); - } - - void DocInverter::closeDocStore(SegmentWriteStatePtr state) - { - consumer->closeDocStore(state); - endConsumer->closeDocStore(state); - } - - void DocInverter::abort() - { - consumer->abort(); - endConsumer->abort(); - } - - bool DocInverter::freeRAM() - { - return consumer->freeRAM(); - } - - DocFieldConsumerPerThreadPtr DocInverter::addThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread) - { - return newLucene(docFieldProcessorPerThread, shared_from_this()); + + childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); + endChildThreadsAndFields.put(boost::static_pointer_cast(entry->first)->endConsumer, endChildFields); } + + consumer->flush(childThreadsAndFields, state); + endConsumer->flush(endChildThreadsAndFields, state); +} + +void DocInverter::closeDocStore(const SegmentWriteStatePtr& state) { + consumer->closeDocStore(state); + endConsumer->closeDocStore(state); +} + +void DocInverter::abort() { + consumer->abort(); + endConsumer->abort(); +} + +bool DocInverter::freeRAM() { + return consumer->freeRAM(); +} + +DocFieldConsumerPerThreadPtr DocInverter::addThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread) { + return newLucene(docFieldProcessorPerThread, shared_from_this()); +} + } diff --git a/src/core/index/DocInverterPerField.cpp b/src/core/index/DocInverterPerField.cpp index af983722..669c72b9 100644 --- a/src/core/index/DocInverterPerField.cpp +++ b/src/core/index/DocInverterPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -25,194 +25,180 @@ #include "InfoStream.h" #include "StringUtils.h" -namespace Lucene -{ - DocInverterPerField::DocInverterPerField(DocInverterPerThreadPtr perThread, FieldInfoPtr fieldInfo) - { - this->_perThread = perThread; - this->fieldInfo = fieldInfo; - docState = perThread->docState; - fieldState = perThread->fieldState; - } - - DocInverterPerField::~DocInverterPerField() - { - } - - void DocInverterPerField::initialize() - { - DocInverterPerThreadPtr perThread(_perThread); - consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); - endConsumer = perThread->endConsumer->addField(shared_from_this(), fieldInfo); - } - - void DocInverterPerField::abort() - { - consumer->abort(); - endConsumer->abort(); - } - - void DocInverterPerField::processFields(Collection fields, int32_t count) - { - fieldState->reset(docState->doc->getBoost()); - - int32_t maxFieldLength = docState->maxFieldLength; - bool doInvert = consumer->start(fields, count); - DocumentsWriterPtr docWriter(docState->_docWriter); - DocInverterPerThreadPtr perThread(_perThread); - - for (int32_t i = 0; i < count; ++i) - { - FieldablePtr field = fields[i]; - if (field->isIndexed() && doInvert) - { - bool anyToken; - - if (fieldState->length > 0) - fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name); - - if (!field->isTokenized()) - { - // un-tokenized field - String stringValue(field->stringValue()); - int32_t valueLength = (int32_t)stringValue.length(); - perThread->singleToken->reinit(stringValue, 0, valueLength); - fieldState->attributeSource = perThread->singleToken; - consumer->start(field); +namespace Lucene { - bool success = false; - LuceneException finally; - try - { - consumer->add(); - success = true; - } - catch (LuceneException& e) - { - finally = e; +DocInverterPerField::DocInverterPerField(const DocInverterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { + this->_perThread = perThread; + this->fieldInfo = fieldInfo; + docState = perThread->docState; + fieldState = perThread->fieldState; +} + +DocInverterPerField::~DocInverterPerField() { +} + +void DocInverterPerField::initialize() { + DocInverterPerThreadPtr perThread(_perThread); + consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); + endConsumer = perThread->endConsumer->addField(shared_from_this(), fieldInfo); +} + +void DocInverterPerField::abort() { + consumer->abort(); + endConsumer->abort(); +} + +void DocInverterPerField::processFields(Collection fields, int32_t count) { + fieldState->reset(docState->doc->getBoost()); + + int32_t maxFieldLength = docState->maxFieldLength; + bool doInvert = consumer->start(fields, count); + DocumentsWriterPtr docWriter(docState->_docWriter); + DocInverterPerThreadPtr perThread(_perThread); + + for (int32_t i = 0; i < count; ++i) { + FieldablePtr field = fields[i]; + if (field->isIndexed() && doInvert) { + bool anyToken; + + if (fieldState->length > 0) { + fieldState->position += docState->analyzer->getPositionIncrementGap(fieldInfo->name); + } + + if (!field->isTokenized()) { + // un-tokenized field + String stringValue(field->stringValue()); + int32_t valueLength = (int32_t)stringValue.length(); + perThread->singleToken->reinit(stringValue, 0, valueLength); + fieldState->attributeSource = perThread->singleToken; + consumer->start(field); + + bool success = false; + LuceneException finally; + try { + consumer->add(); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + docWriter->setAborting(); + } + finally.throwException(); + fieldState->offset += valueLength; + ++fieldState->length; + ++fieldState->position; + anyToken = (valueLength > 0); + } else { + // tokenized field + TokenStreamPtr stream; + TokenStreamPtr streamValue(field->tokenStreamValue()); + + if (streamValue) { + stream = streamValue; + } else { + // the field does not have a TokenStream, so we have to obtain one from the analyzer + ReaderPtr reader; // find or make Reader + ReaderPtr readerValue(field->readerValue()); + + if (readerValue) { + reader = readerValue; + } else { + String stringValue(field->stringValue()); + perThread->stringReader->init(stringValue); + reader = perThread->stringReader; } - if (!success) - docWriter->setAborting(); - finally.throwException(); - fieldState->offset += valueLength; - ++fieldState->length; - ++fieldState->position; - anyToken = (valueLength > 0); + + // Tokenize field and add to postingTable + stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader); } - else - { - // tokenized field - TokenStreamPtr stream; - TokenStreamPtr streamValue(field->tokenStreamValue()); - - if (streamValue) - stream = streamValue; - else - { - // the field does not have a TokenStream, so we have to obtain one from the analyzer - ReaderPtr reader; // find or make Reader - ReaderPtr readerValue(field->readerValue()); - - if (readerValue) - reader = readerValue; - else - { - String stringValue(field->stringValue()); - perThread->stringReader->init(stringValue); - reader = perThread->stringReader; + + // reset the TokenStream to the first token + stream->reset(); + + int32_t startLength = fieldState->length; + + LuceneException finally; + try { + int32_t offsetEnd = fieldState->offset - 1; + + bool hasMoreTokens = stream->incrementToken(); + + fieldState->attributeSource = stream; + + OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute()); + PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute()); + + consumer->start(field); + + while (true) { + // If we hit an exception in stream.next below (which is fairly common, eg if analyzer + // chokes on a given document), then it's non-aborting and (above) this one document + // will be marked as deleted, but still consume a docID + if (!hasMoreTokens) { + break; } - - // Tokenize field and add to postingTable - stream = docState->analyzer->reusableTokenStream(fieldInfo->name, reader); - } - - // reset the TokenStream to the first token - stream->reset(); - - int32_t startLength = fieldState->length; - - LuceneException finally; - try - { - int32_t offsetEnd = fieldState->offset - 1; - - bool hasMoreTokens = stream->incrementToken(); - - fieldState->attributeSource = stream; - - OffsetAttributePtr offsetAttribute(fieldState->attributeSource->addAttribute()); - PositionIncrementAttributePtr posIncrAttribute(fieldState->attributeSource->addAttribute()); - - consumer->start(field); - - while (true) - { - // If we hit an exception in stream.next below (which is fairly common, eg if analyzer - // chokes on a given document), then it's non-aborting and (above) this one document - // will be marked as deleted, but still consume a docID - if (!hasMoreTokens) - break; - - int32_t posIncr = posIncrAttribute->getPositionIncrement(); - fieldState->position += posIncr; - if (fieldState->position > 0) - --fieldState->position; - - if (posIncr == 0) - ++fieldState->numOverlap; - - bool success = false; - try - { - // If we hit an exception in here, we abort all buffered documents since the last - // flush, on the likelihood that the internal state of the consumer is now corrupt - // and should not be flushed to a new segment - consumer->add(); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - docWriter->setAborting(); - finally.throwException(); - ++fieldState->position; - offsetEnd = fieldState->offset + offsetAttribute->endOffset(); - if (++fieldState->length >= maxFieldLength) - { - if (docState->infoStream) - *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n"; - break; + + int32_t posIncr = posIncrAttribute->getPositionIncrement(); + fieldState->position += posIncr; + if (fieldState->position > 0) { + --fieldState->position; + } + + if (posIncr == 0) { + ++fieldState->numOverlap; + } + + bool success = false; + try { + // If we hit an exception in here, we abort all buffered documents since the last + // flush, on the likelihood that the internal state of the consumer is now corrupt + // and should not be flushed to a new segment + consumer->add(); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + docWriter->setAborting(); + } + finally.throwException(); + ++fieldState->position; + offsetEnd = fieldState->offset + offsetAttribute->endOffset(); + if (++fieldState->length >= maxFieldLength) { + if (docState->infoStream) { + *docState->infoStream << L"maxFieldLength " << StringUtils::toString(maxFieldLength) << L" reached for field " << fieldInfo->name << L", ignoring following tokens\n"; } - - hasMoreTokens = stream->incrementToken(); + break; } - - // trigger streams to perform end-of-stream operations - stream->end(); - - fieldState->offset += offsetAttribute->endOffset(); - anyToken = (fieldState->length > startLength); - } - catch (LuceneException& e) - { - finally = e; + + hasMoreTokens = stream->incrementToken(); } - stream->close(); - finally.throwException(); + + // trigger streams to perform end-of-stream operations + stream->end(); + + fieldState->offset += offsetAttribute->endOffset(); + anyToken = (fieldState->length > startLength); + } catch (LuceneException& e) { + finally = e; } - - if (anyToken) - fieldState->offset += docState->analyzer->getOffsetGap(field); - fieldState->boost *= field->getBoost(); + stream->close(); + finally.throwException(); } - - // don't hang onto the field - fields[i].reset(); + + if (anyToken) { + fieldState->offset += docState->analyzer->getOffsetGap(field); + } + fieldState->boost *= field->getBoost(); } - - consumer->finish(); - endConsumer->finish(); + + // don't hang onto the field + fields[i].reset(); } + + consumer->finish(); + endConsumer->finish(); +} + } diff --git a/src/core/index/DocInverterPerThread.cpp b/src/core/index/DocInverterPerThread.cpp index 2c8dc6cb..c02747d3 100644 --- a/src/core/index/DocInverterPerThread.cpp +++ b/src/core/index/DocInverterPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -18,80 +18,65 @@ #include "FieldInvertState.h" #include "ReusableStringReader.h" -namespace Lucene -{ - DocInverterPerThread::DocInverterPerThread(DocFieldProcessorPerThreadPtr docFieldProcessorPerThread, DocInverterPtr docInverter) - { - this->fieldState = newLucene(); - this->stringReader = newLucene(); - this->singleToken = newLucene(); - this->_docInverter = docInverter; - this->docState = docFieldProcessorPerThread->docState; - } - - DocInverterPerThread::~DocInverterPerThread() - { - } - - void DocInverterPerThread::initialize() - { - DocInverterPtr docInverter(_docInverter); - consumer = docInverter->consumer->addThread(shared_from_this()); - endConsumer = docInverter->endConsumer->addThread(shared_from_this()); - } - - void DocInverterPerThread::startDocument() - { - consumer->startDocument(); - endConsumer->startDocument(); - } - - DocWriterPtr DocInverterPerThread::finishDocument() - { - endConsumer->finishDocument(); - return consumer->finishDocument(); - } - - void DocInverterPerThread::abort() - { - LuceneException finally; - try - { - consumer->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - try - { - endConsumer->abort(); - } - catch (LuceneException& e) - { - finally = e; - } - finally.throwException(); - } - - DocFieldConsumerPerFieldPtr DocInverterPerThread::addField(FieldInfoPtr fi) - { - return newLucene(shared_from_this(), fi); - } - - SingleTokenAttributeSource::SingleTokenAttributeSource() - { - termAttribute = addAttribute(); - offsetAttribute = addAttribute(); - } - - SingleTokenAttributeSource::~SingleTokenAttributeSource() - { +namespace Lucene { + +DocInverterPerThread::DocInverterPerThread(const DocFieldProcessorPerThreadPtr& docFieldProcessorPerThread, const DocInverterPtr& docInverter) { + this->fieldState = newLucene(); + this->stringReader = newLucene(); + this->singleToken = newLucene(); + this->_docInverter = docInverter; + this->docState = docFieldProcessorPerThread->docState; +} + +DocInverterPerThread::~DocInverterPerThread() { +} + +void DocInverterPerThread::initialize() { + DocInverterPtr docInverter(_docInverter); + consumer = docInverter->consumer->addThread(shared_from_this()); + endConsumer = docInverter->endConsumer->addThread(shared_from_this()); +} + +void DocInverterPerThread::startDocument() { + consumer->startDocument(); + endConsumer->startDocument(); +} + +DocWriterPtr DocInverterPerThread::finishDocument() { + endConsumer->finishDocument(); + return consumer->finishDocument(); +} + +void DocInverterPerThread::abort() { + LuceneException finally; + try { + consumer->abort(); + } catch (LuceneException& e) { + finally = e; } - - void SingleTokenAttributeSource::reinit(const String& stringValue, int32_t startOffset, int32_t endOffset) - { - termAttribute->setTermBuffer(stringValue); - offsetAttribute->setOffset(startOffset, endOffset); + try { + endConsumer->abort(); + } catch (LuceneException& e) { + finally = e; } + finally.throwException(); +} + +DocFieldConsumerPerFieldPtr DocInverterPerThread::addField(const FieldInfoPtr& fi) { + return newLucene(shared_from_this(), fi); +} + +SingleTokenAttributeSource::SingleTokenAttributeSource() { + termAttribute = addAttribute(); + offsetAttribute = addAttribute(); +} + +SingleTokenAttributeSource::~SingleTokenAttributeSource() { +} + +void SingleTokenAttributeSource::reinit(const String& stringValue, int32_t startOffset, int32_t endOffset) { + termAttribute->setTermBuffer(stringValue); + offsetAttribute->setOffset(startOffset, endOffset); +} + } diff --git a/src/core/index/DocumentsWriter.cpp b/src/core/index/DocumentsWriter.cpp index 6d94c951..67d107e4 100644 --- a/src/core/index/DocumentsWriter.cpp +++ b/src/core/index/DocumentsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -40,1603 +40,1411 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// Max # ThreadState instances; if there are more threads than this they share ThreadStates - const int32_t DocumentsWriter::MAX_THREAD_STATE = 5; - - /// Coarse estimates used to measure RAM usage of buffered deletes - const int32_t DocumentsWriter::OBJECT_HEADER_BYTES = 8; - #ifdef LPP_BUILD_64 - const int32_t DocumentsWriter::POINTER_NUM_BYTE = 8; - #else - const int32_t DocumentsWriter::POINTER_NUM_BYTE = 4; - #endif - const int32_t DocumentsWriter::INT_NUM_BYTE = 4; - #ifdef LPP_UNICODE_CHAR_SIZE_4 - const int32_t DocumentsWriter::CHAR_NUM_BYTE = 4; - #else - const int32_t DocumentsWriter::CHAR_NUM_BYTE = 2; - #endif - - /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object - /// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is - /// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since - /// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). - /// BufferedDeletes.num is OBJ_HEADER + INT. - const int32_t DocumentsWriter::BYTES_PER_DEL_TERM = 8 * DocumentsWriter::POINTER_NUM_BYTE + 5 * - DocumentsWriter::OBJECT_HEADER_BYTES + 6 * - DocumentsWriter::INT_NUM_BYTE; - - /// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is - /// OBJ_HEADER + int - const int32_t DocumentsWriter::BYTES_PER_DEL_DOCID = 2 * DocumentsWriter::POINTER_NUM_BYTE + - DocumentsWriter::OBJECT_HEADER_BYTES + - DocumentsWriter::INT_NUM_BYTE; - - /// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object - /// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount - /// (say 24 bytes). Integer is OBJ_HEADER + INT. - const int32_t DocumentsWriter::BYTES_PER_DEL_QUERY = 5 * DocumentsWriter::POINTER_NUM_BYTE + 2 * - DocumentsWriter::OBJECT_HEADER_BYTES + 2 * - DocumentsWriter::INT_NUM_BYTE + 24; - - /// Initial chunks size of the shared byte[] blocks used to store postings data - const int32_t DocumentsWriter::BYTE_BLOCK_SHIFT = 15; - const int32_t DocumentsWriter::BYTE_BLOCK_SIZE = 1 << DocumentsWriter::BYTE_BLOCK_SHIFT; - const int32_t DocumentsWriter::BYTE_BLOCK_MASK = DocumentsWriter::BYTE_BLOCK_SIZE - 1; - const int32_t DocumentsWriter::BYTE_BLOCK_NOT_MASK = ~DocumentsWriter::BYTE_BLOCK_MASK; - - /// Initial chunk size of the shared char[] blocks used to store term text - const int32_t DocumentsWriter::CHAR_BLOCK_SHIFT = 14; - const int32_t DocumentsWriter::CHAR_BLOCK_SIZE = 1 << DocumentsWriter::CHAR_BLOCK_SHIFT; - const int32_t DocumentsWriter::CHAR_BLOCK_MASK = DocumentsWriter::CHAR_BLOCK_SIZE - 1; - - const int32_t DocumentsWriter::MAX_TERM_LENGTH = DocumentsWriter::CHAR_BLOCK_SIZE - 1; - - /// Initial chunks size of the shared int[] blocks used to store postings data - const int32_t DocumentsWriter::INT_BLOCK_SHIFT = 13; - const int32_t DocumentsWriter::INT_BLOCK_SIZE = 1 << DocumentsWriter::INT_BLOCK_SHIFT; - const int32_t DocumentsWriter::INT_BLOCK_MASK = DocumentsWriter::INT_BLOCK_SIZE - 1; - - const int32_t DocumentsWriter::PER_DOC_BLOCK_SIZE = 1024; - - DocumentsWriter::DocumentsWriter(DirectoryPtr directory, IndexWriterPtr writer, IndexingChainPtr indexingChain) - { - this->threadStates = Collection::newInstance(); - this->threadBindings = MapThreadDocumentsWriterThreadState::newInstance(); - this->_openFiles = HashSet::newInstance(); - this->_closedFiles = HashSet::newInstance(); - this->freeIntBlocks = Collection::newInstance(); - this->freeCharBlocks = Collection::newInstance(); - - this->directory = directory; - this->_writer = writer; - this->indexingChain = indexingChain; +namespace Lucene { + +/// Max # ThreadState instances; if there are more threads than this they share ThreadStates +const int32_t DocumentsWriter::MAX_THREAD_STATE = 5; + +/// Coarse estimates used to measure RAM usage of buffered deletes +const int32_t DocumentsWriter::OBJECT_HEADER_BYTES = 8; +#ifdef LPP_BUILD_64 +const int32_t DocumentsWriter::POINTER_NUM_BYTE = 8; +#else +const int32_t DocumentsWriter::POINTER_NUM_BYTE = 4; +#endif +const int32_t DocumentsWriter::INT_NUM_BYTE = 4; +#ifdef LPP_UNICODE_CHAR_SIZE_4 +const int32_t DocumentsWriter::CHAR_NUM_BYTE = 4; +#else +const int32_t DocumentsWriter::CHAR_NUM_BYTE = 2; +#endif + +/// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object +/// with Term key, BufferedDeletes.Num val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Term is +/// object with String field and String text (OBJ_HEADER + 2*POINTER). We don't count Term's field since +/// it's interned. Term's text is String (OBJ_HEADER + 4*INT + POINTER + OBJ_HEADER + string.length*CHAR). +/// BufferedDeletes.num is OBJ_HEADER + INT. +const int32_t DocumentsWriter::BYTES_PER_DEL_TERM = 8 * DocumentsWriter::POINTER_NUM_BYTE + 5 * + DocumentsWriter::OBJECT_HEADER_BYTES + 6 * + DocumentsWriter::INT_NUM_BYTE; + +/// Rough logic: del docIDs are List. Say list allocates ~2X size (2*POINTER). Integer is +/// OBJ_HEADER + int +const int32_t DocumentsWriter::BYTES_PER_DEL_DOCID = 2 * DocumentsWriter::POINTER_NUM_BYTE + + DocumentsWriter::OBJECT_HEADER_BYTES + + DocumentsWriter::INT_NUM_BYTE; + +/// Rough logic: HashMap has an array[Entry] with varying load factor (say 2 * POINTER). Entry is object +/// with Query key, Integer val, int hash, Entry next (OBJ_HEADER + 3*POINTER + INT). Query we often undercount +/// (say 24 bytes). Integer is OBJ_HEADER + INT. +const int32_t DocumentsWriter::BYTES_PER_DEL_QUERY = 5 * DocumentsWriter::POINTER_NUM_BYTE + 2 * + DocumentsWriter::OBJECT_HEADER_BYTES + 2 * + DocumentsWriter::INT_NUM_BYTE + 24; + +/// Initial chunks size of the shared byte[] blocks used to store postings data +const int32_t DocumentsWriter::BYTE_BLOCK_SHIFT = 15; +const int32_t DocumentsWriter::BYTE_BLOCK_SIZE = 1 << DocumentsWriter::BYTE_BLOCK_SHIFT; +const int32_t DocumentsWriter::BYTE_BLOCK_MASK = DocumentsWriter::BYTE_BLOCK_SIZE - 1; +const int32_t DocumentsWriter::BYTE_BLOCK_NOT_MASK = ~DocumentsWriter::BYTE_BLOCK_MASK; + +/// Initial chunk size of the shared char[] blocks used to store term text +const int32_t DocumentsWriter::CHAR_BLOCK_SHIFT = 14; +const int32_t DocumentsWriter::CHAR_BLOCK_SIZE = 1 << DocumentsWriter::CHAR_BLOCK_SHIFT; +const int32_t DocumentsWriter::CHAR_BLOCK_MASK = DocumentsWriter::CHAR_BLOCK_SIZE - 1; + +const int32_t DocumentsWriter::MAX_TERM_LENGTH = DocumentsWriter::CHAR_BLOCK_SIZE - 1; + +/// Initial chunks size of the shared int[] blocks used to store postings data +const int32_t DocumentsWriter::INT_BLOCK_SHIFT = 13; +const int32_t DocumentsWriter::INT_BLOCK_SIZE = 1 << DocumentsWriter::INT_BLOCK_SHIFT; +const int32_t DocumentsWriter::INT_BLOCK_MASK = DocumentsWriter::INT_BLOCK_SIZE - 1; + +const int32_t DocumentsWriter::PER_DOC_BLOCK_SIZE = 1024; + +DocumentsWriter::DocumentsWriter(const DirectoryPtr& directory, const IndexWriterPtr& writer, const IndexingChainPtr& indexingChain) { + this->threadStates = Collection::newInstance(); + this->threadBindings = MapThreadDocumentsWriterThreadState::newInstance(); + this->_openFiles = HashSet::newInstance(); + this->_closedFiles = HashSet::newInstance(); + this->freeIntBlocks = Collection::newInstance(); + this->freeCharBlocks = Collection::newInstance(); + + this->directory = directory; + this->_writer = writer; + this->indexingChain = indexingChain; +} + +DocumentsWriter::~DocumentsWriter() { +} + +void DocumentsWriter::initialize() { + docStoreOffset = 0; + nextDocID = 0; + numDocsInRAM = 0; + numDocsInStore = 0; + pauseThreads = 0; + flushPending = false; + bufferIsFull = false; + aborting = false; + maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; + deletesInRAM = newLucene(false); + deletesFlushed = newLucene(true); + maxBufferedDeleteTerms = IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS; + ramBufferSize = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); + waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); + waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); + freeTrigger = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 1.05); + freeLevel = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 0.95); + maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS; + flushedDocCount = 0; + closed = false; + waitQueue = newLucene(shared_from_this()); + skipDocWriter = newLucene(); + numBytesAlloc = 0; + numBytesUsed = 0; + byteBlockAllocator = newLucene(shared_from_this(), BYTE_BLOCK_SIZE); + perDocAllocator = newLucene(shared_from_this(), PER_DOC_BLOCK_SIZE); + + IndexWriterPtr writer(_writer); + this->similarity = writer->getSimilarity(); + flushedDocCount = writer->maxDoc(); + + consumer = indexingChain->getChain(shared_from_this()); + docFieldProcessor = boost::dynamic_pointer_cast(consumer); +} + +PerDocBufferPtr DocumentsWriter::newPerDocBuffer() { + return newLucene(shared_from_this()); +} + +IndexingChainPtr DocumentsWriter::getDefaultIndexingChain() { + static DefaultIndexingChainPtr defaultIndexingChain; + LUCENE_RUN_ONCE( + defaultIndexingChain = newLucene(); + CycleCheck::addStatic(defaultIndexingChain); + ); + return defaultIndexingChain; +} + +void DocumentsWriter::updateFlushedDocCount(int32_t n) { + SyncLock syncLock(this); + flushedDocCount += n; +} + +int32_t DocumentsWriter::getFlushedDocCount() { + SyncLock syncLock(this); + return flushedDocCount; +} + +void DocumentsWriter::setFlushedDocCount(int32_t n) { + SyncLock syncLock(this); + flushedDocCount = n; +} + +bool DocumentsWriter::hasProx() { + return docFieldProcessor ? docFieldProcessor->fieldInfos->hasProx() : true; +} + +void DocumentsWriter::setInfoStream(const InfoStreamPtr& infoStream) { + SyncLock syncLock(this); + this->infoStream = infoStream; + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + (*threadState)->docState->infoStream = infoStream; } - - DocumentsWriter::~DocumentsWriter() - { +} + +void DocumentsWriter::setMaxFieldLength(int32_t maxFieldLength) { + SyncLock syncLock(this); + this->maxFieldLength = maxFieldLength; + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + (*threadState)->docState->maxFieldLength = maxFieldLength; } - - void DocumentsWriter::initialize() - { - docStoreOffset = 0; - nextDocID = 0; - numDocsInRAM = 0; - numDocsInStore = 0; - pauseThreads = 0; - flushPending = false; - bufferIsFull = false; - aborting = false; - maxFieldLength = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; - deletesInRAM = newLucene(false); - deletesFlushed = newLucene(true); - maxBufferedDeleteTerms = IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS; - ramBufferSize = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024 * 1024); +} + +void DocumentsWriter::setSimilarity(const SimilarityPtr& similarity) { + SyncLock syncLock(this); + this->similarity = similarity; + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + (*threadState)->docState->similarity = similarity; + } +} + +void DocumentsWriter::setRAMBufferSizeMB(double mb) { + SyncLock syncLock(this); + if (mb == IndexWriter::DISABLE_AUTO_FLUSH) { + ramBufferSize = IndexWriter::DISABLE_AUTO_FLUSH; + waitQueuePauseBytes = 4 * 1024 * 1024; + waitQueueResumeBytes = 2 * 1024 * 1024; + } else { + ramBufferSize = (int64_t)(mb * 1024.0 * 1024.0); waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); - freeTrigger = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 1.05); - freeLevel = (int64_t)(IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB * 1024.0 * 1024.0 * 0.95); - maxBufferedDocs = IndexWriter::DEFAULT_MAX_BUFFERED_DOCS; - flushedDocCount = 0; - closed = false; - waitQueue = newLucene(shared_from_this()); - skipDocWriter = newLucene(); - numBytesAlloc = 0; - numBytesUsed = 0; - byteBlockAllocator = newLucene(shared_from_this(), BYTE_BLOCK_SIZE); - perDocAllocator = newLucene(shared_from_this(), PER_DOC_BLOCK_SIZE); - - IndexWriterPtr writer(_writer); - this->similarity = writer->getSimilarity(); - flushedDocCount = writer->maxDoc(); - - consumer = indexingChain->getChain(shared_from_this()); - docFieldProcessor = boost::dynamic_pointer_cast(consumer); - } - - PerDocBufferPtr DocumentsWriter::newPerDocBuffer() - { - return newLucene(shared_from_this()); - } - - IndexingChainPtr DocumentsWriter::getDefaultIndexingChain() - { - static DefaultIndexingChainPtr defaultIndexingChain; - if (!defaultIndexingChain) - { - defaultIndexingChain = newLucene(); - CycleCheck::addStatic(defaultIndexingChain); - } - return defaultIndexingChain; - } - - void DocumentsWriter::updateFlushedDocCount(int32_t n) - { - SyncLock syncLock(this); - flushedDocCount += n; - } - - int32_t DocumentsWriter::getFlushedDocCount() - { - SyncLock syncLock(this); - return flushedDocCount; - } - - void DocumentsWriter::setFlushedDocCount(int32_t n) - { - SyncLock syncLock(this); - flushedDocCount = n; - } - - bool DocumentsWriter::hasProx() - { - return docFieldProcessor ? docFieldProcessor->fieldInfos->hasProx() : true; + freeTrigger = (int64_t)(1.05 * (double)ramBufferSize); + freeLevel = (int64_t)(0.95 * (double)ramBufferSize); } - - void DocumentsWriter::setInfoStream(InfoStreamPtr infoStream) - { - SyncLock syncLock(this); - this->infoStream = infoStream; - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - (*threadState)->docState->infoStream = infoStream; - } - - void DocumentsWriter::setMaxFieldLength(int32_t maxFieldLength) - { - SyncLock syncLock(this); - this->maxFieldLength = maxFieldLength; - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - (*threadState)->docState->maxFieldLength = maxFieldLength; - } - - void DocumentsWriter::setSimilarity(SimilarityPtr similarity) - { - SyncLock syncLock(this); - this->similarity = similarity; - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - (*threadState)->docState->similarity = similarity; - } - - void DocumentsWriter::setRAMBufferSizeMB(double mb) - { - SyncLock syncLock(this); - if (mb == IndexWriter::DISABLE_AUTO_FLUSH) - { - ramBufferSize = IndexWriter::DISABLE_AUTO_FLUSH; - waitQueuePauseBytes = 4 * 1024 * 1024; - waitQueueResumeBytes = 2 * 1024 * 1024; - } - else - { - ramBufferSize = (int64_t)(mb * 1024.0 * 1024.0); - waitQueuePauseBytes = (int64_t)((double)ramBufferSize * 0.1); - waitQueueResumeBytes = (int64_t)((double)ramBufferSize * 0.05); - freeTrigger = (int64_t)(1.05 * (double)ramBufferSize); - freeLevel = (int64_t)(0.95 * (double)ramBufferSize); - } - } - - double DocumentsWriter::getRAMBufferSizeMB() - { - SyncLock syncLock(this); - if (ramBufferSize == IndexWriter::DISABLE_AUTO_FLUSH) - return (double)ramBufferSize; - else - return (double)ramBufferSize / 1024.0 / 1024.0; - } - - void DocumentsWriter::setMaxBufferedDocs(int32_t count) - { - maxBufferedDocs = count; - } - - int32_t DocumentsWriter::getMaxBufferedDocs() - { - return maxBufferedDocs; +} + +double DocumentsWriter::getRAMBufferSizeMB() { + SyncLock syncLock(this); + if (ramBufferSize == IndexWriter::DISABLE_AUTO_FLUSH) { + return (double)ramBufferSize; + } else { + return (double)ramBufferSize / 1024.0 / 1024.0; } - - String DocumentsWriter::getSegment() - { - return segment; +} + +void DocumentsWriter::setMaxBufferedDocs(int32_t count) { + maxBufferedDocs = count; +} + +int32_t DocumentsWriter::getMaxBufferedDocs() { + return maxBufferedDocs; +} + +String DocumentsWriter::getSegment() { + return segment; +} + +int32_t DocumentsWriter::getNumDocsInRAM() { + return numDocsInRAM; +} + +String DocumentsWriter::getDocStoreSegment() { + SyncLock syncLock(this); + return docStoreSegment; +} + +int32_t DocumentsWriter::getDocStoreOffset() { + return docStoreOffset; +} + +String DocumentsWriter::closeDocStore() { + TestScope testScope(L"DocumentsWriter", L"closeDocStore"); + SyncLock syncLock(this); + BOOST_ASSERT(allThreadsIdle()); + + if (infoStream) { + message(L"closeDocStore: " + StringUtils::toString(_openFiles.size()) + L" files to flush to segment " + + docStoreSegment + L" numDocs=" + StringUtils::toString(numDocsInStore)); } - - int32_t DocumentsWriter::getNumDocsInRAM() - { - return numDocsInRAM; + + bool success = false; + LuceneException finally; + String s; + try { + initFlushState(true); + _closedFiles.clear(); + + consumer->closeDocStore(flushState); + BOOST_ASSERT(_openFiles.empty()); + + s = docStoreSegment; + docStoreSegment.clear(); + docStoreOffset = 0; + numDocsInStore = 0; + success = true; + } catch (LuceneException& e) { + finally = e; } - - String DocumentsWriter::getDocStoreSegment() - { - SyncLock syncLock(this); - return docStoreSegment; + if (!success) { + abort(); } - - int32_t DocumentsWriter::getDocStoreOffset() - { - return docStoreOffset; + finally.throwException(); + return s; +} + +HashSet DocumentsWriter::abortedFiles() { + return _abortedFiles; +} + +void DocumentsWriter::message(const String& message) { + if (infoStream) { + *infoStream << L"DW " << message << L"\n"; } - - String DocumentsWriter::closeDocStore() - { - TestScope testScope(L"DocumentsWriter", L"closeDocStore"); - SyncLock syncLock(this); - BOOST_ASSERT(allThreadsIdle()); - - if (infoStream) - { - message(L"closeDocStore: " + StringUtils::toString(_openFiles.size()) + L" files to flush to segment " + - docStoreSegment + L" numDocs=" + StringUtils::toString(numDocsInStore)); +} + +HashSet DocumentsWriter::openFiles() { + SyncLock syncLock(this); + return HashSet::newInstance(_openFiles.begin(), _openFiles.end()); +} + +HashSet DocumentsWriter::closedFiles() { + SyncLock syncLock(this); + return HashSet::newInstance(_closedFiles.begin(), _closedFiles.end()); +} + +void DocumentsWriter::addOpenFile(const String& name) { + SyncLock syncLock(this); + BOOST_ASSERT(!_openFiles.contains(name)); + _openFiles.add(name); +} + +void DocumentsWriter::removeOpenFile(const String& name) { + SyncLock syncLock(this); + BOOST_ASSERT(_openFiles.contains(name)); + _openFiles.remove(name); + _closedFiles.add(name); +} + +void DocumentsWriter::setAborting() { + SyncLock syncLock(this); + aborting = true; +} + +void DocumentsWriter::abort() { + TestScope testScope(L"DocumentsWriter", L"abort"); + SyncLock syncLock(this); + LuceneException finally; + try { + if (infoStream) { + message(L"docWriter: now abort"); } - - bool success = false; - LuceneException finally; - String s; - try - { - initFlushState(true); - _closedFiles.clear(); - - consumer->closeDocStore(flushState); - BOOST_ASSERT(_openFiles.empty()); - - s = docStoreSegment; + + // Forcefully remove waiting ThreadStates from line + waitQueue->abort(); + + // Wait for all other threads to finish with DocumentsWriter + pauseAllThreads(); + + try { + BOOST_ASSERT(waitQueue->numWaiting == 0); + + waitQueue->waitingBytes = 0; + + try { + _abortedFiles = openFiles(); + } catch (...) { + _abortedFiles.reset(); + } + + deletesInRAM->clear(); + deletesFlushed->clear(); + _openFiles.clear(); + + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + try { + (*threadState)->consumer->abort(); + } catch (...) { + } + } + + try { + consumer->abort(); + } catch (...) { + } + docStoreSegment.clear(); - docStoreOffset = 0; numDocsInStore = 0; - success = true; + docStoreOffset = 0; + + // Reset all postings data + doAfterFlush(); + } catch (LuceneException& e) { + finally = e; } - catch (LuceneException& e) - { + resumeAllThreads(); + } catch (LuceneException& e) { + if (finally.isNull()) { finally = e; } - if (!success) - abort(); - finally.throwException(); - return s; - } - - HashSet DocumentsWriter::abortedFiles() - { - return _abortedFiles; - } - - void DocumentsWriter::message(const String& message) - { - if (infoStream) - *infoStream << L"DW " << message << L"\n"; } - - HashSet DocumentsWriter::openFiles() - { - SyncLock syncLock(this); - return HashSet::newInstance(_openFiles.begin(), _openFiles.end()); + aborting = false; + notifyAll(); + if (infoStream) { + message(L"docWriter: done abort"); } - - HashSet DocumentsWriter::closedFiles() - { - SyncLock syncLock(this); - return HashSet::newInstance(_closedFiles.begin(), _closedFiles.end()); + finally.throwException(); +} + +void DocumentsWriter::doAfterFlush() { + // All ThreadStates should be idle when we are called + BOOST_ASSERT(allThreadsIdle()); + threadBindings.clear(); + waitQueue->reset(); + segment.clear(); + numDocsInRAM = 0; + nextDocID = 0; + bufferIsFull = false; + flushPending = false; + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + (*threadState)->doAfterFlush(); + } + numBytesUsed = 0; +} + +bool DocumentsWriter::pauseAllThreads() { + SyncLock syncLock(this); + ++pauseThreads; + while (!allThreadsIdle()) { + wait(1000); } - - void DocumentsWriter::addOpenFile(const String& name) - { - SyncLock syncLock(this); - BOOST_ASSERT(!_openFiles.contains(name)); - _openFiles.add(name); + return aborting; +} + +void DocumentsWriter::resumeAllThreads() { + SyncLock syncLock(this); + --pauseThreads; + BOOST_ASSERT(pauseThreads >= 0); + if (pauseThreads == 0) { + notifyAll(); } - - void DocumentsWriter::removeOpenFile(const String& name) - { - SyncLock syncLock(this); - BOOST_ASSERT(_openFiles.contains(name)); - _openFiles.remove(name); - _closedFiles.add(name); +} + +bool DocumentsWriter::allThreadsIdle() { + SyncLock syncLock(this); + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + if (!(*threadState)->isIdle) { + return false; + } } - - void DocumentsWriter::setAborting() - { - SyncLock syncLock(this); - aborting = true; + return true; +} + +bool DocumentsWriter::anyChanges() { + SyncLock syncLock(this); + return (numDocsInRAM != 0 || deletesInRAM->numTerms != 0 || !deletesInRAM->docIDs.empty() || !deletesInRAM->queries.empty()); +} + +void DocumentsWriter::initFlushState(bool onlyDocStore) { + SyncLock syncLock(this); + initSegmentName(onlyDocStore); + flushState = newLucene(shared_from_this(), directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, IndexWriterPtr(_writer)->getTermIndexInterval()); +} + +int32_t DocumentsWriter::flush(bool _closeDocStore) { + SyncLock syncLock(this); + BOOST_ASSERT(allThreadsIdle()); + + BOOST_ASSERT(numDocsInRAM > 0); + + BOOST_ASSERT(nextDocID == numDocsInRAM); + BOOST_ASSERT(waitQueue->numWaiting == 0); + BOOST_ASSERT(waitQueue->waitingBytes == 0); + + initFlushState(false); + + docStoreOffset = numDocsInStore; + + if (infoStream) { + message(L"flush postings as segment " + flushState->segmentName + L" numDocs=" + StringUtils::toString(numDocsInRAM)); } - - void DocumentsWriter::abort() - { - TestScope testScope(L"DocumentsWriter", L"abort"); - SyncLock syncLock(this); - LuceneException finally; - try - { - if (infoStream) - message(L"docWriter: now abort"); - - // Forcefully remove waiting ThreadStates from line - waitQueue->abort(); - - // Wait for all other threads to finish with DocumentsWriter - pauseAllThreads(); - - try - { - BOOST_ASSERT(waitQueue->numWaiting == 0); - - waitQueue->waitingBytes = 0; - - try - { - _abortedFiles = openFiles(); - } - catch (...) - { - _abortedFiles.reset(); - } - - deletesInRAM->clear(); - deletesFlushed->clear(); - _openFiles.clear(); - - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - { - try - { - (*threadState)->consumer->abort(); - } - catch (...) - { - } - } - - try - { - consumer->abort(); - } - catch (...) - { - } - - docStoreSegment.clear(); - numDocsInStore = 0; - docStoreOffset = 0; - - // Reset all postings data - doAfterFlush(); - } - catch (LuceneException& e) - { - finally = e; - } - resumeAllThreads(); + + bool success = false; + LuceneException finally; + + try { + if (_closeDocStore) { + BOOST_ASSERT(!flushState->docStoreSegmentName.empty()); + BOOST_ASSERT(flushState->docStoreSegmentName == flushState->segmentName); + + closeDocStore(); + flushState->numDocsInStore = 0; } - catch (LuceneException& e) - { - if (finally.isNull()) - finally = e; + + Collection threads(Collection::newInstance()); + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + threads.add((*threadState)->consumer); } - aborting = false; - notifyAll(); - if (infoStream) - message(L"docWriter: done abort"); - finally.throwException(); - } - - void DocumentsWriter::doAfterFlush() - { - // All ThreadStates should be idle when we are called - BOOST_ASSERT(allThreadsIdle()); - threadBindings.clear(); - waitQueue->reset(); - segment.clear(); - numDocsInRAM = 0; - nextDocID = 0; - bufferIsFull = false; - flushPending = false; - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - (*threadState)->doAfterFlush(); - numBytesUsed = 0; + consumer->flush(threads, flushState); + + if (infoStream) { + SegmentInfoPtr si(newLucene(flushState->segmentName, flushState->numDocs, directory)); + int64_t newSegmentSize = si->sizeInBytes(); + if (infoStream) { + message(L" oldRAMSize=" + StringUtils::toString(numBytesUsed) + L" newFlushedSize=" + + StringUtils::toString(newSegmentSize) + L" docs/MB=" + + StringUtils::toString((double)numDocsInRAM / ((double)newSegmentSize / 1024.0 / 1024.0)) + + L" new/old=" + StringUtils::toString(100.0 * (double)newSegmentSize / (double)numBytesUsed) + L"%"); + } + } + + flushedDocCount += flushState->numDocs; + + doAfterFlush(); + + success = true; + } catch (LuceneException& e) { + finally = e; } - - bool DocumentsWriter::pauseAllThreads() - { - SyncLock syncLock(this); - ++pauseThreads; - while (!allThreadsIdle()) - wait(1000); - return aborting; + if (!success) { + abort(); } - - void DocumentsWriter::resumeAllThreads() - { - SyncLock syncLock(this); - --pauseThreads; - BOOST_ASSERT(pauseThreads >= 0); - if (pauseThreads == 0) - notifyAll(); + finally.throwException(); + + BOOST_ASSERT(waitQueue->waitingBytes == 0); + + return flushState->numDocs; +} + +HashSet DocumentsWriter::getFlushedFiles() { + return flushState->flushedFiles; +} + +void DocumentsWriter::createCompoundFile(const String& segment) { + CompoundFileWriterPtr cfsWriter(newLucene(directory, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION())); + for (HashSet::iterator flushedFile = flushState->flushedFiles.begin(); flushedFile != flushState->flushedFiles.end(); ++flushedFile) { + cfsWriter->addFile(*flushedFile); } - - bool DocumentsWriter::allThreadsIdle() - { - SyncLock syncLock(this); - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - { - if (!(*threadState)->isIdle) - return false; - } + + // Perform the merge + cfsWriter->close(); +} + +bool DocumentsWriter::setFlushPending() { + SyncLock syncLock(this); + if (flushPending) { + return false; + } else { + flushPending = true; return true; } - - bool DocumentsWriter::anyChanges() - { - SyncLock syncLock(this); - return (numDocsInRAM != 0 || deletesInRAM->numTerms != 0 || !deletesInRAM->docIDs.empty() || !deletesInRAM->queries.empty()); +} + +void DocumentsWriter::clearFlushPending() { + SyncLock syncLock(this); + flushPending = false; +} + +void DocumentsWriter::pushDeletes() { + SyncLock syncLock(this); + deletesFlushed->update(deletesInRAM); +} + +void DocumentsWriter::close() { + SyncLock syncLock(this); + closed = true; + notifyAll(); +} + +void DocumentsWriter::initSegmentName(bool onlyDocStore) { + SyncLock syncLock(this); + if (segment.empty() && (!onlyDocStore || docStoreSegment.empty())) { + segment = IndexWriterPtr(_writer)->newSegmentName(); + BOOST_ASSERT(numDocsInRAM == 0); } - - void DocumentsWriter::initFlushState(bool onlyDocStore) - { - SyncLock syncLock(this); - initSegmentName(onlyDocStore); - flushState = newLucene(shared_from_this(), directory, segment, docStoreSegment, numDocsInRAM, numDocsInStore, IndexWriterPtr(_writer)->getTermIndexInterval()); + if (docStoreSegment.empty()) { + docStoreSegment = segment; + BOOST_ASSERT(numDocsInStore == 0); } - - int32_t DocumentsWriter::flush(bool _closeDocStore) - { - SyncLock syncLock(this); - BOOST_ASSERT(allThreadsIdle()); - - BOOST_ASSERT(numDocsInRAM > 0); - - BOOST_ASSERT(nextDocID == numDocsInRAM); - BOOST_ASSERT(waitQueue->numWaiting == 0); - BOOST_ASSERT(waitQueue->waitingBytes == 0); - - initFlushState(false); - - docStoreOffset = numDocsInStore; - - if (infoStream) - message(L"flush postings as segment " + flushState->segmentName + L" numDocs=" + StringUtils::toString(numDocsInRAM)); - - bool success = false; - LuceneException finally; +} - try - { - if (_closeDocStore) - { - BOOST_ASSERT(!flushState->docStoreSegmentName.empty()); - BOOST_ASSERT(flushState->docStoreSegmentName == flushState->segmentName); - - closeDocStore(); - flushState->numDocsInStore = 0; - } - - Collection threads(Collection::newInstance()); - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - threads.add((*threadState)->consumer); - consumer->flush(threads, flushState); - - if (infoStream) - { - SegmentInfoPtr si(newLucene(flushState->segmentName, flushState->numDocs, directory)); - int64_t newSegmentSize = si->sizeInBytes(); - if (infoStream) - { - message(L" oldRAMSize=" + StringUtils::toString(numBytesUsed) + L" newFlushedSize=" + - StringUtils::toString(newSegmentSize) + L" docs/MB=" + - StringUtils::toString((double)numDocsInRAM / ((double)newSegmentSize / 1024.0 / 1024.0)) + - L" new/old=" + StringUtils::toString(100.0 * (double)newSegmentSize / (double)numBytesUsed) + L"%"); - } +DocumentsWriterThreadStatePtr DocumentsWriter::getThreadState(const DocumentPtr& doc, const TermPtr& delTerm) { + SyncLock syncLock(this); + // First, find a thread state. If this thread already has affinity to a specific ThreadState, use that one again. + DocumentsWriterThreadStatePtr state(threadBindings.get(LuceneThread::currentId())); + if (!state) { + // First time this thread has called us since last flush. Find the least loaded thread state + DocumentsWriterThreadStatePtr minThreadState; + for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) { + if (!minThreadState || (*threadState)->numThreads < minThreadState->numThreads) { + minThreadState = *threadState; } - - flushedDocCount += flushState->numDocs; - - doAfterFlush(); - - success = true; } - catch (LuceneException& e) - { - finally = e; + if (minThreadState && (minThreadState->numThreads == 0 || threadStates.size() >= MAX_THREAD_STATE)) { + state = minThreadState; + ++state->numThreads; + } else { + // Just create a new "private" thread state + threadStates.resize(threadStates.size() + 1); + state = newLucene(shared_from_this()); + threadStates[threadStates.size() - 1] = state; } - if (!success) - abort(); - finally.throwException(); - - BOOST_ASSERT(waitQueue->waitingBytes == 0); - - return flushState->numDocs; - } - - HashSet DocumentsWriter::getFlushedFiles() - { - return flushState->flushedFiles; - } - - void DocumentsWriter::createCompoundFile(const String& segment) - { - CompoundFileWriterPtr cfsWriter(newLucene(directory, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION())); - for (HashSet::iterator flushedFile = flushState->flushedFiles.begin(); flushedFile != flushState->flushedFiles.end(); ++flushedFile) - cfsWriter->addFile(*flushedFile); - - // Perform the merge - cfsWriter->close(); + threadBindings.put(LuceneThread::currentId(), state); } - - bool DocumentsWriter::setFlushPending() - { - SyncLock syncLock(this); - if (flushPending) - return false; - else - { + + // Next, wait until my thread state is idle (in case it's shared with other threads) and for threads to + // not be paused nor a flush pending + waitReady(state); + + // Allocate segment name if this is the first doc since last flush + initSegmentName(false); + + state->isIdle = false; + + bool success = false; + LuceneException finally; + try { + state->docState->docID = nextDocID; + + BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init start")); + + if (delTerm) { + addDeleteTerm(delTerm, state->docState->docID); + state->doFlushAfter = timeToFlushDeletes(); + } + + BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init after delTerm")); + + ++nextDocID; + ++numDocsInRAM; + + // We must at this point commit to flushing to ensure we always get N docs when we flush by doc + // count, even if > 1 thread is adding documents + if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) { flushPending = true; - return true; + state->doFlushAfter = true; } + + success = true; + } catch (LuceneException& e) { + finally = e; } - - void DocumentsWriter::clearFlushPending() - { - SyncLock syncLock(this); - flushPending = false; - } - - void DocumentsWriter::pushDeletes() - { - SyncLock syncLock(this); - deletesFlushed->update(deletesInRAM); - } - - void DocumentsWriter::close() - { - SyncLock syncLock(this); - closed = true; + if (!success) { + // Forcefully idle this ThreadState + state->isIdle = true; notifyAll(); - } - - void DocumentsWriter::initSegmentName(bool onlyDocStore) - { - SyncLock syncLock(this); - if (segment.empty() && (!onlyDocStore || docStoreSegment.empty())) - { - segment = IndexWriterPtr(_writer)->newSegmentName(); - BOOST_ASSERT(numDocsInRAM == 0); - } - if (docStoreSegment.empty()) - { - docStoreSegment = segment; - BOOST_ASSERT(numDocsInStore == 0); + if (state->doFlushAfter) { + state->doFlushAfter = false; + flushPending = false; } } - - DocumentsWriterThreadStatePtr DocumentsWriter::getThreadState(DocumentPtr doc, TermPtr delTerm) - { - SyncLock syncLock(this); - // First, find a thread state. If this thread already has affinity to a specific ThreadState, use that one again. - DocumentsWriterThreadStatePtr state(threadBindings.get(LuceneThread::currentId())); - if (!state) - { - // First time this thread has called us since last flush. Find the least loaded thread state - DocumentsWriterThreadStatePtr minThreadState; - for (Collection::iterator threadState = threadStates.begin(); threadState != threadStates.end(); ++threadState) - { - if (!minThreadState || (*threadState)->numThreads < minThreadState->numThreads) - minThreadState = *threadState; - } - if (minThreadState && (minThreadState->numThreads == 0 || threadStates.size() >= MAX_THREAD_STATE)) - { - state = minThreadState; - ++state->numThreads; - } - else - { - // Just create a new "private" thread state - threadStates.resize(threadStates.size() + 1); - state = newLucene(shared_from_this()); - threadStates[threadStates.size() - 1] = state; - } - threadBindings.put(LuceneThread::currentId(), state); - } - - // Next, wait until my thread state is idle (in case it's shared with other threads) and for threads to - // not be paused nor a flush pending - waitReady(state); - - // Allocate segment name if this is the first doc since last flush - initSegmentName(false); - - state->isIdle = false; - - bool success = false; - LuceneException finally; - try - { - state->docState->docID = nextDocID; - - BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init start")); - - if (delTerm) - { - addDeleteTerm(delTerm, state->docState->docID); - state->doFlushAfter = timeToFlushDeletes(); - } - - BOOST_ASSERT(IndexWriterPtr(_writer)->testPoint(L"DocumentsWriter.ThreadState.init after delTerm")); - - ++nextDocID; - ++numDocsInRAM; - - // We must at this point commit to flushing to ensure we always get N docs when we flush by doc - // count, even if > 1 thread is adding documents - if (!flushPending && maxBufferedDocs != IndexWriter::DISABLE_AUTO_FLUSH && numDocsInRAM >= maxBufferedDocs) - { - flushPending = true; - state->doFlushAfter = true; - } - - success = true; - } - catch (LuceneException& e) - { + finally.throwException(); + + return state; +} + +bool DocumentsWriter::addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer) { + return updateDocument(doc, analyzer, TermPtr()); +} + +bool DocumentsWriter::updateDocument(const TermPtr& t, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { + return updateDocument(doc, analyzer, t); +} + +bool DocumentsWriter::updateDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer, const TermPtr& delTerm) { + // This call is synchronized but fast + DocumentsWriterThreadStatePtr state(getThreadState(doc, delTerm)); + + DocStatePtr docState(state->docState); + docState->doc = doc; + docState->analyzer = analyzer; + + bool success = false; + LuceneException finally; + try { + // This call is not synchronized and does all the work + DocWriterPtr perDoc; + try { + perDoc = state->consumer->processDocument(); + } catch (LuceneException& e) { finally = e; } - if (!success) - { - // Forcefully idle this ThreadState - state->isIdle = true; - notifyAll(); - if (state->doFlushAfter) - { - state->doFlushAfter = false; - flushPending = false; - } - } + docState->clear(); finally.throwException(); - - return state; - } - - bool DocumentsWriter::addDocument(DocumentPtr doc, AnalyzerPtr analyzer) - { - return updateDocument(doc, analyzer, TermPtr()); - } - - bool DocumentsWriter::updateDocument(TermPtr t, DocumentPtr doc, AnalyzerPtr analyzer) - { - return updateDocument(doc, analyzer, t); - } - - bool DocumentsWriter::updateDocument(DocumentPtr doc, AnalyzerPtr analyzer, TermPtr delTerm) - { + // This call is synchronized but fast - DocumentsWriterThreadStatePtr state(getThreadState(doc, delTerm)); - - DocStatePtr docState(state->docState); - docState->doc = doc; - docState->analyzer = analyzer; - - bool success = false; - LuceneException finally; - try - { - // This call is not synchronized and does all the work - DocWriterPtr perDoc; - try - { - perDoc = state->consumer->processDocument(); - } - catch (LuceneException& e) - { + finishDocument(state, perDoc); + + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + SyncLock syncLock(this); + if (aborting) { + state->isIdle = true; + notifyAll(); + abort(); + } else { + skipDocWriter->docID = docState->docID; + bool success2 = false; + try { + waitQueue->add(skipDocWriter); + success2 = true; + } catch (LuceneException& e) { finally = e; } - docState->clear(); - finally.throwException(); - - // This call is synchronized but fast - finishDocument(state, perDoc); - - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - { - SyncLock syncLock(this); - if (aborting) - { + if (!success2) { state->isIdle = true; notifyAll(); abort(); + return false; } - else - { - skipDocWriter->docID = docState->docID; - bool success2 = false; - try - { - waitQueue->add(skipDocWriter); - success2 = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success2) - { - state->isIdle = true; - notifyAll(); - abort(); - return false; - } - - state->isIdle = true; + + state->isIdle = true; + notifyAll(); + + // If this thread state had decided to flush, we must clear it so another thread can flush + if (state->doFlushAfter) { + state->doFlushAfter = false; + flushPending = false; notifyAll(); - - // If this thread state had decided to flush, we must clear it so another thread can flush - if (state->doFlushAfter) - { - state->doFlushAfter = false; - flushPending = false; - notifyAll(); - } - - // Immediately mark this document as deleted since likely it was partially added. This keeps - // indexing as "all or none" (atomic) when adding a document - addDeleteDocID(state->docState->docID); } - } - finally.throwException(); - - return (state->doFlushAfter || timeToFlushDeletes()); - } - - int32_t DocumentsWriter::getNumBufferedDeleteTerms() - { - SyncLock syncLock(this); - return deletesInRAM->numTerms; - } - - MapTermNum DocumentsWriter::getBufferedDeleteTerms() - { - SyncLock syncLock(this); - return deletesInRAM->terms; - } - - void DocumentsWriter::remapDeletes(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergeDocCount) - { - SyncLock syncLock(this); - if (!docMaps) - { - // The merged segments had no deletes so docIDs did not change and we have nothing to do - return; + // Immediately mark this document as deleted since likely it was partially added. This keeps + // indexing as "all or none" (atomic) when adding a document + addDeleteDocID(state->docState->docID); } - MergeDocIDRemapperPtr mapper(newLucene(infos, docMaps, delCounts, merge, mergeDocCount)); - deletesInRAM->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); - deletesFlushed->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); - flushedDocCount -= mapper->docShift; - } - - void DocumentsWriter::waitReady(DocumentsWriterThreadStatePtr state) - { - SyncLock syncLock(this); - while (!closed && ((state && !state->isIdle) || pauseThreads != 0 || flushPending || aborting)) - wait(1000); - if (closed) - boost::throw_exception(AlreadyClosedException(L"this IndexWriter is closed")); - } - - bool DocumentsWriter::bufferDeleteTerms(Collection terms) - { - SyncLock syncLock(this); - waitReady(DocumentsWriterThreadStatePtr()); - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - addDeleteTerm(*term, numDocsInRAM); - return timeToFlushDeletes(); - } - - bool DocumentsWriter::bufferDeleteTerm(TermPtr term) - { - SyncLock syncLock(this); - waitReady(DocumentsWriterThreadStatePtr()); - addDeleteTerm(term, numDocsInRAM); - return timeToFlushDeletes(); - } - - bool DocumentsWriter::bufferDeleteQueries(Collection queries) - { - SyncLock syncLock(this); - waitReady(DocumentsWriterThreadStatePtr()); - for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) - addDeleteQuery(*query, numDocsInRAM); - return timeToFlushDeletes(); - } - - bool DocumentsWriter::bufferDeleteQuery(QueryPtr query) - { - SyncLock syncLock(this); - waitReady(DocumentsWriterThreadStatePtr()); - addDeleteQuery(query, numDocsInRAM); - return timeToFlushDeletes(); } - - bool DocumentsWriter::deletesFull() - { - SyncLock syncLock(this); - return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && - (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed + numBytesUsed) >= ramBufferSize) || - (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && - ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); - } - - bool DocumentsWriter::doApplyDeletes() - { - SyncLock syncLock(this); - // Very similar to deletesFull(), except we don't count numBytesAlloc, because we are checking whether - // deletes (alone) are consuming too many resources now and thus should be applied. We apply deletes - // if RAM usage is > 1/2 of our allowed RAM buffer, to prevent too-frequent flushing of a long tail of - // tiny segments when merges (which always apply deletes) are infrequent. - return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && - (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed) >= ramBufferSize / 2) || - (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && - ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); + + finally.throwException(); + + return (state->doFlushAfter || timeToFlushDeletes()); +} + +int32_t DocumentsWriter::getNumBufferedDeleteTerms() { + SyncLock syncLock(this); + return deletesInRAM->numTerms; +} + +MapTermNum DocumentsWriter::getBufferedDeleteTerms() { + SyncLock syncLock(this); + return deletesInRAM->terms; +} + +void DocumentsWriter::remapDeletes(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergeDocCount) { + SyncLock syncLock(this); + if (!docMaps) { + // The merged segments had no deletes so docIDs did not change and we have nothing to do + return; + } + MergeDocIDRemapperPtr mapper(newLucene(infos, docMaps, delCounts, merge, mergeDocCount)); + deletesInRAM->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); + deletesFlushed->remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); + flushedDocCount -= mapper->docShift; +} + +void DocumentsWriter::waitReady(const DocumentsWriterThreadStatePtr& state) { + SyncLock syncLock(this); + while (!closed && ((state && !state->isIdle) || pauseThreads != 0 || flushPending || aborting)) { + wait(1000); } - - bool DocumentsWriter::timeToFlushDeletes() - { - SyncLock syncLock(this); - return ((bufferIsFull || deletesFull()) && setFlushPending()); + if (closed) { + boost::throw_exception(AlreadyClosedException(L"this IndexWriter is closed")); } - - bool DocumentsWriter::checkDeleteTerm(TermPtr term) - { - if (term) - BOOST_ASSERT(!lastDeleteTerm || term->compareTo(lastDeleteTerm) > 0); - lastDeleteTerm = term; - return true; +} + +bool DocumentsWriter::bufferDeleteTerms(Collection terms) { + SyncLock syncLock(this); + waitReady(DocumentsWriterThreadStatePtr()); + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + addDeleteTerm(*term, numDocsInRAM); } - - void DocumentsWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) - { - this->maxBufferedDeleteTerms = maxBufferedDeleteTerms; + return timeToFlushDeletes(); +} + +bool DocumentsWriter::bufferDeleteTerm(const TermPtr& term) { + SyncLock syncLock(this); + waitReady(DocumentsWriterThreadStatePtr()); + addDeleteTerm(term, numDocsInRAM); + return timeToFlushDeletes(); +} + +bool DocumentsWriter::bufferDeleteQueries(Collection queries) { + SyncLock syncLock(this); + waitReady(DocumentsWriterThreadStatePtr()); + for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) { + addDeleteQuery(*query, numDocsInRAM); } - - int32_t DocumentsWriter::getMaxBufferedDeleteTerms() - { - return maxBufferedDeleteTerms; + return timeToFlushDeletes(); +} + +bool DocumentsWriter::bufferDeleteQuery(const QueryPtr& query) { + SyncLock syncLock(this); + waitReady(DocumentsWriterThreadStatePtr()); + addDeleteQuery(query, numDocsInRAM); + return timeToFlushDeletes(); +} + +bool DocumentsWriter::deletesFull() { + SyncLock syncLock(this); + return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && + (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed + numBytesUsed) >= ramBufferSize) || + (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && + ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); +} + +bool DocumentsWriter::doApplyDeletes() { + SyncLock syncLock(this); + // Very similar to deletesFull(), except we don't count numBytesAlloc, because we are checking whether + // deletes (alone) are consuming too many resources now and thus should be applied. We apply deletes + // if RAM usage is > 1/2 of our allowed RAM buffer, to prevent too-frequent flushing of a long tail of + // tiny segments when merges (which always apply deletes) are infrequent. + return ((ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && + (deletesInRAM->bytesUsed + deletesFlushed->bytesUsed) >= ramBufferSize / 2) || + (maxBufferedDeleteTerms != IndexWriter::DISABLE_AUTO_FLUSH && + ((deletesInRAM->size() + deletesFlushed->size()) >= maxBufferedDeleteTerms))); +} + +bool DocumentsWriter::timeToFlushDeletes() { + SyncLock syncLock(this); + return ((bufferIsFull || deletesFull()) && setFlushPending()); +} + +bool DocumentsWriter::checkDeleteTerm(const TermPtr& term) { + if (term) { + BOOST_ASSERT(!lastDeleteTerm || term->compareTo(lastDeleteTerm) > 0); } - - bool DocumentsWriter::hasDeletes() - { - SyncLock syncLock(this); - return deletesFlushed->any(); + lastDeleteTerm = term; + return true; +} + +void DocumentsWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { + this->maxBufferedDeleteTerms = maxBufferedDeleteTerms; +} + +int32_t DocumentsWriter::getMaxBufferedDeleteTerms() { + return maxBufferedDeleteTerms; +} + +bool DocumentsWriter::hasDeletes() { + SyncLock syncLock(this); + return deletesFlushed->any(); +} + +bool DocumentsWriter::applyDeletes(const SegmentInfosPtr& infos) { + SyncLock syncLock(this); + if (!hasDeletes()) { + return false; } - - bool DocumentsWriter::applyDeletes(SegmentInfosPtr infos) - { - SyncLock syncLock(this); - if (!hasDeletes()) - return false; - - if (infoStream) - { - message(L"apply " + StringUtils::toString(deletesFlushed->numTerms) + L" buffered deleted terms and " + - StringUtils::toString(deletesFlushed->docIDs.size()) + L" deleted docIDs and " + - StringUtils::toString(deletesFlushed->queries.size()) + L" deleted queries on " + - StringUtils::toString(infos->size()) + L" segments."); - } - - int32_t infosEnd = infos->size(); - - int32_t docStart = 0; - bool any = false; - IndexWriterPtr writer(_writer); - - for (int32_t i = 0; i < infosEnd; ++i) - { - // Make sure we never attempt to apply deletes to segment in external dir - BOOST_ASSERT(infos->info(i)->dir == directory); - - SegmentReaderPtr reader(writer->readerPool->get(infos->info(i), false)); - LuceneException finally; - try - { - if (applyDeletes(reader, docStart)) - any = true; - docStart += reader->maxDoc(); - } - catch (LuceneException& e) - { - finally = e; - } - writer->readerPool->release(reader); - finally.throwException(); - } - - deletesFlushed->clear(); - - return any; + + if (infoStream) { + message(L"apply " + StringUtils::toString(deletesFlushed->numTerms) + L" buffered deleted terms and " + + StringUtils::toString(deletesFlushed->docIDs.size()) + L" deleted docIDs and " + + StringUtils::toString(deletesFlushed->queries.size()) + L" deleted queries on " + + StringUtils::toString(infos->size()) + L" segments."); } - - bool DocumentsWriter::applyDeletes(IndexReaderPtr reader, int32_t docIDStart) - { - SyncLock syncLock(this); - int32_t docEnd = docIDStart + reader->maxDoc(); - bool any = false; - - BOOST_ASSERT(checkDeleteTerm(TermPtr())); - - // Delete by term - TermDocsPtr docs(reader->termDocs()); + + int32_t infosEnd = infos->size(); + + int32_t docStart = 0; + bool any = false; + IndexWriterPtr writer(_writer); + + for (int32_t i = 0; i < infosEnd; ++i) { + // Make sure we never attempt to apply deletes to segment in external dir + BOOST_ASSERT(infos->info(i)->dir == directory); + + SegmentReaderPtr reader(writer->readerPool->get(infos->info(i), false)); LuceneException finally; - try - { - for (MapTermNum::iterator entry = deletesFlushed->terms.begin(); entry != deletesFlushed->terms.end(); ++entry) - { - // we should be iterating a Map here, so terms better be in order - BOOST_ASSERT(checkDeleteTerm(entry->first)); - docs->seek(entry->first); - int32_t limit = entry->second->getNum(); - while (docs->next()) - { - int32_t docID = docs->doc(); - if (docIDStart + docID >= limit) - break; - reader->deleteDocument(docID); - any = true; - } + try { + if (applyDeletes(reader, docStart)) { + any = true; } - } - catch (LuceneException& e) - { + docStart += reader->maxDoc(); + } catch (LuceneException& e) { finally = e; } - docs->close(); + writer->readerPool->release(reader); finally.throwException(); - - // Delete by docID - for (Collection::iterator docID = deletesFlushed->docIDs.begin(); docID != deletesFlushed->docIDs.end(); ++docID) - { - if (*docID >= docIDStart && *docID < docEnd) - { - reader->deleteDocument(*docID - docIDStart); - any = true; - } - } - - // Delete by query - IndexSearcherPtr searcher(newLucene(reader)); - for (MapQueryInt::iterator entry = deletesFlushed->queries.begin(); entry != deletesFlushed->queries.end(); ++entry) - { - WeightPtr weight(entry->first->weight(searcher)); - ScorerPtr scorer(weight->scorer(reader, true, false)); - if (scorer) - { - while (true) - { - int32_t doc = scorer->nextDoc(); - if ((int64_t)docIDStart + doc >= entry->second) - break; - reader->deleteDocument(doc); - any = true; - } - } - } - searcher->close(); - return any; - } - - void DocumentsWriter::addDeleteTerm(TermPtr term, int32_t docCount) - { - SyncLock syncLock(this); - NumPtr num(deletesInRAM->terms.get(term)); - int32_t docIDUpto = flushedDocCount + docCount; - if (!num) - deletesInRAM->terms.put(term, newLucene(docIDUpto)); - else - num->setNum(docIDUpto); - ++deletesInRAM->numTerms; - - deletesInRAM->addBytesUsed(BYTES_PER_DEL_TERM + term->_text.length() * CHAR_NUM_BYTE); } - - void DocumentsWriter::addDeleteDocID(int32_t docID) - { - SyncLock syncLock(this); - deletesInRAM->docIDs.add(flushedDocCount + docID); - deletesInRAM->addBytesUsed(BYTES_PER_DEL_DOCID); - } - - void DocumentsWriter::addDeleteQuery(QueryPtr query, int32_t docID) - { - SyncLock syncLock(this); - deletesInRAM->queries.put(query, flushedDocCount + docID); - deletesInRAM->addBytesUsed(BYTES_PER_DEL_QUERY); - } - - bool DocumentsWriter::doBalanceRAM() - { - SyncLock syncLock(this); - return (ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && !bufferIsFull && - (numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed >= ramBufferSize || - numBytesAlloc >= freeTrigger)); - } - - void DocumentsWriter::finishDocument(DocumentsWriterThreadStatePtr perThread, DocWriterPtr docWriter) - { - if (doBalanceRAM()) - { - // Must call this without holding synchronized(this) else we'll hit deadlock - balanceRAM(); - } - - { - SyncLock syncLock(this); - BOOST_ASSERT(!docWriter || docWriter->docID == perThread->docState->docID); - - if (aborting) - { - // We are currently aborting, and another thread is waiting for me to become idle. We - // just forcefully idle this threadState; it will be fully reset by abort() - if (docWriter) - { - try - { - docWriter->abort(); - } - catch (...) - { - } + + deletesFlushed->clear(); + + return any; +} + +bool DocumentsWriter::applyDeletes(const IndexReaderPtr& reader, int32_t docIDStart) { + SyncLock syncLock(this); + int32_t docEnd = docIDStart + reader->maxDoc(); + bool any = false; + + BOOST_ASSERT(checkDeleteTerm(TermPtr())); + + // Delete by term + TermDocsPtr docs(reader->termDocs()); + LuceneException finally; + try { + for (MapTermNum::iterator entry = deletesFlushed->terms.begin(); entry != deletesFlushed->terms.end(); ++entry) { + // we should be iterating a Map here, so terms better be in order + BOOST_ASSERT(checkDeleteTerm(entry->first)); + docs->seek(entry->first); + int32_t limit = entry->second->getNum(); + while (docs->next()) { + int32_t docID = docs->doc(); + if (docIDStart + docID >= limit) { + break; } - - perThread->isIdle = true; - notifyAll(); - return; - } - - bool doPause; - - if (docWriter) - doPause = waitQueue->add(docWriter); - else - { - skipDocWriter->docID = perThread->docState->docID; - doPause = waitQueue->add(skipDocWriter); - } - - if (doPause) - waitForWaitQueue(); - - if (bufferIsFull && !flushPending) - { - flushPending = true; - perThread->doFlushAfter = true; + reader->deleteDocument(docID); + any = true; } - - perThread->isIdle = true; - notifyAll(); } + } catch (LuceneException& e) { + finally = e; } - - void DocumentsWriter::waitForWaitQueue() - { - SyncLock syncLock(this); - do - { - wait(1000); + docs->close(); + finally.throwException(); + + // Delete by docID + for (Collection::iterator docID = deletesFlushed->docIDs.begin(); docID != deletesFlushed->docIDs.end(); ++docID) { + if (*docID >= docIDStart && *docID < docEnd) { + reader->deleteDocument(*docID - docIDStart); + any = true; } - while (!waitQueue->doResume()); - } - - int64_t DocumentsWriter::getRAMUsed() - { - return numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; } - - IntArray DocumentsWriter::getIntBlock(bool trackAllocations) - { - SyncLock syncLock(this); - int32_t size = freeIntBlocks.size(); - IntArray b; - if (size == 0) - { - // Always record a block allocated, even if trackAllocations is false. This is necessary because - // this block will be shared between things that don't track allocations (term vectors) and things - // that do (freq/prox postings). - numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE; - b = IntArray::newInstance(INT_BLOCK_SIZE); + + // Delete by query + IndexSearcherPtr searcher(newLucene(reader)); + for (MapQueryInt::iterator entry = deletesFlushed->queries.begin(); entry != deletesFlushed->queries.end(); ++entry) { + WeightPtr weight(entry->first->weight(searcher)); + ScorerPtr scorer(weight->scorer(reader, true, false)); + if (scorer) { + while (true) { + int32_t doc = scorer->nextDoc(); + if ((int64_t)docIDStart + doc >= entry->second) { + break; + } + reader->deleteDocument(doc); + any = true; + } } - else - b = freeIntBlocks.removeLast(); - if (trackAllocations) - numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE; - BOOST_ASSERT(numBytesUsed <= numBytesAlloc); - return b; } - - void DocumentsWriter::bytesAllocated(int64_t numBytes) - { - SyncLock syncLock(this); - numBytesAlloc += numBytes; - } - - void DocumentsWriter::bytesUsed(int64_t numBytes) - { - SyncLock syncLock(this); - numBytesUsed += numBytes; - BOOST_ASSERT(numBytesUsed <= numBytesAlloc); + searcher->close(); + return any; +} + +void DocumentsWriter::addDeleteTerm(const TermPtr& term, int32_t docCount) { + SyncLock syncLock(this); + NumPtr num(deletesInRAM->terms.get(term)); + int32_t docIDUpto = flushedDocCount + docCount; + if (!num) { + deletesInRAM->terms.put(term, newLucene(docIDUpto)); + } else { + num->setNum(docIDUpto); + } + ++deletesInRAM->numTerms; + + deletesInRAM->addBytesUsed(BYTES_PER_DEL_TERM + term->_text.length() * CHAR_NUM_BYTE); +} + +void DocumentsWriter::addDeleteDocID(int32_t docID) { + SyncLock syncLock(this); + deletesInRAM->docIDs.add(flushedDocCount + docID); + deletesInRAM->addBytesUsed(BYTES_PER_DEL_DOCID); +} + +void DocumentsWriter::addDeleteQuery(const QueryPtr& query, int32_t docID) { + SyncLock syncLock(this); + deletesInRAM->queries.put(query, flushedDocCount + docID); + deletesInRAM->addBytesUsed(BYTES_PER_DEL_QUERY); +} + +bool DocumentsWriter::doBalanceRAM() { + SyncLock syncLock(this); + return (ramBufferSize != IndexWriter::DISABLE_AUTO_FLUSH && !bufferIsFull && + (numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed >= ramBufferSize || + numBytesAlloc >= freeTrigger)); +} + +void DocumentsWriter::finishDocument(const DocumentsWriterThreadStatePtr& perThread, const DocWriterPtr& docWriter) { + if (doBalanceRAM()) { + // Must call this without holding synchronized(this) else we'll hit deadlock + balanceRAM(); } - - void DocumentsWriter::recycleIntBlocks(Collection blocks, int32_t start, int32_t end) + { SyncLock syncLock(this); - for (int32_t i = start; i < end; ++i) - { - freeIntBlocks.add(blocks[i]); - blocks[i].reset(); + BOOST_ASSERT(!docWriter || docWriter->docID == perThread->docState->docID); + + if (aborting) { + // We are currently aborting, and another thread is waiting for me to become idle. We + // just forcefully idle this threadState; it will be fully reset by abort() + if (docWriter) { + try { + docWriter->abort(); + } catch (...) { + } + } + + perThread->isIdle = true; + notifyAll(); + return; } - } - - CharArray DocumentsWriter::getCharBlock() - { - SyncLock syncLock(this); - int32_t size = freeCharBlocks.size(); - CharArray c; - if (size == 0) - { - numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; - c = CharArray::newInstance(CHAR_BLOCK_SIZE); + + bool doPause; + + if (docWriter) { + doPause = waitQueue->add(docWriter); + } else { + skipDocWriter->docID = perThread->docState->docID; + doPause = waitQueue->add(skipDocWriter); } - else - c = freeCharBlocks.removeLast(); - // We always track allocations of char blocks for now because nothing that skips allocation tracking - // (currently only term vectors) uses its own char blocks. - numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; - BOOST_ASSERT(numBytesUsed <= numBytesAlloc); - return c; - } - - void DocumentsWriter::recycleCharBlocks(Collection blocks, int32_t numBlocks) - { - SyncLock syncLock(this); - for (int32_t i = 0; i < numBlocks; ++i) - { - freeCharBlocks.add(blocks[i]); - blocks[i].reset(); + + if (doPause) { + waitForWaitQueue(); + } + + if (bufferIsFull && !flushPending) { + flushPending = true; + perThread->doFlushAfter = true; } + + perThread->isIdle = true; + notifyAll(); } - - String DocumentsWriter::toMB(int64_t v) - { - return StringUtils::toString((double)v / 1024.0 / 1024.0); +} + +void DocumentsWriter::waitForWaitQueue() { + SyncLock syncLock(this); + do { + wait(1000); + } while (!waitQueue->doResume()); +} + +int64_t DocumentsWriter::getRAMUsed() { + return numBytesUsed + deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; +} + +IntArray DocumentsWriter::getIntBlock(bool trackAllocations) { + SyncLock syncLock(this); + int32_t size = freeIntBlocks.size(); + IntArray b; + if (size == 0) { + // Always record a block allocated, even if trackAllocations is false. This is necessary because + // this block will be shared between things that don't track allocations (term vectors) and things + // that do (freq/prox postings). + numBytesAlloc += INT_BLOCK_SIZE * INT_NUM_BYTE; + b = IntArray::newInstance(INT_BLOCK_SIZE); + } else { + b = freeIntBlocks.removeLast(); + } + if (trackAllocations) { + numBytesUsed += INT_BLOCK_SIZE * INT_NUM_BYTE; + } + BOOST_ASSERT(numBytesUsed <= numBytesAlloc); + return b; +} + +void DocumentsWriter::bytesAllocated(int64_t numBytes) { + SyncLock syncLock(this); + numBytesAlloc += numBytes; +} + +void DocumentsWriter::bytesUsed(int64_t numBytes) { + SyncLock syncLock(this); + numBytesUsed += numBytes; + BOOST_ASSERT(numBytesUsed <= numBytesAlloc); +} + +void DocumentsWriter::recycleIntBlocks(Collection blocks, int32_t start, int32_t end) { + SyncLock syncLock(this); + for (int32_t i = start; i < end; ++i) { + freeIntBlocks.add(blocks[i]); + blocks[i].reset(); } - - void DocumentsWriter::balanceRAM() - { - // We flush when we've used our target usage - int64_t flushTrigger = ramBufferSize; - - int64_t deletesRAMUsed = deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; - - if (numBytesAlloc + deletesRAMUsed > freeTrigger) - { - if (infoStream) - { - message(L" RAM: now balance allocations: usedMB=" + toMB(numBytesUsed) + - L" vs trigger=" + toMB(flushTrigger) + - L" allocMB=" + toMB(numBytesAlloc) + - L" deletesMB=" + toMB(deletesRAMUsed) + - L" vs trigger=" + toMB(freeTrigger) + - L" byteBlockFree=" + toMB(byteBlockAllocator->freeByteBlocks.size() * BYTE_BLOCK_SIZE) + - L" perDocFree=" + toMB(perDocAllocator->freeByteBlocks.size() * PER_DOC_BLOCK_SIZE) + - L" charBlockFree=" + toMB(freeCharBlocks.size() * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE)); - } - - int64_t startBytesAlloc = numBytesAlloc + deletesRAMUsed; - - int32_t iter = 0; - - // We free equally from each pool in 32 KB chunks until we are below our threshold (freeLevel) - - bool any = true; - - while (numBytesAlloc + deletesRAMUsed > freeLevel) +} + +CharArray DocumentsWriter::getCharBlock() { + SyncLock syncLock(this); + int32_t size = freeCharBlocks.size(); + CharArray c; + if (size == 0) { + numBytesAlloc += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + c = CharArray::newInstance(CHAR_BLOCK_SIZE); + } else { + c = freeCharBlocks.removeLast(); + } + // We always track allocations of char blocks for now because nothing that skips allocation tracking + // (currently only term vectors) uses its own char blocks. + numBytesUsed += CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + BOOST_ASSERT(numBytesUsed <= numBytesAlloc); + return c; +} + +void DocumentsWriter::recycleCharBlocks(Collection blocks, int32_t numBlocks) { + SyncLock syncLock(this); + for (int32_t i = 0; i < numBlocks; ++i) { + freeCharBlocks.add(blocks[i]); + blocks[i].reset(); + } +} + +String DocumentsWriter::toMB(int64_t v) { + return StringUtils::toString((double)v / 1024.0 / 1024.0); +} + +void DocumentsWriter::balanceRAM() { + // We flush when we've used our target usage + int64_t flushTrigger = ramBufferSize; + + int64_t deletesRAMUsed = deletesInRAM->bytesUsed + deletesFlushed->bytesUsed; + + if (numBytesAlloc + deletesRAMUsed > freeTrigger) { + if (infoStream) { + message(L" RAM: now balance allocations: usedMB=" + toMB(numBytesUsed) + + L" vs trigger=" + toMB(flushTrigger) + + L" allocMB=" + toMB(numBytesAlloc) + + L" deletesMB=" + toMB(deletesRAMUsed) + + L" vs trigger=" + toMB(freeTrigger) + + L" byteBlockFree=" + toMB(byteBlockAllocator->freeByteBlocks.size() * BYTE_BLOCK_SIZE) + + L" perDocFree=" + toMB(perDocAllocator->freeByteBlocks.size() * PER_DOC_BLOCK_SIZE) + + L" charBlockFree=" + toMB(freeCharBlocks.size() * CHAR_BLOCK_SIZE * CHAR_NUM_BYTE)); + } + + int64_t startBytesAlloc = numBytesAlloc + deletesRAMUsed; + + int32_t iter = 0; + + // We free equally from each pool in 32 KB chunks until we are below our threshold (freeLevel) + + bool any = true; + + while (numBytesAlloc + deletesRAMUsed > freeLevel) { { - { - SyncLock syncLock(this); - if (perDocAllocator->freeByteBlocks.empty() && byteBlockAllocator->freeByteBlocks.empty() && - freeCharBlocks.empty() && freeIntBlocks.empty() && !any) - { - // Nothing else to free -- must flush now. - bufferIsFull = (numBytesUsed + deletesRAMUsed > flushTrigger); - if (infoStream) - { - if (bufferIsFull) - message(L" nothing to free; now set bufferIsFull"); - else - message(L" nothing to free"); + SyncLock syncLock(this); + if (perDocAllocator->freeByteBlocks.empty() && byteBlockAllocator->freeByteBlocks.empty() && + freeCharBlocks.empty() && freeIntBlocks.empty() && !any) { + // Nothing else to free -- must flush now. + bufferIsFull = (numBytesUsed + deletesRAMUsed > flushTrigger); + if (infoStream) { + if (bufferIsFull) { + message(L" nothing to free; now set bufferIsFull"); + } else { + message(L" nothing to free"); } - BOOST_ASSERT(numBytesUsed <= numBytesAlloc); - break; - } - - if ((iter % 5) == 0 && !byteBlockAllocator->freeByteBlocks.empty()) - { - byteBlockAllocator->freeByteBlocks.removeLast(); - numBytesAlloc -= BYTE_BLOCK_SIZE; } - - if ((iter % 5) == 1 && !freeCharBlocks.empty()) - { - freeCharBlocks.removeLast(); - numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; - } - - if ((iter % 5) == 2 && !freeIntBlocks.empty()) - { - freeIntBlocks.removeLast(); - numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; - } - - if ((iter % 5) == 3 && !perDocAllocator->freeByteBlocks.empty()) - { - // Remove upwards of 32 blocks (each block is 1K) - for (int32_t i = 0; i < 32; ++i) - { - perDocAllocator->freeByteBlocks.removeLast(); - numBytesAlloc -= PER_DOC_BLOCK_SIZE; - if (perDocAllocator->freeByteBlocks.empty()) - break; + BOOST_ASSERT(numBytesUsed <= numBytesAlloc); + break; + } + + if ((iter % 5) == 0 && !byteBlockAllocator->freeByteBlocks.empty()) { + byteBlockAllocator->freeByteBlocks.removeLast(); + numBytesAlloc -= BYTE_BLOCK_SIZE; + } + + if ((iter % 5) == 1 && !freeCharBlocks.empty()) { + freeCharBlocks.removeLast(); + numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; + } + + if ((iter % 5) == 2 && !freeIntBlocks.empty()) { + freeIntBlocks.removeLast(); + numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; + } + + if ((iter % 5) == 3 && !perDocAllocator->freeByteBlocks.empty()) { + // Remove upwards of 32 blocks (each block is 1K) + for (int32_t i = 0; i < 32; ++i) { + perDocAllocator->freeByteBlocks.removeLast(); + numBytesAlloc -= PER_DOC_BLOCK_SIZE; + if (perDocAllocator->freeByteBlocks.empty()) { + break; } } } - - if ((iter % 5) == 4 && any) - { - // Ask consumer to free any recycled state - any = consumer->freeRAM(); - } - - ++iter; } - - if (infoStream) - { - message(L" after free: freedMB=" + StringUtils::toString((double)(startBytesAlloc - numBytesAlloc - deletesRAMUsed) / 1024.0 / 1024.0) + - L" usedMB=" + StringUtils::toString((double)(numBytesUsed + deletesRAMUsed) / 1024.0 / 1024.0) + - L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0)); + + if ((iter % 5) == 4 && any) { + // Ask consumer to free any recycled state + any = consumer->freeRAM(); } + + ++iter; } - else - { - // If we have not crossed the 100% mark, but have crossed the 95% mark of RAM we are actually - // using, go ahead and flush. This prevents over-allocating and then freeing, with every flush. - SyncLock syncLock(this); - if (numBytesUsed + deletesRAMUsed > flushTrigger) - { - if (infoStream) - { - message(L" RAM: now flush @ usedMB=" + StringUtils::toString((double)numBytesUsed / 1024.0 / 1024.0) + - L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0) + - L" deletesMB=" + StringUtils::toString((double)deletesRAMUsed / 1024.0 / 1024.0) + - L" triggerMB=" + StringUtils::toString((double)flushTrigger / 1024.0 / 1024.0)); - } - bufferIsFull = true; + + if (infoStream) { + message(L" after free: freedMB=" + StringUtils::toString((double)(startBytesAlloc - numBytesAlloc - deletesRAMUsed) / 1024.0 / 1024.0) + + L" usedMB=" + StringUtils::toString((double)(numBytesUsed + deletesRAMUsed) / 1024.0 / 1024.0) + + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0)); + } + } else { + // If we have not crossed the 100% mark, but have crossed the 95% mark of RAM we are actually + // using, go ahead and flush. This prevents over-allocating and then freeing, with every flush. + SyncLock syncLock(this); + if (numBytesUsed + deletesRAMUsed > flushTrigger) { + if (infoStream) { + message(L" RAM: now flush @ usedMB=" + StringUtils::toString((double)numBytesUsed / 1024.0 / 1024.0) + + L" allocMB=" + StringUtils::toString((double)numBytesAlloc / 1024.0 / 1024.0) + + L" deletesMB=" + StringUtils::toString((double)deletesRAMUsed / 1024.0 / 1024.0) + + L" triggerMB=" + StringUtils::toString((double)flushTrigger / 1024.0 / 1024.0)); } + bufferIsFull = true; } } - - DocState::DocState() - { - maxFieldLength = 0; - docID = 0; - } - - DocState::~DocState() - { - } - - bool DocState::testPoint(const String& name) - { - return IndexWriterPtr(DocumentsWriterPtr(_docWriter)->_writer)->testPoint(name); - } - - void DocState::clear() - { - // don't hold onto doc nor analyzer, in case it is large - doc.reset(); - analyzer.reset(); - } - - PerDocBuffer::PerDocBuffer(DocumentsWriterPtr docWriter) - { - _docWriter = docWriter; - } - - PerDocBuffer::~PerDocBuffer() - { - } - - ByteArray PerDocBuffer::newBuffer(int32_t size) - { - BOOST_ASSERT(size == DocumentsWriter::PER_DOC_BLOCK_SIZE); - return DocumentsWriterPtr(_docWriter)->perDocAllocator->getByteBlock(false); +} + +DocState::DocState() { + maxFieldLength = 0; + docID = 0; +} + +DocState::~DocState() { +} + +bool DocState::testPoint(const String& name) { + return IndexWriterPtr(DocumentsWriterPtr(_docWriter)->_writer)->testPoint(name); +} + +void DocState::clear() { + // don't hold onto doc nor analyzer, in case it is large + doc.reset(); + analyzer.reset(); +} + +PerDocBuffer::PerDocBuffer(const DocumentsWriterPtr& docWriter) { + _docWriter = docWriter; +} + +PerDocBuffer::~PerDocBuffer() { +} + +ByteArray PerDocBuffer::newBuffer(int32_t size) { + BOOST_ASSERT(size == DocumentsWriter::PER_DOC_BLOCK_SIZE); + return DocumentsWriterPtr(_docWriter)->perDocAllocator->getByteBlock(false); +} + +void PerDocBuffer::recycle() { + SyncLock syncLock(this); + if (!buffers.empty()) { + setLength(0); + + // Recycle the blocks + DocumentsWriterPtr(_docWriter)->perDocAllocator->recycleByteBlocks(buffers); + buffers.clear(); + sizeInBytes = 0; + + BOOST_ASSERT(numBuffers() == 0); } - - void PerDocBuffer::recycle() - { - SyncLock syncLock(this); - if (!buffers.empty()) - { - setLength(0); +} + +DocWriter::DocWriter() { + docID = 0; +} + +DocWriter::~DocWriter() { +} + +void DocWriter::setNext(const DocWriterPtr& next) { + this->next = next; +} + +IndexingChain::~IndexingChain() { +} + +DefaultIndexingChain::~DefaultIndexingChain() { +} + +DocConsumerPtr DefaultIndexingChain::getChain(const DocumentsWriterPtr& documentsWriter) { + TermsHashConsumerPtr termVectorsWriter(newLucene(documentsWriter)); + TermsHashConsumerPtr freqProxWriter(newLucene()); + + InvertedDocConsumerPtr termsHash(newLucene(documentsWriter, true, freqProxWriter, + newLucene(documentsWriter, false, + termVectorsWriter, TermsHashPtr()))); + + DocInverterPtr docInverter(newLucene(termsHash, newLucene())); + return newLucene(documentsWriter, docInverter); +} - // Recycle the blocks - DocumentsWriterPtr(_docWriter)->perDocAllocator->recycleByteBlocks(buffers); - buffers.clear(); - sizeInBytes = 0; +SkipDocWriter::~SkipDocWriter() { +} + +void SkipDocWriter::finish() { +} + +void SkipDocWriter::abort() { +} + +int64_t SkipDocWriter::sizeInBytes() { + return 0; +} + +WaitQueue::WaitQueue(const DocumentsWriterPtr& docWriter) { + this->_docWriter = docWriter; + waiting = Collection::newInstance(10); + nextWriteDocID = 0; + nextWriteLoc = 0; + numWaiting = 0; + waitingBytes = 0; +} + +WaitQueue::~WaitQueue() { +} + +void WaitQueue::reset() { + SyncLock syncLock(this); + // NOTE: nextWriteLoc doesn't need to be reset + BOOST_ASSERT(numWaiting == 0); + BOOST_ASSERT(waitingBytes == 0); + nextWriteDocID = 0; +} + +bool WaitQueue::doResume() { + SyncLock syncLock(this); + return (waitingBytes <= DocumentsWriterPtr(_docWriter)->waitQueueResumeBytes); +} + +bool WaitQueue::doPause() { + SyncLock syncLock(this); + return (waitingBytes > DocumentsWriterPtr(_docWriter)->waitQueuePauseBytes); +} - BOOST_ASSERT(numBuffers() == 0); +void WaitQueue::abort() { + SyncLock syncLock(this); + int32_t count = 0; + for (Collection::iterator doc = waiting.begin(); doc != waiting.end(); ++doc) { + if (*doc) { + (*doc)->abort(); + doc->reset(); + ++count; } } - - DocWriter::DocWriter() - { - docID = 0; - } - - DocWriter::~DocWriter() - { - } - - void DocWriter::setNext(DocWriterPtr next) - { - this->next = next; - } - - IndexingChain::~IndexingChain() - { - } - - DefaultIndexingChain::~DefaultIndexingChain() - { - } - - DocConsumerPtr DefaultIndexingChain::getChain(DocumentsWriterPtr documentsWriter) - { - TermsHashConsumerPtr termVectorsWriter(newLucene(documentsWriter)); - TermsHashConsumerPtr freqProxWriter(newLucene()); - - InvertedDocConsumerPtr termsHash(newLucene(documentsWriter, true, freqProxWriter, - newLucene(documentsWriter, false, - termVectorsWriter, TermsHashPtr()))); - - DocInverterPtr docInverter(newLucene(termsHash, newLucene())); - return newLucene(documentsWriter, docInverter); - } - - SkipDocWriter::~SkipDocWriter() - { - } - - void SkipDocWriter::finish() - { - } - - void SkipDocWriter::abort() - { - } - - int64_t SkipDocWriter::sizeInBytes() - { - return 0; - } - - WaitQueue::WaitQueue(DocumentsWriterPtr docWriter) - { - this->_docWriter = docWriter; - waiting = Collection::newInstance(10); - nextWriteDocID = 0; - nextWriteLoc = 0; - numWaiting = 0; - waitingBytes = 0; - } - - WaitQueue::~WaitQueue() - { - } - - void WaitQueue::reset() - { - SyncLock syncLock(this); - // NOTE: nextWriteLoc doesn't need to be reset - BOOST_ASSERT(numWaiting == 0); - BOOST_ASSERT(waitingBytes == 0); - nextWriteDocID = 0; - } - - bool WaitQueue::doResume() - { - SyncLock syncLock(this); - return (waitingBytes <= DocumentsWriterPtr(_docWriter)->waitQueueResumeBytes); - } - - bool WaitQueue::doPause() - { - SyncLock syncLock(this); - return (waitingBytes > DocumentsWriterPtr(_docWriter)->waitQueuePauseBytes); - } - - void WaitQueue::abort() - { - SyncLock syncLock(this); - int32_t count = 0; - for (Collection::iterator doc = waiting.begin(); doc != waiting.end(); ++doc) - { - if (*doc) - { - (*doc)->abort(); - doc->reset(); - ++count; - } + waitingBytes = 0; + BOOST_ASSERT(count == numWaiting); + numWaiting = 0; +} + +void WaitQueue::writeDocument(const DocWriterPtr& doc) { + DocumentsWriterPtr docWriter(_docWriter); + BOOST_ASSERT(doc == DocumentsWriterPtr(docWriter)->skipDocWriter || nextWriteDocID == doc->docID); + bool success = false; + LuceneException finally; + try { + doc->finish(); + ++nextWriteDocID; + ++docWriter->numDocsInStore; + ++nextWriteLoc; + BOOST_ASSERT(nextWriteLoc <= waiting.size()); + if (nextWriteLoc == waiting.size()) { + nextWriteLoc = 0; } - waitingBytes = 0; - BOOST_ASSERT(count == numWaiting); - numWaiting = 0; + success = true; + } catch (LuceneException& e) { + finally = e; } - - void WaitQueue::writeDocument(DocWriterPtr doc) - { - DocumentsWriterPtr docWriter(_docWriter); - BOOST_ASSERT(doc == DocumentsWriterPtr(docWriter)->skipDocWriter || nextWriteDocID == doc->docID); - bool success = false; - LuceneException finally; - try - { - doc->finish(); - ++nextWriteDocID; - ++docWriter->numDocsInStore; - ++nextWriteLoc; - BOOST_ASSERT(nextWriteLoc <= waiting.size()); - if (nextWriteLoc == waiting.size()) - nextWriteLoc = 0; - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - docWriter->setAborting(); - finally.throwException(); + if (!success) { + docWriter->setAborting(); } - - bool WaitQueue::add(DocWriterPtr doc) - { - SyncLock syncLock(this); - BOOST_ASSERT(doc->docID >= nextWriteDocID); - if (doc->docID == nextWriteDocID) - { - writeDocument(doc); - while (true) - { - doc = waiting[nextWriteLoc]; - if (doc) - { - --numWaiting; - waiting[nextWriteLoc].reset(); - waitingBytes -= doc->sizeInBytes(); - writeDocument(doc); - } - else - break; + finally.throwException(); +} + +bool WaitQueue::add(const DocWriterPtr& doc) { + DocWriterPtr _doc(doc); + SyncLock syncLock(this); + BOOST_ASSERT(_doc->docID >= nextWriteDocID); + if (_doc->docID == nextWriteDocID) { + writeDocument(_doc); + while (true) { + _doc = waiting[nextWriteLoc]; + if (_doc) { + --numWaiting; + waiting[nextWriteLoc].reset(); + waitingBytes -= _doc->sizeInBytes(); + writeDocument(_doc); + } else { + break; } } - else - { - // I finished before documents that were added before me. This can easily happen when I am a small doc - // and the docs before me were large, or just due to luck in the thread scheduling. Just add myself to - // the queue and when that large doc finishes, it will flush me - int32_t gap = doc->docID - nextWriteDocID; - if (gap >= waiting.size()) - { - // Grow queue - Collection newArray(Collection::newInstance(MiscUtils::getNextSize(gap))); - BOOST_ASSERT(nextWriteLoc >= 0); - MiscUtils::arrayCopy(waiting.begin(), nextWriteLoc, newArray.begin(), 0, waiting.size() - nextWriteLoc); - MiscUtils::arrayCopy(waiting.begin(), 0, newArray.begin(), waiting.size() - nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc->docID - nextWriteDocID; - } - - int32_t loc = nextWriteLoc + gap; - if (loc >= waiting.size()) - loc -= waiting.size(); - - // We should only wrap one time - BOOST_ASSERT(loc < waiting.size()); - - // Nobody should be in my spot! - BOOST_ASSERT(!waiting[loc]); - waiting[loc] = doc; - ++numWaiting; - waitingBytes += doc->sizeInBytes(); + } else { + // I finished before documents that were added before me. This can easily happen when I am a small doc + // and the docs before me were large, or just due to luck in the thread scheduling. Just add myself to + // the queue and when that large doc finishes, it will flush me + int32_t gap = _doc->docID - nextWriteDocID; + if (gap >= waiting.size()) { + // Grow queue + Collection newArray(Collection::newInstance(MiscUtils::getNextSize(gap))); + BOOST_ASSERT(nextWriteLoc >= 0); + MiscUtils::arrayCopy(waiting.begin(), nextWriteLoc, newArray.begin(), 0, waiting.size() - nextWriteLoc); + MiscUtils::arrayCopy(waiting.begin(), 0, newArray.begin(), waiting.size() - nextWriteLoc, nextWriteLoc); + nextWriteLoc = 0; + waiting = newArray; + gap = _doc->docID - nextWriteDocID; } - - return doPause(); - } - - ByteBlockAllocator::ByteBlockAllocator(DocumentsWriterPtr docWriter, int32_t blockSize) - { - this->blockSize = blockSize; - this->freeByteBlocks = Collection::newInstance(); - this->_docWriter = docWriter; - } - - ByteBlockAllocator::~ByteBlockAllocator() - { - } - - ByteArray ByteBlockAllocator::getByteBlock(bool trackAllocations) - { - DocumentsWriterPtr docWriter(_docWriter); - SyncLock syncLock(docWriter); - int32_t size = freeByteBlocks.size(); - ByteArray b; - if (size == 0) - { - // Always record a block allocated, even if trackAllocations is false. This is necessary because this block will - // be shared between things that don't track allocations (term vectors) and things that do (freq/prox postings). - docWriter->numBytesAlloc += blockSize; - b = ByteArray::newInstance(blockSize); - MiscUtils::arrayFill(b.get(), 0, b.size(), 0); + + int32_t loc = nextWriteLoc + gap; + if (loc >= waiting.size()) { + loc -= waiting.size(); } - else - b = freeByteBlocks.removeLast(); - if (trackAllocations) - docWriter->numBytesUsed += blockSize; - BOOST_ASSERT(docWriter->numBytesUsed <= docWriter->numBytesAlloc); - return b; + + // We should only wrap one time + BOOST_ASSERT(loc < waiting.size()); + + // Nobody should be in my spot! + BOOST_ASSERT(!waiting[loc]); + waiting[loc] = _doc; + ++numWaiting; + waitingBytes += _doc->sizeInBytes(); } - - void ByteBlockAllocator::recycleByteBlocks(Collection blocks, int32_t start, int32_t end) - { - DocumentsWriterPtr docWriter(_docWriter); - SyncLock syncLock(docWriter); - for (int32_t i = start; i < end; ++i) - { - freeByteBlocks.add(blocks[i]); - blocks[i].reset(); - } + + return doPause(); +} + +ByteBlockAllocator::ByteBlockAllocator(const DocumentsWriterPtr& docWriter, int32_t blockSize) { + this->blockSize = blockSize; + this->freeByteBlocks = Collection::newInstance(); + this->_docWriter = docWriter; +} + +ByteBlockAllocator::~ByteBlockAllocator() { +} + +ByteArray ByteBlockAllocator::getByteBlock(bool trackAllocations) { + DocumentsWriterPtr docWriter(_docWriter); + SyncLock syncLock(docWriter); + int32_t size = freeByteBlocks.size(); + ByteArray b; + if (size == 0) { + // Always record a block allocated, even if trackAllocations is false. This is necessary because this block will + // be shared between things that don't track allocations (term vectors) and things that do (freq/prox postings). + docWriter->numBytesAlloc += blockSize; + b = ByteArray::newInstance(blockSize); + MiscUtils::arrayFill(b.get(), 0, b.size(), 0); + } else { + b = freeByteBlocks.removeLast(); + } + if (trackAllocations) { + docWriter->numBytesUsed += blockSize; + } + BOOST_ASSERT(docWriter->numBytesUsed <= docWriter->numBytesAlloc); + return b; +} + +void ByteBlockAllocator::recycleByteBlocks(Collection blocks, int32_t start, int32_t end) { + DocumentsWriterPtr docWriter(_docWriter); + SyncLock syncLock(docWriter); + for (int32_t i = start; i < end; ++i) { + freeByteBlocks.add(blocks[i]); + blocks[i].reset(); } - - void ByteBlockAllocator::recycleByteBlocks(Collection blocks) - { - DocumentsWriterPtr docWriter(_docWriter); - SyncLock syncLock(docWriter); - int32_t size = blocks.size(); - for (int32_t i = 0; i < size; ++i) - freeByteBlocks.add(blocks[i]); +} + +void ByteBlockAllocator::recycleByteBlocks(Collection blocks) { + DocumentsWriterPtr docWriter(_docWriter); + SyncLock syncLock(docWriter); + int32_t size = blocks.size(); + for (int32_t i = 0; i < size; ++i) { + freeByteBlocks.add(blocks[i]); } } + +} diff --git a/src/core/index/DocumentsWriterThreadState.cpp b/src/core/index/DocumentsWriterThreadState.cpp index 731dfbe8..edb18df8 100644 --- a/src/core/index/DocumentsWriterThreadState.cpp +++ b/src/core/index/DocumentsWriterThreadState.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,34 +9,31 @@ #include "DocumentsWriter.h" #include "DocConsumer.h" -namespace Lucene -{ - DocumentsWriterThreadState::DocumentsWriterThreadState(DocumentsWriterPtr docWriter) - { - this->_docWriter = docWriter; - } - - DocumentsWriterThreadState::~DocumentsWriterThreadState() - { - } - - void DocumentsWriterThreadState::initialize() - { - isIdle = true; - doFlushAfter = false; - numThreads = 1; - DocumentsWriterPtr docWriter(_docWriter); - docState = newLucene(); - docState->maxFieldLength = docWriter->maxFieldLength; - docState->infoStream = docWriter->infoStream; - docState->similarity = docWriter->similarity; - docState->_docWriter = docWriter; - consumer = docWriter->consumer->addThread(shared_from_this()); - } - - void DocumentsWriterThreadState::doAfterFlush() - { - numThreads = 0; - doFlushAfter = false; - } +namespace Lucene { + +DocumentsWriterThreadState::DocumentsWriterThreadState(const DocumentsWriterPtr& docWriter) { + this->_docWriter = docWriter; +} + +DocumentsWriterThreadState::~DocumentsWriterThreadState() { +} + +void DocumentsWriterThreadState::initialize() { + isIdle = true; + doFlushAfter = false; + numThreads = 1; + DocumentsWriterPtr docWriter(_docWriter); + docState = newLucene(); + docState->maxFieldLength = docWriter->maxFieldLength; + docState->infoStream = docWriter->infoStream; + docState->similarity = docWriter->similarity; + docState->_docWriter = docWriter; + consumer = docWriter->consumer->addThread(shared_from_this()); +} + +void DocumentsWriterThreadState::doAfterFlush() { + numThreads = 0; + doFlushAfter = false; +} + } diff --git a/src/core/index/FieldInfo.cpp b/src/core/index/FieldInfo.cpp index e5a406c1..ccd0f2c3 100644 --- a/src/core/index/FieldInfo.cpp +++ b/src/core/index/FieldInfo.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,54 +7,57 @@ #include "LuceneInc.h" #include "FieldInfo.h" -namespace Lucene -{ - FieldInfo::FieldInfo(const String& name, bool isIndexed, int32_t number, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) - { - this->name = name; - this->isIndexed = isIndexed; - this->number = number; - - // for non-indexed fields, leave defaults - this->storeTermVector = isIndexed ? storeTermVector : false; - this->storeOffsetWithTermVector = isIndexed ? storeOffsetWithTermVector : false; - this->storePositionWithTermVector = isIndexed ? storePositionWithTermVector : false; - this->storePayloads = isIndexed ? storePayloads : false; - this->omitNorms = isIndexed ? omitNorms : true; - this->omitTermFreqAndPositions = isIndexed ? omitTermFreqAndPositions : false; - } - - FieldInfo::~FieldInfo() - { - } - - LuceneObjectPtr FieldInfo::clone(LuceneObjectPtr other) - { - return newLucene(name, isIndexed, number, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); +namespace Lucene { + +FieldInfo::FieldInfo(const String& name, bool isIndexed, int32_t number, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { + this->name = name; + this->isIndexed = isIndexed; + this->number = number; + + // for non-indexed fields, leave defaults + this->storeTermVector = isIndexed ? storeTermVector : false; + this->storeOffsetWithTermVector = isIndexed ? storeOffsetWithTermVector : false; + this->storePositionWithTermVector = isIndexed ? storePositionWithTermVector : false; + this->storePayloads = isIndexed ? storePayloads : false; + this->omitNorms = isIndexed ? omitNorms : true; + this->omitTermFreqAndPositions = isIndexed ? omitTermFreqAndPositions : false; +} + +FieldInfo::~FieldInfo() { +} + +LuceneObjectPtr FieldInfo::clone(const LuceneObjectPtr& other) { + return newLucene(name, isIndexed, number, storeTermVector, storePositionWithTermVector, + storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); +} + +void FieldInfo::update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, + bool omitTermFreqAndPositions) { + if (this->isIndexed != isIndexed) { + this->isIndexed = true; // once indexed, always index } - - void FieldInfo::update(bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, - bool omitTermFreqAndPositions) - { - if (this->isIndexed != isIndexed) - this->isIndexed = true; // once indexed, always index - if (isIndexed) // if updated field data is not for indexing, leave the updates out - { - if (this->storeTermVector != storeTermVector) - this->storeTermVector = true; // once vector, always vector - if (this->storePositionWithTermVector != storePositionWithTermVector) - this->storePositionWithTermVector = true; // once vector, always vector - if (this->storeOffsetWithTermVector != storeOffsetWithTermVector) - this->storeOffsetWithTermVector = true; // once vector, always vector - if (this->storePayloads != storePayloads) - this->storePayloads = true; - if (this->omitNorms != omitNorms) - this->omitNorms = false; // once norms are stored, always store - if (this->omitTermFreqAndPositions != omitTermFreqAndPositions) - this->omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life + if (isIndexed) { // if updated field data is not for indexing, leave the updates out + if (this->storeTermVector != storeTermVector) { + this->storeTermVector = true; // once vector, always vector + } + if (this->storePositionWithTermVector != storePositionWithTermVector) { + this->storePositionWithTermVector = true; // once vector, always vector + } + if (this->storeOffsetWithTermVector != storeOffsetWithTermVector) { + this->storeOffsetWithTermVector = true; // once vector, always vector + } + if (this->storePayloads != storePayloads) { + this->storePayloads = true; + } + if (this->omitNorms != omitNorms) { + this->omitNorms = false; // once norms are stored, always store + } + if (this->omitTermFreqAndPositions != omitTermFreqAndPositions) { + this->omitTermFreqAndPositions = true; // if one require omitTermFreqAndPositions at least once, it remains off for life } } } + +} diff --git a/src/core/index/FieldInfos.cpp b/src/core/index/FieldInfos.cpp index c58f032d..05d7499d 100644 --- a/src/core/index/FieldInfos.cpp +++ b/src/core/index/FieldInfos.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,275 +14,246 @@ #include "Fieldable.h" #include "StringUtils.h" -namespace Lucene -{ - // Used internally (ie not written to *.fnm files) for pre-2.9 files - const int32_t FieldInfos::FORMAT_PRE = -1; - - // First used in 2.9; prior to 2.9 there was no format header - const int32_t FieldInfos::FORMAT_START = -2; - - const int32_t FieldInfos::CURRENT_FORMAT = FieldInfos::FORMAT_START; - - const uint8_t FieldInfos::IS_INDEXED = 0x1; - const uint8_t FieldInfos::STORE_TERMVECTOR = 0x2; - const uint8_t FieldInfos::STORE_POSITIONS_WITH_TERMVECTOR = 0x4; - const uint8_t FieldInfos::STORE_OFFSET_WITH_TERMVECTOR = 0x8; - const uint8_t FieldInfos::OMIT_NORMS = 0x10; - const uint8_t FieldInfos::STORE_PAYLOADS = 0x20; - const uint8_t FieldInfos::OMIT_TERM_FREQ_AND_POSITIONS = 0x40; - - FieldInfos::FieldInfos() - { - format = 0; - byNumber = Collection::newInstance(); - byName = MapStringFieldInfo::newInstance(); - } - - FieldInfos::FieldInfos(DirectoryPtr d, const String& name) - { - format = 0; - byNumber = Collection::newInstance(); - byName = MapStringFieldInfo::newInstance(); - IndexInputPtr input(d->openInput(name)); - LuceneException finally; - try - { - try - { - read(input, name); - } - catch (IOException& e) - { - if (format == FORMAT_PRE) - { - input->seek(0); - input->setModifiedUTF8StringsMode(); - byNumber.clear(); - byName.clear(); - try - { - read(input, name); - } - catch (...) - { - // Ignore any new exception & throw original IOE - finally = e; - } - } - else +namespace Lucene { + +// Used internally (ie not written to *.fnm files) for pre-2.9 files +const int32_t FieldInfos::FORMAT_PRE = -1; + +// First used in 2.9; prior to 2.9 there was no format header +const int32_t FieldInfos::FORMAT_START = -2; + +const int32_t FieldInfos::CURRENT_FORMAT = FieldInfos::FORMAT_START; + +const uint8_t FieldInfos::IS_INDEXED = 0x1; +const uint8_t FieldInfos::STORE_TERMVECTOR = 0x2; +const uint8_t FieldInfos::STORE_POSITIONS_WITH_TERMVECTOR = 0x4; +const uint8_t FieldInfos::STORE_OFFSET_WITH_TERMVECTOR = 0x8; +const uint8_t FieldInfos::OMIT_NORMS = 0x10; +const uint8_t FieldInfos::STORE_PAYLOADS = 0x20; +const uint8_t FieldInfos::OMIT_TERM_FREQ_AND_POSITIONS = 0x40; + +FieldInfos::FieldInfos() { + format = 0; + byNumber = Collection::newInstance(); + byName = MapStringFieldInfo::newInstance(); +} + +FieldInfos::FieldInfos(const DirectoryPtr& d, const String& name) { + format = 0; + byNumber = Collection::newInstance(); + byName = MapStringFieldInfo::newInstance(); + IndexInputPtr input(d->openInput(name)); + LuceneException finally; + try { + try { + read(input, name); + } catch (IOException& e) { + if (format == FORMAT_PRE) { + input->seek(0); + input->setModifiedUTF8StringsMode(); + byNumber.clear(); + byName.clear(); + try { + read(input, name); + } catch (...) { + // Ignore any new exception & throw original IOE finally = e; + } + } else { + finally = e; } } - catch (LuceneException& e) - { - finally = e; - } - input->close(); - finally.throwException(); - } - - FieldInfos::~FieldInfos() - { + } catch (LuceneException& e) { + finally = e; } - - LuceneObjectPtr FieldInfos::clone(LuceneObjectPtr other) - { - SyncLock syncLock(this); - FieldInfosPtr fis(newLucene()); - for (Collection::iterator field = byNumber.begin(); field != byNumber.end(); ++field) - { - FieldInfoPtr fi(boost::dynamic_pointer_cast((*field)->clone())); - fis->byNumber.add(fi); - fis->byName.put(fi->name, fi); - } - return fis; + input->close(); + finally.throwException(); +} + +FieldInfos::~FieldInfos() { +} + +LuceneObjectPtr FieldInfos::clone(const LuceneObjectPtr& other) { + SyncLock syncLock(this); + FieldInfosPtr fis(newLucene()); + for (Collection::iterator field = byNumber.begin(); field != byNumber.end(); ++field) { + FieldInfoPtr fi(boost::dynamic_pointer_cast((*field)->clone())); + fis->byNumber.add(fi); + fis->byName.put(fi->name, fi); } - - void FieldInfos::add(DocumentPtr doc) - { - SyncLock syncLock(this); - Collection fields(doc->getFields()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - add((*field)->name(), (*field)->isIndexed(), (*field)->isTermVectorStored(), - (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), - (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); - } + return fis; +} + +void FieldInfos::add(const DocumentPtr& doc) { + SyncLock syncLock(this); + Collection fields(doc->getFields()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + add((*field)->name(), (*field)->isIndexed(), (*field)->isTermVectorStored(), + (*field)->isStorePositionWithTermVector(), (*field)->isStoreOffsetWithTermVector(), + (*field)->getOmitNorms(), false, (*field)->getOmitTermFreqAndPositions()); } - - bool FieldInfos::hasProx() - { - for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) - { - if ((*fi)->isIndexed && !(*fi)->omitTermFreqAndPositions) - return true; +} + +bool FieldInfos::hasProx() { + for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { + if ((*fi)->isIndexed && !(*fi)->omitTermFreqAndPositions) { + return true; } - return false; - } - - void FieldInfos::addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) - { - SyncLock syncLock(this); - for (HashSet::iterator name = names.begin(); name != names.end(); ++name) - add(*name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); - } - - void FieldInfos::add(HashSet names, bool isIndexed) - { - SyncLock syncLock(this); - for (HashSet::iterator name = names.begin(); name != names.end(); ++name) - add(*name, isIndexed); - } - - void FieldInfos::add(const String& name, bool isIndexed) - { - add(name, isIndexed, false, false, false, false); - } - - void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector) - { - add(name, isIndexed, storeTermVector, false, false, false); - } - - void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) - { - add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); - } - - void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms) - { - add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); - } - - FieldInfoPtr FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) - { - SyncLock syncLock(this); - FieldInfoPtr fi(fieldInfo(name)); - if (!fi) - return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); - else - fi->update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); - return fi; } - - FieldInfoPtr FieldInfos::addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) - { - FieldInfoPtr fi(newLucene(name, isIndexed, byNumber.size(), storeTermVector, - storePositionWithTermVector, storeOffsetWithTermVector, - omitNorms, storePayloads, omitTermFreqAndPositions)); - byNumber.add(fi); - byName.put(name, fi); - return fi; - } - - int32_t FieldInfos::fieldNumber(const String& fieldName) - { - FieldInfoPtr fi(fieldInfo(fieldName)); - return fi ? fi->number : -1; + return false; +} + +void FieldInfos::addIndexed(HashSet names, bool storeTermVectors, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { + SyncLock syncLock(this); + for (HashSet::iterator name = names.begin(); name != names.end(); ++name) { + add(*name, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector); } - - FieldInfoPtr FieldInfos::fieldInfo(const String& fieldName) - { - return byName.get(fieldName); +} + +void FieldInfos::add(HashSet names, bool isIndexed) { + SyncLock syncLock(this); + for (HashSet::iterator name = names.begin(); name != names.end(); ++name) { + add(*name, isIndexed); } - - String FieldInfos::fieldName(int32_t fieldNumber) - { - FieldInfoPtr fi(fieldInfo(fieldNumber)); - return fi ? fi->name : L""; +} + +void FieldInfos::add(const String& name, bool isIndexed) { + add(name, isIndexed, false, false, false, false); +} + +void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector) { + add(name, isIndexed, storeTermVector, false, false, false); +} + +void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, bool storeOffsetWithTermVector) { + add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, false); +} + +void FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms) { + add(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, false, false); +} + +FieldInfoPtr FieldInfos::add(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { + SyncLock syncLock(this); + FieldInfoPtr fi(fieldInfo(name)); + if (!fi) { + return addInternal(name, isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); + } else { + fi->update(isIndexed, storeTermVector, storePositionWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTermFreqAndPositions); } - - FieldInfoPtr FieldInfos::fieldInfo(int32_t fieldNumber) - { - return (fieldNumber >= 0 && fieldNumber < byNumber.size()) ? byNumber[fieldNumber] : FieldInfoPtr(); + return fi; +} + +FieldInfoPtr FieldInfos::addInternal(const String& name, bool isIndexed, bool storeTermVector, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool omitNorms, bool storePayloads, bool omitTermFreqAndPositions) { + FieldInfoPtr fi(newLucene(name, isIndexed, byNumber.size(), storeTermVector, + storePositionWithTermVector, storeOffsetWithTermVector, + omitNorms, storePayloads, omitTermFreqAndPositions)); + byNumber.add(fi); + byName.put(name, fi); + return fi; +} + +int32_t FieldInfos::fieldNumber(const String& fieldName) { + FieldInfoPtr fi(fieldInfo(fieldName)); + return fi ? fi->number : -1; +} + +FieldInfoPtr FieldInfos::fieldInfo(const String& fieldName) { + return byName.get(fieldName); +} + +String FieldInfos::fieldName(int32_t fieldNumber) { + FieldInfoPtr fi(fieldInfo(fieldNumber)); + return fi ? fi->name : L""; +} + +FieldInfoPtr FieldInfos::fieldInfo(int32_t fieldNumber) { + return (fieldNumber >= 0 && fieldNumber < byNumber.size()) ? byNumber[fieldNumber] : FieldInfoPtr(); +} + +int32_t FieldInfos::size() { + return byNumber.size(); +} + +bool FieldInfos::hasVectors() { + for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { + if ((*fi)->storeTermVector) { + return true; + } } - - int32_t FieldInfos::size() - { - return byNumber.size(); + return false; +} + +void FieldInfos::write(const DirectoryPtr& d, const String& name) { + IndexOutputPtr output(d->createOutput(name)); + LuceneException finally; + try { + write(output); + } catch (LuceneException& e) { + finally = e; } - - bool FieldInfos::hasVectors() - { - for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) - { - if ((*fi)->storeTermVector) - return true; + output->close(); + finally.throwException(); +} + +void FieldInfos::write(const IndexOutputPtr& output) { + output->writeVInt(CURRENT_FORMAT); + output->writeVInt(size()); + for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) { + uint8_t bits = 0x0; + if ((*fi)->isIndexed) { + bits |= IS_INDEXED; } - return false; - } - - void FieldInfos::write(DirectoryPtr d, const String& name) - { - IndexOutputPtr output(d->createOutput(name)); - LuceneException finally; - try - { - write(output); + if ((*fi)->storeTermVector) { + bits |= STORE_TERMVECTOR; } - catch (LuceneException& e) - { - finally = e; + if ((*fi)->storePositionWithTermVector) { + bits |= STORE_POSITIONS_WITH_TERMVECTOR; } - output->close(); - finally.throwException(); - } - - void FieldInfos::write(IndexOutputPtr output) - { - output->writeVInt(CURRENT_FORMAT); - output->writeVInt(size()); - for (Collection::iterator fi = byNumber.begin(); fi != byNumber.end(); ++fi) - { - uint8_t bits = 0x0; - if ((*fi)->isIndexed) - bits |= IS_INDEXED; - if ((*fi)->storeTermVector) - bits |= STORE_TERMVECTOR; - if ((*fi)->storePositionWithTermVector) - bits |= STORE_POSITIONS_WITH_TERMVECTOR; - if ((*fi)->storeOffsetWithTermVector) - bits |= STORE_OFFSET_WITH_TERMVECTOR; - if ((*fi)->omitNorms) - bits |= OMIT_NORMS; - if ((*fi)->storePayloads) - bits |= STORE_PAYLOADS; - if ((*fi)->omitTermFreqAndPositions) - bits |= OMIT_TERM_FREQ_AND_POSITIONS; - - output->writeString((*fi)->name); - output->writeByte(bits); + if ((*fi)->storeOffsetWithTermVector) { + bits |= STORE_OFFSET_WITH_TERMVECTOR; } - } - - void FieldInfos::read(IndexInputPtr input, const String& fileName) - { - int32_t firstInt = input->readVInt(); - format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format? - - if (format != FORMAT_PRE && format != FORMAT_START) - boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\"")); - - int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required - for (int32_t i = 0; i < size; ++i) - { - String name(input->readString()); - uint8_t bits = input->readByte(); - - addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0, - (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0, - (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0); + if ((*fi)->omitNorms) { + bits |= OMIT_NORMS; + } + if ((*fi)->storePayloads) { + bits |= STORE_PAYLOADS; } - - if (input->getFilePointer() != input->length()) - { - boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + - StringUtils::toString(input->getFilePointer()) + L" vs size " + - StringUtils::toString(input->length()))); + if ((*fi)->omitTermFreqAndPositions) { + bits |= OMIT_TERM_FREQ_AND_POSITIONS; } + + output->writeString((*fi)->name); + output->writeByte(bits); + } +} + +void FieldInfos::read(const IndexInputPtr& input, const String& fileName) { + int32_t firstInt = input->readVInt(); + format = firstInt < 0 ? firstInt : FORMAT_PRE; // This is a real format? + + if (format != FORMAT_PRE && format != FORMAT_START) { + boost::throw_exception(CorruptIndexException(L"unrecognized format " + StringUtils::toString(format) + L" in file \"" + fileName + L"\"")); + } + + int32_t size = format == FORMAT_PRE ? firstInt : input->readVInt(); // read in the size if required + for (int32_t i = 0; i < size; ++i) { + String name(input->readString()); + uint8_t bits = input->readByte(); + + addInternal(name, (bits & IS_INDEXED) != 0, (bits & STORE_TERMVECTOR) != 0, (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0, + (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0, (bits & OMIT_NORMS) != 0, (bits & STORE_PAYLOADS) != 0, + (bits & OMIT_TERM_FREQ_AND_POSITIONS) != 0); + } + + if (input->getFilePointer() != input->length()) { + boost::throw_exception(CorruptIndexException(L"did not read all bytes from file \"" + fileName + L"\": read " + + StringUtils::toString(input->getFilePointer()) + L" vs size " + + StringUtils::toString(input->length()))); } } + +} diff --git a/src/core/index/FieldInvertState.cpp b/src/core/index/FieldInvertState.cpp index 539798d5..554363ee 100644 --- a/src/core/index/FieldInvertState.cpp +++ b/src/core/index/FieldInvertState.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,58 +7,50 @@ #include "LuceneInc.h" #include "FieldInvertState.h" -namespace Lucene -{ - FieldInvertState::FieldInvertState(int32_t position, int32_t length, int32_t numOverlap, int32_t offset, double boost) - { - this->position = position; - this->length = length; - this->numOverlap = numOverlap; - this->offset = offset; - this->boost = boost; - } - - FieldInvertState::~FieldInvertState() - { - } - - void FieldInvertState::reset(double docBoost) - { - position = 0; - length = 0; - numOverlap = 0; - offset = 0; - boost = docBoost; - attributeSource.reset(); - } - - int32_t FieldInvertState::getPosition() - { - return position; - } - - int32_t FieldInvertState::getLength() - { - return length; - } - - int32_t FieldInvertState::getNumOverlap() - { - return numOverlap; - } - - int32_t FieldInvertState::getOffset() - { - return offset; - } - - double FieldInvertState::getBoost() - { - return boost; - } - - AttributeSourcePtr FieldInvertState::getAttributeSource() - { - return attributeSource; - } +namespace Lucene { + +FieldInvertState::FieldInvertState(int32_t position, int32_t length, int32_t numOverlap, int32_t offset, double boost) { + this->position = position; + this->length = length; + this->numOverlap = numOverlap; + this->offset = offset; + this->boost = boost; +} + +FieldInvertState::~FieldInvertState() { +} + +void FieldInvertState::reset(double docBoost) { + position = 0; + length = 0; + numOverlap = 0; + offset = 0; + boost = docBoost; + attributeSource.reset(); +} + +int32_t FieldInvertState::getPosition() { + return position; +} + +int32_t FieldInvertState::getLength() { + return length; +} + +int32_t FieldInvertState::getNumOverlap() { + return numOverlap; +} + +int32_t FieldInvertState::getOffset() { + return offset; +} + +double FieldInvertState::getBoost() { + return boost; +} + +AttributeSourcePtr FieldInvertState::getAttributeSource() { + return attributeSource; +} + } diff --git a/src/core/index/FieldSortedTermVectorMapper.cpp b/src/core/index/FieldSortedTermVectorMapper.cpp index 0d864a72..cf095d9c 100644 --- a/src/core/index/FieldSortedTermVectorMapper.cpp +++ b/src/core/index/FieldSortedTermVectorMapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,47 +8,42 @@ #include "FieldSortedTermVectorMapper.h" #include "TermVectorEntry.h" -namespace Lucene -{ - FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(TermVectorEntryComparator comparator) - : TermVectorMapper(false, false) - { - this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); - this->comparator = comparator; - } - - FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) - : TermVectorMapper(ignoringPositions, ignoringOffsets) - { - this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); - this->comparator = comparator; - } - - FieldSortedTermVectorMapper::~FieldSortedTermVectorMapper() - { - } - - void FieldSortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) - { - TermVectorEntryPtr entry(newLucene(currentField, term, frequency, offsets, positions)); - if (!currentSet.contains_if(luceneEqualTo(entry))) - currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); - } - - void FieldSortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) - { - currentSet = Collection::newInstance(); - currentField = field; - fieldToTerms.put(field, currentSet); - } - - MapStringCollectionTermVectorEntry FieldSortedTermVectorMapper::getFieldToTerms() - { - return fieldToTerms; - } - - TermVectorEntryComparator FieldSortedTermVectorMapper::getComparator() - { - return comparator; +namespace Lucene { + +FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(TermVectorEntryComparator comparator) + : TermVectorMapper(false, false) { + this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); + this->comparator = comparator; +} + +FieldSortedTermVectorMapper::FieldSortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) + : TermVectorMapper(ignoringPositions, ignoringOffsets) { + this->fieldToTerms = MapStringCollectionTermVectorEntry::newInstance(); + this->comparator = comparator; +} + +FieldSortedTermVectorMapper::~FieldSortedTermVectorMapper() { +} + +void FieldSortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { + TermVectorEntryPtr entry(newLucene(currentField, term, frequency, offsets, positions)); + if (!currentSet.contains_if(luceneEqualTo(entry))) { + currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } } + +void FieldSortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { + currentSet = Collection::newInstance(); + currentField = field; + fieldToTerms.put(field, currentSet); +} + +MapStringCollectionTermVectorEntry FieldSortedTermVectorMapper::getFieldToTerms() { + return fieldToTerms; +} + +TermVectorEntryComparator FieldSortedTermVectorMapper::getComparator() { + return comparator; +} + +} diff --git a/src/core/index/FieldsReader.cpp b/src/core/index/FieldsReader.cpp index 57180db2..0f0653d7 100644 --- a/src/core/index/FieldsReader.cpp +++ b/src/core/index/FieldsReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,530 +20,462 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - FieldsReader::FieldsReader(FieldInfosPtr fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, - int32_t formatSize, int32_t docStoreOffset, IndexInputPtr cloneableFieldsStream, - IndexInputPtr cloneableIndexStream) - { - closed = false; - isOriginal = false; - this->fieldInfos = fieldInfos; - this->numTotalDocs = numTotalDocs; - this->_size = size; - this->format = format; - this->formatSize = formatSize; - this->docStoreOffset = docStoreOffset; - this->cloneableFieldsStream = cloneableFieldsStream; - this->cloneableIndexStream = cloneableIndexStream; +namespace Lucene { + +FieldsReader::FieldsReader(const FieldInfosPtr& fieldInfos, int32_t numTotalDocs, int32_t size, int32_t format, + int32_t formatSize, int32_t docStoreOffset, const IndexInputPtr& cloneableFieldsStream, + const IndexInputPtr& cloneableIndexStream) { + closed = false; + isOriginal = false; + this->fieldInfos = fieldInfos; + this->numTotalDocs = numTotalDocs; + this->_size = size; + this->format = format; + this->formatSize = formatSize; + this->docStoreOffset = docStoreOffset; + this->cloneableFieldsStream = cloneableFieldsStream; + this->cloneableIndexStream = cloneableIndexStream; + fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); + indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); +} + +FieldsReader::FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn) { + ConstructReader(d, segment, fn, BufferedIndexInput::BUFFER_SIZE, -1, 0); +} + +FieldsReader::FieldsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { + ConstructReader(d, segment, fn, readBufferSize, docStoreOffset, size); +} + +FieldsReader::~FieldsReader() { +} + +void FieldsReader::ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { + bool success = false; + isOriginal = true; + numTotalDocs = 0; + _size = 0; + closed = false; + format = 0; + formatSize = 0; + docStoreOffset = docStoreOffset; + LuceneException finally; + try { + fieldInfos = fn; + + cloneableFieldsStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_EXTENSION(), readBufferSize); + cloneableIndexStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION(), readBufferSize); + + // First version of fdx did not include a format header, but, the first int will always be 0 in that case + format = cloneableIndexStream->readInt(); + + if (format > FieldsWriter::FORMAT_CURRENT) { + boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + + L" expected " + StringUtils::toString(FieldsWriter::FORMAT_CURRENT) + + L" or lower")); + } + + formatSize = format > FieldsWriter::FORMAT ? 4 : 0; + + if (format < FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { + cloneableFieldsStream->setModifiedUTF8StringsMode(); + } + fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); + + int64_t indexSize = cloneableIndexStream->length() - formatSize; + + if (docStoreOffset != -1) { + // We read only a slice out of this shared fields file + this->docStoreOffset = docStoreOffset; + this->_size = size; + + // Verify the file is long enough to hold all of our docs + BOOST_ASSERT(((int32_t)((double)indexSize / 8.0)) >= _size + this->docStoreOffset); + } else { + this->docStoreOffset = 0; + this->_size = (int32_t)(indexSize >> 3); + } + indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); + numTotalDocs = (int32_t)(indexSize >> 3); + success = true; + } catch (LuceneException& e) { + finally = e; } - - FieldsReader::FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn) - { - ConstructReader(d, segment, fn, BufferedIndexInput::BUFFER_SIZE, -1, 0); - } - - FieldsReader::FieldsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) - { - ConstructReader(d, segment, fn, readBufferSize, docStoreOffset, size); + // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. + // In this case, we want to explicitly close any subset of things that were opened + if (!success) { + close(); } - - FieldsReader::~FieldsReader() - { + finally.throwException(); +} + +LuceneObjectPtr FieldsReader::clone(const LuceneObjectPtr& other) { + ensureOpen(); + return newLucene(fieldInfos, numTotalDocs, _size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); +} + +void FieldsReader::ensureOpen() { + if (closed) { + boost::throw_exception(AlreadyClosedException(L"this FieldsReader is closed")); } - - void FieldsReader::ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fn, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) - { - bool success = false; - isOriginal = true; - numTotalDocs = 0; - _size = 0; - closed = false; - format = 0; - formatSize = 0; - docStoreOffset = docStoreOffset; - LuceneException finally; - try - { - fieldInfos = fn; - - cloneableFieldsStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_EXTENSION(), readBufferSize); - cloneableIndexStream = d->openInput(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION(), readBufferSize); - - // First version of fdx did not include a format header, but, the first int will always be 0 in that case - format = cloneableIndexStream->readInt(); - - if (format > FieldsWriter::FORMAT_CURRENT) - { - boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + StringUtils::toString(format) + - L" expected " + StringUtils::toString(FieldsWriter::FORMAT_CURRENT) + - L" or lower")); - } - - formatSize = format > FieldsWriter::FORMAT ? 4 : 0; - - if (format < FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - cloneableFieldsStream->setModifiedUTF8StringsMode(); - - fieldsStream = boost::dynamic_pointer_cast(cloneableFieldsStream->clone()); - - int64_t indexSize = cloneableIndexStream->length() - formatSize; - - if (docStoreOffset != -1) - { - // We read only a slice out of this shared fields file - this->docStoreOffset = docStoreOffset; - this->_size = size; - - // Verify the file is long enough to hold all of our docs - BOOST_ASSERT(((int32_t)((double)indexSize / 8.0)) >= _size + this->docStoreOffset); +} + +void FieldsReader::close() { + if (!closed) { + if (fieldsStream) { + fieldsStream->close(); + } + if (isOriginal) { + if (cloneableFieldsStream) { + cloneableFieldsStream->close(); } - else - { - this->docStoreOffset = 0; - this->_size = (int32_t)(indexSize >> 3); + if (cloneableIndexStream) { + cloneableIndexStream->close(); } - - indexStream = boost::dynamic_pointer_cast(cloneableIndexStream->clone()); - numTotalDocs = (int32_t)(indexSize >> 3); - success = true; - } - catch (LuceneException& e) - { - finally = e; } - // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. - // In this case, we want to explicitly close any subset of things that were opened - if (!success) - close(); - finally.throwException(); - } - - LuceneObjectPtr FieldsReader::clone(LuceneObjectPtr other) - { - ensureOpen(); - return newLucene(fieldInfos, numTotalDocs, _size, format, formatSize, docStoreOffset, cloneableFieldsStream, cloneableIndexStream); - } - - void FieldsReader::ensureOpen() - { - if (closed) - boost::throw_exception(AlreadyClosedException(L"this FieldsReader is closed")); - } - - void FieldsReader::close() - { - if (!closed) - { - if (fieldsStream) - fieldsStream->close(); - if (isOriginal) - { - if (cloneableFieldsStream) - cloneableFieldsStream->close(); - if (cloneableIndexStream) - cloneableIndexStream->close(); - } - if (indexStream) - indexStream->close(); - fieldsStreamTL.close(); - closed = true; + if (indexStream) { + indexStream->close(); } + fieldsStreamTL.close(); + closed = true; } - - int32_t FieldsReader::size() - { - return _size; - } - - void FieldsReader::seekIndex(int32_t docID) - { - indexStream->seek(formatSize + (docID + docStoreOffset) * 8); - } - - bool FieldsReader::canReadRawDocs() - { - // Disable reading raw docs in 2.x format, because of the removal of compressed fields in 3.0. - // We don't want rawDocs() to decode field bits to figure out if a field was compressed, hence - // we enforce ordinary (non-raw) stored field merges for <3.0 indexes. - return (format >= FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS); - } - - DocumentPtr FieldsReader::doc(int32_t n, FieldSelectorPtr fieldSelector) - { - seekIndex(n); - int64_t position = indexStream->readLong(); - fieldsStream->seek(position); - - DocumentPtr doc(newLucene()); - int32_t numFields = fieldsStream->readVInt(); - for (int32_t i = 0; i < numFields; ++i) - { - int32_t fieldNumber = fieldsStream->readVInt(); - FieldInfoPtr fi = fieldInfos->fieldInfo(fieldNumber); - FieldSelector::FieldSelectorResult acceptField = fieldSelector ? fieldSelector->accept(fi->name) : FieldSelector::SELECTOR_LOAD; - - uint8_t bits = fieldsStream->readByte(); - BOOST_ASSERT(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY); - - bool compressed = ((bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0); - - // compressed fields are only allowed in indexes of version <= 2.9 - BOOST_ASSERT(compressed ? (format < FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true); - - bool tokenize = ((bits & FieldsWriter::FIELD_IS_TOKENIZED) != 0); - bool binary = ((bits & FieldsWriter::FIELD_IS_BINARY) != 0); - - if (acceptField == FieldSelector::SELECTOR_LOAD) - addField(doc, fi, binary, compressed, tokenize); - else if (acceptField == FieldSelector::SELECTOR_LOAD_AND_BREAK) - { - addField(doc, fi, binary, compressed, tokenize); - break; // Get out of this loop - } - else if (acceptField == FieldSelector::SELECTOR_LAZY_LOAD) - addFieldLazy(doc, fi, binary, compressed, tokenize); - else if (acceptField == FieldSelector::SELECTOR_SIZE) - skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); - else if (acceptField == FieldSelector::SELECTOR_SIZE_AND_BREAK) - { - addFieldSize(doc, fi, binary, compressed); - break; - } - else - skipField(binary, compressed); - } +} - return doc; - } - - IndexInputPtr FieldsReader::rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs) - { - seekIndex(startDocID); - int64_t startOffset = indexStream->readLong(); - int64_t lastOffset = startOffset; - int32_t count = 0; - while (count < numDocs) - { - int32_t docID = docStoreOffset + startDocID + count + 1; - BOOST_ASSERT(docID <= numTotalDocs); - int64_t offset = docID < numTotalDocs ? indexStream->readLong() : fieldsStream->length(); - lengths[count++] = (int32_t)(offset - lastOffset); - lastOffset = offset; +int32_t FieldsReader::size() { + return _size; +} + +void FieldsReader::seekIndex(int32_t docID) { + indexStream->seek(formatSize + (docID + docStoreOffset) * 8); +} + +bool FieldsReader::canReadRawDocs() { + // Disable reading raw docs in 2.x format, because of the removal of compressed fields in 3.0. + // We don't want rawDocs() to decode field bits to figure out if a field was compressed, hence + // we enforce ordinary (non-raw) stored field merges for <3.0 indexes. + return (format >= FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS); +} + +DocumentPtr FieldsReader::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { + seekIndex(n); + int64_t position = indexStream->readLong(); + fieldsStream->seek(position); + + DocumentPtr doc(newLucene()); + int32_t numFields = fieldsStream->readVInt(); + for (int32_t i = 0; i < numFields; ++i) { + int32_t fieldNumber = fieldsStream->readVInt(); + FieldInfoPtr fi = fieldInfos->fieldInfo(fieldNumber); + FieldSelector::FieldSelectorResult acceptField = fieldSelector ? fieldSelector->accept(fi->name) : FieldSelector::SELECTOR_LOAD; + + uint8_t bits = fieldsStream->readByte(); + BOOST_ASSERT(bits <= FieldsWriter::FIELD_IS_COMPRESSED + FieldsWriter::FIELD_IS_TOKENIZED + FieldsWriter::FIELD_IS_BINARY); + + bool compressed = ((bits & FieldsWriter::FIELD_IS_COMPRESSED) != 0); + + // compressed fields are only allowed in indexes of version <= 2.9 + BOOST_ASSERT(compressed ? (format < FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS) : true); + + bool tokenize = ((bits & FieldsWriter::FIELD_IS_TOKENIZED) != 0); + bool binary = ((bits & FieldsWriter::FIELD_IS_BINARY) != 0); + + if (acceptField == FieldSelector::SELECTOR_LOAD) { + addField(doc, fi, binary, compressed, tokenize); + } else if (acceptField == FieldSelector::SELECTOR_LOAD_AND_BREAK) { + addField(doc, fi, binary, compressed, tokenize); + break; // Get out of this loop + } else if (acceptField == FieldSelector::SELECTOR_LAZY_LOAD) { + addFieldLazy(doc, fi, binary, compressed, tokenize); + } else if (acceptField == FieldSelector::SELECTOR_SIZE) { + skipField(binary, compressed, addFieldSize(doc, fi, binary, compressed)); + } else if (acceptField == FieldSelector::SELECTOR_SIZE_AND_BREAK) { + addFieldSize(doc, fi, binary, compressed); + break; + } else { + skipField(binary, compressed); } - - fieldsStream->seek(startOffset); - - return fieldsStream; } - - void FieldsReader::skipField(bool binary, bool compressed) - { - skipField(binary, compressed, fieldsStream->readVInt()); + + return doc; +} + +IndexInputPtr FieldsReader::rawDocs(Collection lengths, int32_t startDocID, int32_t numDocs) { + seekIndex(startDocID); + int64_t startOffset = indexStream->readLong(); + int64_t lastOffset = startOffset; + int32_t count = 0; + while (count < numDocs) { + int32_t docID = docStoreOffset + startDocID + count + 1; + BOOST_ASSERT(docID <= numTotalDocs); + int64_t offset = docID < numTotalDocs ? indexStream->readLong() : fieldsStream->length(); + lengths[count++] = (int32_t)(offset - lastOffset); + lastOffset = offset; } - - void FieldsReader::skipField(bool binary, bool compressed, int32_t toRead) - { - if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) - fieldsStream->seek(fieldsStream->getFilePointer() + toRead); - else - { - // We need to skip chars. This will slow us down, but still better - fieldsStream->skipChars(toRead); - } + + fieldsStream->seek(startOffset); + + return fieldsStream; +} + +void FieldsReader::skipField(bool binary, bool compressed) { + skipField(binary, compressed, fieldsStream->readVInt()); +} + +void FieldsReader::skipField(bool binary, bool compressed, int32_t toRead) { + if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES || binary || compressed) { + fieldsStream->seek(fieldsStream->getFilePointer() + toRead); + } else { + // We need to skip chars. This will slow us down, but still better + fieldsStream->skipChars(toRead); } - - void FieldsReader::addFieldLazy(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize) - { - if (binary) - { +} + +void FieldsReader::addFieldLazy(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize) { + if (binary) { + int32_t toRead = fieldsStream->readVInt(); + int64_t pointer = fieldsStream->getFilePointer(); + doc->add(newLucene(shared_from_this(), fi->name, Field::STORE_YES, toRead, pointer, binary, compressed)); + fieldsStream->seek(pointer + toRead); + } else { + Field::Store store = Field::STORE_YES; + Field::Index index = Field::toIndex(fi->isIndexed, tokenize); + Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); + + AbstractFieldPtr f; + if (compressed) { int32_t toRead = fieldsStream->readVInt(); int64_t pointer = fieldsStream->getFilePointer(); - doc->add(newLucene(shared_from_this(), fi->name, Field::STORE_YES, toRead, pointer, binary, compressed)); + f = newLucene(shared_from_this(), fi->name, store, toRead, pointer, binary, compressed); + // skip over the part that we aren't loading fieldsStream->seek(pointer + toRead); - } - else - { - Field::Store store = Field::STORE_YES; - Field::Index index = Field::toIndex(fi->isIndexed, tokenize); - Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); - - AbstractFieldPtr f; - if (compressed) - { - int32_t toRead = fieldsStream->readVInt(); - int64_t pointer = fieldsStream->getFilePointer(); - f = newLucene(shared_from_this(), fi->name, store, toRead, pointer, binary, compressed); - // skip over the part that we aren't loading - fieldsStream->seek(pointer + toRead); - f->setOmitNorms(fi->omitNorms); - f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); - } - else - { - int32_t length = fieldsStream->readVInt(); - int64_t pointer = fieldsStream->getFilePointer(); - // skip ahead of where we are by the length of what is stored - if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - fieldsStream->seek(pointer + length); - else - fieldsStream->skipChars(length); - f = newLucene(shared_from_this(), fi->name, store, index, termVector, length, pointer, binary, compressed); - f->setOmitNorms(fi->omitNorms); - f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); + f->setOmitNorms(fi->omitNorms); + f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); + } else { + int32_t length = fieldsStream->readVInt(); + int64_t pointer = fieldsStream->getFilePointer(); + // skip ahead of where we are by the length of what is stored + if (format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { + fieldsStream->seek(pointer + length); + } else { + fieldsStream->skipChars(length); } - - doc->add(f); + f = newLucene(shared_from_this(), fi->name, store, index, termVector, length, pointer, binary, compressed); + f->setOmitNorms(fi->omitNorms); + f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); } + + doc->add(f); } - - void FieldsReader::addField(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed, bool tokenize) - { - // we have a binary stored field, and it may be compressed - if (binary) - { +} + +void FieldsReader::addField(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed, bool tokenize) { + // we have a binary stored field, and it may be compressed + if (binary) { + int32_t toRead = fieldsStream->readVInt(); + ByteArray b(ByteArray::newInstance(toRead)); + fieldsStream->readBytes(b.get(), 0, b.size()); + if (compressed) { + doc->add(newLucene(fi->name, uncompress(b), Field::STORE_YES)); + } else { + doc->add(newLucene(fi->name, b, Field::STORE_YES)); + } + } else { + Field::Store store = Field::STORE_YES; + Field::Index index = Field::toIndex(fi->isIndexed, tokenize); + Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); + + AbstractFieldPtr f; + if (compressed) { int32_t toRead = fieldsStream->readVInt(); + ByteArray b(ByteArray::newInstance(toRead)); fieldsStream->readBytes(b.get(), 0, b.size()); - if (compressed) - doc->add(newLucene(fi->name, uncompress(b), Field::STORE_YES)); - else - doc->add(newLucene(fi->name, b, Field::STORE_YES)); - } - else - { - Field::Store store = Field::STORE_YES; - Field::Index index = Field::toIndex(fi->isIndexed, tokenize); - Field::TermVector termVector = Field::toTermVector(fi->storeTermVector, fi->storeOffsetWithTermVector, fi->storePositionWithTermVector); - - AbstractFieldPtr f; - if (compressed) - { - int32_t toRead = fieldsStream->readVInt(); - - ByteArray b(ByteArray::newInstance(toRead)); - fieldsStream->readBytes(b.get(), 0, b.size()); - f = newLucene(fi->name, uncompressString(b), store, index, termVector); - f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); - f->setOmitNorms(fi->omitNorms); - } - else - { - f = newLucene(fi->name, fieldsStream->readString(), store, index, termVector); - f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); - f->setOmitNorms(fi->omitNorms); - } - - doc->add(f); + f = newLucene(fi->name, uncompressString(b), store, index, termVector); + f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); + f->setOmitNorms(fi->omitNorms); + } else { + f = newLucene(fi->name, fieldsStream->readString(), store, index, termVector); + f->setOmitTermFreqAndPositions(fi->omitTermFreqAndPositions); + f->setOmitNorms(fi->omitNorms); } + + doc->add(f); } - - int32_t FieldsReader::addFieldSize(DocumentPtr doc, FieldInfoPtr fi, bool binary, bool compressed) - { - int32_t size = fieldsStream->readVInt(); - int32_t bytesize = (binary || compressed) ? size : 2 * size; - ByteArray sizebytes(ByteArray::newInstance(4)); - sizebytes[0] = (uint8_t)MiscUtils::unsignedShift(bytesize, 24); - sizebytes[1] = (uint8_t)MiscUtils::unsignedShift(bytesize, 16); - sizebytes[2] = (uint8_t)MiscUtils::unsignedShift(bytesize, 8); - sizebytes[3] = (uint8_t)(bytesize); - doc->add(newLucene(fi->name, sizebytes, Field::STORE_YES)); - return size; +} + +int32_t FieldsReader::addFieldSize(const DocumentPtr& doc, const FieldInfoPtr& fi, bool binary, bool compressed) { + int32_t size = fieldsStream->readVInt(); + int32_t bytesize = (binary || compressed) ? size : 2 * size; + ByteArray sizebytes(ByteArray::newInstance(4)); + sizebytes[0] = (uint8_t)MiscUtils::unsignedShift(bytesize, 24); + sizebytes[1] = (uint8_t)MiscUtils::unsignedShift(bytesize, 16); + sizebytes[2] = (uint8_t)MiscUtils::unsignedShift(bytesize, 8); + sizebytes[3] = (uint8_t)(bytesize); + doc->add(newLucene(fi->name, sizebytes, Field::STORE_YES)); + return size; +} + +ByteArray FieldsReader::uncompress(ByteArray b) { + try { + return CompressionTools::decompress(b); + } catch (LuceneException& e) { + boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } - - ByteArray FieldsReader::uncompress(ByteArray b) - { - try - { - return CompressionTools::decompress(b); - } - catch (LuceneException& e) - { - boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); - } - return ByteArray(); + return ByteArray(); +} + +String FieldsReader::uncompressString(ByteArray b) { + try { + return CompressionTools::decompressString(b); + } catch (LuceneException& e) { + boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); } - - String FieldsReader::uncompressString(ByteArray b) - { - try - { - return CompressionTools::decompressString(b); - } - catch (LuceneException& e) - { - boost::throw_exception(CorruptIndexException(L"field data are in wrong format [" + e.getError() + L"]")); - } + return L""; +} + +LazyField::LazyField(const FieldsReaderPtr& reader, const String& name, Field::Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : + AbstractField(name, store, Field::INDEX_NO, Field::TERM_VECTOR_NO) { + this->_reader = reader; + this->toRead = toRead; + this->pointer = pointer; + this->_isBinary = isBinary; + if (isBinary) { + binaryLength = toRead; + } + lazy = true; + this->isCompressed = isCompressed; +} + +LazyField::LazyField(const FieldsReaderPtr& reader, const String& name, Field::Store store, Field::Index index, Field::TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : + AbstractField(name, store, index, termVector) { + this->_reader = reader; + this->toRead = toRead; + this->pointer = pointer; + this->_isBinary = isBinary; + if (isBinary) { + binaryLength = toRead; + } + lazy = true; + this->isCompressed = isCompressed; +} + +LazyField::~LazyField() { +} + +IndexInputPtr LazyField::getFieldStream() { + FieldsReaderPtr reader(_reader); + IndexInputPtr localFieldsStream = reader->fieldsStreamTL.get(); + if (!localFieldsStream) { + localFieldsStream = boost::static_pointer_cast(reader->cloneableFieldsStream->clone()); + reader->fieldsStreamTL.set(localFieldsStream); + } + return localFieldsStream; +} + +ReaderPtr LazyField::readerValue() { + FieldsReaderPtr(_reader)->ensureOpen(); + return ReaderPtr(); +} + +TokenStreamPtr LazyField::tokenStreamValue() { + FieldsReaderPtr(_reader)->ensureOpen(); + return TokenStreamPtr(); +} + +String LazyField::stringValue() { + FieldsReaderPtr reader(_reader); + reader->ensureOpen(); + if (_isBinary) { return L""; - } - - LazyField::LazyField(FieldsReaderPtr reader, const String& name, Field::Store store, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : - AbstractField(name, store, Field::INDEX_NO, Field::TERM_VECTOR_NO) - { - this->_reader = reader; - this->toRead = toRead; - this->pointer = pointer; - this->_isBinary = isBinary; - if (isBinary) - binaryLength = toRead; - lazy = true; - this->isCompressed = isCompressed; - } - - LazyField::LazyField(FieldsReaderPtr reader, const String& name, Field::Store store, Field::Index index, Field::TermVector termVector, int32_t toRead, int64_t pointer, bool isBinary, bool isCompressed) : - AbstractField(name, store, index, termVector) - { - this->_reader = reader; - this->toRead = toRead; - this->pointer = pointer; - this->_isBinary = isBinary; - if (isBinary) - binaryLength = toRead; - lazy = true; - this->isCompressed = isCompressed; - } - - LazyField::~LazyField() - { - } - - IndexInputPtr LazyField::getFieldStream() - { - FieldsReaderPtr reader(_reader); - IndexInputPtr localFieldsStream = reader->fieldsStreamTL.get(); - if (!localFieldsStream) - { - localFieldsStream = boost::static_pointer_cast(reader->cloneableFieldsStream->clone()); - reader->fieldsStreamTL.set(localFieldsStream); - } - return localFieldsStream; - } - - ReaderPtr LazyField::readerValue() - { - FieldsReaderPtr(_reader)->ensureOpen(); - return ReaderPtr(); - } - - TokenStreamPtr LazyField::tokenStreamValue() - { - FieldsReaderPtr(_reader)->ensureOpen(); - return TokenStreamPtr(); - } - - String LazyField::stringValue() - { - FieldsReaderPtr reader(_reader); - reader->ensureOpen(); - if (_isBinary) - return L""; - else - { - if (VariantUtils::isNull(fieldsData)) - { - IndexInputPtr localFieldsStream(getFieldStream()); - try - { - localFieldsStream->seek(pointer); - if (isCompressed) - { - ByteArray b(ByteArray::newInstance(toRead)); - localFieldsStream->readBytes(b.get(), 0, b.size()); - fieldsData = reader->uncompressString(b); + } else { + if (VariantUtils::isNull(fieldsData)) { + IndexInputPtr localFieldsStream(getFieldStream()); + try { + localFieldsStream->seek(pointer); + if (isCompressed) { + ByteArray b(ByteArray::newInstance(toRead)); + localFieldsStream->readBytes(b.get(), 0, b.size()); + fieldsData = reader->uncompressString(b); + } else { + if (reader->format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { + ByteArray bytes(ByteArray::newInstance(toRead)); + localFieldsStream->readBytes(bytes.get(), 0, toRead); + fieldsData = StringUtils::toUnicode(bytes.get(), toRead); + } else { + // read in chars because we already know the length we need to read + CharArray chars(CharArray::newInstance(toRead)); + int32_t length = localFieldsStream->readChars(chars.get(), 0, toRead); + fieldsData = String(chars.get(), length); } - else - { - if (reader->format >= FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - { - ByteArray bytes(ByteArray::newInstance(toRead)); - localFieldsStream->readBytes(bytes.get(), 0, toRead); - fieldsData = StringUtils::toUnicode(bytes.get(), toRead); - } - else - { - // read in chars because we already know the length we need to read - CharArray chars(CharArray::newInstance(toRead)); - int32_t length = localFieldsStream->readChars(chars.get(), 0, toRead); - fieldsData = String(chars.get(), length); - } - } - } - catch (IOException& e) - { - boost::throw_exception(FieldReaderException(e.getError())); } + } catch (IOException& e) { + boost::throw_exception(FieldReaderException(e.getError())); } - return VariantUtils::get(fieldsData); } + return VariantUtils::get(fieldsData); } - - int64_t LazyField::getPointer() - { - FieldsReaderPtr(_reader)->ensureOpen(); - return pointer; - } - - void LazyField::setPointer(int64_t pointer) - { - FieldsReaderPtr(_reader)->ensureOpen(); - this->pointer = pointer; - } - - int32_t LazyField::getToRead() - { - FieldsReaderPtr(_reader)->ensureOpen(); - return toRead; - } - - void LazyField::setToRead(int32_t toRead) - { - FieldsReaderPtr(_reader)->ensureOpen(); - this->toRead = toRead; - } - - ByteArray LazyField::getBinaryValue(ByteArray result) - { - FieldsReaderPtr reader(_reader); - reader->ensureOpen(); - - if (_isBinary) - { - if (VariantUtils::isNull(fieldsData)) - { - ByteArray b; - - // Allocate new buffer if result is null or too small - if (!result || result.size() < toRead) - b = ByteArray::newInstance(toRead); - else - b = result; - - IndexInputPtr localFieldsStream(getFieldStream()); - - // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a - // change for people since they are already handling this exception when getting the document. - try - { - localFieldsStream->seek(pointer); - localFieldsStream->readBytes(b.get(), 0, toRead); - if (isCompressed) - fieldsData = reader->uncompress(b); - else - fieldsData = b; - } - catch (IOException& e) - { - boost::throw_exception(FieldReaderException(e.getError())); +} + +int64_t LazyField::getPointer() { + FieldsReaderPtr(_reader)->ensureOpen(); + return pointer; +} + +void LazyField::setPointer(int64_t pointer) { + FieldsReaderPtr(_reader)->ensureOpen(); + this->pointer = pointer; +} + +int32_t LazyField::getToRead() { + FieldsReaderPtr(_reader)->ensureOpen(); + return toRead; +} + +void LazyField::setToRead(int32_t toRead) { + FieldsReaderPtr(_reader)->ensureOpen(); + this->toRead = toRead; +} + +ByteArray LazyField::getBinaryValue(ByteArray result) { + FieldsReaderPtr reader(_reader); + reader->ensureOpen(); + + if (_isBinary) { + if (VariantUtils::isNull(fieldsData)) { + ByteArray b; + + // Allocate new buffer if result is null or too small + if (!result || result.size() < toRead) { + b = ByteArray::newInstance(toRead); + } else { + b = result; + } + + IndexInputPtr localFieldsStream(getFieldStream()); + + // Throw this IOException since IndexReader.document does so anyway, so probably not that big of a + // change for people since they are already handling this exception when getting the document. + try { + localFieldsStream->seek(pointer); + localFieldsStream->readBytes(b.get(), 0, toRead); + if (isCompressed) { + fieldsData = reader->uncompress(b); + } else { + fieldsData = b; } - - binaryOffset = 0; - binaryLength = toRead; + } catch (IOException& e) { + boost::throw_exception(FieldReaderException(e.getError())); } - return VariantUtils::get(fieldsData); + + binaryOffset = 0; + binaryLength = toRead; } - else - return ByteArray(); + return VariantUtils::get(fieldsData); + } else { + return ByteArray(); } } + +} diff --git a/src/core/index/FieldsWriter.cpp b/src/core/index/FieldsWriter.cpp index cb4839ab..85c651d7 100644 --- a/src/core/index/FieldsWriter.cpp +++ b/src/core/index/FieldsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,206 +16,174 @@ #include "Document.h" #include "TestPoint.h" -namespace Lucene -{ - const uint8_t FieldsWriter::FIELD_IS_TOKENIZED = 0x1; - const uint8_t FieldsWriter::FIELD_IS_BINARY = 0x2; - const uint8_t FieldsWriter::FIELD_IS_COMPRESSED = 0x4; - - const int32_t FieldsWriter::FORMAT = 0; // Original format - const int32_t FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; // Changed strings to UTF8 - const int32_t FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; // Lucene 3.0: Removal of compressed fields - - // NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this if you - // switch to a new format! - const int32_t FieldsWriter::FORMAT_CURRENT = FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; - - FieldsWriter::FieldsWriter(DirectoryPtr d, const String& segment, FieldInfosPtr fn) - { - fieldInfos = fn; - - bool success = false; - String fieldsName(segment + L"." + IndexFileNames::FIELDS_EXTENSION()); - LuceneException finally; - try - { - fieldsStream = d->createOutput(fieldsName); - fieldsStream->writeInt(FORMAT_CURRENT); - success = true; +namespace Lucene { + +const uint8_t FieldsWriter::FIELD_IS_TOKENIZED = 0x1; +const uint8_t FieldsWriter::FIELD_IS_BINARY = 0x2; +const uint8_t FieldsWriter::FIELD_IS_COMPRESSED = 0x4; + +const int32_t FieldsWriter::FORMAT = 0; // Original format +const int32_t FieldsWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = 1; // Changed strings to UTF8 +const int32_t FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS = 2; // Lucene 3.0: Removal of compressed fields + +// NOTE: if you introduce a new format, make it 1 higher than the current one, and always change this if you +// switch to a new format! +const int32_t FieldsWriter::FORMAT_CURRENT = FieldsWriter::FORMAT_LUCENE_3_0_NO_COMPRESSED_FIELDS; + +FieldsWriter::FieldsWriter(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fn) { + fieldInfos = fn; + + bool success = false; + String fieldsName(segment + L"." + IndexFileNames::FIELDS_EXTENSION()); + LuceneException finally; + try { + fieldsStream = d->createOutput(fieldsName); + fieldsStream->writeInt(FORMAT_CURRENT); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + try { + close(); + d->deleteFile(fieldsName); + } catch (...) { + // Suppress so we keep throwing the original exception } - catch (LuceneException& e) - { - finally = e; + } + finally.throwException(); + + success = false; + String indexName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); + try { + indexStream = d->createOutput(indexName); + indexStream->writeInt(FORMAT_CURRENT); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + try { + close(); + d->deleteFile(fieldsName); + d->deleteFile(indexName); + } catch (...) { + // Suppress so we keep throwing the original exception } - if (!success) - { - try - { - close(); - d->deleteFile(fieldsName); - } - catch (...) - { - // Suppress so we keep throwing the original exception + } + finally.throwException(); + + doClose = true; +} + +FieldsWriter::FieldsWriter(const IndexOutputPtr& fdx, const IndexOutputPtr& fdt, const FieldInfosPtr& fn) { + fieldInfos = fn; + fieldsStream = fdt; + indexStream = fdx; + doClose = false; +} + +FieldsWriter::~FieldsWriter() { +} + +void FieldsWriter::setFieldsStream(const IndexOutputPtr& stream) { + this->fieldsStream = stream; +} + +void FieldsWriter::flushDocument(int32_t numStoredFields, const RAMOutputStreamPtr& buffer) { + TestScope testScope(L"FieldsWriter", L"flushDocument"); + indexStream->writeLong(fieldsStream->getFilePointer()); + fieldsStream->writeVInt(numStoredFields); + buffer->writeTo(fieldsStream); +} + +void FieldsWriter::skipDocument() { + indexStream->writeLong(fieldsStream->getFilePointer()); + fieldsStream->writeVInt(0); +} + +void FieldsWriter::flush() { + indexStream->flush(); + fieldsStream->flush(); +} + +void FieldsWriter::close() { + if (doClose) { + LuceneException finally; + if (fieldsStream) { + try { + fieldsStream->close(); + } catch (LuceneException& e) { + finally = e; } + fieldsStream.reset(); } - finally.throwException(); - - success = false; - String indexName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - try - { - indexStream = d->createOutput(indexName); - indexStream->writeInt(FORMAT_CURRENT); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - { - try - { - close(); - d->deleteFile(fieldsName); - d->deleteFile(indexName); - } - catch (...) - { - // Suppress so we keep throwing the original exception + if (indexStream) { + try { + indexStream->close(); + } catch (LuceneException& e) { + if (finally.isNull()) { // throw first exception hit + finally = e; + } } + indexStream.reset(); } finally.throwException(); - - doClose = true; - } - - FieldsWriter::FieldsWriter(IndexOutputPtr fdx, IndexOutputPtr fdt, FieldInfosPtr fn) - { - fieldInfos = fn; - fieldsStream = fdt; - indexStream = fdx; - doClose = false; - } - - FieldsWriter::~FieldsWriter() - { - } - - void FieldsWriter::setFieldsStream(IndexOutputPtr stream) - { - this->fieldsStream = stream; - } - - void FieldsWriter::flushDocument(int32_t numStoredFields, RAMOutputStreamPtr buffer) - { - TestScope testScope(L"FieldsWriter", L"flushDocument"); - indexStream->writeLong(fieldsStream->getFilePointer()); - fieldsStream->writeVInt(numStoredFields); - buffer->writeTo(fieldsStream); } +} - void FieldsWriter::skipDocument() - { - indexStream->writeLong(fieldsStream->getFilePointer()); - fieldsStream->writeVInt(0); +void FieldsWriter::writeField(const FieldInfoPtr& fi, const FieldablePtr& field) { + fieldsStream->writeVInt(fi->number); + uint8_t bits = 0; + if (field->isTokenized()) { + bits |= FIELD_IS_TOKENIZED; } - - void FieldsWriter::flush() - { - indexStream->flush(); - fieldsStream->flush(); + if (field->isBinary()) { + bits |= FIELD_IS_BINARY; } - void FieldsWriter::close() - { - if (doClose) - { - LuceneException finally; - if (fieldsStream) - { - try - { - fieldsStream->close(); - } - catch (LuceneException& e) - { - finally = e; - } - fieldsStream.reset(); - } - if (indexStream) - { - try - { - indexStream->close(); - } - catch (LuceneException& e) - { - if (finally.isNull()) // throw first exception hit - finally = e; - } - indexStream.reset(); - } - finally.throwException(); - } + fieldsStream->writeByte(bits); + + if (field->isBinary()) { + ByteArray data(field->getBinaryValue()); + int32_t len = field->getBinaryLength(); + int32_t offset = field->getBinaryOffset(); + + fieldsStream->writeVInt(len); + fieldsStream->writeBytes(data.get(), offset, len); + } else { + fieldsStream->writeString(field->stringValue()); } - - void FieldsWriter::writeField(FieldInfoPtr fi, FieldablePtr field) - { - fieldsStream->writeVInt(fi->number); - uint8_t bits = 0; - if (field->isTokenized()) - bits |= FIELD_IS_TOKENIZED; - if (field->isBinary()) - bits |= FIELD_IS_BINARY; - - fieldsStream->writeByte(bits); - - if (field->isBinary()) - { - ByteArray data(field->getBinaryValue()); - int32_t len = field->getBinaryLength(); - int32_t offset = field->getBinaryOffset(); - - fieldsStream->writeVInt(len); - fieldsStream->writeBytes(data.get(), offset, len); - } - else - fieldsStream->writeString(field->stringValue()); +} + +void FieldsWriter::addRawDocuments(const IndexInputPtr& stream, Collection lengths, int32_t numDocs) { + int64_t position = fieldsStream->getFilePointer(); + int64_t start = position; + for (int32_t i = 0; i < numDocs; ++i) { + indexStream->writeLong(position); + position += lengths[i]; } - - void FieldsWriter::addRawDocuments(IndexInputPtr stream, Collection lengths, int32_t numDocs) - { - int64_t position = fieldsStream->getFilePointer(); - int64_t start = position; - for (int32_t i = 0; i < numDocs; ++i) - { - indexStream->writeLong(position); - position += lengths[i]; + fieldsStream->copyBytes(stream, position - start); + BOOST_ASSERT(fieldsStream->getFilePointer() == position); +} + +void FieldsWriter::addDocument(const DocumentPtr& doc) { + indexStream->writeLong(fieldsStream->getFilePointer()); + + int32_t storedCount = 0; + Collection fields(doc->getFields()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->isStored()) { + ++storedCount; } - fieldsStream->copyBytes(stream, position - start); - BOOST_ASSERT(fieldsStream->getFilePointer() == position); } - - void FieldsWriter::addDocument(DocumentPtr doc) - { - indexStream->writeLong(fieldsStream->getFilePointer()); - - int32_t storedCount = 0; - Collection fields(doc->getFields()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->isStored()) - ++storedCount; - } - fieldsStream->writeVInt(storedCount); - - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if ((*field)->isStored()) - writeField(fieldInfos->fieldInfo((*field)->name()), *field); + fieldsStream->writeVInt(storedCount); + + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if ((*field)->isStored()) { + writeField(fieldInfos->fieldInfo((*field)->name()), *field); } } } + +} diff --git a/src/core/index/FilterIndexReader.cpp b/src/core/index/FilterIndexReader.cpp index c8154bdd..81bdfa42 100644 --- a/src/core/index/FilterIndexReader.cpp +++ b/src/core/index/FilterIndexReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,301 +8,247 @@ #include "FilterIndexReader.h" #include "FieldCache.h" -namespace Lucene -{ - FilterIndexReader::FilterIndexReader(IndexReaderPtr in) - { - this->in = in; - } - - FilterIndexReader::~FilterIndexReader() - { - } - - DirectoryPtr FilterIndexReader::directory() - { - return in->directory(); - } - - Collection FilterIndexReader::getTermFreqVectors(int32_t docNumber) - { - ensureOpen(); - return in->getTermFreqVectors(docNumber); - } - - TermFreqVectorPtr FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field) - { - ensureOpen(); - return in->getTermFreqVector(docNumber, field); - } - - void FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - ensureOpen(); - in->getTermFreqVector(docNumber, field, mapper); - } - - void FilterIndexReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - ensureOpen(); - in->getTermFreqVector(docNumber, mapper); - } - - int32_t FilterIndexReader::numDocs() - { - // Don't call ensureOpen() here (it could affect performance) - return in->numDocs(); - } - - int32_t FilterIndexReader::maxDoc() - { - // Don't call ensureOpen() here (it could affect performance) - return in->maxDoc(); - } - - DocumentPtr FilterIndexReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - ensureOpen(); - return in->document(n, fieldSelector); - } - - bool FilterIndexReader::isDeleted(int32_t n) - { - // Don't call ensureOpen() here (it could affect performance) - return in->isDeleted(n); - } - - bool FilterIndexReader::hasDeletions() - { - // Don't call ensureOpen() here (it could affect performance) - return in->hasDeletions(); - } - - void FilterIndexReader::doUndeleteAll() - { - in->undeleteAll(); - } - - bool FilterIndexReader::hasNorms(const String& field) - { - ensureOpen(); - return in->hasNorms(field); - } - - ByteArray FilterIndexReader::norms(const String& field) - { - ensureOpen(); - return in->norms(field); - } - - void FilterIndexReader::norms(const String& field, ByteArray norms, int32_t offset) - { - ensureOpen(); - in->norms(field, norms, offset); - } - - void FilterIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - in->setNorm(doc, field, value); - } - - TermEnumPtr FilterIndexReader::terms() - { - ensureOpen(); - return in->terms(); - } - - TermEnumPtr FilterIndexReader::terms(TermPtr t) - { - ensureOpen(); - return in->terms(t); - } - - int32_t FilterIndexReader::docFreq(TermPtr t) - { - ensureOpen(); - return in->docFreq(t); - } - - TermDocsPtr FilterIndexReader::termDocs() - { - ensureOpen(); - return in->termDocs(); - } - - TermDocsPtr FilterIndexReader::termDocs(TermPtr term) - { - ensureOpen(); - return in->termDocs(term); - } - - TermPositionsPtr FilterIndexReader::termPositions() - { - ensureOpen(); - return in->termPositions(); - } - - void FilterIndexReader::doDelete(int32_t docNum) - { - in->deleteDocument(docNum); - } - - void FilterIndexReader::doCommit(MapStringString commitUserData) - { - in->commit(commitUserData); - } - - void FilterIndexReader::doClose() - { - in->close(); - - // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is - // generally not a good idea) - FieldCache::DEFAULT()->purge(shared_from_this()); - } - - HashSet FilterIndexReader::getFieldNames(FieldOption fieldOption) - { - ensureOpen(); - return in->getFieldNames(fieldOption); - } - - int64_t FilterIndexReader::getVersion() - { - ensureOpen(); - return in->getVersion(); - } - - bool FilterIndexReader::isCurrent() - { - ensureOpen(); - return in->isCurrent(); - } - - bool FilterIndexReader::isOptimized() - { - ensureOpen(); - return in->isOptimized(); - } - - Collection FilterIndexReader::getSequentialSubReaders() - { - return in->getSequentialSubReaders(); - } - - LuceneObjectPtr FilterIndexReader::getFieldCacheKey() - { - return in->getFieldCacheKey(); - } - - LuceneObjectPtr FilterIndexReader::getDeletesCacheKey() - { - return in->getDeletesCacheKey(); - } - - FilterTermDocs::FilterTermDocs(TermDocsPtr in) - { - this->in = in; - } - - FilterTermDocs::~FilterTermDocs() - { - } - - void FilterTermDocs::seek(TermPtr term) - { - in->seek(term); - } - - void FilterTermDocs::seek(TermEnumPtr termEnum) - { - in->seek(termEnum); - } - - int32_t FilterTermDocs::doc() - { - return in->doc(); - } - - int32_t FilterTermDocs::freq() - { - return in->freq(); - } - - bool FilterTermDocs::next() - { - return in->next(); - } - - int32_t FilterTermDocs::read(Collection docs, Collection freqs) - { - return in->read(docs, freqs); - } - - bool FilterTermDocs::skipTo(int32_t target) - { - return in->skipTo(target); - } - - void FilterTermDocs::close() - { - in->close(); - } - - FilterTermPositions::FilterTermPositions(TermPositionsPtr in) : FilterTermDocs(in) - { - } - - FilterTermPositions::~FilterTermPositions() - { - } - - int32_t FilterTermPositions::nextPosition() - { - return boost::static_pointer_cast(in)->nextPosition(); - } - - int32_t FilterTermPositions::getPayloadLength() - { - return boost::static_pointer_cast(in)->getPayloadLength(); - } - - ByteArray FilterTermPositions::getPayload(ByteArray data, int32_t offset) - { - return boost::static_pointer_cast(in)->getPayload(data, offset); - } - - bool FilterTermPositions::isPayloadAvailable() - { - return boost::static_pointer_cast(in)->isPayloadAvailable(); - } - - FilterTermEnum::FilterTermEnum(TermEnumPtr in) - { - this->in = in; - } - - FilterTermEnum::~FilterTermEnum() - { - } - - bool FilterTermEnum::next() - { - return in->next(); - } - - TermPtr FilterTermEnum::term() - { - return in->term(); - } - - int32_t FilterTermEnum::docFreq() - { - return in->docFreq(); - } - - void FilterTermEnum::close() - { - in->close(); - } +namespace Lucene { + +FilterIndexReader::FilterIndexReader(const IndexReaderPtr& in) { + this->in = in; +} + +FilterIndexReader::~FilterIndexReader() { +} + +DirectoryPtr FilterIndexReader::directory() { + return in->directory(); +} + +Collection FilterIndexReader::getTermFreqVectors(int32_t docNumber) { + ensureOpen(); + return in->getTermFreqVectors(docNumber); +} + +TermFreqVectorPtr FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field) { + ensureOpen(); + return in->getTermFreqVector(docNumber, field); +} + +void FilterIndexReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + ensureOpen(); + in->getTermFreqVector(docNumber, field, mapper); +} + +void FilterIndexReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + ensureOpen(); + in->getTermFreqVector(docNumber, mapper); +} + +int32_t FilterIndexReader::numDocs() { + // Don't call ensureOpen() here (it could affect performance) + return in->numDocs(); +} + +int32_t FilterIndexReader::maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return in->maxDoc(); +} + +DocumentPtr FilterIndexReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + ensureOpen(); + return in->document(n, fieldSelector); +} + +bool FilterIndexReader::isDeleted(int32_t n) { + // Don't call ensureOpen() here (it could affect performance) + return in->isDeleted(n); +} + +bool FilterIndexReader::hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return in->hasDeletions(); +} + +void FilterIndexReader::doUndeleteAll() { + in->undeleteAll(); +} + +bool FilterIndexReader::hasNorms(const String& field) { + ensureOpen(); + return in->hasNorms(field); +} + +ByteArray FilterIndexReader::norms(const String& field) { + ensureOpen(); + return in->norms(field); +} + +void FilterIndexReader::norms(const String& field, ByteArray norms, int32_t offset) { + ensureOpen(); + in->norms(field, norms, offset); +} + +void FilterIndexReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { + in->setNorm(doc, field, value); +} + +TermEnumPtr FilterIndexReader::terms() { + ensureOpen(); + return in->terms(); +} + +TermEnumPtr FilterIndexReader::terms(const TermPtr& t) { + ensureOpen(); + return in->terms(t); +} + +int32_t FilterIndexReader::docFreq(const TermPtr& t) { + ensureOpen(); + return in->docFreq(t); +} + +TermDocsPtr FilterIndexReader::termDocs() { + ensureOpen(); + return in->termDocs(); +} + +TermDocsPtr FilterIndexReader::termDocs(const TermPtr& term) { + ensureOpen(); + return in->termDocs(term); +} + +TermPositionsPtr FilterIndexReader::termPositions() { + ensureOpen(); + return in->termPositions(); +} + +void FilterIndexReader::doDelete(int32_t docNum) { + in->deleteDocument(docNum); +} + +void FilterIndexReader::doCommit(MapStringString commitUserData) { + in->commit(commitUserData); +} + +void FilterIndexReader::doClose() { + in->close(); + + // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is + // generally not a good idea) + FieldCache::DEFAULT()->purge(shared_from_this()); +} + +HashSet FilterIndexReader::getFieldNames(FieldOption fieldOption) { + ensureOpen(); + return in->getFieldNames(fieldOption); +} + +int64_t FilterIndexReader::getVersion() { + ensureOpen(); + return in->getVersion(); +} + +bool FilterIndexReader::isCurrent() { + ensureOpen(); + return in->isCurrent(); +} + +bool FilterIndexReader::isOptimized() { + ensureOpen(); + return in->isOptimized(); +} + +Collection FilterIndexReader::getSequentialSubReaders() { + return in->getSequentialSubReaders(); +} + +LuceneObjectPtr FilterIndexReader::getFieldCacheKey() { + return in->getFieldCacheKey(); +} + +LuceneObjectPtr FilterIndexReader::getDeletesCacheKey() { + return in->getDeletesCacheKey(); +} + +FilterTermDocs::FilterTermDocs(const TermDocsPtr& in) { + this->in = in; +} + +FilterTermDocs::~FilterTermDocs() { +} + +void FilterTermDocs::seek(const TermPtr& term) { + in->seek(term); +} + +void FilterTermDocs::seek(const TermEnumPtr& termEnum) { + in->seek(termEnum); +} + +int32_t FilterTermDocs::doc() { + return in->doc(); +} + +int32_t FilterTermDocs::freq() { + return in->freq(); +} + +bool FilterTermDocs::next() { + return in->next(); +} + +int32_t FilterTermDocs::read(Collection& docs, Collection& freqs) { + return in->read(docs, freqs); +} + +bool FilterTermDocs::skipTo(int32_t target) { + return in->skipTo(target); +} + +void FilterTermDocs::close() { + in->close(); +} + +FilterTermPositions::FilterTermPositions(const TermPositionsPtr& in) : FilterTermDocs(in) { +} + +FilterTermPositions::~FilterTermPositions() { +} + +int32_t FilterTermPositions::nextPosition() { + return boost::static_pointer_cast(in)->nextPosition(); +} + +int32_t FilterTermPositions::getPayloadLength() { + return boost::static_pointer_cast(in)->getPayloadLength(); +} + +ByteArray FilterTermPositions::getPayload(ByteArray data, int32_t offset) { + return boost::static_pointer_cast(in)->getPayload(data, offset); +} + +bool FilterTermPositions::isPayloadAvailable() { + return boost::static_pointer_cast(in)->isPayloadAvailable(); +} + +FilterTermEnum::FilterTermEnum(const TermEnumPtr& in) { + this->in = in; +} + +FilterTermEnum::~FilterTermEnum() { +} + +bool FilterTermEnum::next() { + return in->next(); +} + +TermPtr FilterTermEnum::term() { + return in->term(); +} + +int32_t FilterTermEnum::docFreq() { + return in->docFreq(); +} + +void FilterTermEnum::close() { + in->close(); +} + } diff --git a/src/core/index/FormatPostingsDocsConsumer.cpp b/src/core/index/FormatPostingsDocsConsumer.cpp index 43dceb2b..6f711f52 100644 --- a/src/core/index/FormatPostingsDocsConsumer.cpp +++ b/src/core/index/FormatPostingsDocsConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "FormatPostingsDocsConsumer.h" -namespace Lucene -{ - FormatPostingsDocsConsumer::~FormatPostingsDocsConsumer() - { - } +namespace Lucene { + +FormatPostingsDocsConsumer::~FormatPostingsDocsConsumer() { +} + } diff --git a/src/core/index/FormatPostingsDocsWriter.cpp b/src/core/index/FormatPostingsDocsWriter.cpp index 1e83d1da..8e576dfa 100644 --- a/src/core/index/FormatPostingsDocsWriter.cpp +++ b/src/core/index/FormatPostingsDocsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -21,96 +21,90 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - FormatPostingsDocsWriter::FormatPostingsDocsWriter(SegmentWriteStatePtr state, FormatPostingsTermsWriterPtr parent) - { - this->lastDocID = 0; - this->df = 0; - this->omitTermFreqAndPositions = false; - this->storePayloads = false; - this->freqStart = 0; - - FormatPostingsFieldsWriterPtr parentPostings(parent->_parent); - this->_parent = parent; - this->state = state; - String fileName(IndexFileNames::segmentFileName(parentPostings->segment, IndexFileNames::FREQ_EXTENSION())); - state->flushedFiles.add(fileName); - out = parentPostings->dir->createOutput(fileName); - totalNumDocs = parentPostings->totalNumDocs; - - skipInterval = parentPostings->termsOut->skipInterval; - skipListWriter = parentPostings->skipListWriter; - skipListWriter->setFreqOutput(out); - - termInfo = newLucene(); - utf8 = newLucene(); - } - - FormatPostingsDocsWriter::~FormatPostingsDocsWriter() - { - } - - void FormatPostingsDocsWriter::initialize() - { - posWriter = newLucene(state, shared_from_this()); - } - - void FormatPostingsDocsWriter::setField(FieldInfoPtr fieldInfo) - { - this->fieldInfo = fieldInfo; - omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; - storePayloads = fieldInfo->storePayloads; - posWriter->setField(fieldInfo); +namespace Lucene { + +FormatPostingsDocsWriter::FormatPostingsDocsWriter(const SegmentWriteStatePtr& state, const FormatPostingsTermsWriterPtr& parent) { + this->lastDocID = 0; + this->df = 0; + this->omitTermFreqAndPositions = false; + this->storePayloads = false; + this->freqStart = 0; + + FormatPostingsFieldsWriterPtr parentPostings(parent->_parent); + this->_parent = parent; + this->state = state; + String fileName(IndexFileNames::segmentFileName(parentPostings->segment, IndexFileNames::FREQ_EXTENSION())); + state->flushedFiles.add(fileName); + out = parentPostings->dir->createOutput(fileName); + totalNumDocs = parentPostings->totalNumDocs; + + skipInterval = parentPostings->termsOut->skipInterval; + skipListWriter = parentPostings->skipListWriter; + skipListWriter->setFreqOutput(out); + + termInfo = newLucene(); + utf8 = newLucene(); +} + +FormatPostingsDocsWriter::~FormatPostingsDocsWriter() { +} + +void FormatPostingsDocsWriter::initialize() { + posWriter = newLucene(state, shared_from_this()); +} + +void FormatPostingsDocsWriter::setField(const FieldInfoPtr& fieldInfo) { + this->fieldInfo = fieldInfo; + omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + storePayloads = fieldInfo->storePayloads; + posWriter->setField(fieldInfo); +} + +FormatPostingsPositionsConsumerPtr FormatPostingsDocsWriter::addDoc(int32_t docID, int32_t termDocFreq) { + int32_t delta = docID - lastDocID; + + if (docID < 0 || (df > 0 && delta <= 0)) { + boost::throw_exception(CorruptIndexException(L"docs out of order (" + StringUtils::toString(docID) + L" <= " + StringUtils::toString(lastDocID) + L" )")); } - - FormatPostingsPositionsConsumerPtr FormatPostingsDocsWriter::addDoc(int32_t docID, int32_t termDocFreq) - { - int32_t delta = docID - lastDocID; - - if (docID < 0 || (df > 0 && delta <= 0)) - boost::throw_exception(CorruptIndexException(L"docs out of order (" + StringUtils::toString(docID) + L" <= " + StringUtils::toString(lastDocID) + L" )")); - - if ((++df % skipInterval) == 0) - { - skipListWriter->setSkipData(lastDocID, storePayloads, posWriter->lastPayloadLength); - skipListWriter->bufferSkip(df); - } - - BOOST_ASSERT(docID < totalNumDocs); - - lastDocID = docID; - if (omitTermFreqAndPositions) - out->writeVInt(delta); - else if (termDocFreq == 1) - out->writeVInt((delta << 1) | 1); - else - { - out->writeVInt(delta << 1); - out->writeVInt(termDocFreq); - } - - return posWriter; + + if ((++df % skipInterval) == 0) { + skipListWriter->setSkipData(lastDocID, storePayloads, posWriter->lastPayloadLength); + skipListWriter->bufferSkip(df); } - - void FormatPostingsDocsWriter::finish() - { - int64_t skipPointer = skipListWriter->writeSkip(out); - FormatPostingsTermsWriterPtr parent(_parent); - termInfo->set(df, parent->freqStart, parent->proxStart, (int32_t)(skipPointer - parent->freqStart)); - - StringUtils::toUTF8(parent->currentTerm.get() + parent->currentTermStart, parent->currentTerm.size(), utf8); - - if (df > 0) - parent->termsOut->add(fieldInfo->number, utf8->result, utf8->length, termInfo); - - lastDocID = 0; - df = 0; + + BOOST_ASSERT(docID < totalNumDocs); + + lastDocID = docID; + if (omitTermFreqAndPositions) { + out->writeVInt(delta); + } else if (termDocFreq == 1) { + out->writeVInt((delta << 1) | 1); + } else { + out->writeVInt(delta << 1); + out->writeVInt(termDocFreq); } - - void FormatPostingsDocsWriter::close() - { - out->close(); - posWriter->close(); + + return posWriter; +} + +void FormatPostingsDocsWriter::finish() { + int64_t skipPointer = skipListWriter->writeSkip(out); + FormatPostingsTermsWriterPtr parent(_parent); + termInfo->set(df, parent->freqStart, parent->proxStart, (int32_t)(skipPointer - parent->freqStart)); + + StringUtils::toUTF8(parent->currentTerm.get() + parent->currentTermStart, parent->currentTerm.size(), utf8); + + if (df > 0) { + parent->termsOut->add(fieldInfo->number, utf8->result, utf8->length, termInfo); } + + lastDocID = 0; + df = 0; +} + +void FormatPostingsDocsWriter::close() { + out->close(); + posWriter->close(); +} + } diff --git a/src/core/index/FormatPostingsFieldsConsumer.cpp b/src/core/index/FormatPostingsFieldsConsumer.cpp index e5bedd3b..1063e211 100644 --- a/src/core/index/FormatPostingsFieldsConsumer.cpp +++ b/src/core/index/FormatPostingsFieldsConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "FormatPostingsFieldsConsumer.h" -namespace Lucene -{ - FormatPostingsFieldsConsumer::~FormatPostingsFieldsConsumer() - { - } +namespace Lucene { + +FormatPostingsFieldsConsumer::~FormatPostingsFieldsConsumer() { +} + } diff --git a/src/core/index/FormatPostingsFieldsWriter.cpp b/src/core/index/FormatPostingsFieldsWriter.cpp index 701118d0..e0a8cc58 100644 --- a/src/core/index/FormatPostingsFieldsWriter.cpp +++ b/src/core/index/FormatPostingsFieldsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,41 +12,37 @@ #include "IndexFileNames.h" #include "DefaultSkipListWriter.h" -namespace Lucene -{ - FormatPostingsFieldsWriter::FormatPostingsFieldsWriter(SegmentWriteStatePtr state, FieldInfosPtr fieldInfos) - { - dir = state->directory; - segment = state->segmentName; - totalNumDocs = state->numDocs; - this->state = state; - this->fieldInfos = fieldInfos; - termsOut = newLucene(dir, segment, fieldInfos, state->termIndexInterval); - - skipListWriter = newLucene(termsOut->skipInterval, termsOut->maxSkipLevels, totalNumDocs, IndexOutputPtr(), IndexOutputPtr()); - - state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_EXTENSION())); - state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_INDEX_EXTENSION())); - } - - FormatPostingsFieldsWriter::~FormatPostingsFieldsWriter() - { - } - - void FormatPostingsFieldsWriter::initialize() - { - termsWriter = newLucene(state, shared_from_this()); - } - - FormatPostingsTermsConsumerPtr FormatPostingsFieldsWriter::addField(FieldInfoPtr field) - { - termsWriter->setField(field); - return termsWriter; - } - - void FormatPostingsFieldsWriter::finish() - { - termsOut->close(); - termsWriter->close(); - } +namespace Lucene { + +FormatPostingsFieldsWriter::FormatPostingsFieldsWriter(const SegmentWriteStatePtr& state, const FieldInfosPtr& fieldInfos) { + dir = state->directory; + segment = state->segmentName; + totalNumDocs = state->numDocs; + this->state = state; + this->fieldInfos = fieldInfos; + termsOut = newLucene(dir, segment, fieldInfos, state->termIndexInterval); + + skipListWriter = newLucene(termsOut->skipInterval, termsOut->maxSkipLevels, totalNumDocs, IndexOutputPtr(), IndexOutputPtr()); + + state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_EXTENSION())); + state->flushedFiles.add(state->segmentFileName(IndexFileNames::TERMS_INDEX_EXTENSION())); +} + +FormatPostingsFieldsWriter::~FormatPostingsFieldsWriter() { +} + +void FormatPostingsFieldsWriter::initialize() { + termsWriter = newLucene(state, shared_from_this()); +} + +FormatPostingsTermsConsumerPtr FormatPostingsFieldsWriter::addField(const FieldInfoPtr& field) { + termsWriter->setField(field); + return termsWriter; +} + +void FormatPostingsFieldsWriter::finish() { + termsOut->close(); + termsWriter->close(); +} + } diff --git a/src/core/index/FormatPostingsPositionsConsumer.cpp b/src/core/index/FormatPostingsPositionsConsumer.cpp index 45b2d343..a239aa9a 100644 --- a/src/core/index/FormatPostingsPositionsConsumer.cpp +++ b/src/core/index/FormatPostingsPositionsConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "FormatPostingsPositionsConsumer.h" -namespace Lucene -{ - FormatPostingsPositionsConsumer::~FormatPostingsPositionsConsumer() - { - } +namespace Lucene { + +FormatPostingsPositionsConsumer::~FormatPostingsPositionsConsumer() { +} + } diff --git a/src/core/index/FormatPostingsPositionsWriter.cpp b/src/core/index/FormatPostingsPositionsWriter.cpp index cfa86e5c..b3f80833 100644 --- a/src/core/index/FormatPostingsPositionsWriter.cpp +++ b/src/core/index/FormatPostingsPositionsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,77 +17,69 @@ #include "DefaultSkipListWriter.h" #include "IndexOutput.h" -namespace Lucene -{ - FormatPostingsPositionsWriter::FormatPostingsPositionsWriter(SegmentWriteStatePtr state, FormatPostingsDocsWriterPtr parent) - { - lastPosition = 0; - storePayloads = false; - lastPayloadLength = -1; - - this->_parent = parent; - FormatPostingsFieldsWriterPtr parentFieldsWriter(FormatPostingsTermsWriterPtr(parent->_parent)->_parent); - - omitTermFreqAndPositions = parent->omitTermFreqAndPositions; - - if (parentFieldsWriter->fieldInfos->hasProx()) - { - // At least one field does not omit TF, so create the prox file - String fileName(IndexFileNames::segmentFileName(parentFieldsWriter->segment, IndexFileNames::PROX_EXTENSION())); - state->flushedFiles.add(fileName); - out = parentFieldsWriter->dir->createOutput(fileName); - parent->skipListWriter->setProxOutput(out); - } - else - { - // Every field omits TF so we will write no prox file - } - } - - FormatPostingsPositionsWriter::~FormatPostingsPositionsWriter() - { +namespace Lucene { + +FormatPostingsPositionsWriter::FormatPostingsPositionsWriter(const SegmentWriteStatePtr& state, const FormatPostingsDocsWriterPtr& parent) { + lastPosition = 0; + storePayloads = false; + lastPayloadLength = -1; + + this->_parent = parent; + FormatPostingsFieldsWriterPtr parentFieldsWriter(FormatPostingsTermsWriterPtr(parent->_parent)->_parent); + + omitTermFreqAndPositions = parent->omitTermFreqAndPositions; + + if (parentFieldsWriter->fieldInfos->hasProx()) { + // At least one field does not omit TF, so create the prox file + String fileName(IndexFileNames::segmentFileName(parentFieldsWriter->segment, IndexFileNames::PROX_EXTENSION())); + state->flushedFiles.add(fileName); + out = parentFieldsWriter->dir->createOutput(fileName); + parent->skipListWriter->setProxOutput(out); + } else { + // Every field omits TF so we will write no prox file } - - void FormatPostingsPositionsWriter::addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) - { - BOOST_ASSERT(!omitTermFreqAndPositions); - BOOST_ASSERT(out); - - int32_t delta = position - lastPosition; - lastPosition = position; - - if (storePayloads) - { - if (payloadLength != lastPayloadLength) - { - lastPayloadLength = payloadLength; - out->writeVInt((delta << 1) | 1); - out->writeVInt(payloadLength); - } - else - out->writeVInt(delta << 1); - if (payloadLength > 0) - out->writeBytes(payload.get(), payloadLength); +} + +FormatPostingsPositionsWriter::~FormatPostingsPositionsWriter() { +} + +void FormatPostingsPositionsWriter::addPosition(int32_t position, ByteArray payload, int32_t payloadOffset, int32_t payloadLength) { + BOOST_ASSERT(!omitTermFreqAndPositions); + BOOST_ASSERT(out); + + int32_t delta = position - lastPosition; + lastPosition = position; + + if (storePayloads) { + if (payloadLength != lastPayloadLength) { + lastPayloadLength = payloadLength; + out->writeVInt((delta << 1) | 1); + out->writeVInt(payloadLength); + } else { + out->writeVInt(delta << 1); } - else - out->writeVInt(delta); - } - - void FormatPostingsPositionsWriter::setField(FieldInfoPtr fieldInfo) - { - omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; - storePayloads = omitTermFreqAndPositions ? false : fieldInfo->storePayloads; - } - - void FormatPostingsPositionsWriter::finish() - { - lastPosition = 0; - lastPayloadLength = -1; + if (payloadLength > 0) { + out->writeBytes(payload.get(), payloadLength); + } + } else { + out->writeVInt(delta); } - - void FormatPostingsPositionsWriter::close() - { - if (out) - out->close(); +} + +void FormatPostingsPositionsWriter::setField(const FieldInfoPtr& fieldInfo) { + omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + storePayloads = omitTermFreqAndPositions ? false : fieldInfo->storePayloads; +} + +void FormatPostingsPositionsWriter::finish() { + lastPosition = 0; + lastPayloadLength = -1; +} + +void FormatPostingsPositionsWriter::close() { + if (out) { + out->close(); } } + +} diff --git a/src/core/index/FormatPostingsTermsConsumer.cpp b/src/core/index/FormatPostingsTermsConsumer.cpp index 0f045efb..4894527a 100644 --- a/src/core/index/FormatPostingsTermsConsumer.cpp +++ b/src/core/index/FormatPostingsTermsConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,21 +9,22 @@ #include "UTF8Stream.h" #include "MiscUtils.h" -namespace Lucene -{ - FormatPostingsTermsConsumer::~FormatPostingsTermsConsumer() - { +namespace Lucene { + +FormatPostingsTermsConsumer::~FormatPostingsTermsConsumer() { +} + +FormatPostingsDocsConsumerPtr FormatPostingsTermsConsumer::addTerm(const String& text) { + int32_t len = text.length(); + if (!termBuffer) { + termBuffer = CharArray::newInstance(MiscUtils::getNextSize(len + 1)); } - - FormatPostingsDocsConsumerPtr FormatPostingsTermsConsumer::addTerm(const String& text) - { - int32_t len = text.length(); - if (!termBuffer) - termBuffer = CharArray::newInstance(MiscUtils::getNextSize(len + 1)); - if (termBuffer.size() < len + 1) - termBuffer.resize(MiscUtils::getNextSize(len + 1)); - MiscUtils::arrayCopy(text.begin(), 0, termBuffer.get(), 0, len); - termBuffer[len] = UTF8Base::UNICODE_TERMINATOR; - return addTerm(termBuffer, 0); + if (termBuffer.size() < len + 1) { + termBuffer.resize(MiscUtils::getNextSize(len + 1)); } + MiscUtils::arrayCopy(text.begin(), 0, termBuffer.get(), 0, len); + termBuffer[len] = UTF8Base::UNICODE_TERMINATOR; + return addTerm(termBuffer, 0); +} + } diff --git a/src/core/index/FormatPostingsTermsWriter.cpp b/src/core/index/FormatPostingsTermsWriter.cpp index e7902103..48ce1563 100644 --- a/src/core/index/FormatPostingsTermsWriter.cpp +++ b/src/core/index/FormatPostingsTermsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,54 +12,49 @@ #include "IndexOutput.h" #include "DefaultSkipListWriter.h" -namespace Lucene -{ - FormatPostingsTermsWriter::FormatPostingsTermsWriter(SegmentWriteStatePtr state, FormatPostingsFieldsWriterPtr parent) - { - currentTermStart = 0; - freqStart = 0; - proxStart = 0; - - this->_parent = parent; - this->state = state; - termsOut = parent->termsOut; - } - - FormatPostingsTermsWriter::~FormatPostingsTermsWriter() - { - } - - void FormatPostingsTermsWriter::initialize() - { - docsWriter = newLucene(state, shared_from_this()); - } - - void FormatPostingsTermsWriter::setField(FieldInfoPtr fieldInfo) - { - this->fieldInfo = fieldInfo; - docsWriter->setField(fieldInfo); - } - - FormatPostingsDocsConsumerPtr FormatPostingsTermsWriter::addTerm(CharArray text, int32_t start) - { - currentTerm = text; - currentTermStart = start; - - freqStart = docsWriter->out->getFilePointer(); - if (docsWriter->posWriter->out) - proxStart = docsWriter->posWriter->out->getFilePointer(); - - FormatPostingsFieldsWriterPtr(_parent)->skipListWriter->resetSkip(); - - return docsWriter; - } - - void FormatPostingsTermsWriter::finish() - { - } - - void FormatPostingsTermsWriter::close() - { - docsWriter->close(); +namespace Lucene { + +FormatPostingsTermsWriter::FormatPostingsTermsWriter(const SegmentWriteStatePtr& state, const FormatPostingsFieldsWriterPtr& parent) { + currentTermStart = 0; + freqStart = 0; + proxStart = 0; + + this->_parent = parent; + this->state = state; + termsOut = parent->termsOut; +} + +FormatPostingsTermsWriter::~FormatPostingsTermsWriter() { +} + +void FormatPostingsTermsWriter::initialize() { + docsWriter = newLucene(state, shared_from_this()); +} + +void FormatPostingsTermsWriter::setField(const FieldInfoPtr& fieldInfo) { + this->fieldInfo = fieldInfo; + docsWriter->setField(fieldInfo); +} + +FormatPostingsDocsConsumerPtr FormatPostingsTermsWriter::addTerm(CharArray text, int32_t start) { + currentTerm = text; + currentTermStart = start; + + freqStart = docsWriter->out->getFilePointer(); + if (docsWriter->posWriter->out) { + proxStart = docsWriter->posWriter->out->getFilePointer(); } + + FormatPostingsFieldsWriterPtr(_parent)->skipListWriter->resetSkip(); + + return docsWriter; +} + +void FormatPostingsTermsWriter::finish() { +} + +void FormatPostingsTermsWriter::close() { + docsWriter->close(); +} + } diff --git a/src/core/index/FreqProxFieldMergeState.cpp b/src/core/index/FreqProxFieldMergeState.cpp index 49286f56..838d5d32 100644 --- a/src/core/index/FreqProxFieldMergeState.cpp +++ b/src/core/index/FreqProxFieldMergeState.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,88 +17,84 @@ #include "FieldInfo.h" #include "MiscUtils.h" -namespace Lucene -{ - FreqProxFieldMergeState::FreqProxFieldMergeState(FreqProxTermsWriterPerFieldPtr field) - { - this->numPostings = 0; - this->textOffset = 0; - this->docID = 0; - this->termFreq = 0; - this->postingUpto = -1; - this->freq = newLucene(); - this->prox = newLucene(); - - this->field = field; - this->charPool = TermsHashPerThreadPtr(FreqProxTermsWriterPerThreadPtr(field->_perThread)->_termsHashPerThread)->charPool; - - TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); - this->numPostings = termsHashPerField->numPostings; - this->postings = termsHashPerField->sortPostings(); - } - - FreqProxFieldMergeState::~FreqProxFieldMergeState() - { +namespace Lucene { + +FreqProxFieldMergeState::FreqProxFieldMergeState(const FreqProxTermsWriterPerFieldPtr& field) { + this->numPostings = 0; + this->textOffset = 0; + this->docID = 0; + this->termFreq = 0; + this->postingUpto = -1; + this->freq = newLucene(); + this->prox = newLucene(); + + this->field = field; + this->charPool = TermsHashPerThreadPtr(FreqProxTermsWriterPerThreadPtr(field->_perThread)->_termsHashPerThread)->charPool; + + TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); + this->numPostings = termsHashPerField->numPostings; + this->postings = termsHashPerField->sortPostings(); +} + +FreqProxFieldMergeState::~FreqProxFieldMergeState() { +} + +bool FreqProxFieldMergeState::nextTerm() { + ++postingUpto; + if (postingUpto == numPostings) { + return false; } - - bool FreqProxFieldMergeState::nextTerm() - { - ++postingUpto; - if (postingUpto == numPostings) - return false; - - p = boost::static_pointer_cast(postings[postingUpto]); - docID = 0; - - text = charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; - textOffset = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - - TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); - termsHashPerField->initReader(freq, p, 0); - if (!field->fieldInfo->omitTermFreqAndPositions) - termsHashPerField->initReader(prox, p, 1); - - // Should always be true - bool result = nextDoc(); - BOOST_ASSERT(result); - - return true; + + p = boost::static_pointer_cast(postings[postingUpto]); + docID = 0; + + text = charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; + textOffset = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + + TermsHashPerFieldPtr termsHashPerField(field->_termsHashPerField); + termsHashPerField->initReader(freq, p, 0); + if (!field->fieldInfo->omitTermFreqAndPositions) { + termsHashPerField->initReader(prox, p, 1); } - - bool FreqProxFieldMergeState::nextDoc() - { - if (freq->eof()) - { - if (p->lastDocCode != -1) - { - // Return last doc - docID = p->lastDocID; - if (!field->omitTermFreqAndPositions) - termFreq = p->docFreq; - p->lastDocCode = -1; - return true; - } - else - { - // EOF - return false; + + // Should always be true + bool result = nextDoc(); + BOOST_ASSERT(result); + + return true; +} + +bool FreqProxFieldMergeState::nextDoc() { + if (freq->eof()) { + if (p->lastDocCode != -1) { + // Return last doc + docID = p->lastDocID; + if (!field->omitTermFreqAndPositions) { + termFreq = p->docFreq; } + p->lastDocCode = -1; + return true; + } else { + // EOF + return false; } - - int32_t code = freq->readVInt(); - if (field->omitTermFreqAndPositions) - docID += code; - else - { - docID += MiscUtils::unsignedShift(code, 1); - if ((code & 1) != 0) - termFreq = 1; - else - termFreq = freq->readVInt(); + } + + int32_t code = freq->readVInt(); + if (field->omitTermFreqAndPositions) { + docID += code; + } else { + docID += MiscUtils::unsignedShift(code, 1); + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq->readVInt(); } - - BOOST_ASSERT(docID != p->lastDocID); - - return true; } + + BOOST_ASSERT(docID != p->lastDocID); + + return true; +} + } diff --git a/src/core/index/FreqProxTermsWriter.cpp b/src/core/index/FreqProxTermsWriter.cpp index 04c94b88..c1a4a1cd 100644 --- a/src/core/index/FreqProxTermsWriter.cpp +++ b/src/core/index/FreqProxTermsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -25,271 +25,252 @@ #include "UTF8Stream.h" #include "TestPoint.h" -namespace Lucene -{ - FreqProxTermsWriter::~FreqProxTermsWriter() - { - } - - TermsHashConsumerPerThreadPtr FreqProxTermsWriter::addThread(TermsHashPerThreadPtr perThread) - { - return newLucene(perThread); - } - - void FreqProxTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) - { - int32_t end = start + count; - for (int32_t i = start; i < end; ++i) - postings[i] = newLucene(); +namespace Lucene { + +FreqProxTermsWriter::~FreqProxTermsWriter() { +} + +TermsHashConsumerPerThreadPtr FreqProxTermsWriter::addThread(const TermsHashPerThreadPtr& perThread) { + return newLucene(perThread); +} + +void FreqProxTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { + int32_t end = start + count; + for (int32_t i = start; i < end; ++i) { + postings[i] = newLucene(); } - - int32_t FreqProxTermsWriter::compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2) - { - while (true) - { - wchar_t c1 = text1[pos1++]; - wchar_t c2 = text2[pos2++]; - if (c1 != c2) - { - if (c2 == UTF8Base::UNICODE_TERMINATOR) - return 1; - else if (c1 == UTF8Base::UNICODE_TERMINATOR) - return -1; - else - return (c1 - c2); +} + +int32_t FreqProxTermsWriter::compareText(const wchar_t* text1, int32_t pos1, const wchar_t* text2, int32_t pos2) { + while (true) { + wchar_t c1 = text1[pos1++]; + wchar_t c2 = text2[pos2++]; + if (c1 != c2) { + if (c2 == UTF8Base::UNICODE_TERMINATOR) { + return 1; + } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { + return -1; + } else { + return (c1 - c2); } - else if (c1 == UTF8Base::UNICODE_TERMINATOR) - return 0; + } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { + return 0; } } - - void FreqProxTermsWriter::closeDocStore(SegmentWriteStatePtr state) - { - } - - void FreqProxTermsWriter::abort() - { - } - - void FreqProxTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - // Gather all FieldData's that have postings, across all ThreadStates - Collection allFields(Collection::newInstance()); - - for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) - { - FreqProxTermsWriterPerFieldPtr freqProxPerField(boost::static_pointer_cast(*perField)); - if (TermsHashPerFieldPtr(freqProxPerField->_termsHashPerField)->numPostings > 0) - allFields.add(freqProxPerField); +} + +void FreqProxTermsWriter::closeDocStore(const SegmentWriteStatePtr& state) { +} + +void FreqProxTermsWriter::abort() { +} + +void FreqProxTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + // Gather all FieldData's that have postings, across all ThreadStates + Collection allFields(Collection::newInstance()); + + for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { + FreqProxTermsWriterPerFieldPtr freqProxPerField(boost::static_pointer_cast(*perField)); + if (TermsHashPerFieldPtr(freqProxPerField->_termsHashPerField)->numPostings > 0) { + allFields.add(freqProxPerField); } } - - // Sort by field name - std::sort(allFields.begin(), allFields.end(), luceneCompare()); - - int32_t numAllFields = allFields.size(); - - FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); - - // Current writer chain: - // FormatPostingsFieldsConsumer - // -> IMPL: FormatPostingsFieldsWriter - // -> FormatPostingsTermsConsumer - // -> IMPL: FormatPostingsTermsWriter - // -> FormatPostingsDocConsumer - // -> IMPL: FormatPostingsDocWriter - // -> FormatPostingsPositionsConsumer - // -> IMPL: FormatPostingsPositionsWriter - - int32_t start = 0; - while (start < numAllFields) - { - FieldInfoPtr fieldInfo(allFields[start]->fieldInfo); - String fieldName(fieldInfo->name); - - int32_t end = start + 1; - while (end < numAllFields && allFields[end]->fieldInfo->name == fieldName) - ++end; - - Collection fields(Collection::newInstance(end - start)); - for (int32_t i = start; i < end; ++i) - { - fields[i - start] = allFields[i]; - - // Aggregate the storePayload as seen by the same field across multiple threads - if (fields[i - start]->hasPayloads) - fieldInfo->storePayloads = true; - } - - // If this field has postings then add them to the segment - appendPostings(fields, consumer); - - for (int32_t i = 0; i < fields.size(); ++i) - { - TermsHashPerFieldPtr perField(fields[i]->_termsHashPerField); - int32_t numPostings = perField->numPostings; - perField->reset(); - perField->shrinkHash(numPostings); - fields[i]->reset(); + } + + // Sort by field name + std::sort(allFields.begin(), allFields.end(), luceneCompare()); + + int32_t numAllFields = allFields.size(); + + FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); + + // Current writer chain: + // FormatPostingsFieldsConsumer + // -> IMPL: FormatPostingsFieldsWriter + // -> FormatPostingsTermsConsumer + // -> IMPL: FormatPostingsTermsWriter + // -> FormatPostingsDocConsumer + // -> IMPL: FormatPostingsDocWriter + // -> FormatPostingsPositionsConsumer + // -> IMPL: FormatPostingsPositionsWriter + + int32_t start = 0; + while (start < numAllFields) { + FieldInfoPtr fieldInfo(allFields[start]->fieldInfo); + String fieldName(fieldInfo->name); + + int32_t end = start + 1; + while (end < numAllFields && allFields[end]->fieldInfo->name == fieldName) { + ++end; + } + + Collection fields(Collection::newInstance(end - start)); + for (int32_t i = start; i < end; ++i) { + fields[i - start] = allFields[i]; + + // Aggregate the storePayload as seen by the same field across multiple threads + if (fields[i - start]->hasPayloads) { + fieldInfo->storePayloads = true; } - - start = end; } - - for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - TermsHashPerThreadPtr(boost::static_pointer_cast(entry->first)->_termsHashPerThread)->reset(true); - - consumer->finish(); + + // If this field has postings then add them to the segment + appendPostings(fields, consumer); + + for (int32_t i = 0; i < fields.size(); ++i) { + TermsHashPerFieldPtr perField(fields[i]->_termsHashPerField); + int32_t numPostings = perField->numPostings; + perField->reset(); + perField->shrinkHash(numPostings); + fields[i]->reset(); + } + + start = end; } - - void FreqProxTermsWriter::appendPostings(Collection fields, FormatPostingsFieldsConsumerPtr consumer) - { - TestScope testScope(L"FreqProxTermsWriter", L"appendPostings"); - int32_t numFields = fields.size(); - - Collection mergeStates(Collection::newInstance(numFields)); - - for (int32_t i = 0; i < numFields; ++i) - { - FreqProxFieldMergeStatePtr fms(newLucene(fields[i])); - mergeStates[i] = fms; - - BOOST_ASSERT(fms->field->fieldInfo == fields[0]->fieldInfo); - - // Should always be true - bool result = fms->nextTerm(); - BOOST_ASSERT(result); + + for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + TermsHashPerThreadPtr(boost::static_pointer_cast(entry->first)->_termsHashPerThread)->reset(true); + } + + consumer->finish(); +} + +void FreqProxTermsWriter::appendPostings(Collection fields, const FormatPostingsFieldsConsumerPtr& consumer) { + TestScope testScope(L"FreqProxTermsWriter", L"appendPostings"); + int32_t numFields = fields.size(); + + Collection mergeStates(Collection::newInstance(numFields)); + + for (int32_t i = 0; i < numFields; ++i) { + FreqProxFieldMergeStatePtr fms(newLucene(fields[i])); + mergeStates[i] = fms; + + BOOST_ASSERT(fms->field->fieldInfo == fields[0]->fieldInfo); + + // Should always be true + bool result = fms->nextTerm(); + BOOST_ASSERT(result); + } + + FormatPostingsTermsConsumerPtr termsConsumer(consumer->addField(fields[0]->fieldInfo)); + + Collection termStates(Collection::newInstance(numFields)); + + bool currentFieldOmitTermFreqAndPositions = fields[0]->fieldInfo->omitTermFreqAndPositions; + + while (numFields > 0) { + // Get the next term to merge + termStates[0] = mergeStates[0]; + int32_t numToMerge = 1; + + for (int32_t i = 1; i < numFields; ++i) { + CharArray text = mergeStates[i]->text; + int32_t textOffset = mergeStates[i]->textOffset; + int32_t cmp = compareText(text.get(), textOffset, termStates[0]->text.get(), termStates[0]->textOffset); + + if (cmp < 0) { + termStates[0] = mergeStates[i]; + numToMerge = 1; + } else if (cmp == 0) { + termStates[numToMerge++] = mergeStates[i]; + } } - - FormatPostingsTermsConsumerPtr termsConsumer(consumer->addField(fields[0]->fieldInfo)); - - Collection termStates(Collection::newInstance(numFields)); - - bool currentFieldOmitTermFreqAndPositions = fields[0]->fieldInfo->omitTermFreqAndPositions; - - while (numFields > 0) - { - // Get the next term to merge - termStates[0] = mergeStates[0]; - int32_t numToMerge = 1; - - for (int32_t i = 1; i < numFields; ++i) - { - CharArray text = mergeStates[i]->text; - int32_t textOffset = mergeStates[i]->textOffset; - int32_t cmp = compareText(text.get(), textOffset, termStates[0]->text.get(), termStates[0]->textOffset); - - if (cmp < 0) - { - termStates[0] = mergeStates[i]; - numToMerge = 1; + + FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(termStates[0]->text, termStates[0]->textOffset)); + + // Now termStates has numToMerge FieldMergeStates which all share the same term. Now we must + // interleave the docID streams. + while (numToMerge > 0) { + FreqProxFieldMergeStatePtr minState(termStates[0]); + for (int32_t i = 1; i < numToMerge; ++i) { + if (termStates[i]->docID < minState->docID) { + minState = termStates[i]; } - else if (cmp == 0) - termStates[numToMerge++] = mergeStates[i]; } - - FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(termStates[0]->text, termStates[0]->textOffset)); - - // Now termStates has numToMerge FieldMergeStates which all share the same term. Now we must - // interleave the docID streams. - while (numToMerge > 0) - { - FreqProxFieldMergeStatePtr minState(termStates[0]); - for (int32_t i = 1; i < numToMerge; ++i) - { - if (termStates[i]->docID < minState->docID) - minState = termStates[i]; - } - - int32_t termDocFreq = minState->termFreq; - - FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(minState->docID, termDocFreq)); - - ByteSliceReaderPtr prox(minState->prox); - - // Carefully copy over the prox + payload info, changing the format to match Lucene's segment format. - if (!currentFieldOmitTermFreqAndPositions) - { - // omitTermFreqAndPositions == false so we do write positions & payload - int32_t position = 0; - for (int32_t j = 0; j < termDocFreq; ++j) - { - int32_t code = prox->readVInt(); - position += (code >> 1); - - int32_t payloadLength; - if ((code & 1) != 0) - { - // This position has a payload - payloadLength = prox->readVInt(); - - if (!payloadBuffer) - payloadBuffer = ByteArray::newInstance(payloadLength); - if (payloadBuffer.size() < payloadLength) - payloadBuffer.resize(payloadLength); - - prox->readBytes(payloadBuffer.get(), 0, payloadLength); + + int32_t termDocFreq = minState->termFreq; + + FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(minState->docID, termDocFreq)); + + ByteSliceReaderPtr prox(minState->prox); + + // Carefully copy over the prox + payload info, changing the format to match Lucene's segment format. + if (!currentFieldOmitTermFreqAndPositions) { + // omitTermFreqAndPositions == false so we do write positions & payload + int32_t position = 0; + for (int32_t j = 0; j < termDocFreq; ++j) { + int32_t code = prox->readVInt(); + position += (code >> 1); + + int32_t payloadLength; + if ((code & 1) != 0) { + // This position has a payload + payloadLength = prox->readVInt(); + + if (!payloadBuffer) { + payloadBuffer = ByteArray::newInstance(payloadLength); } - else - payloadLength = 0; - - posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); + if (payloadBuffer.size() < payloadLength) { + payloadBuffer.resize(payloadLength); + } + + prox->readBytes(payloadBuffer.get(), 0, payloadLength); + } else { + payloadLength = 0; } - - posConsumer->finish(); + + posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } - - if (!minState->nextDoc()) - { - // Remove from termStates - int32_t upto = 0; - for (int32_t i = 0; i < numToMerge; ++i) - { - if (termStates[i] != minState) - termStates[upto++] = termStates[i]; + + posConsumer->finish(); + } + + if (!minState->nextDoc()) { + // Remove from termStates + int32_t upto = 0; + for (int32_t i = 0; i < numToMerge; ++i) { + if (termStates[i] != minState) { + termStates[upto++] = termStates[i]; } - --numToMerge; - BOOST_ASSERT(upto == numToMerge); - - // Advance this state to the next term - - if (!minState->nextTerm()) - { - // OK, no more terms, so remove from mergeStates as well - upto = 0; - for (int32_t i = 0; i < numFields; ++i) - { - if (mergeStates[i] != minState) - mergeStates[upto++] = mergeStates[i]; + } + --numToMerge; + BOOST_ASSERT(upto == numToMerge); + + // Advance this state to the next term + + if (!minState->nextTerm()) { + // OK, no more terms, so remove from mergeStates as well + upto = 0; + for (int32_t i = 0; i < numFields; ++i) { + if (mergeStates[i] != minState) { + mergeStates[upto++] = mergeStates[i]; } - --numFields; - BOOST_ASSERT(upto == numFields); } + --numFields; + BOOST_ASSERT(upto == numFields); } } - - docConsumer->finish(); } - - termsConsumer->finish(); - } - - int32_t FreqProxTermsWriter::bytesPerPosting() - { - return RawPostingList::BYTES_SIZE + 4 * DocumentsWriter::INT_NUM_BYTE; - } - - FreqProxTermsWriterPostingList::FreqProxTermsWriterPostingList() - { - docFreq = 0; - lastDocID = 0; - lastDocCode = 0; - lastPosition = 0; - } - - FreqProxTermsWriterPostingList::~FreqProxTermsWriterPostingList() - { + + docConsumer->finish(); } + + termsConsumer->finish(); +} + +int32_t FreqProxTermsWriter::bytesPerPosting() { + return RawPostingList::BYTES_SIZE + 4 * DocumentsWriter::INT_NUM_BYTE; +} + +FreqProxTermsWriterPostingList::FreqProxTermsWriterPostingList() { + docFreq = 0; + lastDocID = 0; + lastDocCode = 0; + lastPosition = 0; +} + +FreqProxTermsWriterPostingList::~FreqProxTermsWriterPostingList() { +} + } diff --git a/src/core/index/FreqProxTermsWriterPerField.cpp b/src/core/index/FreqProxTermsWriterPerField.cpp index 8f1437a2..6404dc89 100644 --- a/src/core/index/FreqProxTermsWriterPerField.cpp +++ b/src/core/index/FreqProxTermsWriterPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,151 +17,131 @@ #include "DocumentsWriter.h" #include "RawPostingList.h" -namespace Lucene -{ - FreqProxTermsWriterPerField::FreqProxTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, FreqProxTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) - { - this->hasPayloads = false; - this->_termsHashPerField = termsHashPerField; - this->_perThread = perThread; - this->fieldInfo = fieldInfo; - docState = termsHashPerField->docState; - fieldState = termsHashPerField->fieldState; - omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; - } - - FreqProxTermsWriterPerField::~FreqProxTermsWriterPerField() - { - } - - int32_t FreqProxTermsWriterPerField::getStreamCount() - { - return fieldInfo->omitTermFreqAndPositions ? 1 : 2; - } - - void FreqProxTermsWriterPerField::finish() - { - } - - void FreqProxTermsWriterPerField::skippingLongTerm() - { - } - - int32_t FreqProxTermsWriterPerField::compareTo(LuceneObjectPtr other) - { - return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); +namespace Lucene { + +FreqProxTermsWriterPerField::FreqProxTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const FreqProxTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { + this->hasPayloads = false; + this->_termsHashPerField = termsHashPerField; + this->_perThread = perThread; + this->fieldInfo = fieldInfo; + docState = termsHashPerField->docState; + fieldState = termsHashPerField->fieldState; + omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; +} + +FreqProxTermsWriterPerField::~FreqProxTermsWriterPerField() { +} + +int32_t FreqProxTermsWriterPerField::getStreamCount() { + return fieldInfo->omitTermFreqAndPositions ? 1 : 2; +} + +void FreqProxTermsWriterPerField::finish() { +} + +void FreqProxTermsWriterPerField::skippingLongTerm() { +} + +int32_t FreqProxTermsWriterPerField::compareTo(const LuceneObjectPtr& other) { + return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); +} + +void FreqProxTermsWriterPerField::reset() { + // Record, up front, whether our in-RAM format will be with or without term freqs + omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + payloadAttribute.reset(); +} + +bool FreqProxTermsWriterPerField::start(Collection fields, int32_t count) { + for (int32_t i = 0; i < count; ++i) { + if (fields[i]->isIndexed()) { + return true; + } } - - void FreqProxTermsWriterPerField::reset() - { - // Record, up front, whether our in-RAM format will be with or without term freqs - omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + return false; +} + +void FreqProxTermsWriterPerField::start(const FieldablePtr& field) { + if (fieldState->attributeSource->hasAttribute()) { + payloadAttribute = fieldState->attributeSource->getAttribute(); + } else { payloadAttribute.reset(); } - - bool FreqProxTermsWriterPerField::start(Collection fields, int32_t count) - { - for (int32_t i = 0; i < count; ++i) - { - if (fields[i]->isIndexed()) - return true; - } - return false; +} + +void FreqProxTermsWriterPerField::writeProx(const FreqProxTermsWriterPostingListPtr& p, int32_t proxCode) { + PayloadPtr payload; + if (payloadAttribute) { + payload = payloadAttribute->getPayload(); } - - void FreqProxTermsWriterPerField::start(FieldablePtr field) - { - if (fieldState->attributeSource->hasAttribute()) - payloadAttribute = fieldState->attributeSource->getAttribute(); - else - payloadAttribute.reset(); + + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + + if (payload && payload->length() > 0) { + termsHashPerField->writeVInt(1, (proxCode << 1) | 1); + termsHashPerField->writeVInt(1, payload->length()); + termsHashPerField->writeBytes(1, payload->getData().get(), payload->getOffset(), payload->length()); + hasPayloads = true; + } else { + termsHashPerField->writeVInt(1, proxCode << 1); } - - void FreqProxTermsWriterPerField::writeProx(FreqProxTermsWriterPostingListPtr p, int32_t proxCode) - { - PayloadPtr payload; - if (payloadAttribute) - payload = payloadAttribute->getPayload(); - - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - - if (payload && payload->length() > 0) - { - termsHashPerField->writeVInt(1, (proxCode << 1) | 1); - termsHashPerField->writeVInt(1, payload->length()); - termsHashPerField->writeBytes(1, payload->getData().get(), payload->getOffset(), payload->length()); - hasPayloads = true; - } - else - termsHashPerField->writeVInt(1, proxCode << 1); - p->lastPosition = fieldState->position; + p->lastPosition = fieldState->position; +} + +void FreqProxTermsWriterPerField::newTerm(const RawPostingListPtr& p) { + // First time we're seeing this term since the last flush + BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.newTerm start")); + FreqProxTermsWriterPostingListPtr newPostingList(boost::static_pointer_cast(p)); + newPostingList->lastDocID = docState->docID; + if (omitTermFreqAndPositions) { + newPostingList->lastDocCode = docState->docID; + } else { + newPostingList->lastDocCode = docState->docID << 1; + newPostingList->docFreq = 1; + writeProx(newPostingList, fieldState->position); } - - void FreqProxTermsWriterPerField::newTerm(RawPostingListPtr p) - { - // First time we're seeing this term since the last flush - BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.newTerm start")); - FreqProxTermsWriterPostingListPtr newPostingList(boost::static_pointer_cast(p)); - newPostingList->lastDocID = docState->docID; - if (omitTermFreqAndPositions) - newPostingList->lastDocCode = docState->docID; - else - { - newPostingList->lastDocCode = docState->docID << 1; - newPostingList->docFreq = 1; - writeProx(newPostingList, fieldState->position); +} + +void FreqProxTermsWriterPerField::addTerm(const RawPostingListPtr& p) { + BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.addTerm start")); + + FreqProxTermsWriterPostingListPtr addPostingList(boost::static_pointer_cast(p)); + + BOOST_ASSERT(omitTermFreqAndPositions || addPostingList->docFreq > 0); + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + + if (omitTermFreqAndPositions) { + if (docState->docID != addPostingList->lastDocID) { + BOOST_ASSERT(docState->docID > addPostingList->lastDocID); + termsHashPerField->writeVInt(0, addPostingList->lastDocCode); + addPostingList->lastDocCode = docState->docID - addPostingList->lastDocID; + addPostingList->lastDocID = docState->docID; } - } - - void FreqProxTermsWriterPerField::addTerm(RawPostingListPtr p) - { - BOOST_ASSERT(docState->testPoint(L"FreqProxTermsWriterPerField.addTerm start")); - - FreqProxTermsWriterPostingListPtr addPostingList(boost::static_pointer_cast(p)); - - BOOST_ASSERT(omitTermFreqAndPositions || addPostingList->docFreq > 0); - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - - if (omitTermFreqAndPositions) - { - if (docState->docID != addPostingList->lastDocID) - { - BOOST_ASSERT(docState->docID > addPostingList->lastDocID); + } else { + if (docState->docID != addPostingList->lastDocID) { + BOOST_ASSERT(docState->docID > addPostingList->lastDocID); + // Term not yet seen in the current doc but previously seen in other doc(s) since + // the last flush + + // Now that we know doc freq for previous doc, write it & lastDocCode + if (addPostingList->docFreq == 1) { + termsHashPerField->writeVInt(0, addPostingList->lastDocCode | 1); + } else { termsHashPerField->writeVInt(0, addPostingList->lastDocCode); - addPostingList->lastDocCode = docState->docID - addPostingList->lastDocID; - addPostingList->lastDocID = docState->docID; - } - } - else - { - if (docState->docID != addPostingList->lastDocID) - { - BOOST_ASSERT(docState->docID > addPostingList->lastDocID); - // Term not yet seen in the current doc but previously seen in other doc(s) since - // the last flush - - // Now that we know doc freq for previous doc, write it & lastDocCode - if (addPostingList->docFreq == 1) - termsHashPerField->writeVInt(0, addPostingList->lastDocCode | 1); - else - { - termsHashPerField->writeVInt(0, addPostingList->lastDocCode); - termsHashPerField->writeVInt(0, addPostingList->docFreq); - } - addPostingList->docFreq = 1; - addPostingList->lastDocCode = (docState->docID - addPostingList->lastDocID) << 1; - addPostingList->lastDocID = docState->docID; - writeProx(addPostingList, fieldState->position); - } - else - { - ++addPostingList->docFreq; - writeProx(addPostingList, fieldState->position - addPostingList->lastPosition); + termsHashPerField->writeVInt(0, addPostingList->docFreq); } + addPostingList->docFreq = 1; + addPostingList->lastDocCode = (docState->docID - addPostingList->lastDocID) << 1; + addPostingList->lastDocID = docState->docID; + writeProx(addPostingList, fieldState->position); + } else { + ++addPostingList->docFreq; + writeProx(addPostingList, fieldState->position - addPostingList->lastPosition); } } - - void FreqProxTermsWriterPerField::abort() - { - } +} + +void FreqProxTermsWriterPerField::abort() { +} + } diff --git a/src/core/index/FreqProxTermsWriterPerThread.cpp b/src/core/index/FreqProxTermsWriterPerThread.cpp index b39012c8..8c3a1626 100644 --- a/src/core/index/FreqProxTermsWriterPerThread.cpp +++ b/src/core/index/FreqProxTermsWriterPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,33 +9,28 @@ #include "FreqProxTermsWriterPerField.h" #include "TermsHashPerThread.h" -namespace Lucene -{ - FreqProxTermsWriterPerThread::FreqProxTermsWriterPerThread(TermsHashPerThreadPtr perThread) - { - docState = perThread->docState; - _termsHashPerThread = perThread; - } - - FreqProxTermsWriterPerThread::~FreqProxTermsWriterPerThread() - { - } - - TermsHashConsumerPerFieldPtr FreqProxTermsWriterPerThread::addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) - { - return newLucene(termsHashPerField, shared_from_this(), fieldInfo); - } - - void FreqProxTermsWriterPerThread::startDocument() - { - } - - DocWriterPtr FreqProxTermsWriterPerThread::finishDocument() - { - return DocWriterPtr(); - } - - void FreqProxTermsWriterPerThread::abort() - { - } +namespace Lucene { + +FreqProxTermsWriterPerThread::FreqProxTermsWriterPerThread(const TermsHashPerThreadPtr& perThread) { + docState = perThread->docState; + _termsHashPerThread = perThread; +} + +FreqProxTermsWriterPerThread::~FreqProxTermsWriterPerThread() { +} + +TermsHashConsumerPerFieldPtr FreqProxTermsWriterPerThread::addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) { + return newLucene(termsHashPerField, shared_from_this(), fieldInfo); +} + +void FreqProxTermsWriterPerThread::startDocument() { +} + +DocWriterPtr FreqProxTermsWriterPerThread::finishDocument() { + return DocWriterPtr(); +} + +void FreqProxTermsWriterPerThread::abort() { +} + } diff --git a/src/core/index/IndexCommit.cpp b/src/core/index/IndexCommit.cpp index 7b9d9c19..466d425b 100644 --- a/src/core/index/IndexCommit.cpp +++ b/src/core/index/IndexCommit.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,29 +8,28 @@ #include "IndexCommit.h" #include "Directory.h" -namespace Lucene -{ - IndexCommit::~IndexCommit() - { - } - - bool IndexCommit::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - IndexCommitPtr otherCommit(boost::dynamic_pointer_cast(other)); - if (!otherCommit) - return false; - return (otherCommit->getDirectory()->equals(getDirectory()) && otherCommit->getVersion() == getVersion()); - } - - int32_t IndexCommit::hashCode() - { - return (getDirectory()->hashCode() + (int32_t)getVersion()); +namespace Lucene { + +IndexCommit::~IndexCommit() { +} + +bool IndexCommit::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int64_t IndexCommit::getTimestamp() - { - return getDirectory()->fileModified(getSegmentsFileName()); + IndexCommitPtr otherCommit(boost::dynamic_pointer_cast(other)); + if (!otherCommit) { + return false; } + return (otherCommit->getDirectory()->equals(getDirectory()) && otherCommit->getVersion() == getVersion()); +} + +int32_t IndexCommit::hashCode() { + return (getDirectory()->hashCode() + (int32_t)getVersion()); +} + +int64_t IndexCommit::getTimestamp() { + return getDirectory()->fileModified(getSegmentsFileName()); +} + } diff --git a/src/core/index/IndexDeletionPolicy.cpp b/src/core/index/IndexDeletionPolicy.cpp index 0221e43c..ece07e52 100644 --- a/src/core/index/IndexDeletionPolicy.cpp +++ b/src/core/index/IndexDeletionPolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,13 +7,12 @@ #include "LuceneInc.h" #include "IndexDeletionPolicy.h" -namespace Lucene -{ - IndexDeletionPolicy::IndexDeletionPolicy() - { - } +namespace Lucene { + +IndexDeletionPolicy::IndexDeletionPolicy() { +} + +IndexDeletionPolicy::~IndexDeletionPolicy() { +} - IndexDeletionPolicy::~IndexDeletionPolicy() - { - } } diff --git a/src/core/index/IndexFileDeleter.cpp b/src/core/index/IndexFileDeleter.cpp index dd1f51dc..cb30e2f4 100644 --- a/src/core/index/IndexFileDeleter.cpp +++ b/src/core/index/IndexFileDeleter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,514 +20,464 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// Change to true to see details of reference counts when infoStream != null - bool IndexFileDeleter::VERBOSE_REF_COUNTS = false; - - IndexFileDeleter::IndexFileDeleter(DirectoryPtr directory, IndexDeletionPolicyPtr policy, SegmentInfosPtr segmentInfos, InfoStreamPtr infoStream, DocumentsWriterPtr docWriter, HashSet synced) - { - this->lastFiles = Collection< HashSet >::newInstance(); - this->commits = Collection::newInstance(); - this->commitsToDelete = Collection::newInstance(); - this->refCounts = MapStringRefCount::newInstance(); - this->docWriter = docWriter; - this->infoStream = infoStream; - this->synced = synced; - - if (infoStream) - message(L"init: current segments file is \"" + segmentInfos->getCurrentSegmentFileName()); - - this->policy = policy; - this->directory = directory; - - // First pass: walk the files and initialize our ref counts - int64_t currentGen = segmentInfos->getGeneration(); - IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); - - HashSet files(directory->listAll()); - CommitPointPtr currentCommitPoint; - - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (filter->accept(L"", *fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) - { - // Add this file to refCounts with initial count 0 - getRefCount(*fileName); - - if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS())) - { - // This is a commit (segments or segments_N), and it's valid (<= the max gen). - // Load it, then incref all files it refers to - if (infoStream) - message(L"init: load commit \"" + *fileName + L"\""); - SegmentInfosPtr sis(newLucene()); - try - { - sis->read(directory, *fileName); +namespace Lucene { + +/// Change to true to see details of reference counts when infoStream != null +bool IndexFileDeleter::VERBOSE_REF_COUNTS = false; + +IndexFileDeleter::IndexFileDeleter(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& policy, const SegmentInfosPtr& segmentInfos, const InfoStreamPtr& infoStream, const DocumentsWriterPtr& docWriter, HashSet synced) { + this->lastFiles = Collection< HashSet >::newInstance(); + this->commits = Collection::newInstance(); + this->commitsToDelete = Collection::newInstance(); + this->refCounts = MapStringRefCount::newInstance(); + this->docWriter = docWriter; + this->infoStream = infoStream; + this->synced = synced; + + if (infoStream) { + message(L"init: current segments file is \"" + segmentInfos->getCurrentSegmentFileName()); + } + + this->policy = policy; + this->directory = directory; + + // First pass: walk the files and initialize our ref counts + int64_t currentGen = segmentInfos->getGeneration(); + IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); + + HashSet files(directory->listAll()); + CommitPointPtr currentCommitPoint; + + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (filter->accept(L"", *fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { + // Add this file to refCounts with initial count 0 + getRefCount(*fileName); + + if (boost::starts_with(*fileName, IndexFileNames::SEGMENTS())) { + // This is a commit (segments or segments_N), and it's valid (<= the max gen). + // Load it, then incref all files it refers to + if (infoStream) { + message(L"init: load commit \"" + *fileName + L"\""); + } + SegmentInfosPtr sis(newLucene()); + try { + sis->read(directory, *fileName); + } catch (IOException& e) { + if (SegmentInfos::generationFromSegmentsFileName(*fileName) <= currentGen) { + boost::throw_exception(e); + } else { + // Most likely we are opening an index that has an aborted "future" commit, + // so suppress exc in this case + sis.reset(); } - catch (IOException& e) - { - if (SegmentInfos::generationFromSegmentsFileName(*fileName) <= currentGen) - boost::throw_exception(e); - else - { - // Most likely we are opening an index that has an aborted "future" commit, - // so suppress exc in this case - sis.reset(); - } + } catch (...) { + if (infoStream) { + message(L"init: hit exception when loading commit \"" + *fileName + L"\"; skipping this commit point"); } - catch (...) - { - if (infoStream) - message(L"init: hit exception when loading commit \"" + *fileName + L"\"; skipping this commit point"); - sis.reset(); + sis.reset(); + } + if (sis) { + CommitPointPtr commitPoint(newLucene(commitsToDelete, directory, sis)); + if (sis->getGeneration() == segmentInfos->getGeneration()) { + currentCommitPoint = commitPoint; } - if (sis) - { - CommitPointPtr commitPoint(newLucene(commitsToDelete, directory, sis)); - if (sis->getGeneration() == segmentInfos->getGeneration()) - currentCommitPoint = commitPoint; - commits.add(commitPoint); - incRef(sis, true); - - if (!lastSegmentInfos || sis->getGeneration() > lastSegmentInfos->getGeneration()) - lastSegmentInfos = sis; + commits.add(commitPoint); + incRef(sis, true); + + if (!lastSegmentInfos || sis->getGeneration() > lastSegmentInfos->getGeneration()) { + lastSegmentInfos = sis; } } } } - - if (!currentCommitPoint) - { - // We did not in fact see the segments_N file corresponding to the segmentInfos that was passed - // in. Yet, it must exist, because our caller holds the write lock. This can happen when the - // directory listing was stale (eg when index accessed via NFS client with stale directory listing - // cache). So we try now to explicitly open this commit point. - SegmentInfosPtr sis(newLucene()); - try - { - sis->read(directory, segmentInfos->getCurrentSegmentFileName()); - } - catch (LuceneException&) - { - boost::throw_exception(CorruptIndexException(L"failed to locate current segments_N file")); - } - if (infoStream) - message(L"forced open of current segments file " + segmentInfos->getCurrentSegmentFileName()); - currentCommitPoint = newLucene(commitsToDelete, directory, sis); - commits.add(currentCommitPoint); - incRef(sis, true); + } + + if (!currentCommitPoint) { + // We did not in fact see the segments_N file corresponding to the segmentInfos that was passed + // in. Yet, it must exist, because our caller holds the write lock. This can happen when the + // directory listing was stale (eg when index accessed via NFS client with stale directory listing + // cache). So we try now to explicitly open this commit point. + SegmentInfosPtr sis(newLucene()); + try { + sis->read(directory, segmentInfos->getCurrentSegmentFileName()); + } catch (LuceneException&) { + boost::throw_exception(CorruptIndexException(L"failed to locate current segments_N file")); } - - // We keep commits list in sorted order (oldest to newest) - std::sort(commits.begin(), commits.end(), luceneCompare()); - - // Now delete anything with ref count at 0. These are presumably abandoned files eg due to crash of IndexWriter. - for (MapStringRefCount::iterator entry = refCounts.begin(); entry != refCounts.end(); ++entry) - { - if (entry->second->count == 0) - { - if (infoStream) - message(L"init: removing unreferenced file \"" + entry->first + L"\""); - deleteFile(entry->first); - } + if (infoStream) { + message(L"forced open of current segments file " + segmentInfos->getCurrentSegmentFileName()); } - - // Finally, give policy a chance to remove things on startup - policy->onInit(commits); - - // Always protect the incoming segmentInfos since sometime it may not be the most recent commit - checkpoint(segmentInfos, false); - - startingCommitDeleted = currentCommitPoint->isDeleted(); - - deleteCommits(); - } - - IndexFileDeleter::~IndexFileDeleter() - { + currentCommitPoint = newLucene(commitsToDelete, directory, sis); + commits.add(currentCommitPoint); + incRef(sis, true); } - - void IndexFileDeleter::setInfoStream(InfoStreamPtr infoStream) - { - this->infoStream = infoStream; - } - - void IndexFileDeleter::message(const String& message) - { - if (infoStream) - { - *infoStream << L"IFD [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); - *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; + + // We keep commits list in sorted order (oldest to newest) + std::sort(commits.begin(), commits.end(), luceneCompare()); + + // Now delete anything with ref count at 0. These are presumably abandoned files eg due to crash of IndexWriter. + for (MapStringRefCount::iterator entry = refCounts.begin(); entry != refCounts.end(); ++entry) { + if (entry->second->count == 0) { + if (infoStream) { + message(L"init: removing unreferenced file \"" + entry->first + L"\""); + } + deleteFile(entry->first); } } - - SegmentInfosPtr IndexFileDeleter::getLastSegmentInfos() - { - return lastSegmentInfos; + + // Finally, give policy a chance to remove things on startup + policy->onInit(commits); + + // Always protect the incoming segmentInfos since sometime it may not be the most recent commit + checkpoint(segmentInfos, false); + + startingCommitDeleted = currentCommitPoint->isDeleted(); + + deleteCommits(); +} + +IndexFileDeleter::~IndexFileDeleter() { +} + +void IndexFileDeleter::setInfoStream(const InfoStreamPtr& infoStream) { + this->infoStream = infoStream; +} + +void IndexFileDeleter::message(const String& message) { + if (infoStream) { + *infoStream << L"IFD [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); + *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } - - void IndexFileDeleter::deleteCommits() - { - if (!commitsToDelete.empty()) - { - // First decref all files that had been referred to by the now-deleted commits - for (Collection::iterator commit = commitsToDelete.begin(); commit != commitsToDelete.end(); ++commit) - { - if (infoStream) - message(L"deleteCommits: now decRef commit \"" + (*commit)->getSegmentsFileName() + L"\""); - for (HashSet::iterator file = (*commit)->files.begin(); file != (*commit)->files.end(); ++file) - decRef(*file); +} + +SegmentInfosPtr IndexFileDeleter::getLastSegmentInfos() { + return lastSegmentInfos; +} + +void IndexFileDeleter::deleteCommits() { + if (!commitsToDelete.empty()) { + // First decref all files that had been referred to by the now-deleted commits + for (Collection::iterator commit = commitsToDelete.begin(); commit != commitsToDelete.end(); ++commit) { + if (infoStream) { + message(L"deleteCommits: now decRef commit \"" + (*commit)->getSegmentsFileName() + L"\""); } - commitsToDelete.clear(); - - // Now compact commits to remove deleted ones (preserving the sort) - int32_t size = commits.size(); - int32_t readFrom = 0; - int32_t writeTo = 0; - while (readFrom < size) - { - CommitPointPtr commit(boost::dynamic_pointer_cast(commits[readFrom])); - if (!commit->deleted) - { - if (writeTo != readFrom) - commits[writeTo] = commits[readFrom]; - ++writeTo; - } - ++readFrom; + for (HashSet::iterator file = (*commit)->files.begin(); file != (*commit)->files.end(); ++file) { + decRef(*file); } - - while (size > writeTo) - { - commits.removeLast(); - --size; + } + commitsToDelete.clear(); + + // Now compact commits to remove deleted ones (preserving the sort) + int32_t size = commits.size(); + int32_t readFrom = 0; + int32_t writeTo = 0; + while (readFrom < size) { + CommitPointPtr commit(boost::dynamic_pointer_cast(commits[readFrom])); + if (!commit->deleted) { + if (writeTo != readFrom) { + commits[writeTo] = commits[readFrom]; + } + ++writeTo; } + ++readFrom; + } + + while (size > writeTo) { + commits.removeLast(); + --size; } } - - void IndexFileDeleter::refresh(const String& segmentName) - { - HashSet files(directory->listAll()); - IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); - String segmentPrefix1(segmentName + L"."); - String segmentPrefix2(segmentName + L"_"); - - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (filter->accept(L"", *fileName) && +} + +void IndexFileDeleter::refresh(const String& segmentName) { + HashSet files(directory->listAll()); + IndexFileNameFilterPtr filter(IndexFileNameFilter::getFilter()); + String segmentPrefix1(segmentName + L"."); + String segmentPrefix2(segmentName + L"_"); + + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (filter->accept(L"", *fileName) && (segmentName.empty() || boost::starts_with(*fileName, segmentPrefix1) || boost::starts_with(*fileName, segmentPrefix2)) && - !refCounts.contains(*fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) - { - // Unreferenced file, so remove it - if (infoStream) - message(L"refresh [prefix=" + segmentName + L"]: removing newly created unreferenced file \"" + *fileName + L"\""); - deleteFile(*fileName); + !refCounts.contains(*fileName) && *fileName != IndexFileNames::SEGMENTS_GEN()) { + // Unreferenced file, so remove it + if (infoStream) { + message(L"refresh [prefix=" + segmentName + L"]: removing newly created unreferenced file \"" + *fileName + L"\""); } + deleteFile(*fileName); } } - - void IndexFileDeleter::refresh() - { - refresh(L""); - } - - void IndexFileDeleter::close() - { - // DecRef old files from the last checkpoint, if any - for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) - decRef(*file); - lastFiles.clear(); - deletePendingFiles(); +} + +void IndexFileDeleter::refresh() { + refresh(L""); +} + +void IndexFileDeleter::close() { + // DecRef old files from the last checkpoint, if any + for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) { + decRef(*file); } - - void IndexFileDeleter::deletePendingFiles() - { - if (deletable) - { - HashSet oldDeletable(deletable); - deletable.reset(); - for (HashSet::iterator fileName = oldDeletable.begin(); fileName != oldDeletable.end(); ++fileName) - { - if (infoStream) - message(L"delete pending file " + *fileName); - deleteFile(*fileName); + lastFiles.clear(); + deletePendingFiles(); +} + +void IndexFileDeleter::deletePendingFiles() { + if (deletable) { + HashSet oldDeletable(deletable); + deletable.reset(); + for (HashSet::iterator fileName = oldDeletable.begin(); fileName != oldDeletable.end(); ++fileName) { + if (infoStream) { + message(L"delete pending file " + *fileName); } + deleteFile(*fileName); } } - - void IndexFileDeleter::checkpoint(SegmentInfosPtr segmentInfos, bool isCommit) - { - if (infoStream) - message(L"now checkpoint \"" + segmentInfos->getCurrentSegmentFileName() + L"\" [" + StringUtils::toString(segmentInfos->size()) + L" segments; isCommit = " + StringUtils::toString(isCommit) + L"]"); - - // Try again now to delete any previously un-deletable files (because they were in use, on Windows) - deletePendingFiles(); - - // Incref the files - incRef(segmentInfos, isCommit); - - if (isCommit) - { - // Append to our commits list - commits.add(newLucene(commitsToDelete, directory, segmentInfos)); - - // Tell policy so it can remove commits - policy->onCommit(commits); - - // Decref files for commits that were deleted by the policy - deleteCommits(); - } - else - { - HashSet docWriterFiles; - if (docWriter) - { - docWriterFiles = docWriter->openFiles(); - if (docWriterFiles) - { - // We must incRef these files before decRef'ing last files to make sure we - // don't accidentally delete them - incRef(docWriterFiles); - } +} + +void IndexFileDeleter::checkpoint(const SegmentInfosPtr& segmentInfos, bool isCommit) { + if (infoStream) { + message(L"now checkpoint \"" + segmentInfos->getCurrentSegmentFileName() + L"\" [" + StringUtils::toString(segmentInfos->size()) + L" segments; isCommit = " + StringUtils::toString(isCommit) + L"]"); + } + + // Try again now to delete any previously un-deletable files (because they were in use, on Windows) + deletePendingFiles(); + + // Incref the files + incRef(segmentInfos, isCommit); + + if (isCommit) { + // Append to our commits list + commits.add(newLucene(commitsToDelete, directory, segmentInfos)); + + // Tell policy so it can remove commits + policy->onCommit(commits); + + // Decref files for commits that were deleted by the policy + deleteCommits(); + } else { + HashSet docWriterFiles; + if (docWriter) { + docWriterFiles = docWriter->openFiles(); + if (docWriterFiles) { + // We must incRef these files before decRef'ing last files to make sure we + // don't accidentally delete them + incRef(docWriterFiles); } - - // DecRef old files from the last checkpoint, if any - for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) - decRef(*file); - lastFiles.clear(); - - // Save files so we can decr on next checkpoint/commit - lastFiles.add(segmentInfos->files(directory, false)); - - if (docWriterFiles) - lastFiles.add(docWriterFiles); } - } - - void IndexFileDeleter::incRef(SegmentInfosPtr segmentInfos, bool isCommit) - { - // If this is a commit point, also incRef the segments_N file - HashSet files(segmentInfos->files(directory, isCommit)); - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - incRef(*fileName); - } - - void IndexFileDeleter::incRef(HashSet files) - { - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) - incRef(*file); - } - - void IndexFileDeleter::incRef(const String& fileName) - { - RefCountPtr rc(getRefCount(fileName)); - if (infoStream && VERBOSE_REF_COUNTS) - message(L" IncRef \"" + fileName + L"\": pre-incr count is " + StringUtils::toString(rc->count)); - rc->IncRef(); - } - - void IndexFileDeleter::decRef(HashSet files) - { - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) + + // DecRef old files from the last checkpoint, if any + for (Collection< HashSet >::iterator file = lastFiles.begin(); file != lastFiles.end(); ++file) { decRef(*file); - } - - void IndexFileDeleter::decRef(const String& fileName) - { - RefCountPtr rc(getRefCount(fileName)); - if (infoStream && VERBOSE_REF_COUNTS) - message(L" DecRef \"" + fileName + L"\": pre-decr count is " + StringUtils::toString(rc->count)); - if (rc->DecRef() == 0) - { - // This file is no longer referenced by any past commit points nor by the in-memory SegmentInfos - deleteFile(fileName); - refCounts.remove(fileName); - - if (synced) - { - SyncLock syncLock(&synced); - synced.remove(fileName); - } } + lastFiles.clear(); + + // Save files so we can decr on next checkpoint/commit + lastFiles.add(segmentInfos->files(directory, false)); + + if (docWriterFiles) { + lastFiles.add(docWriterFiles); + } + } +} + +void IndexFileDeleter::incRef(const SegmentInfosPtr& segmentInfos, bool isCommit) { + // If this is a commit point, also incRef the segments_N file + HashSet files(segmentInfos->files(directory, isCommit)); + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + incRef(*fileName); } - - void IndexFileDeleter::decRef(SegmentInfosPtr segmentInfos) - { - decRef(segmentInfos->files(directory, false)); +} + +void IndexFileDeleter::incRef(HashSet files) { + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + incRef(*file); + } +} + +void IndexFileDeleter::incRef(const String& fileName) { + RefCountPtr rc(getRefCount(fileName)); + if (infoStream && VERBOSE_REF_COUNTS) { + message(L" IncRef \"" + fileName + L"\": pre-incr count is " + StringUtils::toString(rc->count)); } - - bool IndexFileDeleter::exists(const String& fileName) - { - return refCounts.contains(fileName) ? getRefCount(fileName)->count > 0 : false; + rc->IncRef(); +} + +void IndexFileDeleter::decRef(HashSet files) { + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + decRef(*file); } - - RefCountPtr IndexFileDeleter::getRefCount(const String& fileName) - { - RefCountPtr rc; - MapStringRefCount::iterator ref = refCounts.find(fileName); - if (ref == refCounts.end()) - { - rc = newLucene(fileName); - refCounts.put(fileName, rc); +} + +void IndexFileDeleter::decRef(const String& fileName) { + RefCountPtr rc(getRefCount(fileName)); + if (infoStream && VERBOSE_REF_COUNTS) { + message(L" DecRef \"" + fileName + L"\": pre-decr count is " + StringUtils::toString(rc->count)); + } + if (rc->DecRef() == 0) { + // This file is no longer referenced by any past commit points nor by the in-memory SegmentInfos + deleteFile(fileName); + refCounts.remove(fileName); + + if (synced) { + SyncLock syncLock(&synced); + synced.remove(fileName); } - else - rc = ref->second; - return rc; } - - void IndexFileDeleter::deleteFiles(HashSet files) - { - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) - deleteFile(*file); +} + +void IndexFileDeleter::decRef(const SegmentInfosPtr& segmentInfos) { + decRef(segmentInfos->files(directory, false)); +} + +bool IndexFileDeleter::exists(const String& fileName) { + return refCounts.contains(fileName) ? getRefCount(fileName)->count > 0 : false; +} + +RefCountPtr IndexFileDeleter::getRefCount(const String& fileName) { + RefCountPtr rc; + MapStringRefCount::iterator ref = refCounts.find(fileName); + if (ref == refCounts.end()) { + rc = newLucene(fileName); + refCounts.put(fileName, rc); + } else { + rc = ref->second; + } + return rc; +} + +void IndexFileDeleter::deleteFiles(HashSet files) { + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + deleteFile(*file); } - - void IndexFileDeleter::deleteNewFiles(HashSet files) - { - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (!refCounts.contains(*fileName)) - { - if (infoStream) - message(L"delete new file \"" + *fileName + L"\""); - deleteFile(*fileName); +} + +void IndexFileDeleter::deleteNewFiles(HashSet files) { + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (!refCounts.contains(*fileName)) { + if (infoStream) { + message(L"delete new file \"" + *fileName + L"\""); } + deleteFile(*fileName); } } - - void IndexFileDeleter::deleteFile(const String& fileName) - { - try - { - if (infoStream) - message(L"delete \"" + fileName + L"\""); - directory->deleteFile(fileName); +} + +void IndexFileDeleter::deleteFile(const String& fileName) { + try { + if (infoStream) { + message(L"delete \"" + fileName + L"\""); } - catch (IOException& e) // if delete fails - { - if (directory->fileExists(fileName)) // if delete fails - { - // Some operating systems (eg. Windows) don't permit a file to be deleted while it is opened - // for read (eg. by another process or thread). So we assume that when a delete fails it is - // because the file is open in another process, and queue the file for subsequent deletion. - if (infoStream) - message(L"IndexFileDeleter: unable to remove file \"" + fileName + L"\": " + e.getError() + L"; Will re-try later."); - if (!deletable) - deletable = HashSet::newInstance(); - deletable.add(fileName); // add to deletable + directory->deleteFile(fileName); + } catch (IOException& e) { // if delete fails + if (directory->fileExists(fileName)) { // if delete fails + // Some operating systems (eg. Windows) don't permit a file to be deleted while it is opened + // for read (eg. by another process or thread). So we assume that when a delete fails it is + // because the file is open in another process, and queue the file for subsequent deletion. + if (infoStream) { + message(L"IndexFileDeleter: unable to remove file \"" + fileName + L"\": " + e.getError() + L"; Will re-try later."); + } + if (!deletable) { + deletable = HashSet::newInstance(); } + deletable.add(fileName); // add to deletable } } - - RefCount::RefCount(const String& fileName) - { - initDone = false; - count = 0; - this->fileName = fileName; - } - - RefCount::~RefCount() - { - } - - int32_t RefCount::IncRef() - { - if (!initDone) - initDone = true; - else - BOOST_ASSERT(count > 0); - return ++count; - } - - int32_t RefCount::DecRef() - { +} + +RefCount::RefCount(const String& fileName) { + initDone = false; + count = 0; + this->fileName = fileName; +} + +RefCount::~RefCount() { +} + +int32_t RefCount::IncRef() { + if (!initDone) { + initDone = true; + } else { BOOST_ASSERT(count > 0); - return --count; - } - - CommitPoint::CommitPoint(Collection commitsToDelete, DirectoryPtr directory, SegmentInfosPtr segmentInfos) - { - deleted = false; - - this->directory = directory; - this->commitsToDelete = commitsToDelete; - userData = segmentInfos->getUserData(); - segmentsFileName = segmentInfos->getCurrentSegmentFileName(); - version = segmentInfos->getVersion(); - generation = segmentInfos->getGeneration(); - HashSet files(segmentInfos->files(directory, true)); - this->files = HashSet::newInstance(files.begin(), files.end()); - gen = segmentInfos->getGeneration(); - _isOptimized = (segmentInfos->size() == 1 && !segmentInfos->info(0)->hasDeletions()); - - BOOST_ASSERT(!segmentInfos->hasExternalSegments(directory)); - } - - CommitPoint::~CommitPoint() - { - } - - String CommitPoint::toString() - { - return L"IndexFileDeleter::CommitPoint(" + segmentsFileName + L")"; } - - bool CommitPoint::isOptimized() - { - return _isOptimized; - } - - String CommitPoint::getSegmentsFileName() - { - return segmentsFileName; - } - - HashSet CommitPoint::getFileNames() - { - return files; - } - - DirectoryPtr CommitPoint::getDirectory() - { - return directory; - } - - int64_t CommitPoint::getVersion() - { - return version; - } - - int64_t CommitPoint::getGeneration() - { - return generation; - } - - MapStringString CommitPoint::getUserData() - { - return userData; - } - - void CommitPoint::deleteCommit() - { - if (!deleted) - { - deleted = true; - commitsToDelete.add(shared_from_this()); - } + return ++count; +} + +int32_t RefCount::DecRef() { + BOOST_ASSERT(count > 0); + return --count; +} + +CommitPoint::CommitPoint(Collection commitsToDelete, const DirectoryPtr& directory, const SegmentInfosPtr& segmentInfos) { + deleted = false; + + this->directory = directory; + this->commitsToDelete = commitsToDelete; + userData = segmentInfos->getUserData(); + segmentsFileName = segmentInfos->getCurrentSegmentFileName(); + version = segmentInfos->getVersion(); + generation = segmentInfos->getGeneration(); + HashSet files(segmentInfos->files(directory, true)); + this->files = HashSet::newInstance(files.begin(), files.end()); + gen = segmentInfos->getGeneration(); + _isOptimized = (segmentInfos->size() == 1 && !segmentInfos->info(0)->hasDeletions()); + + BOOST_ASSERT(!segmentInfos->hasExternalSegments(directory)); +} + +CommitPoint::~CommitPoint() { +} + +String CommitPoint::toString() { + return L"IndexFileDeleter::CommitPoint(" + segmentsFileName + L")"; +} + +bool CommitPoint::isOptimized() { + return _isOptimized; +} + +String CommitPoint::getSegmentsFileName() { + return segmentsFileName; +} + +HashSet CommitPoint::getFileNames() { + return files; +} + +DirectoryPtr CommitPoint::getDirectory() { + return directory; +} + +int64_t CommitPoint::getVersion() { + return version; +} + +int64_t CommitPoint::getGeneration() { + return generation; +} + +MapStringString CommitPoint::getUserData() { + return userData; +} + +void CommitPoint::deleteCommit() { + if (!deleted) { + deleted = true; + commitsToDelete.add(shared_from_this()); } - - bool CommitPoint::isDeleted() - { - return deleted; +} + +bool CommitPoint::isDeleted() { + return deleted; +} + +int32_t CommitPoint::compareTo(const LuceneObjectPtr& other) { + CommitPointPtr otherCommit(boost::static_pointer_cast(other)); + if (gen < otherCommit->gen) { + return -1; } - - int32_t CommitPoint::compareTo(LuceneObjectPtr other) - { - CommitPointPtr otherCommit(boost::static_pointer_cast(other)); - if (gen < otherCommit->gen) - return -1; - if (gen > otherCommit->gen) - return 1; - return 0; + if (gen > otherCommit->gen) { + return 1; } + return 0; +} + } diff --git a/src/core/index/IndexFileNameFilter.cpp b/src/core/index/IndexFileNameFilter.cpp index 35ae238d..64b42b27 100644 --- a/src/core/index/IndexFileNameFilter.cpp +++ b/src/core/index/IndexFileNameFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,56 +10,53 @@ #include "IndexFileNameFilter.h" #include "IndexFileNames.h" -namespace Lucene -{ - bool IndexFileNameFilter::accept(const String& directory, const String& name) - { - String::size_type i = name.find_last_of(L'.'); - if (i != String::npos) - { - String extension(name.substr(i+1)); - if (IndexFileNames::INDEX_EXTENSIONS().contains(extension)) +namespace Lucene { + +bool IndexFileNameFilter::accept(const String& directory, const String& name) { + String::size_type i = name.find_last_of(L'.'); + if (i != String::npos) { + String extension(name.substr(i+1)); + if (IndexFileNames::INDEX_EXTENSIONS().contains(extension)) { + return true; + } else if (!extension.empty()) { + if (extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) { return true; - else if (!extension.empty()) - { - if (extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) - return true; - if (extension[0] == L's' && boost::regex_search(extension, boost::wregex(L"s\\d+"))) - return true; } - } - else - { - if (name == IndexFileNames::DELETABLE()) - return true; - if (boost::starts_with(name, IndexFileNames::SEGMENTS())) + if (extension[0] == L's' && boost::regex_search(extension, boost::wregex(L"s\\d+"))) { return true; + } } - return false; - } - - bool IndexFileNameFilter::isCFSFile(const String& name) - { - String::size_type i = name.find_last_of(L'.'); - if (i != String::npos) - { - String extension(name.substr(i+1)); - if (IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE().contains(extension)) - return true; - else if (!extension.empty() && extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) - return true; + } else { + if (name == IndexFileNames::DELETABLE()) { + return true; + } + if (boost::starts_with(name, IndexFileNames::SEGMENTS())) { + return true; } - return false; } - - IndexFileNameFilterPtr IndexFileNameFilter::getFilter() - { - static IndexFileNameFilterPtr singleton; - if (!singleton) - { - singleton = newLucene(); - CycleCheck::addStatic(singleton); + return false; +} + +bool IndexFileNameFilter::isCFSFile(const String& name) { + String::size_type i = name.find_last_of(L'.'); + if (i != String::npos) { + String extension(name.substr(i+1)); + if (IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE().contains(extension)) { + return true; + } else if (!extension.empty() && extension[0] == L'f' && boost::regex_search(extension, boost::wregex(L"f\\d+"))) { + return true; } - return singleton; } + return false; +} + +IndexFileNameFilterPtr IndexFileNameFilter::getFilter() { + static IndexFileNameFilterPtr singleton; + LUCENE_RUN_ONCE( + singleton = newLucene(); + CycleCheck::addStatic(singleton); + ); + return singleton; +} + } diff --git a/src/core/index/IndexFileNames.cpp b/src/core/index/IndexFileNames.cpp index 52ea1534..efcb3267 100644 --- a/src/core/index/IndexFileNames.cpp +++ b/src/core/index/IndexFileNames.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,263 +10,230 @@ #include "SegmentInfo.h" #include "StringUtils.h" -namespace Lucene -{ - IndexFileNames::~IndexFileNames() - { - } - - const String& IndexFileNames::SEGMENTS() - { - static String _SEGMENTS(L"segments"); - return _SEGMENTS; - } - - const String& IndexFileNames::SEGMENTS_GEN() - { - static String _SEGMENTS_GEN(L"segments.gen"); - return _SEGMENTS_GEN; - } - - const String& IndexFileNames::DELETABLE() - { - static String _DELETABLE(L"deletable"); - return _DELETABLE; - } - - const String& IndexFileNames::NORMS_EXTENSION() - { - static String _NORMS_EXTENSION(L"nrm"); - return _NORMS_EXTENSION; - } - - const String& IndexFileNames::FREQ_EXTENSION() - { - static String _FREQ_EXTENSION(L"frq"); - return _FREQ_EXTENSION; - } - - const String& IndexFileNames::PROX_EXTENSION() - { - static String _PROX_EXTENSION(L"prx"); - return _PROX_EXTENSION; - } - - const String& IndexFileNames::TERMS_EXTENSION() - { - static String _TERMS_EXTENSION(L"tis"); - return _TERMS_EXTENSION; - } - - const String& IndexFileNames::TERMS_INDEX_EXTENSION() - { - static String _TERMS_INDEX_EXTENSION(L"tii"); - return _TERMS_INDEX_EXTENSION; - } - - const String& IndexFileNames::FIELDS_INDEX_EXTENSION() - { - static String _FIELDS_INDEX_EXTENSION(L"fdx"); - return _FIELDS_INDEX_EXTENSION; - } - - const String& IndexFileNames::FIELDS_EXTENSION() - { - static String _FIELDS_EXTENSION(L"fdt"); - return _FIELDS_EXTENSION; - } - - const String& IndexFileNames::VECTORS_FIELDS_EXTENSION() - { - static String _VECTORS_FIELDS_EXTENSION(L"tvf"); - return _VECTORS_FIELDS_EXTENSION; - } - - const String& IndexFileNames::VECTORS_DOCUMENTS_EXTENSION() - { - static String _VECTORS_DOCUMENTS_EXTENSION(L"tvd"); - return _VECTORS_DOCUMENTS_EXTENSION; - } - - const String& IndexFileNames::VECTORS_INDEX_EXTENSION() - { - static String _VECTORS_INDEX_EXTENSION(L"tvx"); - return _VECTORS_INDEX_EXTENSION; - } - - const String& IndexFileNames::COMPOUND_FILE_EXTENSION() - { - static String _COMPOUND_FILE_EXTENSION(L"cfs"); - return _COMPOUND_FILE_EXTENSION; - } - - const String& IndexFileNames::COMPOUND_FILE_STORE_EXTENSION() - { - static String _COMPOUND_FILE_STORE_EXTENSION(L"cfx"); - return _COMPOUND_FILE_STORE_EXTENSION; - } - - const String& IndexFileNames::DELETES_EXTENSION() - { - static String _DELETES_EXTENSION(L"del"); - return _DELETES_EXTENSION; - } - - const String& IndexFileNames::FIELD_INFOS_EXTENSION() - { - static String _FIELD_INFOS_EXTENSION(L"fnm"); - return _FIELD_INFOS_EXTENSION; - } - - const String& IndexFileNames::PLAIN_NORMS_EXTENSION() - { - static String _PLAIN_NORMS_EXTENSION(L"f"); - return _PLAIN_NORMS_EXTENSION; - } - - const String& IndexFileNames::SEPARATE_NORMS_EXTENSION() - { - static String _SEPARATE_NORMS_EXTENSION(L"s"); - return _SEPARATE_NORMS_EXTENSION; - } - - const String& IndexFileNames::GEN_EXTENSION() - { - static String _GEN_EXTENSION(L"gen"); - return _GEN_EXTENSION; +namespace Lucene { + +IndexFileNames::~IndexFileNames() { +} + +const String& IndexFileNames::SEGMENTS() { + static String _SEGMENTS(L"segments"); + return _SEGMENTS; +} + +const String& IndexFileNames::SEGMENTS_GEN() { + static String _SEGMENTS_GEN(L"segments.gen"); + return _SEGMENTS_GEN; +} + +const String& IndexFileNames::DELETABLE() { + static String _DELETABLE(L"deletable"); + return _DELETABLE; +} + +const String& IndexFileNames::NORMS_EXTENSION() { + static String _NORMS_EXTENSION(L"nrm"); + return _NORMS_EXTENSION; +} + +const String& IndexFileNames::FREQ_EXTENSION() { + static String _FREQ_EXTENSION(L"frq"); + return _FREQ_EXTENSION; +} + +const String& IndexFileNames::PROX_EXTENSION() { + static String _PROX_EXTENSION(L"prx"); + return _PROX_EXTENSION; +} + +const String& IndexFileNames::TERMS_EXTENSION() { + static String _TERMS_EXTENSION(L"tis"); + return _TERMS_EXTENSION; +} + +const String& IndexFileNames::TERMS_INDEX_EXTENSION() { + static String _TERMS_INDEX_EXTENSION(L"tii"); + return _TERMS_INDEX_EXTENSION; +} + +const String& IndexFileNames::FIELDS_INDEX_EXTENSION() { + static String _FIELDS_INDEX_EXTENSION(L"fdx"); + return _FIELDS_INDEX_EXTENSION; +} + +const String& IndexFileNames::FIELDS_EXTENSION() { + static String _FIELDS_EXTENSION(L"fdt"); + return _FIELDS_EXTENSION; +} + +const String& IndexFileNames::VECTORS_FIELDS_EXTENSION() { + static String _VECTORS_FIELDS_EXTENSION(L"tvf"); + return _VECTORS_FIELDS_EXTENSION; +} + +const String& IndexFileNames::VECTORS_DOCUMENTS_EXTENSION() { + static String _VECTORS_DOCUMENTS_EXTENSION(L"tvd"); + return _VECTORS_DOCUMENTS_EXTENSION; +} + +const String& IndexFileNames::VECTORS_INDEX_EXTENSION() { + static String _VECTORS_INDEX_EXTENSION(L"tvx"); + return _VECTORS_INDEX_EXTENSION; +} + +const String& IndexFileNames::COMPOUND_FILE_EXTENSION() { + static String _COMPOUND_FILE_EXTENSION(L"cfs"); + return _COMPOUND_FILE_EXTENSION; +} + +const String& IndexFileNames::COMPOUND_FILE_STORE_EXTENSION() { + static String _COMPOUND_FILE_STORE_EXTENSION(L"cfx"); + return _COMPOUND_FILE_STORE_EXTENSION; +} + +const String& IndexFileNames::DELETES_EXTENSION() { + static String _DELETES_EXTENSION(L"del"); + return _DELETES_EXTENSION; +} + +const String& IndexFileNames::FIELD_INFOS_EXTENSION() { + static String _FIELD_INFOS_EXTENSION(L"fnm"); + return _FIELD_INFOS_EXTENSION; +} + +const String& IndexFileNames::PLAIN_NORMS_EXTENSION() { + static String _PLAIN_NORMS_EXTENSION(L"f"); + return _PLAIN_NORMS_EXTENSION; +} + +const String& IndexFileNames::SEPARATE_NORMS_EXTENSION() { + static String _SEPARATE_NORMS_EXTENSION(L"s"); + return _SEPARATE_NORMS_EXTENSION; +} + +const String& IndexFileNames::GEN_EXTENSION() { + static String _GEN_EXTENSION(L"gen"); + return _GEN_EXTENSION; +} + +const HashSet IndexFileNames::INDEX_EXTENSIONS() { + static HashSet _INDEX_EXTENSIONS; + LUCENE_RUN_ONCE( + _INDEX_EXTENSIONS = HashSet::newInstance(); + _INDEX_EXTENSIONS.add(COMPOUND_FILE_EXTENSION()); + _INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); + _INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); + _INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS.add(TERMS_EXTENSION()); + _INDEX_EXTENSIONS.add(FREQ_EXTENSION()); + _INDEX_EXTENSIONS.add(PROX_EXTENSION()); + _INDEX_EXTENSIONS.add(DELETES_EXTENSION()); + _INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); + _INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); + _INDEX_EXTENSIONS.add(GEN_EXTENSION()); + _INDEX_EXTENSIONS.add(NORMS_EXTENSION()); + _INDEX_EXTENSIONS.add(COMPOUND_FILE_STORE_EXTENSION()); + ); + return _INDEX_EXTENSIONS; +}; + +const HashSet IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE() { + static HashSet _INDEX_EXTENSIONS_IN_COMPOUND_FILE; + LUCENE_RUN_ONCE( + _INDEX_EXTENSIONS_IN_COMPOUND_FILE = HashSet::newInstance(); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELD_INFOS_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FREQ_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(PROX_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_INDEX_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_DOCUMENTS_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_FIELDS_EXTENSION()); + _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(NORMS_EXTENSION()); + ); + return _INDEX_EXTENSIONS_IN_COMPOUND_FILE; +}; + +const HashSet IndexFileNames::STORE_INDEX_EXTENSIONS() { + static HashSet _STORE_INDEX_EXTENSIONS; + LUCENE_RUN_ONCE( + _STORE_INDEX_EXTENSIONS = HashSet::newInstance(); + _STORE_INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); + _STORE_INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); + _STORE_INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); + _STORE_INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); + _STORE_INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); + ); + return _STORE_INDEX_EXTENSIONS; +}; + +const HashSet IndexFileNames::NON_STORE_INDEX_EXTENSIONS() { + static HashSet _NON_STORE_INDEX_EXTENSIONS; + LUCENE_RUN_ONCE( + _NON_STORE_INDEX_EXTENSIONS = HashSet::newInstance(); + _NON_STORE_INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); + _NON_STORE_INDEX_EXTENSIONS.add(FREQ_EXTENSION()); + _NON_STORE_INDEX_EXTENSIONS.add(PROX_EXTENSION()); + _NON_STORE_INDEX_EXTENSIONS.add(TERMS_EXTENSION()); + _NON_STORE_INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); + _NON_STORE_INDEX_EXTENSIONS.add(NORMS_EXTENSION()); + ); + return _NON_STORE_INDEX_EXTENSIONS; +}; + +const HashSet IndexFileNames::COMPOUND_EXTENSIONS() { + static HashSet _COMPOUND_EXTENSIONS; + LUCENE_RUN_ONCE( + _COMPOUND_EXTENSIONS = HashSet::newInstance(); + _COMPOUND_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); + _COMPOUND_EXTENSIONS.add(FREQ_EXTENSION()); + _COMPOUND_EXTENSIONS.add(PROX_EXTENSION()); + _COMPOUND_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); + _COMPOUND_EXTENSIONS.add(FIELDS_EXTENSION()); + _COMPOUND_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); + _COMPOUND_EXTENSIONS.add(TERMS_EXTENSION()); + ); + return _COMPOUND_EXTENSIONS; +}; + +const HashSet IndexFileNames::VECTOR_EXTENSIONS() { + static HashSet _VECTOR_EXTENSIONS; + LUCENE_RUN_ONCE( + _VECTOR_EXTENSIONS = HashSet::newInstance(); + _VECTOR_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); + _VECTOR_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); + _VECTOR_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); + ); + return _VECTOR_EXTENSIONS; +}; + +String IndexFileNames::fileNameFromGeneration(const String& base, const String& extension, int64_t gen) { + if (gen == SegmentInfo::NO) { + return L""; + } else if (gen == SegmentInfo::WITHOUT_GEN) { + return base + extension; + } else { + return base + L"_" + StringUtils::toString(gen, StringUtils::CHARACTER_MAX_RADIX) + extension; } - - const HashSet IndexFileNames::INDEX_EXTENSIONS() - { - static HashSet _INDEX_EXTENSIONS; - if (!_INDEX_EXTENSIONS) - { - _INDEX_EXTENSIONS = HashSet::newInstance(); - _INDEX_EXTENSIONS.add(COMPOUND_FILE_EXTENSION()); - _INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); - _INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); - _INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS.add(TERMS_EXTENSION()); - _INDEX_EXTENSIONS.add(FREQ_EXTENSION()); - _INDEX_EXTENSIONS.add(PROX_EXTENSION()); - _INDEX_EXTENSIONS.add(DELETES_EXTENSION()); - _INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); - _INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); - _INDEX_EXTENSIONS.add(GEN_EXTENSION()); - _INDEX_EXTENSIONS.add(NORMS_EXTENSION()); - _INDEX_EXTENSIONS.add(COMPOUND_FILE_STORE_EXTENSION()); - } - return _INDEX_EXTENSIONS; - }; - - const HashSet IndexFileNames::INDEX_EXTENSIONS_IN_COMPOUND_FILE() - { - static HashSet _INDEX_EXTENSIONS_IN_COMPOUND_FILE; - if (!_INDEX_EXTENSIONS_IN_COMPOUND_FILE) - { - _INDEX_EXTENSIONS_IN_COMPOUND_FILE = HashSet::newInstance(); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELD_INFOS_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FIELDS_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(TERMS_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(FREQ_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(PROX_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_INDEX_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_DOCUMENTS_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(VECTORS_FIELDS_EXTENSION()); - _INDEX_EXTENSIONS_IN_COMPOUND_FILE.add(NORMS_EXTENSION()); - } - return _INDEX_EXTENSIONS_IN_COMPOUND_FILE; - }; - - const HashSet IndexFileNames::STORE_INDEX_EXTENSIONS() - { - static HashSet _STORE_INDEX_EXTENSIONS; - if (!_STORE_INDEX_EXTENSIONS) - { - _STORE_INDEX_EXTENSIONS = HashSet::newInstance(); - _STORE_INDEX_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); - _STORE_INDEX_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); - _STORE_INDEX_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); - _STORE_INDEX_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); - _STORE_INDEX_EXTENSIONS.add(FIELDS_EXTENSION()); - } - return _STORE_INDEX_EXTENSIONS; - }; - - const HashSet IndexFileNames::NON_STORE_INDEX_EXTENSIONS() - { - static HashSet _NON_STORE_INDEX_EXTENSIONS; - if (!_NON_STORE_INDEX_EXTENSIONS) - { - _NON_STORE_INDEX_EXTENSIONS = HashSet::newInstance(); - _NON_STORE_INDEX_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); - _NON_STORE_INDEX_EXTENSIONS.add(FREQ_EXTENSION()); - _NON_STORE_INDEX_EXTENSIONS.add(PROX_EXTENSION()); - _NON_STORE_INDEX_EXTENSIONS.add(TERMS_EXTENSION()); - _NON_STORE_INDEX_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); - _NON_STORE_INDEX_EXTENSIONS.add(NORMS_EXTENSION()); - } - return _NON_STORE_INDEX_EXTENSIONS; - }; - - const HashSet IndexFileNames::COMPOUND_EXTENSIONS() - { - static HashSet _COMPOUND_EXTENSIONS; - if (!_COMPOUND_EXTENSIONS) - { - _COMPOUND_EXTENSIONS = HashSet::newInstance(); - _COMPOUND_EXTENSIONS.add(FIELD_INFOS_EXTENSION()); - _COMPOUND_EXTENSIONS.add(FREQ_EXTENSION()); - _COMPOUND_EXTENSIONS.add(PROX_EXTENSION()); - _COMPOUND_EXTENSIONS.add(FIELDS_INDEX_EXTENSION()); - _COMPOUND_EXTENSIONS.add(FIELDS_EXTENSION()); - _COMPOUND_EXTENSIONS.add(TERMS_INDEX_EXTENSION()); - _COMPOUND_EXTENSIONS.add(TERMS_EXTENSION()); - } - return _COMPOUND_EXTENSIONS; - }; - - const HashSet IndexFileNames::VECTOR_EXTENSIONS() - { - static HashSet _VECTOR_EXTENSIONS; - if (!_VECTOR_EXTENSIONS) - { - _VECTOR_EXTENSIONS = HashSet::newInstance(); - _VECTOR_EXTENSIONS.add(VECTORS_INDEX_EXTENSION()); - _VECTOR_EXTENSIONS.add(VECTORS_DOCUMENTS_EXTENSION()); - _VECTOR_EXTENSIONS.add(VECTORS_FIELDS_EXTENSION()); - } - return _VECTOR_EXTENSIONS; - }; - - String IndexFileNames::fileNameFromGeneration(const String& base, const String& extension, int64_t gen) - { - if (gen == SegmentInfo::NO) - return L""; - else if (gen == SegmentInfo::WITHOUT_GEN) - return base + extension; - else - return base + L"_" + StringUtils::toString(gen, StringUtils::CHARACTER_MAX_RADIX) + extension; +} + +bool IndexFileNames::isDocStoreFile(const String& fileName) { + if (boost::ends_with(fileName, COMPOUND_FILE_STORE_EXTENSION())) { + return true; } - - bool IndexFileNames::isDocStoreFile(const String& fileName) - { - if (boost::ends_with(fileName, COMPOUND_FILE_STORE_EXTENSION())) + for (HashSet::iterator index = STORE_INDEX_EXTENSIONS().begin(); index != STORE_INDEX_EXTENSIONS().end(); ++index) { + if (boost::ends_with(fileName, *index)) { return true; - for (HashSet::iterator index = STORE_INDEX_EXTENSIONS().begin(); index != STORE_INDEX_EXTENSIONS().end(); ++index) - { - if (boost::ends_with(fileName, *index)) - return true; } - return false; - } - - String IndexFileNames::segmentFileName(const String& segmentName, const String& ext) - { - return segmentName + L"." + ext; } + return false; +} + +String IndexFileNames::segmentFileName(const String& segmentName, const String& ext) { + return segmentName + L"." + ext; +} + } diff --git a/src/core/index/IndexReader.cpp b/src/core/index/IndexReader.cpp index 2a70c518..a6cd3372 100644 --- a/src/core/index/IndexReader.cpp +++ b/src/core/index/IndexReader.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include +#include #include #include "IndexReader.h" #include "_IndexReader.h" @@ -18,447 +18,386 @@ #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t IndexReader::DEFAULT_TERMS_INDEX_DIVISOR = 1; - - IndexReader::IndexReader() - { - refCount = 1; - closed = false; - _hasChanges = false; - } - - IndexReader::~IndexReader() - { - } - - int32_t IndexReader::getRefCount() - { - SyncLock syncLock(this); - return refCount; - } - - void IndexReader::incRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0); - ensureOpen(); - ++refCount; - } - - void IndexReader::decRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0); - ensureOpen(); - if (refCount == 1) - { - commit(); - doClose(); - } - --refCount; - } - - void IndexReader::ensureOpen() - { - if (refCount <= 0) - boost::throw_exception(AlreadyClosedException(L"this IndexReader is closed")); - } - - IndexReaderPtr IndexReader::open(DirectoryPtr directory) - { - return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), true, DEFAULT_TERMS_INDEX_DIVISOR); - } - - IndexReaderPtr IndexReader::open(DirectoryPtr directory, bool readOnly) - { - return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - IndexReaderPtr IndexReader::open(IndexCommitPtr commit, bool readOnly) - { - return open(commit->getDirectory(), IndexDeletionPolicyPtr(), commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly) - { - return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) - { - return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, termInfosIndexDivisor); - } - - IndexReaderPtr IndexReader::open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly) - { - return open(commit->getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); - } - - IndexReaderPtr IndexReader::open(IndexCommitPtr commit, IndexDeletionPolicyPtr deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) - { - return open(commit->getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); - } - - IndexReaderPtr IndexReader::open(DirectoryPtr directory, IndexDeletionPolicyPtr deletionPolicy, IndexCommitPtr commit, bool readOnly, int32_t termInfosIndexDivisor) - { - return DirectoryReader::open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); - } - - IndexReaderPtr IndexReader::reopen() - { - SyncLock syncLock(this); - boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); - return IndexReaderPtr(); - } - - IndexReaderPtr IndexReader::reopen(bool openReadOnly) - { - SyncLock syncLock(this); - boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); - return IndexReaderPtr(); - } - - IndexReaderPtr IndexReader::reopen(IndexCommitPtr commit) - { - SyncLock syncLock(this); - boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen(IndexCommit).")); - return IndexReaderPtr(); - } +namespace Lucene { - LuceneObjectPtr IndexReader::clone(LuceneObjectPtr other) - { - SyncLock syncLock(this); - if (!other) - boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone().")); - return other; - } - - LuceneObjectPtr IndexReader::clone(bool openReadOnly, LuceneObjectPtr other) - { - SyncLock syncLock(this); - if (!other) - boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone(bool).")); - return other; - } - - DirectoryPtr IndexReader::directory() - { - ensureOpen(); - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return DirectoryPtr(); - } - - int64_t IndexReader::lastModified(DirectoryPtr directory2) - { - return newLucene(newLucene(), directory2)->run(); - } - - int64_t IndexReader::getCurrentVersion(DirectoryPtr directory) - { - return SegmentInfos::readCurrentVersion(directory); - } - - MapStringString IndexReader::getCommitUserData(DirectoryPtr directory) - { - return SegmentInfos::readCurrentUserData(directory); - } - - int64_t IndexReader::getVersion() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return 0; - } - - MapStringString IndexReader::getCommitUserData() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return MapStringString(); - } - - bool IndexReader::isCurrent() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return false; - } - - bool IndexReader::isOptimized() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return false; - } - - bool IndexReader::indexExists(DirectoryPtr directory) - { - return (SegmentInfos::getCurrentSegmentGeneration(directory) != -1); - } - - int32_t IndexReader::numDeletedDocs() - { - return (maxDoc() - numDocs()); - } - - DocumentPtr IndexReader::document(int32_t n) - { - ensureOpen(); - return document(n, FieldSelectorPtr()); - } - - bool IndexReader::hasChanges() - { - return _hasChanges; - } - - bool IndexReader::hasNorms(const String& field) - { - // backward compatible implementation. - // SegmentReader has an efficient implementation. - ensureOpen(); - return norms(field); - } - - void IndexReader::setNorm(int32_t doc, const String& field, uint8_t value) - { - SyncLock syncLock(this); - ensureOpen(); - acquireWriteLock(); - _hasChanges = true; - doSetNorm(doc, field, value); +const int32_t IndexReader::DEFAULT_TERMS_INDEX_DIVISOR = 1; + +IndexReader::IndexReader() { + refCount = 1; + closed = false; + _hasChanges = false; +} + +IndexReader::~IndexReader() { +} + +int32_t IndexReader::getRefCount() { + SyncLock syncLock(this); + return refCount; +} + +void IndexReader::incRef() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0); + ensureOpen(); + ++refCount; +} + +void IndexReader::decRef() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0); + ensureOpen(); + if (refCount == 1) { + commit(); + doClose(); } - - void IndexReader::setNorm(int32_t doc, const String& field, double value) - { - ensureOpen(); - setNorm(doc, field, Similarity::encodeNorm(value)); + --refCount; +} + +void IndexReader::ensureOpen() { + if (refCount <= 0) { + boost::throw_exception(AlreadyClosedException(L"this IndexReader is closed")); } +} + +IndexReaderPtr IndexReader::open(const DirectoryPtr& directory) { + return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), true, DEFAULT_TERMS_INDEX_DIVISOR); +} + +IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, bool readOnly) { + return open(directory, IndexDeletionPolicyPtr(), IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); +} + +IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, bool readOnly) { + return open(commit->getDirectory(), IndexDeletionPolicyPtr(), commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); +} + +IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly) { + return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, DEFAULT_TERMS_INDEX_DIVISOR); +} + +IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { + return open(directory, deletionPolicy, IndexCommitPtr(), readOnly, termInfosIndexDivisor); +} - TermDocsPtr IndexReader::termDocs(TermPtr term) - { - ensureOpen(); - TermDocsPtr _termDocs(termDocs()); - _termDocs->seek(term); - return _termDocs; +IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly) { + return open(commit->getDirectory(), deletionPolicy, commit, readOnly, DEFAULT_TERMS_INDEX_DIVISOR); +} + +IndexReaderPtr IndexReader::open(const IndexCommitPtr& commit, const IndexDeletionPolicyPtr& deletionPolicy, bool readOnly, int32_t termInfosIndexDivisor) { + return open(commit->getDirectory(), deletionPolicy, commit, readOnly, termInfosIndexDivisor); +} + +IndexReaderPtr IndexReader::open(const DirectoryPtr& directory, const IndexDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& commit, bool readOnly, int32_t termInfosIndexDivisor) { + return DirectoryReader::open(directory, deletionPolicy, commit, readOnly, termInfosIndexDivisor); +} + +IndexReaderPtr IndexReader::reopen() { + SyncLock syncLock(this); + boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); + return IndexReaderPtr(); +} + +IndexReaderPtr IndexReader::reopen(bool openReadOnly) { + SyncLock syncLock(this); + boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen().")); + return IndexReaderPtr(); +} + +IndexReaderPtr IndexReader::reopen(const IndexCommitPtr& commit) { + SyncLock syncLock(this); + boost::throw_exception(UnsupportedOperationException(L"This reader does not support reopen(IndexCommit).")); + return IndexReaderPtr(); +} + +LuceneObjectPtr IndexReader::clone(const LuceneObjectPtr& other) { + SyncLock syncLock(this); + if (!other) { + boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone().")); } - - TermPositionsPtr IndexReader::termPositions(TermPtr term) - { - ensureOpen(); - TermPositionsPtr _termPositions(termPositions()); - _termPositions->seek(term); - return _termPositions; + return other; +} + +LuceneObjectPtr IndexReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { + SyncLock syncLock(this); + if (!other) { + boost::throw_exception(UnsupportedOperationException(L"This reader does not implement clone(bool).")); } - - void IndexReader::deleteDocument(int32_t docNum) - { - SyncLock syncLock(this); - ensureOpen(); - acquireWriteLock(); - _hasChanges = true; - doDelete(docNum); + return other; +} + +DirectoryPtr IndexReader::directory() { + ensureOpen(); + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return DirectoryPtr(); +} + +int64_t IndexReader::lastModified(const DirectoryPtr& directory2) { + return newLucene(newLucene(), directory2)->run(); +} + +int64_t IndexReader::getCurrentVersion(const DirectoryPtr& directory) { + return SegmentInfos::readCurrentVersion(directory); +} + +MapStringString IndexReader::getCommitUserData(const DirectoryPtr& directory) { + return SegmentInfos::readCurrentUserData(directory); +} + +int64_t IndexReader::getVersion() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return 0; +} + +MapStringString IndexReader::getCommitUserData() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return MapStringString(); +} + +bool IndexReader::isCurrent() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return false; +} + +bool IndexReader::isOptimized() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return false; +} + +bool IndexReader::indexExists(const DirectoryPtr& directory) { + return (SegmentInfos::getCurrentSegmentGeneration(directory) != -1); +} + +int32_t IndexReader::numDeletedDocs() { + return (maxDoc() - numDocs()); +} + +DocumentPtr IndexReader::document(int32_t n) { + ensureOpen(); + return document(n, FieldSelectorPtr()); +} + +bool IndexReader::hasChanges() { + return _hasChanges; +} + +bool IndexReader::hasNorms(const String& field) { + // backward compatible implementation. + // SegmentReader has an efficient implementation. + ensureOpen(); + return norms(field); +} + +void IndexReader::setNorm(int32_t doc, const String& field, uint8_t value) { + SyncLock syncLock(this); + ensureOpen(); + acquireWriteLock(); + _hasChanges = true; + doSetNorm(doc, field, value); +} + +void IndexReader::setNorm(int32_t doc, const String& field, double value) { + ensureOpen(); + setNorm(doc, field, Similarity::encodeNorm(value)); +} + +TermDocsPtr IndexReader::termDocs(const TermPtr& term) { + ensureOpen(); + TermDocsPtr _termDocs(termDocs()); + _termDocs->seek(term); + return _termDocs; +} + +TermPositionsPtr IndexReader::termPositions(const TermPtr& term) { + ensureOpen(); + TermPositionsPtr _termPositions(termPositions()); + _termPositions->seek(term); + return _termPositions; +} + +void IndexReader::deleteDocument(int32_t docNum) { + SyncLock syncLock(this); + ensureOpen(); + acquireWriteLock(); + _hasChanges = true; + doDelete(docNum); +} + +int32_t IndexReader::deleteDocuments(const TermPtr& term) { + ensureOpen(); + TermDocsPtr docs(termDocs(term)); + if (!docs) { + return 0; } - - int32_t IndexReader::deleteDocuments(TermPtr term) - { - ensureOpen(); - TermDocsPtr docs(termDocs(term)); - if (!docs) - return 0; - int32_t n = 0; - LuceneException finally; - try - { - while (docs->next()) - { - deleteDocument(docs->doc()); - ++n; - } + int32_t n = 0; + LuceneException finally; + try { + while (docs->next()) { + deleteDocument(docs->doc()); + ++n; } - catch (LuceneException& e) - { - finally = e; - } - docs->close(); - finally.throwException(); - return n; - } - - void IndexReader::undeleteAll() - { - SyncLock syncLock(this); - ensureOpen(); - acquireWriteLock(); - _hasChanges = true; - doUndeleteAll(); + } catch (LuceneException& e) { + finally = e; } - - void IndexReader::acquireWriteLock() - { - SyncLock syncLock(this); - // NOOP - } - - void IndexReader::flush() - { - SyncLock syncLock(this); - ensureOpen(); - commit(); - } - - void IndexReader::flush(MapStringString commitUserData) - { - SyncLock syncLock(this); - ensureOpen(); - commit(commitUserData); - } - - void IndexReader::commit() - { - commit(MapStringString()); + docs->close(); + finally.throwException(); + return n; +} + +void IndexReader::undeleteAll() { + SyncLock syncLock(this); + ensureOpen(); + acquireWriteLock(); + _hasChanges = true; + doUndeleteAll(); +} + +void IndexReader::acquireWriteLock() { + SyncLock syncLock(this); + // NOOP +} + +void IndexReader::flush() { + SyncLock syncLock(this); + ensureOpen(); + commit(); +} + +void IndexReader::flush(MapStringString commitUserData) { + SyncLock syncLock(this); + ensureOpen(); + commit(commitUserData); +} + +void IndexReader::commit() { + commit(MapStringString()); +} + +void IndexReader::commit(MapStringString commitUserData) { + SyncLock syncLock(this); + if (_hasChanges) { + doCommit(commitUserData); } - - void IndexReader::commit(MapStringString commitUserData) - { - SyncLock syncLock(this); - if (_hasChanges) - doCommit(commitUserData); - _hasChanges = false; + _hasChanges = false; +} + +void IndexReader::close() { + SyncLock syncLock(this); + if (!closed) { + decRef(); + closed = true; } - - void IndexReader::close() - { - SyncLock syncLock(this); - if (!closed) - { - decRef(); - closed = true; +} + +IndexCommitPtr IndexReader::getIndexCommit() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return IndexCommitPtr(); +} + +void IndexReader::main(Collection args) { + String filename; + bool extract = false; + + for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) { + if (*arg == L"-extract") { + extract = true; + } else if (filename.empty()) { + filename = *arg; } } - - IndexCommitPtr IndexReader::getIndexCommit() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return IndexCommitPtr(); + + if (filename.empty()) { + std::wcout << L"Usage: IndexReader [-extract] "; + return; } - - void IndexReader::main(Collection args) - { - String filename; - bool extract = false; - - for (Collection::iterator arg = args.begin(); arg != args.end(); ++arg) - { - if (*arg == L"-extract") - extract = true; - else if (filename.empty()) - filename = *arg; - } - - if (filename.empty()) - { - std::wcout << L"Usage: IndexReader [-extract] "; - return; - } - - DirectoryPtr dir; - CompoundFileReaderPtr cfr; - - LuceneException finally; - try - { - String dirname(FileUtils::extractPath(filename)); - filename = FileUtils::extractPath(filename); - dir = FSDirectory::open(dirname); - cfr = newLucene(dir, filename); - - HashSet _files(cfr->listAll()); - Collection files(Collection::newInstance(_files.begin(), _files.end())); - std::sort(files.begin(), files.end()); // sort the array of filename so that the output is more readable - - for (Collection::iterator file = files.begin(); file != files.end(); ++file) - { - int64_t len = cfr->fileLength(*file); - - if (extract) - { - std::wcout << L"extract " << *file << L" with " << len << L" bytes to local directory..."; - IndexInputPtr ii(cfr->openInput(*file)); - - std::ofstream f(StringUtils::toUTF8(*file).c_str(), std::ios::binary | std::ios::out); - - // read and write with a small buffer, which is more effective than reading byte by byte - ByteArray buffer(ByteArray::newInstance(1024)); - - int32_t chunk = buffer.size(); - while (len > 0) - { - int32_t bufLen = std::min(chunk, (int32_t)len); - ii->readBytes(buffer.get(), 0, bufLen); - f.write((char*)buffer.get(), bufLen); - len -= bufLen; - } - ii->close(); + + DirectoryPtr dir; + CompoundFileReaderPtr cfr; + + LuceneException finally; + try { + String dirname(FileUtils::extractPath(filename)); + filename = FileUtils::extractPath(filename); + dir = FSDirectory::open(dirname); + cfr = newLucene(dir, filename); + + HashSet _files(cfr->listAll()); + Collection files(Collection::newInstance(_files.begin(), _files.end())); + std::sort(files.begin(), files.end()); // sort the array of filename so that the output is more readable + + for (Collection::iterator file = files.begin(); file != files.end(); ++file) { + int64_t len = cfr->fileLength(*file); + + if (extract) { + std::wcout << L"extract " << *file << L" with " << len << L" bytes to local directory..."; + IndexInputPtr ii(cfr->openInput(*file)); + + boost::filesystem::ofstream f(*file, std::ios::binary | std::ios::out); + + // read and write with a small buffer, which is more effective than reading byte by byte + ByteArray buffer(ByteArray::newInstance(1024)); + + int32_t chunk = buffer.size(); + while (len > 0) { + int32_t bufLen = std::min(chunk, (int32_t)len); + ii->readBytes(buffer.get(), 0, bufLen); + f.write((char*)buffer.get(), bufLen); + len -= bufLen; } - else - std::wcout << *file << L": " << len << " bytes\n"; + ii->close(); + } else { + std::wcout << *file << L": " << len << " bytes\n"; } } - catch (LuceneException& e) - { - finally = e; - } - - if (dir) - dir->close(); - if (cfr) - cfr->close(); - - finally.throwException(); - } - - Collection IndexReader::listCommits(DirectoryPtr dir) - { - return DirectoryReader::listCommits(dir); - } - - Collection IndexReader::getSequentialSubReaders() - { - return Collection(); // override - } - - LuceneObjectPtr IndexReader::getFieldCacheKey() - { - return shared_from_this(); - } - - LuceneObjectPtr IndexReader::getDeletesCacheKey() - { - return shared_from_this(); - } - - int64_t IndexReader::getUniqueTermCount() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not implement getUniqueTermCount()")); - return 0; - } - - int32_t IndexReader::getTermInfosIndexDivisor() - { - boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); - return 0; - } - - FindSegmentsModified::FindSegmentsModified(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) - { - result = 0; + } catch (LuceneException& e) { + finally = e; } - - FindSegmentsModified::~FindSegmentsModified() - { + + if (dir) { + dir->close(); } - - uint64_t FindSegmentsModified::doBody(const String& segmentFileName) - { - return directory->fileModified(segmentFileName); + if (cfr) { + cfr->close(); } + + finally.throwException(); +} + +Collection IndexReader::listCommits(const DirectoryPtr& dir) { + return DirectoryReader::listCommits(dir); +} + +Collection IndexReader::getSequentialSubReaders() { + return Collection(); // override +} + +LuceneObjectPtr IndexReader::getFieldCacheKey() { + return shared_from_this(); +} + +LuceneObjectPtr IndexReader::getDeletesCacheKey() { + return shared_from_this(); +} + +int64_t IndexReader::getUniqueTermCount() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not implement getUniqueTermCount()")); + return 0; +} + +int32_t IndexReader::getTermInfosIndexDivisor() { + boost::throw_exception(UnsupportedOperationException(L"This reader does not support this method.")); + return 0; +} + +FindSegmentsModified::FindSegmentsModified(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { + result = 0; +} + +FindSegmentsModified::~FindSegmentsModified() { +} + +uint64_t FindSegmentsModified::doBody(const String& segmentFileName) { + return directory->fileModified(segmentFileName); +} + } diff --git a/src/core/index/IndexWriter.cpp b/src/core/index/IndexWriter.cpp index aabb6e4d..86749361 100644 --- a/src/core/index/IndexWriter.cpp +++ b/src/core/index/IndexWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -30,3828 +30,3521 @@ #include "TestPoint.h" #include "StringUtils.h" -namespace Lucene -{ - /// The normal read buffer size defaults to 1024, but increasing this during merging seems to - /// yield performance gains. However we don't want to increase it too much because there are - /// quite a few BufferedIndexInputs created during merging. - const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096; - - int32_t IndexWriter::MESSAGE_ID = 0; - InfoStreamPtr IndexWriter::defaultInfoStream; - - /// Default value for the write lock timeout (1,000). - int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000; - - const String IndexWriter::WRITE_LOCK_NAME = L"write.lock"; - - /// Value to denote a flush trigger is disabled. - const int32_t IndexWriter::DISABLE_AUTO_FLUSH = -1; - - /// Disabled by default (because IndexWriter flushes by RAM usage by default). - const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DOCS = IndexWriter::DISABLE_AUTO_FLUSH; - - /// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). - const double IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; - - /// Disabled by default (because IndexWriter flushes by RAM usage by default). - const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriter::DISABLE_AUTO_FLUSH; - - /// Default value is 10000. - const int32_t IndexWriter::DEFAULT_MAX_FIELD_LENGTH = 10000; - - /// Default value is 128. - const int32_t IndexWriter::DEFAULT_TERM_INDEX_INTERVAL = 128; - - /// Sets the maximum field length to INT_MAX - const int32_t IndexWriter::MaxFieldLengthUNLIMITED = INT_MAX; - - /// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} - const int32_t IndexWriter::MaxFieldLengthLIMITED = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, int32_t mfl) - { - this->directory = d; - this->analyzer = a; - this->create = create; - this->maxFieldLength = mfl; +namespace Lucene { + +/// The normal read buffer size defaults to 1024, but increasing this during merging seems to +/// yield performance gains. However we don't want to increase it too much because there are +/// quite a few BufferedIndexInputs created during merging. +const int32_t IndexWriter::MERGE_READ_BUFFER_SIZE = 4096; + +int32_t IndexWriter::MESSAGE_ID = 0; +InfoStreamPtr IndexWriter::defaultInfoStream; + +/// Default value for the write lock timeout (1,000). +int64_t IndexWriter::WRITE_LOCK_TIMEOUT = 1000; + +const String IndexWriter::WRITE_LOCK_NAME = L"write.lock"; + +/// Value to denote a flush trigger is disabled. +const int32_t IndexWriter::DISABLE_AUTO_FLUSH = -1; + +/// Disabled by default (because IndexWriter flushes by RAM usage by default). +const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DOCS = IndexWriter::DISABLE_AUTO_FLUSH; + +/// Default value is 16 MB (which means flush when buffered docs consume 16 MB RAM). +const double IndexWriter::DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; + +/// Disabled by default (because IndexWriter flushes by RAM usage by default). +const int32_t IndexWriter::DEFAULT_MAX_BUFFERED_DELETE_TERMS = IndexWriter::DISABLE_AUTO_FLUSH; + +/// Default value is 10000. +const int32_t IndexWriter::DEFAULT_MAX_FIELD_LENGTH = 10000; + +/// Default value is 128. +const int32_t IndexWriter::DEFAULT_TERM_INDEX_INTERVAL = 128; + +/// Sets the maximum field length to INT_MAX +const int32_t IndexWriter::MaxFieldLengthUNLIMITED = INT_MAX; + +/// Sets the maximum field length to {@link #DEFAULT_MAX_FIELD_LENGTH} +const int32_t IndexWriter::MaxFieldLengthLIMITED = IndexWriter::DEFAULT_MAX_FIELD_LENGTH; + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, int32_t mfl) { + this->directory = d; + this->analyzer = a; + this->create = create; + this->maxFieldLength = mfl; +} + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, int32_t mfl) { + this->directory = d; + this->analyzer = a; + this->create = !IndexReader::indexExists(d); + this->maxFieldLength = mfl; +} + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl) { + this->directory = d; + this->analyzer = a; + this->deletionPolicy = deletionPolicy; + this->create = !IndexReader::indexExists(d); + this->maxFieldLength = mfl; +} + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl) { + this->directory = d; + this->analyzer = a; + this->create = create; + this->deletionPolicy = deletionPolicy; + this->maxFieldLength = mfl; +} + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, bool create, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexingChainPtr& indexingChain, const IndexCommitPtr& commit) { + this->directory = d; + this->analyzer = a; + this->create = create; + this->deletionPolicy = deletionPolicy; + this->maxFieldLength = mfl; + this->indexingChain = indexingChain; + this->indexCommit = commit; +} + +IndexWriter::IndexWriter(const DirectoryPtr& d, const AnalyzerPtr& a, const IndexDeletionPolicyPtr& deletionPolicy, int32_t mfl, const IndexCommitPtr& commit) { + this->directory = d; + this->analyzer = a; + this->create = false; + this->deletionPolicy = deletionPolicy; + this->maxFieldLength = mfl; + this->indexCommit = commit; +} + +IndexWriter::~IndexWriter() { +} + +void IndexWriter::initialize() { + messageID = -1; + messageIDLock = newInstance(); + setMessageID(defaultInfoStream); + this->writeLockTimeout = WRITE_LOCK_TIMEOUT; + this->segmentInfos = newLucene(); + pendingMerges = Collection::newInstance(); + mergeExceptions = Collection::newInstance(); + segmentsToOptimize = SetSegmentInfo::newInstance(); + optimizeMaxNumSegments = 0; + mergingSegments = SetSegmentInfo::newInstance(); + runningMerges = SetOneMerge::newInstance(); + synced = HashSet::newInstance(); + syncing = HashSet::newInstance(); + changeCount = 0; + lastCommitChangeCount = 0; + poolReaders = false; + readCount = 0; + writeThread = 0; + upgradeCount = 0; + readerTermsIndexDivisor = IndexReader::DEFAULT_TERMS_INDEX_DIVISOR; + readerPool = newLucene(shared_from_this()); + closed = false; + closing = false; + hitOOM = false; + stopMerges = false; + mergeGen = 0; + flushCount = 0; + flushDeletesCount = 0; + localFlushedDocCount = 0; + pendingCommitChangeCount = 0; + mergePolicy = newLucene(shared_from_this()); + mergeScheduler = newLucene(); + similarity = Similarity::getDefault(); + termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; + commitLock = newInstance(); + + if (!indexingChain) { + indexingChain = DocumentsWriter::getDefaultIndexingChain(); } - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, int32_t mfl) - { - this->directory = d; - this->analyzer = a; - this->create = !IndexReader::indexExists(d); - this->maxFieldLength = mfl; + + if (create) { + directory->clearLock(WRITE_LOCK_NAME); // clear the write lock in case it's leftover } - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl) - { - this->directory = d; - this->analyzer = a; - this->deletionPolicy = deletionPolicy; - this->create = !IndexReader::indexExists(d); - this->maxFieldLength = mfl; - } - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl) - { - this->directory = d; - this->analyzer = a; - this->create = create; - this->deletionPolicy = deletionPolicy; - this->maxFieldLength = mfl; - } - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, bool create, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexingChainPtr indexingChain, IndexCommitPtr commit) - { - this->directory = d; - this->analyzer = a; - this->create = create; - this->deletionPolicy = deletionPolicy; - this->maxFieldLength = mfl; - this->indexingChain = indexingChain; - this->indexCommit = commit; - } - - IndexWriter::IndexWriter(DirectoryPtr d, AnalyzerPtr a, IndexDeletionPolicyPtr deletionPolicy, int32_t mfl, IndexCommitPtr commit) - { - this->directory = d; - this->analyzer = a; - this->create = false; - this->deletionPolicy = deletionPolicy; - this->maxFieldLength = mfl; - this->indexCommit = commit; - } - - IndexWriter::~IndexWriter() - { + + LockPtr writeLock(directory->makeLock(WRITE_LOCK_NAME)); + + if (!writeLock->obtain((int32_t)writeLockTimeout)) { // obtain write lock + boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); } - - void IndexWriter::initialize() - { - messageID = -1; - messageIDLock = newInstance(); - setMessageID(defaultInfoStream); - this->writeLockTimeout = WRITE_LOCK_TIMEOUT; - this->segmentInfos = newLucene(); - pendingMerges = Collection::newInstance(); - mergeExceptions = Collection::newInstance(); - segmentsToOptimize = SetSegmentInfo::newInstance(); - optimizeMaxNumSegments = 0; - mergingSegments = SetSegmentInfo::newInstance(); - runningMerges = SetOneMerge::newInstance(); - synced = HashSet::newInstance(); - syncing = HashSet::newInstance(); - changeCount = 0; - lastCommitChangeCount = 0; - poolReaders = false; - readCount = 0; - writeThread = 0; - upgradeCount = 0; - readerTermsIndexDivisor = IndexReader::DEFAULT_TERMS_INDEX_DIVISOR; - readerPool = newLucene(shared_from_this()); - closed = false; - closing = false; - hitOOM = false; - stopMerges = false; - mergeGen = 0; - flushCount = 0; - flushDeletesCount = 0; - localFlushedDocCount = 0; - pendingCommitChangeCount = 0; - mergePolicy = newLucene(shared_from_this()); - mergeScheduler = newLucene(); - similarity = Similarity::getDefault(); - termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; - commitLock = newInstance(); - - if (!indexingChain) - indexingChain = DocumentsWriter::getDefaultIndexingChain(); - - if (create) - directory->clearLock(WRITE_LOCK_NAME); // clear the write lock in case it's leftover - - LockPtr writeLock(directory->makeLock(WRITE_LOCK_NAME)); - - if (!writeLock->obtain((int32_t)writeLockTimeout)) // obtain write lock - boost::throw_exception(LockObtainFailedException(L"Index locked for write: " + writeLock->toString())); - this->writeLock = writeLock; - - bool success = false; - LuceneException finally; - - try - { - if (create) - { - // Try to read first. This is to allow create against an index that's currently open for - // searching. In this case we write the next segments_N file with no segments - bool doCommit; - try - { - segmentInfos->read(directory); - segmentInfos->clear(); - doCommit = false; - } - catch (LuceneException&) - { - // Likely this means it's a fresh directory - doCommit = true; - } + this->writeLock = writeLock; - if (doCommit) - { - // Only commit if there is no segments file in this dir already. - segmentInfos->commit(directory); - HashSet files(segmentInfos->files(directory, true)); - synced.addAll(files.begin(), files.end()); - } - else - { - // Record that we have a change (zero out all segments) pending - ++changeCount; - } - } - else - { + bool success = false; + LuceneException finally; + + try { + if (create) { + // Try to read first. This is to allow create against an index that's currently open for + // searching. In this case we write the next segments_N file with no segments + bool doCommit; + try { segmentInfos->read(directory); - - if (indexCommit) - { - // Swap out all segments, but, keep metadata in SegmentInfos, like version & generation, to - // preserve write-once. This is important if readers are open against the future commit points. - if (indexCommit->getDirectory() != directory) - boost::throw_exception(IllegalArgumentException(L"IndexCommit's directory doesn't match my directory")); - SegmentInfosPtr oldInfos(newLucene()); - oldInfos->read(directory, indexCommit->getSegmentsFileName()); - segmentInfos->replace(oldInfos); - ++changeCount; - if (infoStream) - message(L"init: loaded commit \"" + indexCommit->getSegmentsFileName() + L"\""); - } - - // We assume that this segments_N was previously properly sync'd + segmentInfos->clear(); + doCommit = false; + } catch (LuceneException&) { + // Likely this means it's a fresh directory + doCommit = true; + } + + if (doCommit) { + // Only commit if there is no segments file in this dir already. + segmentInfos->commit(directory); HashSet files(segmentInfos->files(directory, true)); synced.addAll(files.begin(), files.end()); + } else { + // Record that we have a change (zero out all segments) pending + ++changeCount; } - - setRollbackSegmentInfos(segmentInfos); - - docWriter = newLucene(directory, shared_from_this(), indexingChain); - docWriter->setInfoStream(infoStream); - docWriter->setMaxFieldLength(maxFieldLength); - - // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter - deleter = newLucene(directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, infoStream, docWriter, synced); - - if (deleter->startingCommitDeleted) - { - // Deletion policy deleted the "head" commit point. We have to mark ourself as changed so that if we - // are closed without any further changes we write a new segments_N file. + } else { + segmentInfos->read(directory); + + if (indexCommit) { + // Swap out all segments, but, keep metadata in SegmentInfos, like version & generation, to + // preserve write-once. This is important if readers are open against the future commit points. + if (indexCommit->getDirectory() != directory) { + boost::throw_exception(IllegalArgumentException(L"IndexCommit's directory doesn't match my directory")); + } + SegmentInfosPtr oldInfos(newLucene()); + oldInfos->read(directory, indexCommit->getSegmentsFileName()); + segmentInfos->replace(oldInfos); ++changeCount; + if (infoStream) { + message(L"init: loaded commit \"" + indexCommit->getSegmentsFileName() + L"\""); + } } - - pushMaxBufferedDocs(); - - if (infoStream) - message(L"init: create=" + StringUtils::toString(create)); - messageState(); - - success = true; - } - catch (LuceneException& e) - { - finally = e; + + // We assume that this segments_N was previously properly sync'd + HashSet files(segmentInfos->files(directory, true)); + synced.addAll(files.begin(), files.end()); } - - if (!success) - { - if (infoStream) - message(L"init: hit exception on init; releasing write lock"); - try - { - this->writeLock->release(); - } - catch (...) - { - // don't mask the original exception - } - this->writeLock.reset(); + + setRollbackSegmentInfos(segmentInfos); + + docWriter = newLucene(directory, shared_from_this(), indexingChain); + docWriter->setInfoStream(infoStream); + docWriter->setMaxFieldLength(maxFieldLength); + + // Default deleter (for backwards compatibility) is KeepOnlyLastCommitDeleter + deleter = newLucene(directory, deletionPolicy ? deletionPolicy : newLucene(), segmentInfos, infoStream, docWriter, synced); + + if (deleter->startingCommitDeleted) { + // Deletion policy deleted the "head" commit point. We have to mark ourself as changed so that if we + // are closed without any further changes we write a new segments_N file. + ++changeCount; } - - finally.throwException(); - } - - int32_t IndexWriter::MAX_TERM_LENGTH() - { - static int32_t _MAX_TERM_LENGTH = 0; - if (_MAX_TERM_LENGTH == 0) - _MAX_TERM_LENGTH = DocumentsWriter::MAX_TERM_LENGTH; - return _MAX_TERM_LENGTH; - } - - IndexReaderPtr IndexWriter::getReader() - { - return getReader(readerTermsIndexDivisor); - } - - IndexReaderPtr IndexWriter::getReader(int32_t termInfosIndexDivisor) - { - ensureOpen(); - - if (infoStream) - message(L"flush at getReader"); - - // Do this up front before flushing so that the readers obtained during this flush are pooled, the first time - // this method is called - poolReaders = true; - - // Prevent segmentInfos from changing while opening the reader; in theory we could do similar retry logic, - // just like we do when loading segments_N - IndexReaderPtr r; - { - SyncLock syncLock(this); - flush(false, true, true); - r = newLucene(shared_from_this(), segmentInfos, termInfosIndexDivisor); + + pushMaxBufferedDocs(); + + if (infoStream) { + message(L"init: create=" + StringUtils::toString(create)); } - maybeMerge(); - return r; + messageState(); + + success = true; + } catch (LuceneException& e) { + finally = e; } - - int32_t IndexWriter::numDeletedDocs(SegmentInfoPtr info) - { - SegmentReaderPtr reader(readerPool->getIfExists(info)); - int32_t deletedDocs = 0; - LuceneException finally; - try - { - deletedDocs = reader ? reader->numDeletedDocs() : info->getDelCount(); + + if (!success) { + if (infoStream) { + message(L"init: hit exception on init; releasing write lock"); } - catch (LuceneException& e) - { - finally = e; + try { + this->writeLock->release(); + } catch (...) { + // don't mask the original exception } - if (reader) - readerPool->release(reader); - finally.throwException(); - return deletedDocs; - } - - void IndexWriter::acquireWrite() - { - SyncLock syncLock(this); - BOOST_ASSERT(writeThread != LuceneThread::currentId()); - while (writeThread != 0 || readCount > 0) - doWait(); - - // we could have been closed while we were waiting - ensureOpen(); - - writeThread = LuceneThread::currentId(); - } - - void IndexWriter::releaseWrite() - { - SyncLock syncLock(this); - BOOST_ASSERT(writeThread == LuceneThread::currentId()); - writeThread = 0; - notifyAll(); - } - - void IndexWriter::acquireRead() - { - SyncLock syncLock(this); - int64_t current = LuceneThread::currentId(); - while (writeThread != 0 && writeThread != current) - doWait(); - ++readCount; - } - - void IndexWriter::upgradeReadToWrite() - { - SyncLock syncLock(this); - BOOST_ASSERT(readCount > 0); - ++upgradeCount; - while (readCount > upgradeCount || writeThread != 0) - doWait(); - writeThread = LuceneThread::currentId(); - --readCount; - --upgradeCount; + this->writeLock.reset(); } - - void IndexWriter::releaseRead() - { - SyncLock syncLock(this); - --readCount; - BOOST_ASSERT(readCount >= 0); - notifyAll(); + + finally.throwException(); +} + +int32_t IndexWriter::MAX_TERM_LENGTH() { + static int32_t _MAX_TERM_LENGTH = 0; + LUCENE_RUN_ONCE( + _MAX_TERM_LENGTH = DocumentsWriter::MAX_TERM_LENGTH; + ); + return _MAX_TERM_LENGTH; +} + +IndexReaderPtr IndexWriter::getReader() { + return getReader(readerTermsIndexDivisor); +} + +IndexReaderPtr IndexWriter::getReader(int32_t termInfosIndexDivisor) { + ensureOpen(); + + if (infoStream) { + message(L"flush at getReader"); } - - bool IndexWriter::isOpen(bool includePendingClose) + + // Do this up front before flushing so that the readers obtained during this flush are pooled, the first time + // this method is called + poolReaders = true; + + // Prevent segmentInfos from changing while opening the reader; in theory we could do similar retry logic, + // just like we do when loading segments_N + IndexReaderPtr r; { SyncLock syncLock(this); - return !(closed || (includePendingClose && closing)); + flush(false, true, true); + r = newLucene(shared_from_this(), segmentInfos, termInfosIndexDivisor); } + maybeMerge(); + return r; +} - void IndexWriter::ensureOpen(bool includePendingClose) - { - SyncLock syncLock(this); - if (!isOpen(includePendingClose)) - boost::throw_exception(AlreadyClosedException(L"This IndexWriter is closed")); +int32_t IndexWriter::numDeletedDocs(const SegmentInfoPtr& info) { + SegmentReaderPtr reader(readerPool->getIfExists(info)); + int32_t deletedDocs = 0; + LuceneException finally; + try { + deletedDocs = reader ? reader->numDeletedDocs() : info->getDelCount(); + } catch (LuceneException& e) { + finally = e; + } + if (reader) { + readerPool->release(reader); + } + finally.throwException(); + return deletedDocs; +} + +void IndexWriter::acquireWrite() { + SyncLock syncLock(this); + BOOST_ASSERT(writeThread != LuceneThread::currentId()); + while (writeThread != 0 || readCount > 0) { + doWait(); } - - void IndexWriter::ensureOpen() - { - ensureOpen(true); + + // we could have been closed while we were waiting + ensureOpen(); + + writeThread = LuceneThread::currentId(); +} + +void IndexWriter::releaseWrite() { + SyncLock syncLock(this); + BOOST_ASSERT(writeThread == LuceneThread::currentId()); + writeThread = 0; + notifyAll(); +} + +void IndexWriter::acquireRead() { + SyncLock syncLock(this); + int64_t current = LuceneThread::currentId(); + while (writeThread != 0 && writeThread != current) { + doWait(); } - - void IndexWriter::message(const String& message) - { - if (infoStream) - { - *infoStream << L"IW " << StringUtils::toString(messageID); - *infoStream << L" [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); - *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; - } + ++readCount; +} + +void IndexWriter::upgradeReadToWrite() { + SyncLock syncLock(this); + BOOST_ASSERT(readCount > 0); + ++upgradeCount; + while (readCount > upgradeCount || writeThread != 0) { + doWait(); + } + writeThread = LuceneThread::currentId(); + --readCount; + --upgradeCount; +} + +void IndexWriter::releaseRead() { + SyncLock syncLock(this); + --readCount; + BOOST_ASSERT(readCount >= 0); + notifyAll(); +} + +bool IndexWriter::isOpen(bool includePendingClose) { + SyncLock syncLock(this); + return !(closed || (includePendingClose && closing)); +} + +void IndexWriter::ensureOpen(bool includePendingClose) { + SyncLock syncLock(this); + if (!isOpen(includePendingClose)) { + boost::throw_exception(AlreadyClosedException(L"This IndexWriter is closed")); } - - void IndexWriter::setMessageID(InfoStreamPtr infoStream) - { - SyncLock syncLock(this); - if (infoStream && messageID == -1) - { - SyncLock messageLock(messageIDLock); - messageID = MESSAGE_ID++; - } - this->infoStream = infoStream; +} + +void IndexWriter::ensureOpen() { + ensureOpen(true); +} + +void IndexWriter::message(const String& message) { + if (infoStream) { + *infoStream << L"IW " << StringUtils::toString(messageID); + *infoStream << L" [" << DateTools::timeToString(MiscUtils::currentTimeMillis(), DateTools::RESOLUTION_SECOND); + *infoStream << L"; " << StringUtils::toString(LuceneThread::currentId()) << L"]: " << message << L"\n"; } - - LogMergePolicyPtr IndexWriter::getLogMergePolicy() - { - LogMergePolicyPtr logMergePolicy(boost::dynamic_pointer_cast(mergePolicy)); - if (logMergePolicy) - return logMergePolicy; - boost::throw_exception(IllegalArgumentException(L"This method can only be called when the merge policy is the default LogMergePolicy")); - return LogMergePolicyPtr(); - } - - bool IndexWriter::getUseCompoundFile() - { - return getLogMergePolicy()->getUseCompoundFile(); +} + +void IndexWriter::setMessageID(const InfoStreamPtr& infoStream) { + SyncLock syncLock(this); + if (infoStream && messageID == -1) { + SyncLock messageLock(messageIDLock); + messageID = MESSAGE_ID++; } - - void IndexWriter::setUseCompoundFile(bool value) - { - getLogMergePolicy()->setUseCompoundFile(value); - getLogMergePolicy()->setUseCompoundDocStore(value); + this->infoStream = infoStream; +} + +LogMergePolicyPtr IndexWriter::getLogMergePolicy() { + LogMergePolicyPtr logMergePolicy(boost::dynamic_pointer_cast(mergePolicy)); + if (logMergePolicy) { + return logMergePolicy; } - - void IndexWriter::setSimilarity(SimilarityPtr similarity) - { - ensureOpen(); - this->similarity = similarity; - docWriter->setSimilarity(similarity); + boost::throw_exception(IllegalArgumentException(L"This method can only be called when the merge policy is the default LogMergePolicy")); + return LogMergePolicyPtr(); +} + +bool IndexWriter::getUseCompoundFile() { + return getLogMergePolicy()->getUseCompoundFile(); +} + +void IndexWriter::setUseCompoundFile(bool value) { + getLogMergePolicy()->setUseCompoundFile(value); + getLogMergePolicy()->setUseCompoundDocStore(value); +} + +void IndexWriter::setSimilarity(const SimilarityPtr& similarity) { + ensureOpen(); + this->similarity = similarity; + docWriter->setSimilarity(similarity); +} + +SimilarityPtr IndexWriter::getSimilarity() { + ensureOpen(); + return this->similarity; +} + +void IndexWriter::setTermIndexInterval(int32_t interval) { + ensureOpen(); + this->termIndexInterval = interval; +} + +int32_t IndexWriter::getTermIndexInterval() { + // We pass false because this method is called by SegmentMerger while we are in the process of closing + ensureOpen(false); + return termIndexInterval; +} + +void IndexWriter::setRollbackSegmentInfos(const SegmentInfosPtr& infos) { + SyncLock syncLock(this); + rollbackSegmentInfos = boost::dynamic_pointer_cast(infos->clone()); + BOOST_ASSERT(!rollbackSegmentInfos->hasExternalSegments(directory)); + rollbackSegments = MapSegmentInfoInt::newInstance(); + int32_t size = rollbackSegmentInfos->size(); + for (int32_t i = 0; i < size; ++i) { + rollbackSegments.put(rollbackSegmentInfos->info(i), i); } +} - SimilarityPtr IndexWriter::getSimilarity() - { - ensureOpen(); - return this->similarity; +void IndexWriter::setMergePolicy(const MergePolicyPtr& mp) { + ensureOpen(); + if (!mp) { + boost::throw_exception(NullPointerException(L"MergePolicy must be non-null")); } - - void IndexWriter::setTermIndexInterval(int32_t interval) - { - ensureOpen(); - this->termIndexInterval = interval; + + if (mergePolicy != mp) { + mergePolicy->close(); } - - int32_t IndexWriter::getTermIndexInterval() - { - // We pass false because this method is called by SegmentMerger while we are in the process of closing - ensureOpen(false); - return termIndexInterval; + mergePolicy = mp; + pushMaxBufferedDocs(); + if (infoStream) { + message(L"setMergePolicy"); } +} - void IndexWriter::setRollbackSegmentInfos(SegmentInfosPtr infos) - { - SyncLock syncLock(this); - rollbackSegmentInfos = boost::dynamic_pointer_cast(infos->clone()); - BOOST_ASSERT(!rollbackSegmentInfos->hasExternalSegments(directory)); - rollbackSegments = MapSegmentInfoInt::newInstance(); - int32_t size = rollbackSegmentInfos->size(); - for (int32_t i = 0; i < size; ++i) - rollbackSegments.put(rollbackSegmentInfos->info(i), i); - } - - void IndexWriter::setMergePolicy(MergePolicyPtr mp) - { - ensureOpen(); - if (!mp) - boost::throw_exception(NullPointerException(L"MergePolicy must be non-null")); - - if (mergePolicy != mp) - mergePolicy->close(); - mergePolicy = mp; - pushMaxBufferedDocs(); - if (infoStream) - message(L"setMergePolicy"); - } - - MergePolicyPtr IndexWriter::getMergePolicy() - { - ensureOpen(); - return mergePolicy; +MergePolicyPtr IndexWriter::getMergePolicy() { + ensureOpen(); + return mergePolicy; +} + +void IndexWriter::setMergeScheduler(const MergeSchedulerPtr& mergeScheduler) { + SyncLock syncLock(this); + ensureOpen(); + if (!mergeScheduler) { + boost::throw_exception(NullPointerException(L"MergeScheduler must be non-null")); } - - void IndexWriter::setMergeScheduler(MergeSchedulerPtr mergeScheduler) - { - SyncLock syncLock(this); - ensureOpen(); - if (!mergeScheduler) - boost::throw_exception(NullPointerException(L"MergeScheduler must be non-null")); - if (this->mergeScheduler != mergeScheduler) - { - finishMerges(true); - this->mergeScheduler->close(); - } - this->mergeScheduler = mergeScheduler; - if (infoStream) - message(L"setMergeScheduler"); + if (this->mergeScheduler != mergeScheduler) { + finishMerges(true); + this->mergeScheduler->close(); } - - MergeSchedulerPtr IndexWriter::getMergeScheduler() - { - ensureOpen(); - return mergeScheduler; + this->mergeScheduler = mergeScheduler; + if (infoStream) { + message(L"setMergeScheduler"); } - - void IndexWriter::setMaxMergeDocs(int32_t maxMergeDocs) - { - getLogMergePolicy()->setMaxMergeDocs(maxMergeDocs); +} + +MergeSchedulerPtr IndexWriter::getMergeScheduler() { + ensureOpen(); + return mergeScheduler; +} + +void IndexWriter::setMaxMergeDocs(int32_t maxMergeDocs) { + getLogMergePolicy()->setMaxMergeDocs(maxMergeDocs); +} + +int32_t IndexWriter::getMaxMergeDocs() { + return getLogMergePolicy()->getMaxMergeDocs(); +} + +void IndexWriter::setMaxFieldLength(int32_t maxFieldLength) { + ensureOpen(); + this->maxFieldLength = maxFieldLength; + docWriter->setMaxFieldLength(maxFieldLength); + if (infoStream) { + message(L"setMaxFieldLength " + StringUtils::toString(maxFieldLength)); } - - int32_t IndexWriter::getMaxMergeDocs() - { - return getLogMergePolicy()->getMaxMergeDocs(); +} + +int32_t IndexWriter::getMaxFieldLength() { + ensureOpen(); + return maxFieldLength; +} + +void IndexWriter::setReaderTermsIndexDivisor(int32_t divisor) { + ensureOpen(); + if (divisor <= 0) { + boost::throw_exception(IllegalArgumentException(L"divisor must be >= 1 (got " + StringUtils::toString(divisor) + L")")); } - - void IndexWriter::setMaxFieldLength(int32_t maxFieldLength) - { - ensureOpen(); - this->maxFieldLength = maxFieldLength; - docWriter->setMaxFieldLength(maxFieldLength); - if (infoStream) - message(L"setMaxFieldLength " + StringUtils::toString(maxFieldLength)); + readerTermsIndexDivisor = divisor; + if (infoStream) { + message(L"setReaderTermsIndexDivisor " + StringUtils::toString(readerTermsIndexDivisor)); } - - int32_t IndexWriter::getMaxFieldLength() - { - ensureOpen(); - return maxFieldLength; +} + +int32_t IndexWriter::getReaderTermsIndexDivisor() { + ensureOpen(); + return readerTermsIndexDivisor; +} + +void IndexWriter::setMaxBufferedDocs(int32_t maxBufferedDocs) { + ensureOpen(); + if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) { + boost::throw_exception(IllegalArgumentException(L"maxBufferedDocs must at least be 2 when enabled")); } - - void IndexWriter::setReaderTermsIndexDivisor(int32_t divisor) - { - ensureOpen(); - if (divisor <= 0) - boost::throw_exception(IllegalArgumentException(L"divisor must be >= 1 (got " + StringUtils::toString(divisor) + L")")); - readerTermsIndexDivisor = divisor; - if (infoStream) - message(L"setReaderTermsIndexDivisor " + StringUtils::toString(readerTermsIndexDivisor)); - } - - int32_t IndexWriter::getReaderTermsIndexDivisor() - { - ensureOpen(); - return readerTermsIndexDivisor; + if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) { + boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); } - - void IndexWriter::setMaxBufferedDocs(int32_t maxBufferedDocs) - { - ensureOpen(); - if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) - boost::throw_exception(IllegalArgumentException(L"maxBufferedDocs must at least be 2 when enabled")); - if (maxBufferedDocs == DISABLE_AUTO_FLUSH && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) - boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); - docWriter->setMaxBufferedDocs(maxBufferedDocs); - pushMaxBufferedDocs(); - if (infoStream) - message(L"setMaxBufferedDocs " + StringUtils::toString(maxBufferedDocs)); + docWriter->setMaxBufferedDocs(maxBufferedDocs); + pushMaxBufferedDocs(); + if (infoStream) { + message(L"setMaxBufferedDocs " + StringUtils::toString(maxBufferedDocs)); } - - void IndexWriter::pushMaxBufferedDocs() - { - if (docWriter->getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) - { - LogDocMergePolicyPtr lmp(boost::dynamic_pointer_cast(mergePolicy)); - if (lmp) - { - int32_t maxBufferedDocs = docWriter->getMaxBufferedDocs(); - if (lmp->getMinMergeDocs() != maxBufferedDocs) - { - if (infoStream) - message(L"now push maxBufferedDocs " + StringUtils::toString(maxBufferedDocs) + L" to LogDocMergePolicy"); - lmp->setMinMergeDocs(maxBufferedDocs); +} + +void IndexWriter::pushMaxBufferedDocs() { + if (docWriter->getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { + LogDocMergePolicyPtr lmp(boost::dynamic_pointer_cast(mergePolicy)); + if (lmp) { + int32_t maxBufferedDocs = docWriter->getMaxBufferedDocs(); + if (lmp->getMinMergeDocs() != maxBufferedDocs) { + if (infoStream) { + message(L"now push maxBufferedDocs " + StringUtils::toString(maxBufferedDocs) + L" to LogDocMergePolicy"); } + lmp->setMinMergeDocs(maxBufferedDocs); } } } - - int32_t IndexWriter::getMaxBufferedDocs() - { - ensureOpen(); - return docWriter->getMaxBufferedDocs(); +} + +int32_t IndexWriter::getMaxBufferedDocs() { + ensureOpen(); + return docWriter->getMaxBufferedDocs(); +} + +void IndexWriter::setRAMBufferSizeMB(double mb) { + if (mb > 2048.0) { + boost::throw_exception(IllegalArgumentException(L"ramBufferSize " + StringUtils::toString(mb) + L" is too large; should be comfortably less than 2048")); } - - void IndexWriter::setRAMBufferSizeMB(double mb) - { - if (mb > 2048.0) - boost::throw_exception(IllegalArgumentException(L"ramBufferSize " + StringUtils::toString(mb) + L" is too large; should be comfortably less than 2048")); - if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) - boost::throw_exception(IllegalArgumentException(L"ramBufferSize should be > 0.0 MB when enabled")); - if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) - boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); - docWriter->setRAMBufferSizeMB(mb); - if (infoStream) - message(L"setRAMBufferSizeMB " + StringUtils::toString(mb)); - } - - double IndexWriter::getRAMBufferSizeMB() - { - return docWriter->getRAMBufferSizeMB(); - } - - void IndexWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) - { - ensureOpen(); - if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) - boost::throw_exception(IllegalArgumentException(L"maxBufferedDeleteTerms must at least be 1 when enabled")); - docWriter->setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); - if (infoStream) - message(L"setMaxBufferedDeleteTerms " + StringUtils::toString(maxBufferedDeleteTerms)); - } - - int32_t IndexWriter::getMaxBufferedDeleteTerms() - { - ensureOpen(); - return docWriter->getMaxBufferedDeleteTerms(); + if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) { + boost::throw_exception(IllegalArgumentException(L"ramBufferSize should be > 0.0 MB when enabled")); } - - void IndexWriter::setMergeFactor(int32_t mergeFactor) - { - getLogMergePolicy()->setMergeFactor(mergeFactor); + if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) { + boost::throw_exception(IllegalArgumentException(L"at least one of ramBufferSize and maxBufferedDocs must be enabled")); } - - int32_t IndexWriter::getMergeFactor() - { - return getLogMergePolicy()->getMergeFactor(); + docWriter->setRAMBufferSizeMB(mb); + if (infoStream) { + message(L"setRAMBufferSizeMB " + StringUtils::toString(mb)); } - - void IndexWriter::setDefaultInfoStream(InfoStreamPtr infoStream) - { - IndexWriter::defaultInfoStream = infoStream; +} + +double IndexWriter::getRAMBufferSizeMB() { + return docWriter->getRAMBufferSizeMB(); +} + +void IndexWriter::setMaxBufferedDeleteTerms(int32_t maxBufferedDeleteTerms) { + ensureOpen(); + if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH && maxBufferedDeleteTerms < 1) { + boost::throw_exception(IllegalArgumentException(L"maxBufferedDeleteTerms must at least be 1 when enabled")); } - - InfoStreamPtr IndexWriter::getDefaultInfoStream() - { - return IndexWriter::defaultInfoStream; + docWriter->setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); + if (infoStream) { + message(L"setMaxBufferedDeleteTerms " + StringUtils::toString(maxBufferedDeleteTerms)); } - - void IndexWriter::setInfoStream(InfoStreamPtr infoStream) - { - ensureOpen(); - setMessageID(infoStream); - docWriter->setInfoStream(infoStream); - deleter->setInfoStream(infoStream); - messageState(); +} + +int32_t IndexWriter::getMaxBufferedDeleteTerms() { + ensureOpen(); + return docWriter->getMaxBufferedDeleteTerms(); +} + +void IndexWriter::setMergeFactor(int32_t mergeFactor) { + getLogMergePolicy()->setMergeFactor(mergeFactor); +} + +int32_t IndexWriter::getMergeFactor() { + return getLogMergePolicy()->getMergeFactor(); +} + +void IndexWriter::setDefaultInfoStream(const InfoStreamPtr& infoStream) { + IndexWriter::defaultInfoStream = infoStream; +} + +InfoStreamPtr IndexWriter::getDefaultInfoStream() { + return IndexWriter::defaultInfoStream; +} + +void IndexWriter::setInfoStream(const InfoStreamPtr& infoStream) { + ensureOpen(); + setMessageID(infoStream); + docWriter->setInfoStream(infoStream); + deleter->setInfoStream(infoStream); + messageState(); +} + +void IndexWriter::messageState() { + if (infoStream) { + message(L"ramBufferSizeMB=" + StringUtils::toString(docWriter->getRAMBufferSizeMB()) + + L" maxBufferedDocs=" + StringUtils::toString(docWriter->getMaxBufferedDocs()) + + L" maxBuffereDeleteTerms=" + StringUtils::toString(docWriter->getMaxBufferedDeleteTerms()) + + L" maxFieldLength=" + StringUtils::toString(maxFieldLength) + + L" index=" + segString()); } - - void IndexWriter::messageState() - { - if (infoStream) - { - message(L"ramBufferSizeMB=" + StringUtils::toString(docWriter->getRAMBufferSizeMB()) + - L" maxBufferedDocs=" + StringUtils::toString(docWriter->getMaxBufferedDocs()) + - L" maxBuffereDeleteTerms=" + StringUtils::toString(docWriter->getMaxBufferedDeleteTerms()) + - L" maxFieldLength=" + StringUtils::toString(maxFieldLength) + - L" index=" + segString()); +} + +InfoStreamPtr IndexWriter::getInfoStream() { + ensureOpen(); + return infoStream; +} + +bool IndexWriter::verbose() { + return infoStream.get() != NULL; +} + +void IndexWriter::setWriteLockTimeout(int64_t writeLockTimeout) { + ensureOpen(); + this->writeLockTimeout = writeLockTimeout; +} + +int64_t IndexWriter::getWriteLockTimeout() { + ensureOpen(); + return writeLockTimeout; +} + +void IndexWriter::setDefaultWriteLockTimeout(int64_t writeLockTimeout) { + IndexWriter::WRITE_LOCK_TIMEOUT = writeLockTimeout; +} + +int64_t IndexWriter::getDefaultWriteLockTimeout() { + return IndexWriter::WRITE_LOCK_TIMEOUT; +} + +void IndexWriter::close() { + close(true); +} + +void IndexWriter::close(bool waitForMerges) { + // Ensure that only one thread actually gets to do the closing + if (shouldClose()) { + // If any methods have hit std::bad_alloc, then abort on close, in case the internal state of IndexWriter + // or DocumentsWriter is corrupt + if (hitOOM) { + rollbackInternal(); + } else { + closeInternal(waitForMerges); } } - - InfoStreamPtr IndexWriter::getInfoStream() - { - ensureOpen(); - return infoStream; - } - - bool IndexWriter::verbose() - { - return infoStream; - } - - void IndexWriter::setWriteLockTimeout(int64_t writeLockTimeout) - { - ensureOpen(); - this->writeLockTimeout = writeLockTimeout; - } - - int64_t IndexWriter::getWriteLockTimeout() - { - ensureOpen(); - return writeLockTimeout; - } - - void IndexWriter::setDefaultWriteLockTimeout(int64_t writeLockTimeout) - { - IndexWriter::WRITE_LOCK_TIMEOUT = writeLockTimeout; - } - - int64_t IndexWriter::getDefaultWriteLockTimeout() - { - return IndexWriter::WRITE_LOCK_TIMEOUT; - } - - void IndexWriter::close() - { - close(true); +} + +bool IndexWriter::shouldClose() { + SyncLock syncLock(this); + while (true) { + if (!closed) { + if (!closing) { + closing = true; + return true; + } else { + // Another thread is presently trying to close; wait until it finishes one way (closes + // successfully) or another (fails to close) + doWait(); + } + } else { + return false; + } } - - void IndexWriter::close(bool waitForMerges) - { - // Ensure that only one thread actually gets to do the closing - if (shouldClose()) +} + +void IndexWriter::closeInternal(bool waitForMerges) { + docWriter->pauseAllThreads(); + + LuceneException finally; + try { + if (infoStream) { + message(L"now flush at close"); + } + + docWriter->close(); + + // Only allow a new merge to be triggered if we are going to wait for merges + if (!hitOOM) { + flush(waitForMerges, true, true); + } + + // Give merge scheduler last chance to run, in case any pending merges are waiting + if (waitForMerges) { + mergeScheduler->merge(shared_from_this()); + } + + mergePolicy->close(); + + finishMerges(waitForMerges); + stopMerges = true; + + mergeScheduler->close(); + + if (infoStream) { + message(L"now call final commit()"); + } + + if (!hitOOM) { + commit(0); + } + + if (infoStream) { + message(L"at close: " + segString()); + } + { - // If any methods have hit std::bad_alloc, then abort on close, in case the internal state of IndexWriter - // or DocumentsWriter is corrupt - if (hitOOM) - rollbackInternal(); - else - closeInternal(waitForMerges); + SyncLock syncLock(this); + readerPool->close(); + docWriter.reset(); + deleter->close(); + } + + if (writeLock) { + writeLock->release(); // release write lock + writeLock.reset(); } - } - bool IndexWriter::shouldClose() - { - SyncLock syncLock(this); - while (true) { - if (!closed) - { - if (!closing) - { - closing = true; - return true; - } - else - { - // Another thread is presently trying to close; wait until it finishes one way (closes - // successfully) or another (fails to close) - doWait(); - } - } - else - return false; + SyncLock syncLock(this); + closed = true; } + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"closeInternal"); + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::closeInternal(bool waitForMerges) { - docWriter->pauseAllThreads(); - - LuceneException finally; - try - { - if (infoStream) - message(L"now flush at close"); - - docWriter->close(); - - // Only allow a new merge to be triggered if we are going to wait for merges - if (!hitOOM) - flush(waitForMerges, true, true); - - // Give merge scheduler last chance to run, in case any pending merges are waiting - if (waitForMerges) - mergeScheduler->merge(shared_from_this()); - - mergePolicy->close(); - - finishMerges(waitForMerges); - stopMerges = true; - - mergeScheduler->close(); - - if (infoStream) - message(L"now call final commit()"); - - if (!hitOOM) - commit(0); - - if (infoStream) - message(L"at close: " + segString()); - - { - SyncLock syncLock(this); - readerPool->close(); - docWriter.reset(); - deleter->close(); - } - - if (writeLock) - { - writeLock->release(); // release write lock - writeLock.reset(); + SyncLock syncLock(this); + closing = false; + notifyAll(); + if (!closed) { + if (docWriter) { + docWriter->resumeAllThreads(); } - - { - SyncLock syncLock(this); - closed = true; + if (infoStream) { + message(L"hit exception while closing"); } } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"closeInternal"); + } + finally.throwException(); +} + +bool IndexWriter::flushDocStores() { + SyncLock syncLock(this); + + if (infoStream) { + message(L"flushDocStores segment=" + docWriter->getDocStoreSegment()); + } + + bool useCompoundDocStore = false; + + if (infoStream) { + message(L"closeDocStores segment=" + docWriter->getDocStoreSegment()); + } + + String docStoreSegment; + + bool success = false; + LuceneException finally; + try { + docStoreSegment = docWriter->closeDocStore(); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success && infoStream) { + message(L"hit exception closing doc store segment"); + } + finally.throwException(); + + if (infoStream) { + message(L"flushDocStores files=" + StringUtils::toString(docWriter->closedFiles())); + } + + useCompoundDocStore = mergePolicy->useCompoundDocStore(segmentInfos); + HashSet closedFiles(docWriter->closedFiles()); + + if (useCompoundDocStore && !docStoreSegment.empty() && !closedFiles.empty()) { + // Now build compound doc store file + if (infoStream) { + message(L"create compound file " + docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); } - catch (LuceneException& e) - { + + success = false; + + int32_t numSegments = segmentInfos->size(); + String compoundFileName(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); + + try { + CompoundFileWriterPtr cfsWriter(newLucene(directory, compoundFileName)); + for (HashSet::iterator file = closedFiles.begin(); file != closedFiles.end(); ++file) { + cfsWriter->addFile(*file); + } + + // Perform the merge + cfsWriter->close(); + success = true; + } catch (LuceneException& e) { finally = e; } - { - SyncLock syncLock(this); - closing = false; - notifyAll(); - if (!closed) - { - if (docWriter) - docWriter->resumeAllThreads(); - if (infoStream) - message(L"hit exception while closing"); + + if (!success) { + if (infoStream) { + message(L"hit exception building compound file doc store for segment " + docStoreSegment); } + deleter->deleteFile(compoundFileName); + docWriter->abort(); } finally.throwException(); + + for (int32_t i = 0; i < numSegments; ++i) { + SegmentInfoPtr si(segmentInfos->info(i)); + if (si->getDocStoreOffset() != -1 && si->getDocStoreSegment() == docStoreSegment) { + si->setDocStoreIsCompoundFile(true); + } + } + + checkpoint(); + + // In case the files we just merged into a CFS were not previously checkpointed + deleter->deleteNewFiles(docWriter->closedFiles()); } - - bool IndexWriter::flushDocStores() - { - SyncLock syncLock(this); - - if (infoStream) - message(L"flushDocStores segment=" + docWriter->getDocStoreSegment()); - - bool useCompoundDocStore = false; - - if (infoStream) - message(L"closeDocStores segment=" + docWriter->getDocStoreSegment()); - - String docStoreSegment; - - bool success = false; + + return useCompoundDocStore; +} + +DirectoryPtr IndexWriter::getDirectory() { + ensureOpen(false); // Pass false because the flush during closing calls getDirectory + return directory; +} + +AnalyzerPtr IndexWriter::getAnalyzer() { + ensureOpen(); + return analyzer; +} + +int32_t IndexWriter::maxDoc() { + SyncLock syncLock(this); + int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; + for (int32_t i = 0; i < segmentInfos->size(); ++i) { + count += segmentInfos->info(i)->docCount; + } + return count; +} + +int32_t IndexWriter::numDocs() { + SyncLock syncLock(this); + int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; + for (int32_t i = 0; i < segmentInfos->size(); ++i) { + SegmentInfoPtr info(segmentInfos->info(i)); + count += info->docCount - info->getDelCount(); + } + return count; +} + +bool IndexWriter::hasDeletions() { + SyncLock syncLock(this); + ensureOpen(); + if (docWriter->hasDeletes()) { + return true; + } + for (int32_t i = 0; i < segmentInfos->size(); ++i) { + if (segmentInfos->info(i)->hasDeletions()) { + return true; + } + } + return false; +} + +void IndexWriter::addDocument(const DocumentPtr& doc) { + addDocument(doc, analyzer); +} + +void IndexWriter::addDocument(const DocumentPtr& doc, const AnalyzerPtr& analyzer) { + ensureOpen(); + bool doFlush = false; + bool success = false; + try { LuceneException finally; - try - { - docStoreSegment = docWriter->closeDocStore(); + try { + doFlush = docWriter->addDocument(doc, analyzer); success = true; - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { finally = e; } - if (!success && infoStream) - message(L"hit exception closing doc store segment"); - finally.throwException(); - - if (infoStream) - message(L"flushDocStores files=" + docWriter->closedFiles()); - - useCompoundDocStore = mergePolicy->useCompoundDocStore(segmentInfos); - HashSet closedFiles(docWriter->closedFiles()); - - if (useCompoundDocStore && !docStoreSegment.empty() && !closedFiles.empty()) - { - // Now build compound doc store file - if (infoStream) - message(L"create compound file " + docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); - - success = false; - - int32_t numSegments = segmentInfos->size(); - String compoundFileName(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); - - try - { - CompoundFileWriterPtr cfsWriter(newLucene(directory, compoundFileName)); - for (HashSet::iterator file = closedFiles.begin(); file != closedFiles.end(); ++file) - cfsWriter->addFile(*file); - - // Perform the merge - cfsWriter->close(); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - if (infoStream) - message(L"hit exception building compound file doc store for segment " + docStoreSegment); - deleter->deleteFile(compoundFileName); - docWriter->abort(); + if (!success) { + if (infoStream) { + message(L"hit exception adding document"); } - finally.throwException(); - - for (int32_t i = 0; i < numSegments; ++i) { - SegmentInfoPtr si(segmentInfos->info(i)); - if (si->getDocStoreOffset() != -1 && si->getDocStoreSegment() == docStoreSegment) - si->setDocStoreIsCompoundFile(true); + SyncLock syncLock(this); + // If docWriter has some aborted files that were never incref'd, then we clean them up here + if (docWriter) { + HashSet files(docWriter->abortedFiles()); + if (files) { + deleter->deleteNewFiles(files); + } + } } - - checkpoint(); - - // In case the files we just merged into a CFS were not previously checkpointed - deleter->deleteNewFiles(docWriter->closedFiles()); } - - return useCompoundDocStore; - } - - DirectoryPtr IndexWriter::getDirectory() - { - ensureOpen(false); // Pass false because the flush during closing calls getDirectory - return directory; - } - - AnalyzerPtr IndexWriter::getAnalyzer() - { - ensureOpen(); - return analyzer; - } - - int32_t IndexWriter::maxDoc() - { - SyncLock syncLock(this); - int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; - for (int32_t i = 0; i < segmentInfos->size(); ++i) - count += segmentInfos->info(i)->docCount; - return count; + finally.throwException(); + if (doFlush) { + flush(true, false, false); + } + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"addDocument")); } - - int32_t IndexWriter::numDocs() - { - SyncLock syncLock(this); - int32_t count = docWriter ? docWriter->getNumDocsInRAM() : 0; - for (int32_t i = 0; i < segmentInfos->size(); ++i) - { - SegmentInfoPtr info(segmentInfos->info(i)); - count += info->docCount - info->getDelCount(); +} + +void IndexWriter::deleteDocuments(const TermPtr& term) { + ensureOpen(); + try { + bool doFlush = docWriter->bufferDeleteTerm(term); + if (doFlush) { + flush(true, false, false); } - return count; + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"deleteDocuments(Term)")); } - - bool IndexWriter::hasDeletions() - { - SyncLock syncLock(this); - ensureOpen(); - if (docWriter->hasDeletes()) - return true; - for (int32_t i = 0; i < segmentInfos->size(); ++i) - { - if (segmentInfos->info(i)->hasDeletions()) - return true; +} + +void IndexWriter::deleteDocuments(Collection terms) { + ensureOpen(); + try { + bool doFlush = docWriter->bufferDeleteTerms(terms); + if (doFlush) { + flush(true, false, false); } - return false; + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"deleteDocuments(VectorTerm)")); } - - void IndexWriter::addDocument(DocumentPtr doc) - { - addDocument(doc, analyzer); +} + +void IndexWriter::deleteDocuments(const QueryPtr& query) { + ensureOpen(); + bool doFlush = docWriter->bufferDeleteQuery(query); + if (doFlush) { + flush(true, false, false); } - - void IndexWriter::addDocument(DocumentPtr doc, AnalyzerPtr analyzer) - { - ensureOpen(); +} + +void IndexWriter::deleteDocuments(Collection queries) { + ensureOpen(); + bool doFlush = docWriter->bufferDeleteQueries(queries); + if (doFlush) { + flush(true, false, false); + } +} + +void IndexWriter::updateDocument(const TermPtr& term, const DocumentPtr& doc) { + ensureOpen(); + updateDocument(term, doc, getAnalyzer()); +} + +void IndexWriter::updateDocument(const TermPtr& term, const DocumentPtr& doc, const AnalyzerPtr& analyzer) { + ensureOpen(); + try { bool doFlush = false; bool success = false; - try - { - LuceneException finally; - try - { - doFlush = docWriter->addDocument(doc, analyzer); - success = true; - } - catch (LuceneException& e) - { - finally = e; + LuceneException finally; + try { + doFlush = docWriter->updateDocument(term, doc, analyzer); + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + if (infoStream) { + message(L"hit exception updating document"); } - if (!success) + { - if (infoStream) - message(L"hit exception adding document"); - { - SyncLock syncLock(this); - // If docWriter has some aborted files that were never incref'd, then we clean them up here - if (docWriter) - { - HashSet files(docWriter->abortedFiles()); - if (files) - deleter->deleteNewFiles(files); + SyncLock syncLock(this); + // If docWriter has some aborted files that were never incref'd, then we clean them up here + if (docWriter) { + HashSet files(docWriter->abortedFiles()); + if (files) { + deleter->deleteNewFiles(files); } } } - finally.throwException(); - if (doFlush) - flush(true, false, false); - } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"addDocument")); - } - } - - void IndexWriter::deleteDocuments(TermPtr term) - { - ensureOpen(); - try - { - bool doFlush = docWriter->bufferDeleteTerm(term); - if (doFlush) - flush(true, false, false); - } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"deleteDocuments(Term)")); - } - } - - void IndexWriter::deleteDocuments(Collection terms) - { - ensureOpen(); - try - { - bool doFlush = docWriter->bufferDeleteTerms(terms); - if (doFlush) - flush(true, false, false); - } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"deleteDocuments(VectorTerm)")); } - } - - void IndexWriter::deleteDocuments(QueryPtr query) - { - ensureOpen(); - bool doFlush = docWriter->bufferDeleteQuery(query); - if (doFlush) - flush(true, false, false); - } - - void IndexWriter::deleteDocuments(Collection queries) - { - ensureOpen(); - bool doFlush = docWriter->bufferDeleteQueries(queries); - if (doFlush) + finally.throwException(); + if (doFlush) { flush(true, false, false); - } - - void IndexWriter::updateDocument(TermPtr term, DocumentPtr doc) - { - ensureOpen(); - updateDocument(term, doc, getAnalyzer()); - } - - void IndexWriter::updateDocument(TermPtr term, DocumentPtr doc, AnalyzerPtr analyzer) - { - ensureOpen(); - try - { - bool doFlush = false; - bool success = false; - LuceneException finally; - try - { - doFlush = docWriter->updateDocument(term, doc, analyzer); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - { - if (infoStream) - message(L"hit exception updating document"); - - { - SyncLock syncLock(this); - // If docWriter has some aborted files that were never incref'd, then we clean them up here - if (docWriter) - { - HashSet files(docWriter->abortedFiles()); - if (files) - deleter->deleteNewFiles(files); - } - } - } - finally.throwException(); - if (doFlush) - flush(true, false, false); - } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"updateDocument")); } + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"updateDocument")); } - - int32_t IndexWriter::getSegmentCount() - { - SyncLock syncLock(this); - return segmentInfos->size(); - } - - int32_t IndexWriter::getNumBufferedDocuments() - { - SyncLock syncLock(this); - return docWriter->getNumDocsInRAM(); - } - - int32_t IndexWriter::getDocCount(int32_t i) - { - SyncLock syncLock(this); - return (i >= 0 && i < segmentInfos->size()) ? segmentInfos->info(i)->docCount : -1; +} + +int32_t IndexWriter::getSegmentCount() { + SyncLock syncLock(this); + return segmentInfos->size(); +} + +int32_t IndexWriter::getNumBufferedDocuments() { + SyncLock syncLock(this); + return docWriter->getNumDocsInRAM(); +} + +int32_t IndexWriter::getDocCount(int32_t i) { + SyncLock syncLock(this); + return (i >= 0 && i < segmentInfos->size()) ? segmentInfos->info(i)->docCount : -1; +} + +int32_t IndexWriter::getFlushCount() { + SyncLock syncLock(this); + return flushCount; +} + +int32_t IndexWriter::getFlushDeletesCount() { + SyncLock syncLock(this); + return flushDeletesCount; +} + +String IndexWriter::newSegmentName() { + // Cannot synchronize on IndexWriter because that causes deadlock + SyncLock segmentLock(segmentInfos); + + // Important to increment changeCount so that the segmentInfos is written on close. + // Otherwise we could close, re-open and re-return the same segment name that was + // previously returned which can cause problems at least with ConcurrentMergeScheduler. + ++changeCount; + return L"_" + StringUtils::toString(segmentInfos->counter++, StringUtils::CHARACTER_MAX_RADIX); +} + +void IndexWriter::optimize() { + optimize(true); +} + +void IndexWriter::optimize(int32_t maxNumSegments) { + optimize(maxNumSegments, true); +} + +void IndexWriter::optimize(bool doWait) { + optimize(1, doWait); +} + +void IndexWriter::optimize(int32_t maxNumSegments, bool doWait) { + ensureOpen(); + + if (maxNumSegments < 1) { + boost::throw_exception(IllegalArgumentException(L"maxNumSegments must be >= 1; got " + StringUtils::toString(maxNumSegments))); } - - int32_t IndexWriter::getFlushCount() - { - SyncLock syncLock(this); - return flushCount; + + if (infoStream) { + message(L"optimize: index now " + segString()); } - - int32_t IndexWriter::getFlushDeletesCount() + + flush(true, false, true); + { SyncLock syncLock(this); - return flushDeletesCount; - } - - String IndexWriter::newSegmentName() - { - // Cannot synchronize on IndexWriter because that causes deadlock - SyncLock segmentLock(segmentInfos); - - // Important to increment changeCount so that the segmentInfos is written on close. - // Otherwise we could close, re-open and re-return the same segment name that was - // previously returned which can cause problems at least with ConcurrentMergeScheduler. - ++changeCount; - return L"_" + StringUtils::toString(segmentInfos->counter++, StringUtils::CHARACTER_MAX_RADIX); - } - - void IndexWriter::optimize() - { - optimize(true); - } - - void IndexWriter::optimize(int32_t maxNumSegments) - { - optimize(maxNumSegments, true); - } - - void IndexWriter::optimize(bool doWait) - { - optimize(1, doWait); + + resetMergeExceptions(); + segmentsToOptimize.clear(); + optimizeMaxNumSegments = maxNumSegments; + int32_t numSegments = segmentInfos->size(); + for (int32_t i = 0; i < numSegments; ++i) { + segmentsToOptimize.add(segmentInfos->info(i)); + } + + // Now mark all pending & running merges as optimize merge + for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { + (*merge)->optimize = true; + (*merge)->maxNumSegmentsOptimize = maxNumSegments; + } + + for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { + (*merge)->optimize = true; + (*merge)->maxNumSegmentsOptimize = maxNumSegments; + } } - - void IndexWriter::optimize(int32_t maxNumSegments, bool doWait) - { - ensureOpen(); - - if (maxNumSegments < 1) - boost::throw_exception(IllegalArgumentException(L"maxNumSegments must be >= 1; got " + StringUtils::toString(maxNumSegments))); - - if (infoStream) - message(L"optimize: index now " + segString()); - - flush(true, false, true); - + + maybeMerge(maxNumSegments, true); + + if (doWait) { { SyncLock syncLock(this); - - resetMergeExceptions(); - segmentsToOptimize.clear(); - optimizeMaxNumSegments = maxNumSegments; - int32_t numSegments = segmentInfos->size(); - for (int32_t i = 0; i < numSegments; ++i) - segmentsToOptimize.add(segmentInfos->info(i)); - - // Now mark all pending & running merges as optimize merge - for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) - { - (*merge)->optimize = true; - (*merge)->maxNumSegmentsOptimize = maxNumSegments; - } - - for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) - { - (*merge)->optimize = true; - (*merge)->maxNumSegmentsOptimize = maxNumSegments; - } - } - - maybeMerge(maxNumSegments, true); - - if (doWait) - { - { - SyncLock syncLock(this); - while (true) - { - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete optimize")); - - if (!mergeExceptions.empty()) - { - // Forward any exceptions in background merge threads to the current thread - for (Collection::iterator merge = mergeExceptions.begin(); merge != mergeExceptions.end(); ++merge) - { - if ((*merge)->optimize) - { - LuceneException err = (*merge)->getException(); - if (!err.isNull()) - boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); + while (true) { + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete optimize")); + } + + if (!mergeExceptions.empty()) { + // Forward any exceptions in background merge threads to the current thread + for (Collection::iterator merge = mergeExceptions.begin(); merge != mergeExceptions.end(); ++merge) { + if ((*merge)->optimize) { + LuceneException err = (*merge)->getException(); + if (!err.isNull()) { + boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); } } } - - if (optimizeMergesPending()) - IndexWriter::doWait(); - else - break; + } + + if (optimizeMergesPending()) { + IndexWriter::doWait(); + } else { + break; } } - - // If close is called while we are still running, throw an exception so the calling thread will know the - // optimize did not complete - ensureOpen(); } - - // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background - // threads accomplish the optimization + + // If close is called while we are still running, throw an exception so the calling thread will know the + // optimize did not complete + ensureOpen(); } - - bool IndexWriter::optimizeMergesPending() - { - SyncLock syncLock(this); - - for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) - { - if ((*merge)->optimize) - return true; + + // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background + // threads accomplish the optimization +} + +bool IndexWriter::optimizeMergesPending() { + SyncLock syncLock(this); + + for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { + if ((*merge)->optimize) { + return true; } - - for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) - { - if ((*merge)->optimize) - return true; + } + + for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { + if ((*merge)->optimize) { + return true; } - - return false; } - - void IndexWriter::expungeDeletes(bool doWait) + + return false; +} + +void IndexWriter::expungeDeletes(bool doWait) { + ensureOpen(); + + if (infoStream) { + message(L"expungeDeletes: index now " + segString()); + } + + MergeSpecificationPtr spec; + { - ensureOpen(); - - if (infoStream) - message(L"expungeDeletes: index now " + segString()); - - MergeSpecificationPtr spec; - - { - SyncLock syncLock(this); - spec = mergePolicy->findMergesToExpungeDeletes(segmentInfos); - for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) - registerMerge(*merge); + SyncLock syncLock(this); + spec = mergePolicy->findMergesToExpungeDeletes(segmentInfos); + for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { + registerMerge(*merge); } - - mergeScheduler->merge(shared_from_this()); - - if (doWait) + } + + mergeScheduler->merge(shared_from_this()); + + if (doWait) { { - { - SyncLock syncLock(this); - bool running = true; - while (running) - { - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete expungeDeletes")); - - // Check each merge that MergePolicy asked us to do, to see if any of them are still running and - // if any of them have hit an exception. - running = false; - for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) - { - if (pendingMerges.contains(*merge) || runningMerges.contains(*merge)) - running = true; - LuceneException err = (*merge)->getException(); - if (!err.isNull()) - boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); + SyncLock syncLock(this); + bool running = true; + while (running) { + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete expungeDeletes")); + } + + // Check each merge that MergePolicy asked us to do, to see if any of them are still running and + // if any of them have hit an exception. + running = false; + for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { + if (pendingMerges.contains(*merge) || runningMerges.contains(*merge)) { + running = true; } - - // If any of our merges are still running, wait - if (running) - IndexWriter::doWait(); + LuceneException err = (*merge)->getException(); + if (!err.isNull()) { + boost::throw_exception(IOException(L"background merge hit exception: " + (*merge)->segString(directory))); + } + } + + // If any of our merges are still running, wait + if (running) { + IndexWriter::doWait(); } } } - - // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background - // threads accomplish the optimization - } - - void IndexWriter::expungeDeletes() - { - expungeDeletes(true); - } - - void IndexWriter::maybeMerge() - { - maybeMerge(false); } - - void IndexWriter::maybeMerge(bool optimize) - { - maybeMerge(1, optimize); + + // NOTE: in the ConcurrentMergeScheduler case, when doWait is false, we can return immediately while background + // threads accomplish the optimization +} + +void IndexWriter::expungeDeletes() { + expungeDeletes(true); +} + +void IndexWriter::maybeMerge() { + maybeMerge(false); +} + +void IndexWriter::maybeMerge(bool optimize) { + maybeMerge(1, optimize); +} + +void IndexWriter::maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize) { + updatePendingMerges(maxNumSegmentsOptimize, optimize); + mergeScheduler->merge(shared_from_this()); +} + +void IndexWriter::updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize) { + SyncLock syncLock(this); + BOOST_ASSERT(!optimize || maxNumSegmentsOptimize > 0); + + if (stopMerges) { + return; } - - void IndexWriter::maybeMerge(int32_t maxNumSegmentsOptimize, bool optimize) - { - updatePendingMerges(maxNumSegmentsOptimize, optimize); - mergeScheduler->merge(shared_from_this()); + + // Do not start new merges if we've hit std::bad_alloc + if (hitOOM) { + return; } - - void IndexWriter::updatePendingMerges(int32_t maxNumSegmentsOptimize, bool optimize) - { - SyncLock syncLock(this); - BOOST_ASSERT(!optimize || maxNumSegmentsOptimize > 0); - - if (stopMerges) - return; - - // Do not start new merges if we've hit std::bad_alloc - if (hitOOM) - return; - - MergeSpecificationPtr spec; - - if (optimize) - { - spec = mergePolicy->findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); - - if (spec) - { - for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) - { - (*merge)->optimize = true; - (*merge)->maxNumSegmentsOptimize = maxNumSegmentsOptimize; - } + + MergeSpecificationPtr spec; + + if (optimize) { + spec = mergePolicy->findMergesForOptimize(segmentInfos, maxNumSegmentsOptimize, segmentsToOptimize); + + if (spec) { + for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { + (*merge)->optimize = true; + (*merge)->maxNumSegmentsOptimize = maxNumSegmentsOptimize; } } - else - spec = mergePolicy->findMerges(segmentInfos); - - if (spec) - { - for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) - registerMerge(*merge); - } + } else { + spec = mergePolicy->findMerges(segmentInfos); } - - OneMergePtr IndexWriter::getNextMerge() - { - SyncLock syncLock(this); - if (pendingMerges.empty()) - return OneMergePtr(); - else - { - // Advance the merge from pending to running - OneMergePtr merge(pendingMerges.removeFirst()); - runningMerges.add(merge); - return merge; + + if (spec) { + for (Collection::iterator merge = spec->merges.begin(); merge != spec->merges.end(); ++merge) { + registerMerge(*merge); } } - - OneMergePtr IndexWriter::getNextExternalMerge() - { - SyncLock syncLock(this); - if (pendingMerges.empty()) - return OneMergePtr(); - else - { - for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) - { - if ((*merge)->isExternal) - { - // Advance the merge from pending to running - OneMergePtr running(*merge); - runningMerges.add(*merge); - pendingMerges.remove(merge); - return running; - } - } - } - - // All existing merges do not involve external segments +} + +OneMergePtr IndexWriter::getNextMerge() { + SyncLock syncLock(this); + if (pendingMerges.empty()) { return OneMergePtr(); + } else { + // Advance the merge from pending to running + OneMergePtr merge(pendingMerges.removeFirst()); + runningMerges.add(merge); + return merge; } - - void IndexWriter::startTransaction(bool haveReadLock) - { - SyncLock syncLock(this); - bool success = false; - LuceneException finally; - try - { - if (infoStream) - message(L"now start transaction"); - - BOOST_ASSERT(docWriter->getNumBufferedDeleteTerms() == 0); // calling startTransaction with buffered delete terms not supported - BOOST_ASSERT(docWriter->getNumDocsInRAM() == 0); // calling startTransaction with buffered documents not supported - - ensureOpen(); - - // If a transaction is trying to roll back (because addIndexes hit an exception) then wait here until that's done - while (stopMerges) - doWait(); - - success = true; - } - catch (LuceneException& e) - { - finally = e; +} + +OneMergePtr IndexWriter::getNextExternalMerge() { + SyncLock syncLock(this); + if (pendingMerges.empty()) { + return OneMergePtr(); + } else { + for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { + if ((*merge)->isExternal) { + // Advance the merge from pending to running + OneMergePtr running(*merge); + runningMerges.add(*merge); + pendingMerges.remove(merge); + return running; + } } - - // Release the write lock if our caller held it, on hitting an exception - if (!success && haveReadLock) - releaseRead(); - finally.throwException(); - - if (haveReadLock) - upgradeReadToWrite(); - else - acquireWrite(); - - success = false; - - try - { - localRollbackSegmentInfos = boost::dynamic_pointer_cast(segmentInfos->clone()); - - BOOST_ASSERT(!hasExternalSegments()); - - localFlushedDocCount = docWriter->getFlushedDocCount(); - - // We must "protect" our files at this point from deletion in case we need to rollback - deleter->incRef(segmentInfos, false); - - success = true; + } + + // All existing merges do not involve external segments + return OneMergePtr(); +} + +void IndexWriter::startTransaction(bool haveReadLock) { + SyncLock syncLock(this); + bool success = false; + LuceneException finally; + try { + if (infoStream) { + message(L"now start transaction"); } - catch (LuceneException& e) - { - finally = e; + + BOOST_ASSERT(docWriter->getNumBufferedDeleteTerms() == 0); // calling startTransaction with buffered delete terms not supported + BOOST_ASSERT(docWriter->getNumDocsInRAM() == 0); // calling startTransaction with buffered documents not supported + + ensureOpen(); + + // If a transaction is trying to roll back (because addIndexes hit an exception) then wait here until that's done + while (stopMerges) { + doWait(); } - - if (!success) - finishAddIndexes(); - finally.throwException(); + + success = true; + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::rollbackTransaction() - { - SyncLock syncLock(this); - - if (infoStream) - message(L"now rollback transaction"); - - if (docWriter) - docWriter->setFlushedDocCount(localFlushedDocCount); - - // Must finish merges before rolling back segmentInfos so merges don't hit exceptions on trying to commit - // themselves, don't get files deleted out from under them, etc. - finishMerges(false); - - // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next - // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). - segmentInfos->clear(); - segmentInfos->addAll(localRollbackSegmentInfos); - localRollbackSegmentInfos.reset(); - - // This must come after we rollback segmentInfos, so that if a commit() kicks off it does not see the - // segmentInfos with external segments. - finishAddIndexes(); - - // Ask deleter to locate unreferenced files we had created & remove them - deleter->checkpoint(segmentInfos, false); - - // Remove the incRef we did in startTransaction - deleter->decRef(segmentInfos); - - // Also ask deleter to remove any newly created files that were never incref'd; this "garbage" is created - // when a merge kicks off but aborts part way through before it had a chance to incRef the files it had - // partially created - deleter->refresh(); - - notifyAll(); - - BOOST_ASSERT(!hasExternalSegments()); + + // Release the write lock if our caller held it, on hitting an exception + if (!success && haveReadLock) { + releaseRead(); } - - void IndexWriter::commitTransaction() - { - SyncLock syncLock(this); - - if (infoStream) - message(L"now commit transaction"); - - // Give deleter a chance to remove files now - checkpoint(); - - // Remove the incRef we did in startTransaction. - deleter->decRef(localRollbackSegmentInfos); - - localRollbackSegmentInfos.reset(); - + finally.throwException(); + + if (haveReadLock) { + upgradeReadToWrite(); + } else { + acquireWrite(); + } + + success = false; + + try { + localRollbackSegmentInfos = boost::dynamic_pointer_cast(segmentInfos->clone()); + BOOST_ASSERT(!hasExternalSegments()); - + + localFlushedDocCount = docWriter->getFlushedDocCount(); + + // We must "protect" our files at this point from deletion in case we need to rollback + deleter->incRef(segmentInfos, false); + + success = true; + } catch (LuceneException& e) { + finally = e; + } + + if (!success) { finishAddIndexes(); } - - void IndexWriter::rollback() - { - ensureOpen(); - - // Ensure that only one thread actually gets to do the closing - if (shouldClose()) - rollbackInternal(); + finally.throwException(); +} + +void IndexWriter::rollbackTransaction() { + SyncLock syncLock(this); + + if (infoStream) { + message(L"now rollback transaction"); } - - void IndexWriter::rollbackInternal() - { - bool success = false; - - if (infoStream) - message(L"rollback"); - - docWriter->pauseAllThreads(); - LuceneException finally; - try - { - finishMerges(false); - - // Must pre-close these two, in case they increment changeCount so that we can then set it to false before - // calling closeInternal - mergePolicy->close(); - mergeScheduler->close(); - { - SyncLock syncLock(this); - - if (pendingCommit) - { - pendingCommit->rollbackCommit(directory); - deleter->decRef(pendingCommit); - pendingCommit.reset(); - notifyAll(); - } + if (docWriter) { + docWriter->setFlushedDocCount(localFlushedDocCount); + } + + // Must finish merges before rolling back segmentInfos so merges don't hit exceptions on trying to commit + // themselves, don't get files deleted out from under them, etc. + finishMerges(false); + + // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next + // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). + segmentInfos->clear(); + segmentInfos->addAll(localRollbackSegmentInfos); + localRollbackSegmentInfos.reset(); + + // This must come after we rollback segmentInfos, so that if a commit() kicks off it does not see the + // segmentInfos with external segments. + finishAddIndexes(); + + // Ask deleter to locate unreferenced files we had created & remove them + deleter->checkpoint(segmentInfos, false); + + // Remove the incRef we did in startTransaction + deleter->decRef(segmentInfos); + + // Also ask deleter to remove any newly created files that were never incref'd; this "garbage" is created + // when a merge kicks off but aborts part way through before it had a chance to incRef the files it had + // partially created + deleter->refresh(); + + notifyAll(); + + BOOST_ASSERT(!hasExternalSegments()); +} + +void IndexWriter::commitTransaction() { + SyncLock syncLock(this); + + if (infoStream) { + message(L"now commit transaction"); + } + + // Give deleter a chance to remove files now + checkpoint(); + + // Remove the incRef we did in startTransaction. + deleter->decRef(localRollbackSegmentInfos); + + localRollbackSegmentInfos.reset(); + + BOOST_ASSERT(!hasExternalSegments()); + + finishAddIndexes(); +} + +void IndexWriter::rollback() { + ensureOpen(); + + // Ensure that only one thread actually gets to do the closing + if (shouldClose()) { + rollbackInternal(); + } +} + +void IndexWriter::rollbackInternal() { + bool success = false; + + if (infoStream) { + message(L"rollback"); + } + + docWriter->pauseAllThreads(); + LuceneException finally; + try { + finishMerges(false); + + // Must pre-close these two, in case they increment changeCount so that we can then set it to false before + // calling closeInternal + mergePolicy->close(); + mergeScheduler->close(); - // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next - // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). - segmentInfos->clear(); - segmentInfos->addAll(rollbackSegmentInfos); - - BOOST_ASSERT(!hasExternalSegments()); - - docWriter->abort(); - - bool test = testPoint(L"rollback before checkpoint"); - BOOST_ASSERT(test); - - // Ask deleter to locate unreferenced files & remove them - deleter->checkpoint(segmentInfos, false); - deleter->refresh(); - } - - // Don't bother saving any changes in our segmentInfos - readerPool->clear(SegmentInfosPtr()); - - lastCommitChangeCount = changeCount; - - success = true; - } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"rollbackInternal"); - } - catch (LuceneException& e) - { - finally = e; - } { SyncLock syncLock(this); - - if (!success) - { - docWriter->resumeAllThreads(); - closing = false; + + if (pendingCommit) { + pendingCommit->rollbackCommit(directory); + deleter->decRef(pendingCommit); + pendingCommit.reset(); notifyAll(); - if (infoStream) - message(L"hit exception during rollback"); } - } - finally.throwException(); - - closeInternal(false); - } - - void IndexWriter::deleteAll() - { - SyncLock syncLock(this); - bool success = false; - docWriter->pauseAllThreads(); - LuceneException finally; - try - { - // Abort any running merges - finishMerges(false); - - // Remove any buffered docs - docWriter->abort(); - docWriter->setFlushedDocCount(0); - - // Remove all segments + + // Keep the same segmentInfos instance but replace all of its SegmentInfo instances. This is so the next + // attempt to commit using this instance of IndexWriter will always write to a new generation ("write once"). segmentInfos->clear(); - + segmentInfos->addAll(rollbackSegmentInfos); + + BOOST_ASSERT(!hasExternalSegments()); + + docWriter->abort(); + + bool test = testPoint(L"rollback before checkpoint"); + BOOST_ASSERT(test); + // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); deleter->refresh(); - - // Don't bother saving any changes in our segmentInfos - readerPool->clear(SegmentInfosPtr()); - - // Mark that the index has changed - ++changeCount; - - success = true; - } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"deleteAll"); - } - catch (LuceneException& e) - { - finally = e; } - - docWriter->resumeAllThreads(); - if (!success && infoStream) - message(L"hit exception during deleteAll"); - - finally.throwException(); + + // Don't bother saving any changes in our segmentInfos + readerPool->clear(SegmentInfosPtr()); + + lastCommitChangeCount = changeCount; + + success = true; + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"rollbackInternal"); + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::finishMerges(bool waitForMerges) { SyncLock syncLock(this); - if (!waitForMerges) - { - stopMerges = true; - - // Abort all pending and running merges - for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) - { - if (infoStream) - message(L"now abort pending merge " + (*merge)->segString(directory)); - (*merge)->abort(); - mergeFinish(*merge); - } - pendingMerges.clear(); - - for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) - { - if (infoStream) - message(L"now abort running merge " + (*merge)->segString(directory)); - (*merge)->abort(); - } - - // Ensure any running addIndexes finishes. It's fine if a new one attempts to start because its merges - // will quickly see the stopMerges == true and abort. - acquireRead(); - releaseRead(); - - // These merges periodically check whether they have been aborted, and stop if so. We wait here to make - // sure they all stop. It should not take very long because the merge threads periodically check if they - // are aborted. - while (!runningMerges.empty()) - { - if (infoStream) - message(L"now wait for " + StringUtils::toString(runningMerges.size()) + L" running merge to abort"); - doWait(); - } - - stopMerges = false; + + if (!success) { + docWriter->resumeAllThreads(); + closing = false; notifyAll(); - - BOOST_ASSERT(mergingSegments.empty()); - - if (infoStream) - message(L"all running merges have aborted"); - } - else - { - // waitForMerges() will ensure any running addIndexes finishes. It's fine if a new one attempts to start - // because from our caller above the call will see that we are in the process of closing, and will throw - // an AlreadyClosed exception. - IndexWriter::waitForMerges(); + if (infoStream) { + message(L"hit exception during rollback"); + } } } - - void IndexWriter::waitForMerges() - { - SyncLock syncLock(this); - // Ensure any running addIndexes finishes. - acquireRead(); - releaseRead(); - - while (!pendingMerges.empty() || !runningMerges.empty()) - doWait(); - - // sanity check - BOOST_ASSERT(mergingSegments.empty()); - } - - void IndexWriter::checkpoint() - { - SyncLock syncLock(this); - ++changeCount; + finally.throwException(); + + closeInternal(false); +} + +void IndexWriter::deleteAll() { + SyncLock syncLock(this); + bool success = false; + docWriter->pauseAllThreads(); + LuceneException finally; + try { + // Abort any running merges + finishMerges(false); + + // Remove any buffered docs + docWriter->abort(); + docWriter->setFlushedDocCount(0); + + // Remove all segments + segmentInfos->clear(); + + // Ask deleter to locate unreferenced files & remove them deleter->checkpoint(segmentInfos, false); + deleter->refresh(); + + // Don't bother saving any changes in our segmentInfos + readerPool->clear(SegmentInfosPtr()); + + // Mark that the index has changed + ++changeCount; + + success = true; + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"deleteAll"); + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::finishAddIndexes() - { - releaseWrite(); + + docWriter->resumeAllThreads(); + if (!success && infoStream) { + message(L"hit exception during deleteAll"); } - - void IndexWriter::blockAddIndexes(bool includePendingClose) - { + + finally.throwException(); +} + +void IndexWriter::finishMerges(bool waitForMerges) { + SyncLock syncLock(this); + if (!waitForMerges) { + stopMerges = true; + + // Abort all pending and running merges + for (Collection::iterator merge = pendingMerges.begin(); merge != pendingMerges.end(); ++merge) { + if (infoStream) { + message(L"now abort pending merge " + (*merge)->segString(directory)); + } + (*merge)->abort(); + mergeFinish(*merge); + } + pendingMerges.clear(); + + for (SetOneMerge::iterator merge = runningMerges.begin(); merge != runningMerges.end(); ++merge) { + if (infoStream) { + message(L"now abort running merge " + (*merge)->segString(directory)); + } + (*merge)->abort(); + } + + // Ensure any running addIndexes finishes. It's fine if a new one attempts to start because its merges + // will quickly see the stopMerges == true and abort. acquireRead(); - - bool success = false; - LuceneException finally; - try - { - // Make sure we are still open since we could have waited quite a while for last addIndexes to finish - ensureOpen(includePendingClose); - success = true; + releaseRead(); + + // These merges periodically check whether they have been aborted, and stop if so. We wait here to make + // sure they all stop. It should not take very long because the merge threads periodically check if they + // are aborted. + while (!runningMerges.empty()) { + if (infoStream) { + message(L"now wait for " + StringUtils::toString(runningMerges.size()) + L" running merge to abort"); + } + doWait(); } - catch (LuceneException& e) - { - finally = e; + + stopMerges = false; + notifyAll(); + + BOOST_ASSERT(mergingSegments.empty()); + + if (infoStream) { + message(L"all running merges have aborted"); } - - if (!success) - releaseRead(); - finally.throwException(); + } else { + // waitForMerges() will ensure any running addIndexes finishes. It's fine if a new one attempts to start + // because from our caller above the call will see that we are in the process of closing, and will throw + // an AlreadyClosed exception. + IndexWriter::waitForMerges(); } - - void IndexWriter::resumeAddIndexes() - { - releaseRead(); +} + +void IndexWriter::waitForMerges() { + SyncLock syncLock(this); + // Ensure any running addIndexes finishes. + acquireRead(); + releaseRead(); + + while (!pendingMerges.empty() || !runningMerges.empty()) { + doWait(); } - - void IndexWriter::resetMergeExceptions() - { - SyncLock syncLock(this); - mergeExceptions.clear(); - ++mergeGen; + + // sanity check + BOOST_ASSERT(mergingSegments.empty()); +} + +void IndexWriter::checkpoint() { + SyncLock syncLock(this); + ++changeCount; + deleter->checkpoint(segmentInfos, false); +} + +void IndexWriter::finishAddIndexes() { + releaseWrite(); +} + +void IndexWriter::blockAddIndexes(bool includePendingClose) { + acquireRead(); + + bool success = false; + LuceneException finally; + try { + // Make sure we are still open since we could have waited quite a while for last addIndexes to finish + ensureOpen(includePendingClose); + success = true; + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::noDupDirs(Collection dirs) - { - Collection dups(Collection::newInstance()); - - for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) - { - for (Collection::iterator dup = dups.begin(); dup != dups.end(); ++dup) - { - if (*dup == *dir) - boost::throw_exception(IllegalArgumentException(L"Directory " + (*dir)->getLockID() + L" appears more than once")); - } - if (*dir == directory) - boost::throw_exception(IllegalArgumentException(L"Cannot add directory to itself")); - dups.add(*dir); - } + + if (!success) { + releaseRead(); } - - void IndexWriter::addIndexesNoOptimize(Collection dirs) - { - ensureOpen(); - - noDupDirs(dirs); - - // Do not allow add docs or deletes while we are running - docWriter->pauseAllThreads(); - - LuceneException finally; - try - { - if (infoStream) - message(L"flush at addIndexesNoOptimize"); - flush(true, false, true); - - bool success = false; - - startTransaction(false); - - try - { - int32_t docCount = 0; - - { - SyncLock syncLock(this); - ensureOpen(); - - for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) - { - if (directory == *dir) - { - // cannot add this index: segments may be deleted in merge before added - boost::throw_exception(IllegalArgumentException(L"Cannot add this index to itself")); - } - - SegmentInfosPtr sis(newLucene()); // read infos from dir - sis->read(*dir); - - for (int32_t j = 0; j < sis->size(); ++j) - { - SegmentInfoPtr info(sis->info(j)); - BOOST_ASSERT(!segmentInfos->contains(info)); - docCount += info->docCount; - segmentInfos->add(info); // add each info - } - } - } - - // Notify DocumentsWriter that the flushed count just increased - docWriter->updateFlushedDocCount(docCount); - - maybeMerge(); - - ensureOpen(); - - // If after merging there remain segments in the index that are in a different directory, just copy these - // over into our index. This is necessary (before finishing the transaction) to avoid leaving the index - // in an unusable (inconsistent) state. - resolveExternalSegments(); + finally.throwException(); +} - ensureOpen(); +void IndexWriter::resumeAddIndexes() { + releaseRead(); +} - success = true; - } - catch (LuceneException& e) - { - finally = e; +void IndexWriter::resetMergeExceptions() { + SyncLock syncLock(this); + mergeExceptions.clear(); + ++mergeGen; +} + +void IndexWriter::noDupDirs(Collection dirs) { + Collection dups(Collection::newInstance()); + + for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { + for (Collection::iterator dup = dups.begin(); dup != dups.end(); ++dup) { + if (*dup == *dir) { + boost::throw_exception(IllegalArgumentException(L"Directory " + (*dir)->getLockID() + L" appears more than once")); } - - if (success) - commitTransaction(); - else - rollbackTransaction(); - } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"addIndexesNoOptimize"); } - catch (LuceneException& e) - { - finally = e; + if (*dir == directory) { + boost::throw_exception(IllegalArgumentException(L"Cannot add directory to itself")); } - if (docWriter) - docWriter->resumeAllThreads(); - finally.throwException(); - } - - bool IndexWriter::hasExternalSegments() - { - return segmentInfos->hasExternalSegments(directory); + dups.add(*dir); } - - void IndexWriter::resolveExternalSegments() - { - bool any = false; - bool done = false; - - while (!done) - { - SegmentInfoPtr info; - OneMergePtr merge; - +} + +void IndexWriter::addIndexesNoOptimize(Collection dirs) { + ensureOpen(); + + noDupDirs(dirs); + + // Do not allow add docs or deletes while we are running + docWriter->pauseAllThreads(); + + LuceneException finally; + try { + if (infoStream) { + message(L"flush at addIndexesNoOptimize"); + } + flush(true, false, true); + + bool success = false; + + startTransaction(false); + + try { + int32_t docCount = 0; + { SyncLock syncLock(this); - if (stopMerges) - boost::throw_exception(MergeAbortedException(L"rollback() was called or addIndexes* hit an unhandled exception")); - - int32_t numSegments = segmentInfos->size(); - - done = true; - for (int32_t i = 0; i < numSegments; ++i) - { - info = segmentInfos->info(i); - if (info->dir != directory) - { - done = false; - OneMergePtr newMerge(newLucene(segmentInfos->range(i, i + 1), boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile())); - - // Returns true if no running merge conflicts with this one (and, records this merge as - // pending), ie, this segment is not currently being merged - if (registerMerge(newMerge)) - { - merge = newMerge; - - // If this segment is not currently being merged, then advance it to running & run - // the merge ourself (below) - pendingMerges.remove(merge); - runningMerges.add(merge); - break; - } + ensureOpen(); + + for (Collection::iterator dir = dirs.begin(); dir != dirs.end(); ++dir) { + if (directory == *dir) { + // cannot add this index: segments may be deleted in merge before added + boost::throw_exception(IllegalArgumentException(L"Cannot add this index to itself")); + } + + SegmentInfosPtr sis(newLucene()); // read infos from dir + sis->read(*dir); + + for (int32_t j = 0; j < sis->size(); ++j) { + SegmentInfoPtr info(sis->info(j)); + BOOST_ASSERT(!segmentInfos->contains(info)); + docCount += info->docCount; + segmentInfos->add(info); // add each info } - } - - if (!done && !merge) - { - // We are not yet done (external segments still exist in segmentInfos), yet, all such segments - // are currently "covered" by a pending or running merge. We now try to grab any pending merge - // that involves external segments - merge = getNextExternalMerge(); - } - - if (!done && !merge) - { - // We are not yet done, and, all external segments fall under merges that the merge scheduler is - // currently running. So, we now wait and check back to see if the merge has completed. - doWait(); } } - - if (merge) - { - any = true; - IndexWriter::merge(merge); - } - } - - if (any) - { - // Sometimes, on copying an external segment over, more merges may become necessary - mergeScheduler->merge(shared_from_this()); - } - } - - void IndexWriter::addIndexes(Collection readers) - { - ensureOpen(); - - // Do not allow add docs or deletes while we are running - docWriter->pauseAllThreads(); - - // We must pre-acquire a read lock here (and upgrade to write lock in startTransaction below) so that no - // other addIndexes is allowed to start up after we have flushed & optimized but before we then start our - // transaction. This is because the merging below requires that only one segment is present in the index - acquireRead(); - - LuceneException finally; - try + + // Notify DocumentsWriter that the flushed count just increased + docWriter->updateFlushedDocCount(docCount); + + maybeMerge(); + + ensureOpen(); + + // If after merging there remain segments in the index that are in a different directory, just copy these + // over into our index. This is necessary (before finishing the transaction) to avoid leaving the index + // in an unusable (inconsistent) state. + resolveExternalSegments(); + + ensureOpen(); + + success = true; + } catch (LuceneException& e) { + finally = e; + } + + if (success) { + commitTransaction(); + } else { + rollbackTransaction(); + } + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"addIndexesNoOptimize"); + } catch (LuceneException& e) { + finally = e; + } + if (docWriter) { + docWriter->resumeAllThreads(); + } + finally.throwException(); +} + +bool IndexWriter::hasExternalSegments() { + return segmentInfos->hasExternalSegments(directory); +} + +void IndexWriter::resolveExternalSegments() { + bool any = false; + bool done = false; + + while (!done) { + SegmentInfoPtr info; + OneMergePtr merge; + { - SegmentInfoPtr info; - String mergedName; - SegmentMergerPtr merger; - - bool success = false; - - try - { - flush(true, false, true); - optimize(); // start with zero or 1 seg - success = true; + SyncLock syncLock(this); + if (stopMerges) { + boost::throw_exception(MergeAbortedException(L"rollback() was called or addIndexes* hit an unhandled exception")); } - catch (LuceneException& e) - { - finally = e; + + int32_t numSegments = segmentInfos->size(); + + done = true; + for (int32_t i = 0; i < numSegments; ++i) { + info = segmentInfos->info(i); + if (info->dir != directory) { + done = false; + OneMergePtr newMerge(newLucene(segmentInfos->range(i, i + 1), boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile())); + + // Returns true if no running merge conflicts with this one (and, records this merge as + // pending), ie, this segment is not currently being merged + if (registerMerge(newMerge)) { + merge = newMerge; + + // If this segment is not currently being merged, then advance it to running & run + // the merge ourself (below) + pendingMerges.remove(merge); + runningMerges.add(merge); + break; + } + } } - - // Take care to release the read lock if we hit an exception before starting the transaction - if (!success) - releaseRead(); - finally.throwException(); - - // true means we already have a read lock; if this call hits an exception it will release the write lock - startTransaction(true); - - try + + if (!done && !merge) { + // We are not yet done (external segments still exist in segmentInfos), yet, all such segments + // are currently "covered" by a pending or running merge. We now try to grab any pending merge + // that involves external segments + merge = getNextExternalMerge(); + } + + if (!done && !merge) { + // We are not yet done, and, all external segments fall under merges that the merge scheduler is + // currently running. So, we now wait and check back to see if the merge has completed. + doWait(); + } + } + + if (merge) { + any = true; + IndexWriter::merge(merge); + } + } + + if (any) { + // Sometimes, on copying an external segment over, more merges may become necessary + mergeScheduler->merge(shared_from_this()); + } +} + +void IndexWriter::addIndexes(Collection readers) { + ensureOpen(); + + // Do not allow add docs or deletes while we are running + docWriter->pauseAllThreads(); + + // We must pre-acquire a read lock here (and upgrade to write lock in startTransaction below) so that no + // other addIndexes is allowed to start up after we have flushed & optimized but before we then start our + // transaction. This is because the merging below requires that only one segment is present in the index + acquireRead(); + + LuceneException finally; + try { + SegmentInfoPtr info; + String mergedName; + SegmentMergerPtr merger; + + bool success = false; + + try { + flush(true, false, true); + optimize(); // start with zero or 1 seg + success = true; + } catch (LuceneException& e) { + finally = e; + } + + // Take care to release the read lock if we hit an exception before starting the transaction + if (!success) { + releaseRead(); + } + finally.throwException(); + + // true means we already have a read lock; if this call hits an exception it will release the write lock + startTransaction(true); + + try { + mergedName = newSegmentName(); + merger = newLucene(shared_from_this(), mergedName, OneMergePtr()); + + SegmentReaderPtr sReader; + { - mergedName = newSegmentName(); - merger = newLucene(shared_from_this(), mergedName, OneMergePtr()); - - SegmentReaderPtr sReader; - + SyncLock syncLock(this); + if (segmentInfos->size() == 1) { // add existing index, if any + sReader = readerPool->get(segmentInfos->info(0), true, BufferedIndexInput::BUFFER_SIZE, -1); + } + } + + success = false; + + try { + if (sReader) { + merger->add(sReader); + } + + for (Collection::iterator i = readers.begin(); i != readers.end(); ++i) { + merger->add(*i); + } + + int32_t docCount = merger->merge(); // merge 'em + { SyncLock syncLock(this); - if (segmentInfos->size() == 1) // add existing index, if any - sReader = readerPool->get(segmentInfos->info(0), true, BufferedIndexInput::BUFFER_SIZE, -1); + segmentInfos->clear(); // pop old infos & add new + info = newLucene(mergedName, docCount, directory, false, true, -1, L"", false, merger->hasProx()); + setDiagnostics(info, L"addIndexes(Collection)"); + segmentInfos->add(info); } - + + // Notify DocumentsWriter that the flushed count just increased + docWriter->updateFlushedDocCount(docCount); + + success = true; + } catch (LuceneException& e) { + finally = e; + } + + if (sReader) { + readerPool->release(sReader); + } + } catch (LuceneException& e) { + finally = e; + } + + if (!success) { + if (infoStream) { + message(L"hit exception in addIndexes during merge"); + } + rollbackTransaction(); + } else { + commitTransaction(); + } + + finally.throwException(); + + if (boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile()) { + HashSet files; + + { + SyncLock syncLock(this); + // Must incRef our files so that if another thread is running merge/optimize, it doesn't delete our + // segment's files before we have a change to finish making the compound file. + if (segmentInfos->contains(info)) { + files = info->files(); + deleter->incRef(files); + } + } + + if (files) { success = false; - - try - { - if (sReader) - merger->add(sReader); - - for (Collection::iterator i = readers.begin(); i != readers.end(); ++i) - merger->add(*i); - - int32_t docCount = merger->merge(); // merge 'em - + + startTransaction(false); + + try { + merger->createCompoundFile(mergedName + L".cfs"); + { SyncLock syncLock(this); - segmentInfos->clear(); // pop old infos & add new - info = newLucene(mergedName, docCount, directory, false, true, -1, L"", false, merger->hasProx()); - setDiagnostics(info, L"addIndexes(Collection)"); - segmentInfos->add(info); + info->setUseCompoundFile(true); } - - // Notify DocumentsWriter that the flushed count just increased - docWriter->updateFlushedDocCount(docCount); - + success = true; - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { finally = e; } - - if (sReader) - readerPool->release(sReader); - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - if (infoStream) - message(L"hit exception in addIndexes during merge"); - rollbackTransaction(); - } - else - commitTransaction(); - - finally.throwException(); - - if (boost::dynamic_pointer_cast(mergePolicy) && getUseCompoundFile()) - { - HashSet files; - + { SyncLock syncLock(this); - // Must incRef our files so that if another thread is running merge/optimize, it doesn't delete our - // segment's files before we have a change to finish making the compound file. - if (segmentInfos->contains(info)) - { - files = info->files(); - deleter->incRef(files); - } + deleter->decRef(files); } - - if (files) - { - success = false; - - startTransaction(false); - - try - { - merger->createCompoundFile(mergedName + L".cfs"); - - { - SyncLock syncLock(this); - info->setUseCompoundFile(true); - } - - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - { - SyncLock syncLock(this); - deleter->decRef(files); - } - - if (!success) - { - if (infoStream) - message(L"hit exception building compound file in addIndexes during merge"); - rollbackTransaction(); + + if (!success) { + if (infoStream) { + message(L"hit exception building compound file in addIndexes during merge"); } - else - commitTransaction(); + rollbackTransaction(); + } else { + commitTransaction(); } } } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"addIndexes(Collection)"); + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"addIndexes(Collection)"); + } catch (LuceneException& e) { + finally = e; + } + if (docWriter) { + docWriter->resumeAllThreads(); + } + finally.throwException(); +} + +void IndexWriter::doAfterFlush() { + // override +} + +void IndexWriter::doBeforeFlush() { + // override +} + +void IndexWriter::prepareCommit() { + ensureOpen(); + prepareCommit(MapStringString()); +} + +void IndexWriter::prepareCommit(MapStringString commitUserData) { + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); + } + + if (pendingCommit) { + boost::throw_exception(IllegalStateException(L"prepareCommit was already called with no corresponding call to commit")); + } + + if (infoStream) { + message(L"prepareCommit: flush"); + } + + flush(true, true, true); + + startCommit(0, commitUserData); +} + +void IndexWriter::commit(int64_t sizeInBytes) { + SyncLock messageLock(commitLock); + startCommit(sizeInBytes, MapStringString()); + finishCommit(); +} + +void IndexWriter::commit() { + commit(MapStringString()); +} + +void IndexWriter::commit(MapStringString commitUserData) { + ensureOpen(); + + if (infoStream) { + message(L"commit: start"); + } + + { + SyncLock messageLock(commitLock); + + if (infoStream) { + message(L"commit: enter lock"); } - catch (LuceneException& e) - { + + if (!pendingCommit) { + if (infoStream) { + message(L"commit: now prepare"); + } + prepareCommit(commitUserData); + } else if (infoStream) { + message(L"commit: already prepared"); + } + + finishCommit(); + } +} + +void IndexWriter::finishCommit() { + SyncLock syncLock(this); + if (pendingCommit) { + LuceneException finally; + try { + if (infoStream) { + message(L"commit: pendingCommit != null"); + } + pendingCommit->finishCommit(directory); + if (infoStream) { + message(L"commit: wrote segments file \"" + pendingCommit->getCurrentSegmentFileName() + L"\""); + } + lastCommitChangeCount = pendingCommitChangeCount; + segmentInfos->updateGeneration(pendingCommit); + segmentInfos->setUserData(pendingCommit->getUserData()); + setRollbackSegmentInfos(pendingCommit); + deleter->checkpoint(pendingCommit, true); + } catch (LuceneException& e) { finally = e; } - if (docWriter) - docWriter->resumeAllThreads(); + + deleter->decRef(pendingCommit); + pendingCommit.reset(); + notifyAll(); finally.throwException(); + } else if (infoStream) { + message(L"commit: pendingCommit == null; skip"); } - - void IndexWriter::doAfterFlush() - { - // override + + if (infoStream) { + message(L"commit: done"); } - - void IndexWriter::doBeforeFlush() - { - // override +} + +void IndexWriter::flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) { + // We can be called during close, when closing = true, so we must pass false to ensureOpen + ensureOpen(false); + if (doFlush(flushDocStores, flushDeletes) && triggerMerge) { + maybeMerge(); } - - void IndexWriter::prepareCommit() - { - ensureOpen(); - prepareCommit(MapStringString()); +} + +bool IndexWriter::doFlush(bool flushDocStores, bool flushDeletes) { + TestScope testScope(L"IndexWriter", L"doFlush"); + SyncLock syncLock(this); + bool success = false; + LuceneException finally; + try { + try { + success = doFlushInternal(flushDocStores, flushDeletes); + } catch (LuceneException& e) { + finally = e; + } + if (docWriter->doBalanceRAM()) { + docWriter->balanceRAM(); + } + finally.throwException(); + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::prepareCommit(MapStringString commitUserData) - { - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); - - if (pendingCommit) - boost::throw_exception(IllegalStateException(L"prepareCommit was already called with no corresponding call to commit")); - - if (infoStream) - message(L"prepareCommit: flush"); - - flush(true, true, true); - - startCommit(0, commitUserData); - } - - void IndexWriter::commit(int64_t sizeInBytes) - { - SyncLock messageLock(commitLock); - startCommit(sizeInBytes, MapStringString()); - finishCommit(); + docWriter->clearFlushPending(); + finally.throwException(); + return success; +} + +bool IndexWriter::doFlushInternal(bool flushDocStores, bool flushDeletes) { + SyncLock syncLock(this); + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot flush")); } - - void IndexWriter::commit() - { - commit(MapStringString()); + + ensureOpen(false); + + BOOST_ASSERT(testPoint(L"startDoFlush")); + + doBeforeFlush(); + + ++flushCount; + + // If we are flushing because too many deletes accumulated, then we should apply the deletes to free RAM + if (docWriter->doApplyDeletes()) { + flushDeletes = true; } - - void IndexWriter::commit(MapStringString commitUserData) - { - ensureOpen(); - - if (infoStream) - message(L"commit: start"); - - { - SyncLock messageLock(commitLock); - - if (infoStream) - message(L"commit: enter lock"); - - if (!pendingCommit) - { - if (infoStream) - message(L"commit: now prepare"); - prepareCommit(commitUserData); + + // Make sure no threads are actively adding a document. Returns true if docWriter is currently aborting, in + // which case we skip flushing this segment + if (infoStream) { + message(L"flush: now pause all indexing threads"); + } + if (docWriter->pauseAllThreads()) { + docWriter->resumeAllThreads(); + return false; + } + + bool flushDocs = false; + + LuceneException finally; + try { + SegmentInfoPtr newSegment; + + int32_t numDocs = docWriter->getNumDocsInRAM(); + + // Always flush docs if there are any + flushDocs = (numDocs > 0); + + String docStoreSegment(docWriter->getDocStoreSegment()); + + BOOST_ASSERT(!docStoreSegment.empty() || numDocs == 0); + + if (docStoreSegment.empty()) { + flushDocStores = false; + } + + int32_t docStoreOffset = docWriter->getDocStoreOffset(); + + bool docStoreIsCompoundFile = false; + + if (infoStream) { + message(L" flush: segment=" + docWriter->getSegment() + + L" docStoreSegment=" + StringUtils::toString(docWriter->getDocStoreSegment()) + + L" docStoreOffset=" + StringUtils::toString(docStoreOffset) + + L" flushDocs=" + StringUtils::toString(flushDocs) + + L" flushDeletes=" + StringUtils::toString(flushDeletes) + + L" flushDocStores=" + StringUtils::toString(flushDocStores) + + L" numDocs=" + StringUtils::toString(numDocs) + + L" numBufDelTerms=" + StringUtils::toString(docWriter->getNumBufferedDeleteTerms())); + message(L" index before flush " + segString()); + } + + // Check if the doc stores must be separately flushed because other segments, besides the one we are + // about to flush, reference it + if (flushDocStores && (!flushDocs || docWriter->getSegment() != docWriter->getDocStoreSegment())) { + // We must separately flush the doc store + if (infoStream) { + message(L" flush shared docStore segment " + docStoreSegment); } - else if (infoStream) - message(L"commit: already prepared"); - - finishCommit(); + + docStoreIsCompoundFile = IndexWriter::flushDocStores(); + flushDocStores = false; } - } - - void IndexWriter::finishCommit() - { - SyncLock syncLock(this); - if (pendingCommit) - { - LuceneException finally; - try - { - if (infoStream) - message(L"commit: pendingCommit != null"); - pendingCommit->finishCommit(directory); - if (infoStream) - message(L"commit: wrote segments file \"" + pendingCommit->getCurrentSegmentFileName() + L"\""); - lastCommitChangeCount = pendingCommitChangeCount; - segmentInfos->updateGeneration(pendingCommit); - segmentInfos->setUserData(pendingCommit->getUserData()); - setRollbackSegmentInfos(pendingCommit); - deleter->checkpoint(pendingCommit, true); - } - catch (LuceneException& e) - { + + String segment(docWriter->getSegment()); + + // If we are flushing docs, segment must not be null + BOOST_ASSERT(!segment.empty() || !flushDocs); + + if (flushDocs) { + bool success = false; + int32_t flushedDocCount; + + try { + flushedDocCount = docWriter->flush(flushDocStores); + if (infoStream) { + message(L"flushedFiles=" + StringUtils::toString(docWriter->getFlushedFiles())); + } + success = true; + } catch (LuceneException& e) { finally = e; } - - deleter->decRef(pendingCommit); - pendingCommit.reset(); - notifyAll(); + + if (!success) { + if (infoStream) { + message(L"hit exception flushing segment " + segment); + } + deleter->refresh(segment); + } + finally.throwException(); - } - else if (infoStream) - message(L"commit: pendingCommit == null; skip"); - - if (infoStream) - message(L"commit: done"); - } - - void IndexWriter::flush(bool triggerMerge, bool flushDocStores, bool flushDeletes) - { - // We can be called during close, when closing = true, so we must pass false to ensureOpen - ensureOpen(false); - if (doFlush(flushDocStores, flushDeletes) && triggerMerge) - maybeMerge(); - } - - bool IndexWriter::doFlush(bool flushDocStores, bool flushDeletes) - { - TestScope testScope(L"IndexWriter", L"doFlush"); - SyncLock syncLock(this); - bool success = false; - LuceneException finally; - try - { - try - { - success = doFlushInternal(flushDocStores, flushDeletes); + + if (docStoreOffset == 0 && flushDocStores) { + // This means we are flushing private doc stores with this segment, so it will not be shared + // with other segments + BOOST_ASSERT(!docStoreSegment.empty()); + BOOST_ASSERT(docStoreSegment == segment); + docStoreOffset = -1; + docStoreIsCompoundFile = false; + docStoreSegment.clear(); } - catch (LuceneException& e) - { + + // Create new SegmentInfo, but do not add to our segmentInfos until deletes are flushed successfully. + newSegment = newLucene(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter->hasProx()); + setDiagnostics(newSegment, L"flush"); + } + + docWriter->pushDeletes(); + + if (flushDocs) { + segmentInfos->add(newSegment); + checkpoint(); + } + + if (flushDocs && mergePolicy->useCompoundFile(segmentInfos, newSegment)) { + // Now build compound file + bool success = false; + try { + docWriter->createCompoundFile(segment); + success = true; + } catch (LuceneException& e) { finally = e; } - if (docWriter->doBalanceRAM()) - docWriter->balanceRAM(); + + if (!success) { + if (infoStream) { + message(L"hit exception creating compound file for newly flushed segment " + segment); + } + deleter->deleteFile(segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); + } + finally.throwException(); + + newSegment->setUseCompoundFile(true); + checkpoint(); } - catch (LuceneException& e) - { - finally = e; + + if (flushDeletes) { + applyDeletes(); } - docWriter->clearFlushPending(); - finally.throwException(); - return success; + + if (flushDocs) { + checkpoint(); + } + + doAfterFlush(); + } catch (std::bad_alloc& oom) { + finally = handleOOM(oom, L"doFlush"); + flushDocs = false; + } catch (LuceneException& e) { + finally = e; + } + docWriter->resumeAllThreads(); + finally.throwException(); + + return flushDocs; +} + +int64_t IndexWriter::ramSizeInBytes() { + ensureOpen(); + return docWriter->getRAMUsed(); +} + +int32_t IndexWriter::numRamDocs() { + SyncLock syncLock(this); + ensureOpen(); + return docWriter->getNumDocsInRAM(); +} + +int32_t IndexWriter::ensureContiguousMerge(const OneMergePtr& merge) { + int32_t first = segmentInfos->find(merge->segments->info(0)); + if (first == -1) { + boost::throw_exception(MergeException(L"Could not find segment " + merge->segments->info(0)->name + L" in current index " + segString())); } - - bool IndexWriter::doFlushInternal(bool flushDocStores, bool flushDeletes) - { - SyncLock syncLock(this); - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot flush")); - - ensureOpen(false); - - BOOST_ASSERT(testPoint(L"startDoFlush")); - - doBeforeFlush(); - - ++flushCount; - - // If we are flushing because too many deletes accumulated, then we should apply the deletes to free RAM - if (docWriter->doApplyDeletes()) - flushDeletes = true; - - // Make sure no threads are actively adding a document. Returns true if docWriter is currently aborting, in - // which case we skip flushing this segment - if (infoStream) - message(L"flush: now pause all indexing threads"); - if (docWriter->pauseAllThreads()) - { - docWriter->resumeAllThreads(); - return false; + + int32_t numSegments = segmentInfos->size(); + int32_t numSegmentsToMerge = merge->segments->size(); + + for (int32_t i = 0; i < numSegmentsToMerge; ++i) { + SegmentInfoPtr info(merge->segments->info(i)); + + if (first + i >= numSegments || !segmentInfos->info(first + i)->equals(info)) { + if (!segmentInfos->contains(info)) { + boost::throw_exception(MergeException(L"MergePolicy selected a segment (" + info->name + L") that is not in the current index " + segString())); + } else { + boost::throw_exception(MergeException(L"MergePolicy selected non-contiguous segments to merge (" + merge->segString(directory) + L" vs " + segString() + L"), which IndexWriter (currently) cannot handle")); + } } - - bool flushDocs = false; - - LuceneException finally; - try - { - SegmentInfoPtr newSegment; - - int32_t numDocs = docWriter->getNumDocsInRAM(); - - // Always flush docs if there are any - flushDocs = (numDocs > 0); - - String docStoreSegment(docWriter->getDocStoreSegment()); - - BOOST_ASSERT(!docStoreSegment.empty() || numDocs == 0); - - if (docStoreSegment.empty()) - flushDocStores = false; - - int32_t docStoreOffset = docWriter->getDocStoreOffset(); - - bool docStoreIsCompoundFile = false; - - if (infoStream) - { - message(L" flush: segment=" + docWriter->getSegment() + - L" docStoreSegment=" + StringUtils::toString(docWriter->getDocStoreSegment()) + - L" docStoreOffset=" + StringUtils::toString(docStoreOffset) + - L" flushDocs=" + StringUtils::toString(flushDocs) + - L" flushDeletes=" + StringUtils::toString(flushDeletes) + - L" flushDocStores=" + StringUtils::toString(flushDocStores) + - L" numDocs=" + StringUtils::toString(numDocs) + - L" numBufDelTerms=" + StringUtils::toString(docWriter->getNumBufferedDeleteTerms())); - message(L" index before flush " + segString()); - } - - // Check if the doc stores must be separately flushed because other segments, besides the one we are - // about to flush, reference it - if (flushDocStores && (!flushDocs || docWriter->getSegment() != docWriter->getDocStoreSegment())) - { - // We must separately flush the doc store - if (infoStream) - message(L" flush shared docStore segment " + docStoreSegment); - - docStoreIsCompoundFile = IndexWriter::flushDocStores(); - flushDocStores = false; - } - - String segment(docWriter->getSegment()); - - // If we are flushing docs, segment must not be null - BOOST_ASSERT(!segment.empty() || !flushDocs); - - if (flushDocs) - { - bool success = false; - int32_t flushedDocCount; - - try - { - flushedDocCount = docWriter->flush(flushDocStores); - if (infoStream) - message(L"flushedFiles=" + docWriter->getFlushedFiles()); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - if (infoStream) - message(L"hit exception flushing segment " + segment); - deleter->refresh(segment); + } + + return first; +} + +void IndexWriter::commitMergedDeletes(const OneMergePtr& merge, const SegmentReaderPtr& mergeReader) { + SyncLock syncLock(this); + BOOST_ASSERT(testPoint(L"startCommitMergeDeletes")); + + SegmentInfosPtr sourceSegments(merge->segments); + + if (infoStream) { + message(L"commitMergeDeletes " + merge->segString(directory)); + } + + // Carefully merge deletes that occurred after we started merging + int32_t docUpto = 0; + int32_t delCount = 0; + + for (int32_t i = 0; i < sourceSegments->size(); ++i) { + SegmentInfoPtr info(sourceSegments->info(i)); + int32_t docCount = info->docCount; + SegmentReaderPtr previousReader(merge->readersClone[i]); + SegmentReaderPtr currentReader(merge->readers[i]); + if (previousReader->hasDeletions()) { + // There were deletes on this segment when the merge started. The merge has collapsed away those deletes, + // but if new deletes were flushed since the merge started, we must now carefully keep any newly flushed + // deletes but mapping them to the new docIDs. + + if (currentReader->numDeletedDocs() > previousReader->numDeletedDocs()) { + // This means this segment has had new deletes committed since we started the merge, so we must merge them + for (int32_t j = 0; j < docCount; ++j) { + if (previousReader->isDeleted(j)) { + BOOST_ASSERT(currentReader->isDeleted(j)); + } else { + if (currentReader->isDeleted(j)) { + mergeReader->doDelete(docUpto); + ++delCount; + } + ++docUpto; + } } - - finally.throwException(); - - if (docStoreOffset == 0 && flushDocStores) - { - // This means we are flushing private doc stores with this segment, so it will not be shared - // with other segments - BOOST_ASSERT(!docStoreSegment.empty()); - BOOST_ASSERT(docStoreSegment == segment); - docStoreOffset = -1; - docStoreIsCompoundFile = false; - docStoreSegment.clear(); + } else { + docUpto += docCount - previousReader->numDeletedDocs(); + } + } else if (currentReader->hasDeletions()) { + // This segment had no deletes before but now it does + for (int32_t j = 0; j < docCount; ++j) { + if (currentReader->isDeleted(j)) { + mergeReader->doDelete(docUpto); + ++delCount; } - - // Create new SegmentInfo, but do not add to our segmentInfos until deletes are flushed successfully. - newSegment = newLucene(segment, flushedDocCount, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, docWriter->hasProx()); - setDiagnostics(newSegment, L"flush"); - } - - docWriter->pushDeletes(); - - if (flushDocs) - { - segmentInfos->add(newSegment); - checkpoint(); + ++docUpto; } - - if (flushDocs && mergePolicy->useCompoundFile(segmentInfos, newSegment)) - { - // Now build compound file - bool success = false; - try - { - docWriter->createCompoundFile(segment); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - if (infoStream) - message(L"hit exception creating compound file for newly flushed segment " + segment); - deleter->deleteFile(segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); - } - - finally.throwException(); - - newSegment->setUseCompoundFile(true); - checkpoint(); - } - - if (flushDeletes) - applyDeletes(); - - if (flushDocs) - checkpoint(); - - doAfterFlush(); - } - catch (std::bad_alloc& oom) - { - finally = handleOOM(oom, L"doFlush"); - flushDocs = false; - } - catch (LuceneException& e) - { - finally = e; + } else { + // No deletes before or after + docUpto += info->docCount; } - docWriter->resumeAllThreads(); - finally.throwException(); - - return flushDocs; } - - int64_t IndexWriter::ramSizeInBytes() - { - ensureOpen(); - return docWriter->getRAMUsed(); + + BOOST_ASSERT(mergeReader->numDeletedDocs() == delCount); + + mergeReader->_hasChanges = (delCount > 0); +} + +bool IndexWriter::commitMerge(const OneMergePtr& merge, const SegmentMergerPtr& merger, int32_t mergedDocCount, const SegmentReaderPtr& mergedReader) { + SyncLock syncLock(this); + BOOST_ASSERT(testPoint(L"startCommitMerge")); + + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete merge")); } - - int32_t IndexWriter::numRamDocs() - { - SyncLock syncLock(this); - ensureOpen(); - return docWriter->getNumDocsInRAM(); + + if (infoStream) { + message(L"commitMerge: " + merge->segString(directory) + L" index=" + segString()); } - - int32_t IndexWriter::ensureContiguousMerge(OneMergePtr merge) - { - int32_t first = segmentInfos->find(merge->segments->info(0)); - if (first == -1) - boost::throw_exception(MergeException(L"Could not find segment " + merge->segments->info(0)->name + L" in current index " + segString())); - - int32_t numSegments = segmentInfos->size(); - int32_t numSegmentsToMerge = merge->segments->size(); - - for (int32_t i = 0; i < numSegmentsToMerge; ++i) - { - SegmentInfoPtr info(merge->segments->info(i)); - - if (first + i >= numSegments || !segmentInfos->info(first + i)->equals(info)) - { - if (!segmentInfos->contains(info)) - boost::throw_exception(MergeException(L"MergePolicy selected a segment (" + info->name + L") that is not in the current index " + segString())); - else - boost::throw_exception(MergeException(L"MergePolicy selected non-contiguous segments to merge (" + merge->segString(directory) + L" vs " + segString() + L"), which IndexWriter (currently) cannot handle")); - } + + BOOST_ASSERT(merge->registerDone); + + // If merge was explicitly aborted, or, if rollback() or rollbackTransaction() had been called since our merge + // started (which results in an unqualified deleter.refresh() call that will remove any index file that current + // segments does not reference), we abort this merge + if (merge->isAborted()) { + if (infoStream) { + message(L"commitMerge: skipping merge " + merge->segString(directory) + L": it was aborted"); } - - return first; + return false; } - - void IndexWriter::commitMergedDeletes(OneMergePtr merge, SegmentReaderPtr mergeReader) - { - SyncLock syncLock(this); - BOOST_ASSERT(testPoint(L"startCommitMergeDeletes")); - - SegmentInfosPtr sourceSegments(merge->segments); - - if (infoStream) - message(L"commitMergeDeletes " + merge->segString(directory)); - - // Carefully merge deletes that occurred after we started merging - int32_t docUpto = 0; - int32_t delCount = 0; - - for (int32_t i = 0; i < sourceSegments->size(); ++i) - { - SegmentInfoPtr info(sourceSegments->info(i)); - int32_t docCount = info->docCount; - SegmentReaderPtr previousReader(merge->readersClone[i]); - SegmentReaderPtr currentReader(merge->readers[i]); - if (previousReader->hasDeletions()) - { - // There were deletes on this segment when the merge started. The merge has collapsed away those deletes, - // but if new deletes were flushed since the merge started, we must now carefully keep any newly flushed - // deletes but mapping them to the new docIDs. - - if (currentReader->numDeletedDocs() > previousReader->numDeletedDocs()) - { - // This means this segment has had new deletes committed since we started the merge, so we must merge them - for (int32_t j = 0; j < docCount; ++j) - { - if (previousReader->isDeleted(j)) - BOOST_ASSERT(currentReader->isDeleted(j)); - else - { - if (currentReader->isDeleted(j)) - { - mergeReader->doDelete(docUpto); - ++delCount; - } - ++docUpto; - } - } + + int32_t start = ensureContiguousMerge(merge); + + commitMergedDeletes(merge, mergedReader); + docWriter->remapDeletes(segmentInfos, merger->getDocMaps(), merger->getDelCounts(), merge, mergedDocCount); + + // If the doc store we are using has been closed and is in now compound format (but wasn't when we started), + // then we will switch to the compound format as well + setMergeDocStoreIsCompoundFile(merge); + + merge->info->setHasProx(merger->hasProx()); + + segmentInfos->remove(start, start + merge->segments->size()); + BOOST_ASSERT(!segmentInfos->contains(merge->info)); + segmentInfos->add(start, merge->info); + + closeMergeReaders(merge, false); + + // Must note the change to segmentInfos so any commits in-flight don't lose it + checkpoint(); + + // If the merged segments had pending changes, clear them so that they don't bother writing + // them to disk, updating SegmentInfo, etc. + readerPool->clear(merge->segments); + + if (merge->optimize) { + // cascade the optimize + segmentsToOptimize.add(merge->info); + } + return true; +} + +LuceneException IndexWriter::handleMergeException(const LuceneException& exc, const OneMergePtr& merge) { + if (infoStream) { + message(L"handleMergeException: merge=" + merge->segString(directory) + L" exc=" + exc.getError()); + } + + // Set the exception on the merge, so if optimize() is waiting on us it sees the root cause exception + merge->setException(exc); + addMergeException(merge); + + switch (exc.getType()) { + case LuceneException::MergeAborted: + // We can ignore this exception (it happens when close(false) or rollback is called), unless the + // merge involves segments from external directories, in which case we must throw it so, for + // example, the rollbackTransaction code in addIndexes* is executed. + if (merge->isExternal) { + return exc; + } + break; + case LuceneException::IO: + case LuceneException::Runtime: + return exc; + default: + return RuntimeException(); // Should not get here + } + return LuceneException(); +} + +void IndexWriter::merge(const OneMergePtr& merge) { + bool success = false; + + try { + LuceneException finally; + try { + try { + mergeInit(merge); + if (infoStream) { + message(L"now merge\n merge=" + merge->segString(directory) + L"\n index=" + segString()); } - else - docUpto += docCount - previousReader->numDeletedDocs(); + + mergeMiddle(merge); + mergeSuccess(merge); + success = true; + } catch (LuceneException& e) { + finally = handleMergeException(e, merge); } - else if (currentReader->hasDeletions()) + { - // This segment had no deletes before but now it does - for (int32_t j = 0; j < docCount; ++j) - { - if (currentReader->isDeleted(j)) - { - mergeReader->doDelete(docUpto); - ++delCount; + SyncLock syncLock(this); + mergeFinish(merge); + + if (!success) { + if (infoStream) { + message(L"hit exception during merge"); + } + + if (merge->info && !segmentInfos->contains(merge->info)) { + deleter->refresh(merge->info->name); } - ++docUpto; + } + + // This merge (and, generally, any change to the segments) may now enable + // new merges, so we call merge policy & update pending merges. + if (success && !merge->isAborted() && !closed && !closing) { + updatePendingMerges(merge->maxNumSegmentsOptimize, merge->optimize); } } - else - { - // No deletes before or after - docUpto += info->docCount; - } + } catch (LuceneException& e) { + finally = e; } - - BOOST_ASSERT(mergeReader->numDeletedDocs() == delCount); - - mergeReader->_hasChanges = (delCount > 0); + finally.throwException(); + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"merge")); } - - bool IndexWriter::commitMerge(OneMergePtr merge, SegmentMergerPtr merger, int32_t mergedDocCount, SegmentReaderPtr mergedReader) - { - SyncLock syncLock(this); - BOOST_ASSERT(testPoint(L"startCommitMerge")); - - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot complete merge")); - - if (infoStream) - message(L"commitMerge: " + merge->segString(directory) + L" index=" + segString()); - - BOOST_ASSERT(merge->registerDone); - - // If merge was explicitly aborted, or, if rollback() or rollbackTransaction() had been called since our merge - // started (which results in an unqualified deleter.refresh() call that will remove any index file that current - // segments does not reference), we abort this merge - if (merge->isAborted()) - { - if (infoStream) - message(L"commitMerge: skipping merge " + merge->segString(directory) + L": it was aborted"); +} + +void IndexWriter::mergeSuccess(const OneMergePtr& merge) { + // override +} + +bool IndexWriter::registerMerge(const OneMergePtr& merge) { + SyncLock syncLock(this); + + if (merge->registerDone) { + return true; + } + + if (stopMerges) { + merge->abort(); + boost::throw_exception(MergeAbortedException(L"merge is aborted: " + merge->segString(directory))); + } + + int32_t count = merge->segments->size(); + bool isExternal = false; + for (int32_t i = 0; i < count; ++i) { + SegmentInfoPtr info(merge->segments->info(i)); + if (mergingSegments.contains(info)) { return false; } - - int32_t start = ensureContiguousMerge(merge); - - commitMergedDeletes(merge, mergedReader); - docWriter->remapDeletes(segmentInfos, merger->getDocMaps(), merger->getDelCounts(), merge, mergedDocCount); - - // If the doc store we are using has been closed and is in now compound format (but wasn't when we started), - // then we will switch to the compound format as well - setMergeDocStoreIsCompoundFile(merge); - - merge->info->setHasProx(merger->hasProx()); - - segmentInfos->remove(start, start + merge->segments->size()); - BOOST_ASSERT(!segmentInfos->contains(merge->info)); - segmentInfos->add(start, merge->info); - - closeMergeReaders(merge, false); - - // Must note the change to segmentInfos so any commits in-flight don't lose it - checkpoint(); - - // If the merged segments had pending changes, clear them so that they don't bother writing - // them to disk, updating SegmentInfo, etc. - readerPool->clear(merge->segments); - - if (merge->optimize) - { - // cascade the optimize - segmentsToOptimize.add(merge->info); + if (!segmentInfos->contains(info)) { + return false; } - return true; - } - - LuceneException IndexWriter::handleMergeException(const LuceneException& exc, OneMergePtr merge) - { - if (infoStream) - message(L"handleMergeException: merge=" + merge->segString(directory) + L" exc=" + exc.getError()); - - // Set the exception on the merge, so if optimize() is waiting on us it sees the root cause exception - merge->setException(exc); - addMergeException(merge); - - switch (exc.getType()) - { - case LuceneException::MergeAborted: - // We can ignore this exception (it happens when close(false) or rollback is called), unless the - // merge involves segments from external directories, in which case we must throw it so, for - // example, the rollbackTransaction code in addIndexes* is executed. - if (merge->isExternal) - return exc; - break; - case LuceneException::IO: - case LuceneException::Runtime: - return exc; - default: - return RuntimeException(); // Should not get here + if (info->dir != directory) { + isExternal = true; + } + if (segmentsToOptimize.contains(info)) { + merge->optimize = true; + merge->maxNumSegmentsOptimize = optimizeMaxNumSegments; } - return LuceneException(); } - - void IndexWriter::merge(OneMergePtr merge) - { - bool success = false; - - try - { - LuceneException finally; - try - { - try - { - mergeInit(merge); - if (infoStream) - message(L"now merge\n merge=" + merge->segString(directory) + L"\n index=" + segString()); - - mergeMiddle(merge); - mergeSuccess(merge); - success = true; - } - catch (LuceneException& e) - { - finally = handleMergeException(e, merge); - } - - { - SyncLock syncLock(this); - mergeFinish(merge); - - if (!success) - { - if (infoStream) - message(L"hit exception during merge"); - - if (merge->info && !segmentInfos->contains(merge->info)) - deleter->refresh(merge->info->name); - } - // This merge (and, generally, any change to the segments) may now enable - // new merges, so we call merge policy & update pending merges. - if (success && !merge->isAborted() && !closed && !closing) - updatePendingMerges(merge->maxNumSegmentsOptimize, merge->optimize); - } - } - catch (LuceneException& e) - { - finally = e; - } - finally.throwException(); + ensureContiguousMerge(merge); + + pendingMerges.add(merge); + + if (infoStream) { + message(L"add merge to pendingMerges: " + merge->segString(directory) + L" [total " + StringUtils::toString(pendingMerges.size()) + L" pending]"); + } + + merge->mergeGen = mergeGen; + merge->isExternal = isExternal; + + // OK it does not conflict; now record that this merge is running (while synchronized) + // to avoid race condition where two conflicting merges from different threads, start + for (int32_t i = 0; i < count; ++i) { + mergingSegments.add(merge->segments->info(i)); + } + + // Merge is now registered + merge->registerDone = true; + return true; +} + +void IndexWriter::mergeInit(const OneMergePtr& merge) { + SyncLock syncLock(this); + bool success = false; + LuceneException finally; + try { + _mergeInit(merge); + success = true; + } catch (LuceneException& e) { + finally = e; + } + + if (!success) { + mergeFinish(merge); + } + finally.throwException(); +} + +void IndexWriter::_mergeInit(const OneMergePtr& merge) { + SyncLock syncLock(this); + bool test = testPoint(L"startMergeInit"); + BOOST_ASSERT(test); + + BOOST_ASSERT(merge->registerDone); + BOOST_ASSERT(!merge->optimize || merge->maxNumSegmentsOptimize > 0); + + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot merge")); + } + + if (merge->info) { + // mergeInit already done + return; + } + + if (merge->isAborted()) { + return; + } + + applyDeletes(); + + SegmentInfosPtr sourceSegments(merge->segments); + int32_t end = sourceSegments->size(); + + // Check whether this merge will allow us to skip merging the doc stores (stored field & vectors). + // This is a very substantial optimization (saves tons of IO). + DirectoryPtr lastDir(directory); + String lastDocStoreSegment; + int32_t next = -1; + + bool mergeDocStores = false; + bool doFlushDocStore = false; + String currentDocStoreSegment(docWriter->getDocStoreSegment()); + + // Test each segment to be merged: check if we need to flush/merge doc stores + for (int32_t i = 0; i < end; ++i) { + SegmentInfoPtr si(sourceSegments->info(i)); + + // If it has deletions we must merge the doc stores + if (si->hasDeletions()) { + mergeDocStores = true; + } + + // If it has its own (private) doc stores we must merge the doc stores + if (si->getDocStoreOffset() == -1) { + mergeDocStores = true; + } + + // If it has a different doc store segment than previous segments, we must merge the doc stores + String docStoreSegment(si->getDocStoreSegment()); + if (docStoreSegment.empty()) { + mergeDocStores = true; + } else if (lastDocStoreSegment.empty()) { + lastDocStoreSegment = docStoreSegment; + } else if (lastDocStoreSegment != docStoreSegment) { + mergeDocStores = true; + } + + // Segments' docScoreOffsets must be in-order, contiguous. For the default merge policy now + // this will always be the case but for an arbitrary merge policy this may not be the case + if (next == -1) { + next = si->getDocStoreOffset() + si->docCount; + } else if (next != si->getDocStoreOffset()) { + mergeDocStores = true; + } else { + next = si->getDocStoreOffset() + si->docCount; + } + + // If the segment comes from a different directory we must merge + if (lastDir != si->dir) { + mergeDocStores = true; } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"merge")); + + // If the segment is referencing the current "live" doc store outputs then we must merge + if (si->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty() && si->getDocStoreSegment() == currentDocStoreSegment) { + doFlushDocStore = true; } } - - void IndexWriter::mergeSuccess(OneMergePtr merge) - { - // override + + // if a mergedSegmentWarmer is installed, we must merge the doc stores because we will open a full + // SegmentReader on the merged segment + if (!mergeDocStores && mergedSegmentWarmer && !currentDocStoreSegment.empty() && !lastDocStoreSegment.empty() && lastDocStoreSegment == currentDocStoreSegment) { + mergeDocStores = true; } - - bool IndexWriter::registerMerge(OneMergePtr merge) - { - SyncLock syncLock(this); - - if (merge->registerDone) - return true; - - if (stopMerges) - { - merge->abort(); - boost::throw_exception(MergeAbortedException(L"merge is aborted: " + merge->segString(directory))); - } - - int32_t count = merge->segments->size(); - bool isExternal = false; - for (int32_t i = 0; i < count; ++i) - { - SegmentInfoPtr info(merge->segments->info(i)); - if (mergingSegments.contains(info)) - return false; - if (!segmentInfos->contains(info)) - return false; - if (info->dir != directory) - isExternal = true; - if (segmentsToOptimize.contains(info)) - { - merge->optimize = true; - merge->maxNumSegmentsOptimize = optimizeMaxNumSegments; - } - } - - ensureContiguousMerge(merge); - - pendingMerges.add(merge); - - if (infoStream) - message(L"add merge to pendingMerges: " + merge->segString(directory) + L" [total " + StringUtils::toString(pendingMerges.size()) + L" pending]"); - - merge->mergeGen = mergeGen; - merge->isExternal = isExternal; - - // OK it does not conflict; now record that this merge is running (while synchronized) - // to avoid race condition where two conflicting merges from different threads, start - for (int32_t i = 0; i < count; ++i) - mergingSegments.add(merge->segments->info(i)); - - // Merge is now registered - merge->registerDone = true; - return true; + + int32_t docStoreOffset; + String docStoreSegment; + bool docStoreIsCompoundFile; + + if (mergeDocStores) { + docStoreOffset = -1; + docStoreSegment.clear(); + docStoreIsCompoundFile = false; + } else { + SegmentInfoPtr si(sourceSegments->info(0)); + docStoreOffset = si->getDocStoreOffset(); + docStoreSegment = si->getDocStoreSegment(); + docStoreIsCompoundFile = si->getDocStoreIsCompoundFile(); } - - void IndexWriter::mergeInit(OneMergePtr merge) - { - SyncLock syncLock(this); - bool success = false; - LuceneException finally; - try - { - _mergeInit(merge); - success = true; - } - catch (LuceneException& e) - { - finally = e; + + if (mergeDocStores && doFlushDocStore) { + // SegmentMerger intends to merge the doc stores (stored fields, vectors), and at + // least one of the segments to be merged refers to the currently live doc stores. + if (infoStream) { + message(L"now flush at merge"); } - - if (!success) - mergeFinish(merge); - finally.throwException(); + doFlush(true, false); } - - void IndexWriter::_mergeInit(OneMergePtr merge) - { - SyncLock syncLock(this); - bool test = testPoint(L"startMergeInit"); - BOOST_ASSERT(test); - - BOOST_ASSERT(merge->registerDone); - BOOST_ASSERT(!merge->optimize || merge->maxNumSegmentsOptimize > 0); - - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot merge")); - - if (merge->info) - { - // mergeInit already done - return; - } - - if (merge->isAborted()) - return; - - applyDeletes(); - + + merge->mergeDocStores = mergeDocStores; + + // Bind a new segment name here so even with ConcurrentMergePolicy we keep deterministic segment names. + merge->info = newLucene(newSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false); + + MapStringString details(MapStringString::newInstance()); + details.put(L"optimize", StringUtils::toString(merge->optimize)); + details.put(L"mergeFactor", StringUtils::toString(end)); + details.put(L"mergeDocStores", StringUtils::toString(mergeDocStores)); + setDiagnostics(merge->info, L"merge", details); + + // Also enroll the merged segment into mergingSegments; this prevents it from getting + // selected for a merge after our merge is done but while we are building the CFS + mergingSegments.add(merge->info); +} + +void IndexWriter::setDiagnostics(const SegmentInfoPtr& info, const String& source) { + setDiagnostics(info, source, MapStringString()); +} + +void IndexWriter::setDiagnostics(const SegmentInfoPtr& info, const String& source, MapStringString details) { + MapStringString diagnostics(MapStringString::newInstance()); + diagnostics.put(L"source", source); + diagnostics.put(L"lucene.version", Constants::LUCENE_VERSION); + diagnostics.put(L"os", Constants::OS_NAME); + if (details) { + diagnostics.putAll(details.begin(), details.end()); + } + info->setDiagnostics(diagnostics); +} + +void IndexWriter::mergeFinish(const OneMergePtr& merge) { + SyncLock syncLock(this); + // Optimize, addIndexes or finishMerges may be waiting on merges to finish. + notifyAll(); + + // It's possible we are called twice, eg if there was an exception inside mergeInit + if (merge->registerDone) { SegmentInfosPtr sourceSegments(merge->segments); int32_t end = sourceSegments->size(); - - // Check whether this merge will allow us to skip merging the doc stores (stored field & vectors). - // This is a very substantial optimization (saves tons of IO). - DirectoryPtr lastDir(directory); - String lastDocStoreSegment; - int32_t next = -1; - - bool mergeDocStores = false; - bool doFlushDocStore = false; - String currentDocStoreSegment(docWriter->getDocStoreSegment()); - - // Test each segment to be merged: check if we need to flush/merge doc stores - for (int32_t i = 0; i < end; ++i) - { - SegmentInfoPtr si(sourceSegments->info(i)); - - // If it has deletions we must merge the doc stores - if (si->hasDeletions()) - mergeDocStores = true; - - // If it has its own (private) doc stores we must merge the doc stores - if (si->getDocStoreOffset() == -1) - mergeDocStores = true; - - // If it has a different doc store segment than previous segments, we must merge the doc stores - String docStoreSegment(si->getDocStoreSegment()); - if (docStoreSegment.empty()) - mergeDocStores = true; - else if (lastDocStoreSegment.empty()) - lastDocStoreSegment = docStoreSegment; - else if (lastDocStoreSegment != docStoreSegment) - mergeDocStores = true; - - // Segments' docScoreOffsets must be in-order, contiguous. For the default merge policy now - // this will always be the case but for an arbitrary merge policy this may not be the case - if (next == -1) - next = si->getDocStoreOffset() + si->docCount; - else if (next != si->getDocStoreOffset()) - mergeDocStores = true; - else - next = si->getDocStoreOffset() + si->docCount; - - // If the segment comes from a different directory we must merge - if (lastDir != si->dir) - mergeDocStores = true; - - // If the segment is referencing the current "live" doc store outputs then we must merge - if (si->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty() && si->getDocStoreSegment() == currentDocStoreSegment) - doFlushDocStore = true; - } - - // if a mergedSegmentWarmer is installed, we must merge the doc stores because we will open a full - // SegmentReader on the merged segment - if (!mergeDocStores && mergedSegmentWarmer && !currentDocStoreSegment.empty() && !lastDocStoreSegment.empty() && lastDocStoreSegment == currentDocStoreSegment) - mergeDocStores = true; - - int32_t docStoreOffset; - String docStoreSegment; - bool docStoreIsCompoundFile; - - if (mergeDocStores) - { - docStoreOffset = -1; - docStoreSegment.clear(); - docStoreIsCompoundFile = false; - } - else - { - SegmentInfoPtr si(sourceSegments->info(0)); - docStoreOffset = si->getDocStoreOffset(); - docStoreSegment = si->getDocStoreSegment(); - docStoreIsCompoundFile = si->getDocStoreIsCompoundFile(); - } - - if (mergeDocStores && doFlushDocStore) - { - // SegmentMerger intends to merge the doc stores (stored fields, vectors), and at - // least one of the segments to be merged refers to the currently live doc stores. - if (infoStream) - message(L"now flush at merge"); - doFlush(true, false); - } - - merge->mergeDocStores = mergeDocStores; - - // Bind a new segment name here so even with ConcurrentMergePolicy we keep deterministic segment names. - merge->info = newLucene(newSegmentName(), 0, directory, false, true, docStoreOffset, docStoreSegment, docStoreIsCompoundFile, false); - - MapStringString details(MapStringString::newInstance()); - details.put(L"optimize", StringUtils::toString(merge->optimize)); - details.put(L"mergeFactor", StringUtils::toString(end)); - details.put(L"mergeDocStores", StringUtils::toString(mergeDocStores)); - setDiagnostics(merge->info, L"merge", details); - - // Also enroll the merged segment into mergingSegments; this prevents it from getting - // selected for a merge after our merge is done but while we are building the CFS - mergingSegments.add(merge->info); - } - - void IndexWriter::setDiagnostics(SegmentInfoPtr info, const String& source) - { - setDiagnostics(info, source, MapStringString()); - } - - void IndexWriter::setDiagnostics(SegmentInfoPtr info, const String& source, MapStringString details) - { - MapStringString diagnostics(MapStringString::newInstance()); - diagnostics.put(L"source", source); - diagnostics.put(L"lucene.version", Constants::LUCENE_VERSION); - diagnostics.put(L"os", Constants::OS_NAME); - if (details) - diagnostics.putAll(details.begin(), details.end()); - info->setDiagnostics(diagnostics); - } - - void IndexWriter::mergeFinish(OneMergePtr merge) - { - SyncLock syncLock(this); - // Optimize, addIndexes or finishMerges may be waiting on merges to finish. - notifyAll(); - - // It's possible we are called twice, eg if there was an exception inside mergeInit - if (merge->registerDone) - { - SegmentInfosPtr sourceSegments(merge->segments); - int32_t end = sourceSegments->size(); - for (int32_t i = 0; i < end; ++i) - mergingSegments.remove(sourceSegments->info(i)); - - mergingSegments.remove(merge->info); - merge->registerDone = false; + for (int32_t i = 0; i < end; ++i) { + mergingSegments.remove(sourceSegments->info(i)); } - runningMerges.remove(merge); + mergingSegments.remove(merge->info); + merge->registerDone = false; } - - void IndexWriter::setMergeDocStoreIsCompoundFile(OneMergePtr merge) - { - SyncLock syncLock(this); - - String mergeDocStoreSegment(merge->info->getDocStoreSegment()); - if (!mergeDocStoreSegment.empty() && !merge->info->getDocStoreIsCompoundFile()) - { - int32_t size = segmentInfos->size(); - for (int32_t i = 0; i < size; ++i) - { - SegmentInfoPtr info(segmentInfos->info(i)); - String docStoreSegment(info->getDocStoreSegment()); - if (!docStoreSegment.empty() && docStoreSegment == mergeDocStoreSegment && info->getDocStoreIsCompoundFile()) - { - merge->info->setDocStoreIsCompoundFile(true); - break; - } + + runningMerges.remove(merge); +} + +void IndexWriter::setMergeDocStoreIsCompoundFile(const OneMergePtr& merge) { + SyncLock syncLock(this); + + String mergeDocStoreSegment(merge->info->getDocStoreSegment()); + if (!mergeDocStoreSegment.empty() && !merge->info->getDocStoreIsCompoundFile()) { + int32_t size = segmentInfos->size(); + for (int32_t i = 0; i < size; ++i) { + SegmentInfoPtr info(segmentInfos->info(i)); + String docStoreSegment(info->getDocStoreSegment()); + if (!docStoreSegment.empty() && docStoreSegment == mergeDocStoreSegment && info->getDocStoreIsCompoundFile()) { + merge->info->setDocStoreIsCompoundFile(true); + break; } } } - - void IndexWriter::closeMergeReaders(OneMergePtr merge, bool suppressExceptions) - { - SyncLock syncLock(this); - - int32_t numSegments = merge->segments->size(); - if (suppressExceptions) - { - // Suppress any new exceptions so we throw the original cause - for (int32_t i = 0; i < numSegments; ++i) - { - if (merge->readers[i]) - { - try - { - readerPool->release(merge->readers[i], false); - } - catch (...) - { - } - merge->readers[i].reset(); - } - - if (merge->readersClone[i]) - { - try - { - merge->readersClone[i]->close(); - } - catch (...) - { - } - // This was a private clone and we had the only reference - BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); - merge->readersClone[i].reset(); +} + +void IndexWriter::closeMergeReaders(const OneMergePtr& merge, bool suppressExceptions) { + SyncLock syncLock(this); + + int32_t numSegments = merge->segments->size(); + if (suppressExceptions) { + // Suppress any new exceptions so we throw the original cause + for (int32_t i = 0; i < numSegments; ++i) { + if (merge->readers[i]) { + try { + readerPool->release(merge->readers[i], false); + } catch (...) { } + merge->readers[i].reset(); } - } - else - { - for (int32_t i = 0; i < numSegments; ++i) - { - if (merge->readers[i]) - { - readerPool->release(merge->readers[i], true); - merge->readers[i].reset(); - } - - if (merge->readersClone[i]) - { + + if (merge->readersClone[i]) { + try { merge->readersClone[i]->close(); - // This was a private clone and we had the only reference - BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); - merge->readersClone[i].reset(); + } catch (...) { } + // This was a private clone and we had the only reference + BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); + merge->readersClone[i].reset(); + } + } + } else { + for (int32_t i = 0; i < numSegments; ++i) { + if (merge->readers[i]) { + readerPool->release(merge->readers[i], true); + merge->readers[i].reset(); + } + + if (merge->readersClone[i]) { + merge->readersClone[i]->close(); + // This was a private clone and we had the only reference + BOOST_ASSERT(merge->readersClone[i]->getRefCount() == 0); + merge->readersClone[i].reset(); } } } - - int32_t IndexWriter::mergeMiddle(OneMergePtr merge) +} + +int32_t IndexWriter::mergeMiddle(const OneMergePtr& merge) { + merge->checkAborted(directory); + + String mergedName(merge->info->name); + int32_t mergedDocCount = 0; + + SegmentInfosPtr sourceSegments(merge->segments); + int32_t numSegments = sourceSegments->size(); + + if (infoStream) { + message(L"merging " + merge->segString(directory)); + } + + SegmentMergerPtr merger(newLucene(shared_from_this(), mergedName, merge)); + + merge->readers = Collection::newInstance(numSegments); + merge->readersClone = Collection::newInstance(numSegments); + + bool mergeDocStores = false; + + String currentDocStoreSegment; { - merge->checkAborted(directory); - - String mergedName(merge->info->name); - int32_t mergedDocCount = 0; - - SegmentInfosPtr sourceSegments(merge->segments); - int32_t numSegments = sourceSegments->size(); - - if (infoStream) - message(L"merging " + merge->segString(directory)); - - SegmentMergerPtr merger(newLucene(shared_from_this(), mergedName, merge)); - - merge->readers = Collection::newInstance(numSegments); - merge->readersClone = Collection::newInstance(numSegments); - - bool mergeDocStores = false; - - String currentDocStoreSegment; - { - SyncLock syncLock(this); - currentDocStoreSegment = docWriter->getDocStoreSegment(); + SyncLock syncLock(this); + currentDocStoreSegment = docWriter->getDocStoreSegment(); + } + + bool currentDSSMerged = false; + + LuceneException finally; + // This is try/finally to make sure merger's readers are closed + bool success = false; + try { + int32_t totDocCount = 0; + for (int32_t i = 0; i < numSegments; ++i) { + SegmentInfoPtr info(sourceSegments->info(i)); + + // Hold onto the "live" reader; we will use this to commit merged deletes + merge->readers[i] = readerPool->get(info, merge->mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); + SegmentReaderPtr reader(merge->readers[i]); + + // We clone the segment readers because other deletes may come in while we're merging so we need readers that will not change + merge->readersClone[i] = boost::dynamic_pointer_cast(reader->clone(true)); + SegmentReaderPtr clone(merge->readersClone[i]); + merger->add(clone); + + if (clone->hasDeletions()) { + mergeDocStores = true; + } + + if (info->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty()) { + currentDSSMerged = currentDSSMerged || (currentDocStoreSegment == info->getDocStoreSegment()); + } + + totDocCount += clone->numDocs(); } - - bool currentDSSMerged = false; - - LuceneException finally; - // This is try/finally to make sure merger's readers are closed - bool success = false; - try - { - int32_t totDocCount = 0; - for (int32_t i = 0; i < numSegments; ++i) - { - SegmentInfoPtr info(sourceSegments->info(i)); - - // Hold onto the "live" reader; we will use this to commit merged deletes - merge->readers[i] = readerPool->get(info, merge->mergeDocStores, MERGE_READ_BUFFER_SIZE, -1); - SegmentReaderPtr reader(merge->readers[i]); - - // We clone the segment readers because other deletes may come in while we're merging so we need readers that will not change - merge->readersClone[i] = boost::dynamic_pointer_cast(reader->clone(true)); - SegmentReaderPtr clone(merge->readersClone[i]); - merger->add(clone); - - if (clone->hasDeletions()) - mergeDocStores = true; - - if (info->getDocStoreOffset() != -1 && !currentDocStoreSegment.empty()) - currentDSSMerged = currentDSSMerged || (currentDocStoreSegment == info->getDocStoreSegment()); - - totDocCount += clone->numDocs(); - } - - if (infoStream) - message(L"merge: total " + StringUtils::toString(totDocCount) + L" docs"); - - merge->checkAborted(directory); - - // If deletions have arrived and it has now become necessary to merge doc stores, go and open them - if (mergeDocStores && !merge->mergeDocStores) + + if (infoStream) { + message(L"merge: total " + StringUtils::toString(totDocCount) + L" docs"); + } + + merge->checkAborted(directory); + + // If deletions have arrived and it has now become necessary to merge doc stores, go and open them + if (mergeDocStores && !merge->mergeDocStores) { + merge->mergeDocStores = true; + { - merge->mergeDocStores = true; - - { - SyncLock syncLock(this); - if (currentDSSMerged) - { - if (infoStream) - message(L"now flush at mergeMiddle"); - doFlush(true, false); + SyncLock syncLock(this); + if (currentDSSMerged) { + if (infoStream) { + message(L"now flush at mergeMiddle"); } + doFlush(true, false); } - - for (Collection::iterator reader = merge->readersClone.begin(); reader != merge->readersClone.end(); ++reader) - (*reader)->openDocStores(); - - // Clear DSS - merge->info->setDocStore(-1, L"", false); - } - - // This is where all the work happens - merge->info->docCount = merger->merge(merge->mergeDocStores); - mergedDocCount = merge->info->docCount; - - BOOST_ASSERT(mergedDocCount == totDocCount); - - if (merge->useCompoundFile) - { - success = false; - - String compoundFileName(IndexFileNames::segmentFileName(mergedName, IndexFileNames::COMPOUND_FILE_EXTENSION())); - - try - { - if (infoStream) - message(L"create compound file " + compoundFileName); - merger->createCompoundFile(compoundFileName); - success = true; - } - catch (IOException& ioe) - { - SyncLock syncLock(this); - if (merge->isAborted()) - { - // This can happen if rollback or close(false) is called - fall through to logic - // below to remove the partially created CFS - } - else - finally = handleMergeException(ioe, merge); + } + + for (Collection::iterator reader = merge->readersClone.begin(); reader != merge->readersClone.end(); ++reader) { + (*reader)->openDocStores(); + } + + // Clear DSS + merge->info->setDocStore(-1, L"", false); + } + + // This is where all the work happens + merge->info->docCount = merger->merge(merge->mergeDocStores); + mergedDocCount = merge->info->docCount; + + BOOST_ASSERT(mergedDocCount == totDocCount); + + if (merge->useCompoundFile) { + success = false; + + String compoundFileName(IndexFileNames::segmentFileName(mergedName, IndexFileNames::COMPOUND_FILE_EXTENSION())); + + try { + if (infoStream) { + message(L"create compound file " + compoundFileName); } - catch (LuceneException& e) - { - finally = handleMergeException(e, merge); + merger->createCompoundFile(compoundFileName); + success = true; + } catch (IOException& ioe) { + SyncLock syncLock(this); + if (merge->isAborted()) { + // This can happen if rollback or close(false) is called - fall through to logic + // below to remove the partially created CFS + } else { + finally = handleMergeException(ioe, merge); } - - if (!success) - { - if (infoStream) - message(L"hit exception creating compound file during merge"); - { - SyncLock syncLock(this); - deleter->deleteFile(compoundFileName); - deleter->deleteNewFiles(merger->getMergedFiles()); - } + } catch (LuceneException& e) { + finally = handleMergeException(e, merge); + } + + if (!success) { + if (infoStream) { + message(L"hit exception creating compound file during merge"); } - - finally.throwException(); - - success = false; - { SyncLock syncLock(this); - - // delete new non cfs files directly: they were never registered with IFD + deleter->deleteFile(compoundFileName); deleter->deleteNewFiles(merger->getMergedFiles()); - - if (merge->isAborted()) - { - if (infoStream) - message(L"abort merge after building CFS"); - deleter->deleteFile(compoundFileName); - boost::throw_exception(TemporaryException()); - } } - - merge->info->setUseCompoundFile(true); - } - - int32_t termsIndexDivisor = -1; - bool loadDocStores = false; - - // if the merged segment warmer was not installed when this merge was started, causing us - // to not force the docStores to close, we can't warm it now - bool canWarm = (merge->info->getDocStoreSegment().empty() || currentDocStoreSegment.empty() || merge->info->getDocStoreSegment() == currentDocStoreSegment); - - if (poolReaders && mergedSegmentWarmer && canWarm) - { - // Load terms index & doc stores so the segment warmer can run searches, load documents/term vectors - termsIndexDivisor = readerTermsIndexDivisor; - loadDocStores = true; - } - - SegmentReaderPtr mergedReader(readerPool->get(merge->info, loadDocStores, BufferedIndexInput::BUFFER_SIZE, termsIndexDivisor)); - - try + } + + finally.throwException(); + + success = false; + { - if (poolReaders && mergedSegmentWarmer) - mergedSegmentWarmer->warm(mergedReader); - if (!commitMerge(merge, merger, mergedDocCount, mergedReader)) - { - // commitMerge will return false if this merge was aborted + SyncLock syncLock(this); + + // delete new non cfs files directly: they were never registered with IFD + deleter->deleteNewFiles(merger->getMergedFiles()); + + if (merge->isAborted()) { + if (infoStream) { + message(L"abort merge after building CFS"); + } + deleter->deleteFile(compoundFileName); boost::throw_exception(TemporaryException()); } } - catch (LuceneException& e) - { - finally = e; + + merge->info->setUseCompoundFile(true); + } + + int32_t termsIndexDivisor = -1; + bool loadDocStores = false; + + // if the merged segment warmer was not installed when this merge was started, causing us + // to not force the docStores to close, we can't warm it now + bool canWarm = (merge->info->getDocStoreSegment().empty() || currentDocStoreSegment.empty() || merge->info->getDocStoreSegment() == currentDocStoreSegment); + + if (poolReaders && mergedSegmentWarmer && canWarm) { + // Load terms index & doc stores so the segment warmer can run searches, load documents/term vectors + termsIndexDivisor = readerTermsIndexDivisor; + loadDocStores = true; + } + + SegmentReaderPtr mergedReader(readerPool->get(merge->info, loadDocStores, BufferedIndexInput::BUFFER_SIZE, termsIndexDivisor)); + + try { + if (poolReaders && mergedSegmentWarmer) { + mergedSegmentWarmer->warm(mergedReader); } - - { - SyncLock syncLock(this); - readerPool->release(mergedReader); + if (!commitMerge(merge, merger, mergedDocCount, mergedReader)) { + // commitMerge will return false if this merge was aborted + boost::throw_exception(TemporaryException()); } - - finally.throwException(); - - success = true; + } catch (LuceneException& e) { + finally = e; } - catch (LuceneException& e) + { - finally = e; + SyncLock syncLock(this); + readerPool->release(mergedReader); } - // Readers are already closed in commitMerge if we didn't hit an exc - if (!success) - closeMergeReaders(merge, true); - - // has this merge been aborted? - if (finally.getType() == LuceneException::Temporary) - return 0; - finally.throwException(); - - return mergedDocCount; + + success = true; + } catch (LuceneException& e) { + finally = e; } - - void IndexWriter::addMergeException(OneMergePtr merge) - { - SyncLock syncLock(this); - BOOST_ASSERT(!merge->getException().isNull()); - if (!mergeExceptions.contains(merge) && mergeGen == merge->mergeGen) - mergeExceptions.add(merge); + + // Readers are already closed in commitMerge if we didn't hit an exc + if (!success) { + closeMergeReaders(merge, true); } - - bool IndexWriter::applyDeletes() - { - TestScope testScope(L"IndexWriter", L"applyDeletes"); - SyncLock syncLock(this); - BOOST_ASSERT(testPoint(L"startApplyDeletes")); - ++flushDeletesCount; - bool success = false; - bool changed = false; - - LuceneException finally; - try - { - changed = docWriter->applyDeletes(segmentInfos); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success && infoStream) - message(L"hit exception flushing deletes"); - - finally.throwException(); - - if (changed) - checkpoint(); - return changed; + + // has this merge been aborted? + if (finally.getType() == LuceneException::Temporary) { + return 0; } - - int32_t IndexWriter::getBufferedDeleteTermsSize() - { - SyncLock syncLock(this); - return docWriter->getBufferedDeleteTerms().size(); + + finally.throwException(); + + return mergedDocCount; +} + +void IndexWriter::addMergeException(const OneMergePtr& merge) { + SyncLock syncLock(this); + BOOST_ASSERT(!merge->getException().isNull()); + if (!mergeExceptions.contains(merge) && mergeGen == merge->mergeGen) { + mergeExceptions.add(merge); } - - int32_t IndexWriter::getNumBufferedDeleteTerms() - { - SyncLock syncLock(this); - return docWriter->getNumBufferedDeleteTerms(); +} + +bool IndexWriter::applyDeletes() { + TestScope testScope(L"IndexWriter", L"applyDeletes"); + SyncLock syncLock(this); + BOOST_ASSERT(testPoint(L"startApplyDeletes")); + ++flushDeletesCount; + bool success = false; + bool changed = false; + + LuceneException finally; + try { + changed = docWriter->applyDeletes(segmentInfos); + success = true; + } catch (LuceneException& e) { + finally = e; } - - SegmentInfoPtr IndexWriter::newestSegment() - { - return !segmentInfos->empty() ? segmentInfos->info(segmentInfos->size() - 1) : SegmentInfoPtr(); + + if (!success && infoStream) { + message(L"hit exception flushing deletes"); + } + + finally.throwException(); + + if (changed) { + checkpoint(); + } + return changed; +} + +int32_t IndexWriter::getBufferedDeleteTermsSize() { + SyncLock syncLock(this); + return docWriter->getBufferedDeleteTerms().size(); +} + +int32_t IndexWriter::getNumBufferedDeleteTerms() { + SyncLock syncLock(this); + return docWriter->getNumBufferedDeleteTerms(); +} + +SegmentInfoPtr IndexWriter::newestSegment() { + return !segmentInfos->empty() ? segmentInfos->info(segmentInfos->size() - 1) : SegmentInfoPtr(); +} + +String IndexWriter::segString() { + return segString(segmentInfos); +} + +String IndexWriter::segString(const SegmentInfosPtr& infos) { + SyncLock syncLock(this); + StringStream buffer; + int32_t count = infos->size(); + for (int32_t i = 0; i < count; ++i) { + if (i > 0) { + buffer << L" "; + } + SegmentInfoPtr info(infos->info(i)); + buffer << info->segString(directory); + if (info->dir != directory) { + buffer << L"**"; + } } + return buffer.str(); +} - String IndexWriter::segString() - { - return segString(segmentInfos); - } - - String IndexWriter::segString(SegmentInfosPtr infos) - { - SyncLock syncLock(this); - StringStream buffer; - int32_t count = infos->size(); - for (int32_t i = 0; i < count; ++i) - { - if (i > 0) - buffer << L" "; - SegmentInfoPtr info(infos->info(i)); - buffer << info->segString(directory); - if (info->dir != directory) - buffer << L"**"; +bool IndexWriter::startSync(const String& fileName, HashSet pending) { + SyncLock syncedLock(&synced); + if (!synced.contains(fileName)) { + if (!syncing.contains(fileName)) { + syncing.add(fileName); + return true; + } else { + pending.add(fileName); + return false; } - return buffer.str(); + } else { + return false; } - - bool IndexWriter::startSync(const String& fileName, HashSet pending) - { - SyncLock syncedLock(&synced); - if (!synced.contains(fileName)) - { - if (!syncing.contains(fileName)) - { - syncing.add(fileName); - return true; - } - else - { - pending.add(fileName); +} + +void IndexWriter::finishSync(const String& fileName, bool success) { + SyncLock syncedLock(&synced); + BOOST_ASSERT(syncing.contains(fileName)); + syncing.remove(fileName); + if (success) { + synced.add(fileName); + } + synced.notifyAll(); +} + +bool IndexWriter::waitForAllSynced(HashSet syncing) { + SyncLock syncedLock(&synced); + for (HashSet::iterator fileName = syncing.begin(); fileName != syncing.end(); ++fileName) { + while (!synced.contains(*fileName)) { + if (!syncing.contains(*fileName)) { + // There was an error because a file that was previously syncing failed to appear in synced return false; + } else { + synced.wait(); } } - else - return false; } + return true; +} - void IndexWriter::finishSync(const String& fileName, bool success) - { - SyncLock syncedLock(&synced); - BOOST_ASSERT(syncing.contains(fileName)); - syncing.remove(fileName); - if (success) - synced.add(fileName); - synced.notifyAll(); - } - - bool IndexWriter::waitForAllSynced(HashSet syncing) - { - SyncLock syncedLock(&synced); - for (HashSet::iterator fileName = syncing.begin(); fileName != syncing.end(); ++fileName) - { - while (!synced.contains(*fileName)) - { - if (!syncing.contains(*fileName)) - { - // There was an error because a file that was previously syncing failed to appear in synced - return false; - } - else - synced.wait(); - } - } - return true; - } +void IndexWriter::doWait() { + SyncLock syncLock(this); + // NOTE: the callers of this method should in theory be able to do simply wait(), but, as a defense against + // thread timing hazards where notifyAll() fails to be called, we wait for at most 1 second and then return + // so caller can check if wait conditions are satisfied + wait(1000); +} - void IndexWriter::doWait() - { - SyncLock syncLock(this); - // NOTE: the callers of this method should in theory be able to do simply wait(), but, as a defense against - // thread timing hazards where notifyAll() fails to be called, we wait for at most 1 second and then return - // so caller can check if wait conditions are satisfied - wait(1000); +void IndexWriter::startCommit(int64_t sizeInBytes, MapStringString commitUserData) { + BOOST_ASSERT(testPoint(L"startStartCommit")); + + if (hitOOM) { + boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); } - void IndexWriter::startCommit(int64_t sizeInBytes, MapStringString commitUserData) - { - BOOST_ASSERT(testPoint(L"startStartCommit")); - - if (hitOOM) - boost::throw_exception(IllegalStateException(L"this writer hit an OutOfMemoryError; cannot commit")); - - try + try { + if (infoStream) { + message(L"startCommit(): start sizeInBytes=" + StringUtils::toString(sizeInBytes)); + } + + SegmentInfosPtr toSync; + int64_t myChangeCount = 0; + LuceneException finally; + { - if (infoStream) - message(L"startCommit(): start sizeInBytes=" + StringUtils::toString(sizeInBytes)); - - SegmentInfosPtr toSync; - int64_t myChangeCount = 0; - LuceneException finally; - - { - SyncLock syncLock(this); - - // Wait for any running addIndexes to complete first, then block any from running - // until we've copied the segmentInfos we intend to sync - blockAddIndexes(false); - - // On commit the segmentInfos must never reference a segment in another directory - BOOST_ASSERT(!hasExternalSegments()); - - try - { - BOOST_ASSERT(lastCommitChangeCount <= changeCount); - myChangeCount = changeCount; - - if (changeCount == lastCommitChangeCount) - { - if (infoStream) - message(L" skip startCommit(): no changes pending"); - boost::throw_exception(TemporaryException()); + SyncLock syncLock(this); + + // Wait for any running addIndexes to complete first, then block any from running + // until we've copied the segmentInfos we intend to sync + blockAddIndexes(false); + + // On commit the segmentInfos must never reference a segment in another directory + BOOST_ASSERT(!hasExternalSegments()); + + try { + BOOST_ASSERT(lastCommitChangeCount <= changeCount); + myChangeCount = changeCount; + + if (changeCount == lastCommitChangeCount) { + if (infoStream) { + message(L" skip startCommit(): no changes pending"); } + boost::throw_exception(TemporaryException()); + } - // First, we clone & incref the segmentInfos we intend to sync, then, without locking, we sync() each - // file referenced by toSync, in the background. Multiple threads can be doing this at once, if say - // a large merge and a small merge finish at the same time - - if (infoStream) - message(L"startCommit index=" + segString(segmentInfos) + L" changeCount=" + StringUtils::toString(changeCount)); - - readerPool->commit(); - - // It's possible another flush (that did not close the open do stores) snook in after the flush we - // just did, so we remove any tail segments referencing the open doc store from the SegmentInfos - // we are about to sync (the main SegmentInfos will keep them) - toSync = boost::dynamic_pointer_cast(segmentInfos->clone()); - - String dss(docWriter->getDocStoreSegment()); - if (!dss.empty()) - { - while(true) - { - String dss2(toSync->info(toSync->size() - 1)->getDocStoreSegment()); - if (dss2.empty() || dss2 != dss) - break; - toSync->remove(toSync->size() - 1); - ++changeCount; + // First, we clone & incref the segmentInfos we intend to sync, then, without locking, we sync() each + // file referenced by toSync, in the background. Multiple threads can be doing this at once, if say + // a large merge and a small merge finish at the same time + + if (infoStream) { + message(L"startCommit index=" + segString(segmentInfos) + L" changeCount=" + StringUtils::toString(changeCount)); + } + + readerPool->commit(); + + // It's possible another flush (that did not close the open do stores) snook in after the flush we + // just did, so we remove any tail segments referencing the open doc store from the SegmentInfos + // we are about to sync (the main SegmentInfos will keep them) + toSync = boost::dynamic_pointer_cast(segmentInfos->clone()); + + String dss(docWriter->getDocStoreSegment()); + if (!dss.empty()) { + while (true) { + String dss2(toSync->info(toSync->size() - 1)->getDocStoreSegment()); + if (dss2.empty() || dss2 != dss) { + break; } - } - - if (commitUserData) - toSync->setUserData(commitUserData); - - deleter->incRef(toSync, false); - - HashSet files(toSync->files(directory, false)); - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - BOOST_ASSERT(directory->fileExists(*fileName)); - - // If this trips it means we are missing a call to .checkpoint somewhere, because by the - // time we are called, deleter should know about every file referenced by the current head - // segmentInfos - BOOST_ASSERT(deleter->exists(*fileName)); + toSync->remove(toSync->size() - 1); + ++changeCount; } } - catch (LuceneException& e) - { - finally = e; + + if (commitUserData) { + toSync->setUserData(commitUserData); } - resumeAddIndexes(); - - // no changes pending? - if (finally.getType() == LuceneException::Temporary) - return; - - finally.throwException(); - } - - BOOST_ASSERT(testPoint(L"midStartCommit")); - - bool setPending = false; - - try - { - // Loop until all files toSync references are sync'd - while (true) - { - HashSet pending(HashSet::newInstance()); - HashSet files(toSync->files(directory, false)); - for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) - { - if (startSync(*fileName, pending)) - { - bool success = false; - try - { - // Because we incRef'd this commit point above, the file had better exist - BOOST_ASSERT(directory->fileExists(*fileName)); - - if (infoStream) - message(L"now sync " + *fileName); - directory->sync(*fileName); - success = true; - } - catch (LuceneException& e) - { - finally = e; + + deleter->incRef(toSync, false); + + HashSet files(toSync->files(directory, false)); + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + BOOST_ASSERT(directory->fileExists(*fileName)); + + // If this trips it means we are missing a call to .checkpoint somewhere, because by the + // time we are called, deleter should know about every file referenced by the current head + // segmentInfos + BOOST_ASSERT(deleter->exists(*fileName)); + } + } catch (LuceneException& e) { + finally = e; + } + resumeAddIndexes(); + + // no changes pending? + if (finally.getType() == LuceneException::Temporary) { + return; + } + + finally.throwException(); + } + + BOOST_ASSERT(testPoint(L"midStartCommit")); + + bool setPending = false; + + try { + // Loop until all files toSync references are sync'd + while (true) { + HashSet pending(HashSet::newInstance()); + HashSet files(toSync->files(directory, false)); + for (HashSet::iterator fileName = files.begin(); fileName != files.end(); ++fileName) { + if (startSync(*fileName, pending)) { + bool success = false; + try { + // Because we incRef'd this commit point above, the file had better exist + BOOST_ASSERT(directory->fileExists(*fileName)); + + if (infoStream) { + message(L"now sync " + *fileName); } - finishSync(*fileName, success); - finally.throwException(); + directory->sync(*fileName); + success = true; + } catch (LuceneException& e) { + finally = e; } + finishSync(*fileName, success); + finally.throwException(); } - - // All files that I require are either synced or being synced by other threads. If they are being - // synced, we must at this point block until they are done. If this returns false, that means an - // error in another thread resulted in failing to actually sync one of our files, so we repeat - if (waitForAllSynced(pending)) - break; } - - BOOST_ASSERT(testPoint(L"midStartCommit2")); - - { - SyncLock syncLock(this); - - // If someone saved a newer version of segments file since I first started syncing - // my version, I can safely skip saving myself since I've been superseded - - while (true) - { - if (myChangeCount <= lastCommitChangeCount) - { - if (infoStream) - message(L"sync superseded by newer infos"); - break; + + // All files that I require are either synced or being synced by other threads. If they are being + // synced, we must at this point block until they are done. If this returns false, that means an + // error in another thread resulted in failing to actually sync one of our files, so we repeat + if (waitForAllSynced(pending)) { + break; + } + } + + BOOST_ASSERT(testPoint(L"midStartCommit2")); + + { + SyncLock syncLock(this); + + // If someone saved a newer version of segments file since I first started syncing + // my version, I can safely skip saving myself since I've been superseded + + while (true) { + if (myChangeCount <= lastCommitChangeCount) { + if (infoStream) { + message(L"sync superseded by newer infos"); } - else if (!pendingCommit) - { - // My turn to commit - if (segmentInfos->getGeneration() > toSync->getGeneration()) - toSync->updateGeneration(segmentInfos); - - bool success = false; - try - { - // Exception here means nothing is prepared (this method unwinds - // everything it did on an exception) - try - { - toSync->prepareCommit(directory); - } - catch (LuceneException& e) - { - finally = e; - } - - // Have our master segmentInfos record the generations we just prepared. We do this on - // error or success so we don't double-write a segments_N file. - segmentInfos->updateGeneration(toSync); - finally.throwException(); - - BOOST_ASSERT(!pendingCommit); - setPending = true; - pendingCommit = toSync; - pendingCommitChangeCount = myChangeCount; - success = true; - } - catch (LuceneException& e) - { + break; + } else if (!pendingCommit) { + // My turn to commit + if (segmentInfos->getGeneration() > toSync->getGeneration()) { + toSync->updateGeneration(segmentInfos); + } + + bool success = false; + try { + // Exception here means nothing is prepared (this method unwinds + // everything it did on an exception) + try { + toSync->prepareCommit(directory); + } catch (LuceneException& e) { finally = e; } - - if (!success && infoStream) - message(L"hit exception committing segments file"); + + // Have our master segmentInfos record the generations we just prepared. We do this on + // error or success so we don't double-write a segments_N file. + segmentInfos->updateGeneration(toSync); finally.throwException(); - break; + + BOOST_ASSERT(!pendingCommit); + setPending = true; + pendingCommit = toSync; + pendingCommitChangeCount = myChangeCount; + success = true; + } catch (LuceneException& e) { + finally = e; } - else - { - // Must wait for other commit to complete - doWait(); + + if (!success && infoStream) { + message(L"hit exception committing segments file"); } + finally.throwException(); + break; + } else { + // Must wait for other commit to complete + doWait(); } } - - if (infoStream) - message(L"done all syncs"); - BOOST_ASSERT(testPoint(L"midStartCommitSuccess")); - } - catch (LuceneException& e) - { - finally = e; } - - { - SyncLock syncLock(this); - if (!setPending) - deleter->decRef(toSync); + + if (infoStream) { + message(L"done all syncs"); } - finally.throwException(); - } - catch (std::bad_alloc& oom) - { - boost::throw_exception(handleOOM(oom, L"startCommit")); + BOOST_ASSERT(testPoint(L"midStartCommitSuccess")); + } catch (LuceneException& e) { + finally = e; } - BOOST_ASSERT(testPoint(L"finishStartCommit")); - } - - bool IndexWriter::isLocked(DirectoryPtr directory) - { - return directory->makeLock(WRITE_LOCK_NAME)->isLocked(); - } - - void IndexWriter::unlock(DirectoryPtr directory) - { - directory->makeLock(IndexWriter::WRITE_LOCK_NAME)->release(); - } - void IndexWriter::setMergedSegmentWarmer(IndexReaderWarmerPtr warmer) - { - mergedSegmentWarmer = warmer; - } - - IndexReaderWarmerPtr IndexWriter::getMergedSegmentWarmer() - { - return mergedSegmentWarmer; - } - - LuceneException IndexWriter::handleOOM(const std::bad_alloc& oom, const String& location) - { - if (infoStream) - message(L"hit OutOfMemoryError inside " + location); - hitOOM = true; - return OutOfMemoryError(); - } - - bool IndexWriter::testPoint(const String& name) - { - return true; - } - - bool IndexWriter::nrtIsCurrent(SegmentInfosPtr infos) - { - SyncLock syncLock(this); - if (!infos->equals(segmentInfos)) - { - // if any structural changes (new segments), we are stale - return false; - } - else if (infos->getGeneration() != segmentInfos->getGeneration()) { - // if any commit took place since we were opened, we are stale - return false; + SyncLock syncLock(this); + if (!setPending) { + deleter->decRef(toSync); + } } - else - return !docWriter->anyChanges(); - } - - bool IndexWriter::isClosed() - { - SyncLock syncLock(this); - return closed; + finally.throwException(); + } catch (std::bad_alloc& oom) { + boost::throw_exception(handleOOM(oom, L"startCommit")); } - - ReaderPool::ReaderPool(IndexWriterPtr writer) - { - readerMap = MapSegmentInfoSegmentReader::newInstance(); - _indexWriter = writer; + BOOST_ASSERT(testPoint(L"finishStartCommit")); +} + +bool IndexWriter::isLocked(const DirectoryPtr& directory) { + return directory->makeLock(WRITE_LOCK_NAME)->isLocked(); +} + +void IndexWriter::unlock(const DirectoryPtr& directory) { + directory->makeLock(IndexWriter::WRITE_LOCK_NAME)->release(); +} + +void IndexWriter::setMergedSegmentWarmer(const IndexReaderWarmerPtr& warmer) { + mergedSegmentWarmer = warmer; +} + +IndexReaderWarmerPtr IndexWriter::getMergedSegmentWarmer() { + return mergedSegmentWarmer; +} + +LuceneException IndexWriter::handleOOM(const std::bad_alloc& oom, const String& location) { + if (infoStream) { + message(L"hit OutOfMemoryError inside " + location); } + hitOOM = true; + return OutOfMemoryError(); +} - ReaderPool::~ReaderPool() - { +bool IndexWriter::testPoint(const String& name) { + return true; +} + +bool IndexWriter::nrtIsCurrent(const SegmentInfosPtr& infos) { + SyncLock syncLock(this); + if (!infos->equals(segmentInfos)) { + // if any structural changes (new segments), we are stale + return false; + } else if (infos->getGeneration() != segmentInfos->getGeneration()) { + // if any commit took place since we were opened, we are stale + return false; + } else { + return !docWriter->anyChanges(); } - - void ReaderPool::clear(SegmentInfosPtr infos) - { - SyncLock syncLock(this); - if (!infos) - { - for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) +} + +bool IndexWriter::isClosed() { + SyncLock syncLock(this); + return closed; +} + +ReaderPool::ReaderPool(const IndexWriterPtr& writer) { + readerMap = MapSegmentInfoSegmentReader::newInstance(); + _indexWriter = writer; +} + +ReaderPool::~ReaderPool() { +} + +void ReaderPool::clear(const SegmentInfosPtr& infos) { + SyncLock syncLock(this); + if (!infos) { + for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) { + ent->second->_hasChanges = false; + } + } else { + for (int32_t i = 0; i < infos->size(); ++i) { + MapSegmentInfoSegmentReader::iterator ent = readerMap.find(infos->info(i)); + if (ent != readerMap.end()) { ent->second->_hasChanges = false; - } - else - { - for (int32_t i = 0; i < infos->size(); ++i) - { - MapSegmentInfoSegmentReader::iterator ent = readerMap.find(infos->info(i)); - if (ent != readerMap.end()) - ent->second->_hasChanges = false; } } } - - bool ReaderPool::infoIsLive(SegmentInfoPtr info) - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - int32_t idx = indexWriter->segmentInfos->find(info); - BOOST_ASSERT(idx != -1); - BOOST_ASSERT(indexWriter->segmentInfos->info(idx) == info); - return true; - } - - SegmentInfoPtr ReaderPool::mapToLive(SegmentInfoPtr info) - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - int32_t idx = indexWriter->segmentInfos->find(info); - if (idx != -1) - info = indexWriter->segmentInfos->info(idx); - return info; - } - - void ReaderPool::release(SegmentReaderPtr sr) - { - release(sr, false); - } - - void ReaderPool::release(SegmentReaderPtr sr, bool drop) - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - - bool pooled = readerMap.contains(sr->getSegmentInfo()); - - BOOST_ASSERT(!pooled || readerMap.get(sr->getSegmentInfo()) == sr); - - // Drop caller's ref; for an external reader (not pooled), this decRef will close it - sr->decRef(); - - if (pooled && (drop || (!indexWriter->poolReaders && sr->getRefCount() == 1))) - { - // We invoke deleter.checkpoint below, so we must be sync'd on IW if there are changes - BOOST_ASSERT(!sr->_hasChanges || holdsLock()); +} - // Discard (don't save) changes when we are dropping the reader; this is used only on the - // sub-readers after a successful merge. - sr->_hasChanges = sr->_hasChanges && !drop; +bool ReaderPool::infoIsLive(const SegmentInfoPtr& info) { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); + int32_t idx = indexWriter->segmentInfos->find(info); + BOOST_ASSERT(idx != -1); + BOOST_ASSERT(indexWriter->segmentInfos->info(idx) == info); + return true; +} - bool hasChanges = sr->_hasChanges; +SegmentInfoPtr ReaderPool::mapToLive(const SegmentInfoPtr& info) { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); + int32_t idx = indexWriter->segmentInfos->find(info); + SegmentInfoPtr _info(info); + if (idx != -1) { + _info = indexWriter->segmentInfos->info(idx); + } + return _info; +} - // Drop our ref - this will commit any pending changes to the dir - sr->close(); +void ReaderPool::release(const SegmentReaderPtr& sr) { + release(sr, false); +} - // We are the last ref to this reader; since we're not pooling readers, we release it - readerMap.remove(sr->getSegmentInfo()); +void ReaderPool::release(const SegmentReaderPtr& sr, bool drop) { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); - if (hasChanges) - { - // Must checkpoint with deleter, because this segment reader will have created new - // _X_N.del file. - indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); - } + bool pooled = readerMap.contains(sr->getSegmentInfo()); + + BOOST_ASSERT(!pooled || readerMap.get(sr->getSegmentInfo()) == sr); + + // Drop caller's ref; for an external reader (not pooled), this decRef will close it + sr->decRef(); + + if (pooled && (drop || (!indexWriter->poolReaders && sr->getRefCount() == 1))) { + // We invoke deleter.checkpoint below, so we must be sync'd on IW if there are changes + BOOST_ASSERT(!sr->_hasChanges || holdsLock()); + + // Discard (don't save) changes when we are dropping the reader; this is used only on the + // sub-readers after a successful merge. + sr->_hasChanges = sr->_hasChanges && !drop; + + bool hasChanges = sr->_hasChanges; + + // Drop our ref - this will commit any pending changes to the dir + sr->close(); + + // We are the last ref to this reader; since we're not pooling readers, we release it + readerMap.remove(sr->getSegmentInfo()); + + if (hasChanges) { + // Must checkpoint with deleter, because this segment reader will have created new + // _X_N.del file. + indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } } - - void ReaderPool::close() - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - - // We invoke deleter.checkpoint below, so we must be sync'd on IW - BOOST_ASSERT(holdsLock()); - - for (MapSegmentInfoSegmentReader::iterator iter = readerMap.begin(); iter != readerMap.end(); ++iter) - { - if (iter->second->_hasChanges) - { - BOOST_ASSERT(infoIsLive(iter->second->getSegmentInfo())); - iter->second->doCommit(MapStringString()); - - // Must checkpoint with deleter, because this segment reader will have created - // new _X_N.del file. - indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); - } - - // NOTE: it is allowed that this decRef does not actually close the SR; this can happen when a - // near real-time reader is kept open after the IndexWriter instance is closed - iter->second->decRef(); - } - readerMap.clear(); - } - - void ReaderPool::commit() - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - - // We invoke deleter.checkpoint below, so we must be sync'd on IW - BOOST_ASSERT(holdsLock()); - - for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) - { - if (ent->second->_hasChanges) - { - BOOST_ASSERT(infoIsLive(ent->second->getSegmentInfo())); - ent->second->doCommit(MapStringString()); - - // Must checkpoint with deleter, because this segment reader will have created - // new _X_N.del file. - indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); - } +} + +void ReaderPool::close() { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); + + // We invoke deleter.checkpoint below, so we must be sync'd on IW + BOOST_ASSERT(holdsLock()); + + for (MapSegmentInfoSegmentReader::iterator iter = readerMap.begin(); iter != readerMap.end(); ++iter) { + if (iter->second->_hasChanges) { + BOOST_ASSERT(infoIsLive(iter->second->getSegmentInfo())); + iter->second->doCommit(MapStringString()); + + // Must checkpoint with deleter, because this segment reader will have created + // new _X_N.del file. + indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } + + // NOTE: it is allowed that this decRef does not actually close the SR; this can happen when a + // near real-time reader is kept open after the IndexWriter instance is closed + iter->second->decRef(); } - - IndexReaderPtr ReaderPool::getReadOnlyClone(SegmentInfoPtr info, bool doOpenStores, int32_t termInfosIndexDivisor) - { - SyncLock syncLock(this); - SegmentReaderPtr sr(get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, termInfosIndexDivisor)); - IndexReaderPtr clone; - LuceneException finally; - try - { - clone = boost::dynamic_pointer_cast(sr->clone(true)); - } - catch (LuceneException& e) - { - finally = e; + readerMap.clear(); +} + +void ReaderPool::commit() { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); + + // We invoke deleter.checkpoint below, so we must be sync'd on IW + BOOST_ASSERT(holdsLock()); + + for (MapSegmentInfoSegmentReader::iterator ent = readerMap.begin(); ent != readerMap.end(); ++ent) { + if (ent->second->_hasChanges) { + BOOST_ASSERT(infoIsLive(ent->second->getSegmentInfo())); + ent->second->doCommit(MapStringString()); + + // Must checkpoint with deleter, because this segment reader will have created + // new _X_N.del file. + indexWriter->deleter->checkpoint(indexWriter->segmentInfos, false); } - sr->decRef(); - finally.throwException(); - return clone; } - - SegmentReaderPtr ReaderPool::get(SegmentInfoPtr info, bool doOpenStores) - { - return get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, IndexWriterPtr(_indexWriter)->readerTermsIndexDivisor); +} + +IndexReaderPtr ReaderPool::getReadOnlyClone(const SegmentInfoPtr& info, bool doOpenStores, int32_t termInfosIndexDivisor) { + SyncLock syncLock(this); + SegmentReaderPtr sr(get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, termInfosIndexDivisor)); + IndexReaderPtr clone; + LuceneException finally; + try { + clone = boost::dynamic_pointer_cast(sr->clone(true)); + } catch (LuceneException& e) { + finally = e; + } + sr->decRef(); + finally.throwException(); + return clone; +} + +SegmentReaderPtr ReaderPool::get(const SegmentInfoPtr& info, bool doOpenStores) { + return get(info, doOpenStores, BufferedIndexInput::BUFFER_SIZE, IndexWriterPtr(_indexWriter)->readerTermsIndexDivisor); +} + +SegmentReaderPtr ReaderPool::get(const SegmentInfoPtr& info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor) { + SyncLock syncLock(this); + IndexWriterPtr indexWriter(_indexWriter); + if (indexWriter->poolReaders) { + readBufferSize = BufferedIndexInput::BUFFER_SIZE; } - - SegmentReaderPtr ReaderPool::get(SegmentInfoPtr info, bool doOpenStores, int32_t readBufferSize, int32_t termsIndexDivisor) - { - SyncLock syncLock(this); - IndexWriterPtr indexWriter(_indexWriter); - if (indexWriter->poolReaders) - readBufferSize = BufferedIndexInput::BUFFER_SIZE; - - SegmentReaderPtr sr(readerMap.get(info)); - if (!sr) - { - // Returns a ref, which we xfer to readerMap - sr = SegmentReader::get(false, info->dir, info, readBufferSize, doOpenStores, termsIndexDivisor); - if (info->dir == indexWriter->directory) - { - // Only pool if reader is not external - readerMap.put(info, sr); - } + + SegmentReaderPtr sr(readerMap.get(info)); + if (!sr) { + // Returns a ref, which we xfer to readerMap + sr = SegmentReader::get(false, info->dir, info, readBufferSize, doOpenStores, termsIndexDivisor); + if (info->dir == indexWriter->directory) { + // Only pool if reader is not external + readerMap.put(info, sr); } - else - { - if (doOpenStores) - sr->openDocStores(); - if (termsIndexDivisor != -1 && !sr->termsIndexLoaded()) - { - // If this reader was originally opened because we needed to merge it, we didn't load the terms - // index. But now, if the caller wants the terms index (eg because it's doing deletes, or an NRT - // reader is being opened) we ask the reader to load its terms index. - sr->loadTermsIndex(termsIndexDivisor); - } + } else { + if (doOpenStores) { + sr->openDocStores(); } - - // Return a ref to our caller - if (info->dir == indexWriter->directory) - { - // Only incRef if we pooled (reader is not external) - sr->incRef(); + if (termsIndexDivisor != -1 && !sr->termsIndexLoaded()) { + // If this reader was originally opened because we needed to merge it, we didn't load the terms + // index. But now, if the caller wants the terms index (eg because it's doing deletes, or an NRT + // reader is being opened) we ask the reader to load its terms index. + sr->loadTermsIndex(termsIndexDivisor); } - return sr; } - - SegmentReaderPtr ReaderPool::getIfExists(SegmentInfoPtr info) - { - SyncLock syncLock(this); - SegmentReaderPtr sr(readerMap.get(info)); - if (sr) - sr->incRef(); - return sr; + + // Return a ref to our caller + if (info->dir == indexWriter->directory) { + // Only incRef if we pooled (reader is not external) + sr->incRef(); } - - IndexReaderWarmer::~IndexReaderWarmer() - { + return sr; +} + +SegmentReaderPtr ReaderPool::getIfExists(const SegmentInfoPtr& info) { + SyncLock syncLock(this); + SegmentReaderPtr sr(readerMap.get(info)); + if (sr) { + sr->incRef(); } + return sr; +} + +IndexReaderWarmer::~IndexReaderWarmer() { +} + } diff --git a/src/core/index/IntBlockPool.cpp b/src/core/index/IntBlockPool.cpp index 48d73e4f..898c56f1 100644 --- a/src/core/index/IntBlockPool.cpp +++ b/src/core/index/IntBlockPool.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,49 +8,45 @@ #include "IntBlockPool.h" #include "DocumentsWriter.h" -namespace Lucene -{ - IntBlockPool::IntBlockPool(DocumentsWriterPtr docWriter, bool trackAllocations) - { - this->buffers = Collection::newInstance(10); - this->bufferUpto = -1; - this->intUpto = DocumentsWriter::INT_BLOCK_SIZE; - this->intOffset = -DocumentsWriter::INT_BLOCK_SIZE; - this->_docWriter = docWriter; - this->trackAllocations = trackAllocations; - } - - IntBlockPool::~IntBlockPool() - { - } - - void IntBlockPool::reset() - { - if (bufferUpto != -1) - { - if (bufferUpto > 0) - { - // Recycle all but the first buffer - DocumentsWriterPtr(_docWriter)->recycleIntBlocks(buffers, 1, 1 + bufferUpto); - } - - // Reuse first buffer - bufferUpto = 0; - intUpto = 0; - intOffset = 0; - buffer = buffers[0]; +namespace Lucene { + +IntBlockPool::IntBlockPool(const DocumentsWriterPtr& docWriter, bool trackAllocations) { + this->buffers = Collection::newInstance(10); + this->bufferUpto = -1; + this->intUpto = DocumentsWriter::INT_BLOCK_SIZE; + this->intOffset = -DocumentsWriter::INT_BLOCK_SIZE; + this->_docWriter = docWriter; + this->trackAllocations = trackAllocations; +} + +IntBlockPool::~IntBlockPool() { +} + +void IntBlockPool::reset() { + if (bufferUpto != -1) { + if (bufferUpto > 0) { + // Recycle all but the first buffer + DocumentsWriterPtr(_docWriter)->recycleIntBlocks(buffers, 1, 1 + bufferUpto); } - } - - void IntBlockPool::nextBuffer() - { - if (bufferUpto + 1 == buffers.size()) - buffers.resize((int32_t)((double)buffers.size() * 1.5)); - buffer = DocumentsWriterPtr(_docWriter)->getIntBlock(trackAllocations); - buffers[1 + bufferUpto] = buffer; - ++bufferUpto; - + + // Reuse first buffer + bufferUpto = 0; intUpto = 0; - intOffset += DocumentsWriter::INT_BLOCK_SIZE; + intOffset = 0; + buffer = buffers[0]; + } +} + +void IntBlockPool::nextBuffer() { + if (bufferUpto + 1 == buffers.size()) { + buffers.resize((int32_t)((double)buffers.size() * 1.5)); } + buffer = DocumentsWriterPtr(_docWriter)->getIntBlock(trackAllocations); + buffers[1 + bufferUpto] = buffer; + ++bufferUpto; + + intUpto = 0; + intOffset += DocumentsWriter::INT_BLOCK_SIZE; +} + } diff --git a/src/core/index/InvertedDocConsumer.cpp b/src/core/index/InvertedDocConsumer.cpp index a8b17de2..4dbb102b 100644 --- a/src/core/index/InvertedDocConsumer.cpp +++ b/src/core/index/InvertedDocConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "InvertedDocConsumer.h" -namespace Lucene -{ - InvertedDocConsumer::~InvertedDocConsumer() - { - } - - void InvertedDocConsumer::setFieldInfos(FieldInfosPtr fieldInfos) - { - this->fieldInfos = fieldInfos; - } +namespace Lucene { + +InvertedDocConsumer::~InvertedDocConsumer() { +} + +void InvertedDocConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { + this->fieldInfos = fieldInfos; +} + } diff --git a/src/core/index/InvertedDocConsumerPerField.cpp b/src/core/index/InvertedDocConsumerPerField.cpp index ec7a7082..501eb8b8 100644 --- a/src/core/index/InvertedDocConsumerPerField.cpp +++ b/src/core/index/InvertedDocConsumerPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "InvertedDocConsumerPerField.h" -namespace Lucene -{ - InvertedDocConsumerPerField::~InvertedDocConsumerPerField() - { - } +namespace Lucene { + +InvertedDocConsumerPerField::~InvertedDocConsumerPerField() { +} + } diff --git a/src/core/index/InvertedDocConsumerPerThread.cpp b/src/core/index/InvertedDocConsumerPerThread.cpp index b82c213b..50db8771 100644 --- a/src/core/index/InvertedDocConsumerPerThread.cpp +++ b/src/core/index/InvertedDocConsumerPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "InvertedDocConsumerPerThread.h" -namespace Lucene -{ - InvertedDocConsumerPerThread::~InvertedDocConsumerPerThread() - { - } +namespace Lucene { + +InvertedDocConsumerPerThread::~InvertedDocConsumerPerThread() { +} + } diff --git a/src/core/index/InvertedDocEndConsumer.cpp b/src/core/index/InvertedDocEndConsumer.cpp index d574aa43..e921e4c8 100644 --- a/src/core/index/InvertedDocEndConsumer.cpp +++ b/src/core/index/InvertedDocEndConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "InvertedDocEndConsumer.h" -namespace Lucene -{ - InvertedDocEndConsumer::~InvertedDocEndConsumer() - { - } +namespace Lucene { + +InvertedDocEndConsumer::~InvertedDocEndConsumer() { +} + } diff --git a/src/core/index/InvertedDocEndConsumerPerField.cpp b/src/core/index/InvertedDocEndConsumerPerField.cpp index 616821a9..a3e785a0 100644 --- a/src/core/index/InvertedDocEndConsumerPerField.cpp +++ b/src/core/index/InvertedDocEndConsumerPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "InvertedDocEndConsumerPerField.h" -namespace Lucene -{ - InvertedDocEndConsumerPerField::~InvertedDocEndConsumerPerField() - { - } +namespace Lucene { + +InvertedDocEndConsumerPerField::~InvertedDocEndConsumerPerField() { +} + } diff --git a/src/core/index/InvertedDocEndConsumerPerThread.cpp b/src/core/index/InvertedDocEndConsumerPerThread.cpp index 349d66b9..5deb4103 100644 --- a/src/core/index/InvertedDocEndConsumerPerThread.cpp +++ b/src/core/index/InvertedDocEndConsumerPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "InvertedDocEndConsumerPerThread.h" -namespace Lucene -{ - InvertedDocEndConsumerPerThread::~InvertedDocEndConsumerPerThread() - { - } +namespace Lucene { + +InvertedDocEndConsumerPerThread::~InvertedDocEndConsumerPerThread() { +} + } diff --git a/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp b/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp index 0244abcb..8d1ca782 100644 --- a/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp +++ b/src/core/index/KeepOnlyLastCommitDeletionPolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,23 +8,22 @@ #include "KeepOnlyLastCommitDeletionPolicy.h" #include "IndexCommit.h" -namespace Lucene -{ - KeepOnlyLastCommitDeletionPolicy::~KeepOnlyLastCommitDeletionPolicy() - { - } - - void KeepOnlyLastCommitDeletionPolicy::onInit(Collection commits) - { - // Note that commits.size() should normally be 1 - onCommit(commits); - } - - void KeepOnlyLastCommitDeletionPolicy::onCommit(Collection commits) - { - // Note that commits.size() should normally be 2 (if not called by onInit above) - int32_t size = commits.size(); - for (int32_t i = 0; i < size - 1; ++i) - commits[i]->deleteCommit(); +namespace Lucene { + +KeepOnlyLastCommitDeletionPolicy::~KeepOnlyLastCommitDeletionPolicy() { +} + +void KeepOnlyLastCommitDeletionPolicy::onInit(Collection commits) { + // Note that commits.size() should normally be 1 + onCommit(commits); +} + +void KeepOnlyLastCommitDeletionPolicy::onCommit(Collection commits) { + // Note that commits.size() should normally be 2 (if not called by onInit above) + int32_t size = commits.size(); + for (int32_t i = 0; i < size - 1; ++i) { + commits[i]->deleteCommit(); } } + +} diff --git a/src/core/index/LogByteSizeMergePolicy.cpp b/src/core/index/LogByteSizeMergePolicy.cpp index cfa0ede4..8127f80e 100644 --- a/src/core/index/LogByteSizeMergePolicy.cpp +++ b/src/core/index/LogByteSizeMergePolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,46 +8,40 @@ #include #include "LogByteSizeMergePolicy.h" -namespace Lucene -{ - /// Default minimum segment size. - const double LogByteSizeMergePolicy::DEFAULT_MIN_MERGE_MB = 1.6; - - /// Default maximum segment size. A segment of this size or larger will never be merged. - const double LogByteSizeMergePolicy::DEFAULT_MAX_MERGE_MB = DBL_MAX; - - LogByteSizeMergePolicy::LogByteSizeMergePolicy(IndexWriterPtr writer) : LogMergePolicy(writer) - { - minMergeSize = (int64_t)(DEFAULT_MIN_MERGE_MB * 1024 * 1024); - maxMergeSize = DEFAULT_MAX_MERGE_MB == DBL_MAX ? LLONG_MAX : (int64_t)(DEFAULT_MAX_MERGE_MB * 1024 * 1024); - } - - LogByteSizeMergePolicy::~LogByteSizeMergePolicy() - { - } - - int64_t LogByteSizeMergePolicy::size(SegmentInfoPtr info) - { - return sizeBytes(info); - } - - void LogByteSizeMergePolicy::setMaxMergeMB(double mb) - { - maxMergeSize = (int64_t)(mb * 1024 * 1024); - } - - double LogByteSizeMergePolicy::getMaxMergeMB() - { - return ((double)maxMergeSize) / 1024 / 1024; - } - - void LogByteSizeMergePolicy::setMinMergeMB(double mb) - { - minMergeSize = (int64_t)(mb * 1024 * 1024); - } - - double LogByteSizeMergePolicy::getMinMergeMB() - { - return ((double)minMergeSize) / 1024 / 1024; - } +namespace Lucene { + +/// Default minimum segment size. +const double LogByteSizeMergePolicy::DEFAULT_MIN_MERGE_MB = 1.6; + +/// Default maximum segment size. A segment of this size or larger will never be merged. +const double LogByteSizeMergePolicy::DEFAULT_MAX_MERGE_MB = DBL_MAX; + +LogByteSizeMergePolicy::LogByteSizeMergePolicy(const IndexWriterPtr& writer) : LogMergePolicy(writer) { + minMergeSize = (int64_t)(DEFAULT_MIN_MERGE_MB * 1024 * 1024); + maxMergeSize = DEFAULT_MAX_MERGE_MB == DBL_MAX ? std::numeric_limits::max() : (int64_t)(DEFAULT_MAX_MERGE_MB * 1024 * 1024); +} + +LogByteSizeMergePolicy::~LogByteSizeMergePolicy() { +} + +int64_t LogByteSizeMergePolicy::size(const SegmentInfoPtr& info) { + return sizeBytes(info); +} + +void LogByteSizeMergePolicy::setMaxMergeMB(double mb) { + maxMergeSize = (int64_t)(mb * 1024 * 1024); +} + +double LogByteSizeMergePolicy::getMaxMergeMB() { + return ((double)maxMergeSize) / 1024 / 1024; +} + +void LogByteSizeMergePolicy::setMinMergeMB(double mb) { + minMergeSize = (int64_t)(mb * 1024 * 1024); +} + +double LogByteSizeMergePolicy::getMinMergeMB() { + return ((double)minMergeSize) / 1024 / 1024; +} + } diff --git a/src/core/index/LogDocMergePolicy.cpp b/src/core/index/LogDocMergePolicy.cpp index c2c90b0d..309da70a 100644 --- a/src/core/index/LogDocMergePolicy.cpp +++ b/src/core/index/LogDocMergePolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,35 +7,31 @@ #include "LuceneInc.h" #include "LogDocMergePolicy.h" -namespace Lucene -{ - /// Default minimum segment size. @see setMinMergeDocs - const int32_t LogDocMergePolicy::DEFAULT_MIN_MERGE_DOCS = 1000; - - LogDocMergePolicy::LogDocMergePolicy(IndexWriterPtr writer) : LogMergePolicy(writer) - { - minMergeSize = DEFAULT_MIN_MERGE_DOCS; - - // maxMergeSize is never used by LogDocMergePolicy; set it to LLONG_MAX to disable it - maxMergeSize = LLONG_MAX; - } - - LogDocMergePolicy::~LogDocMergePolicy() - { - } - - int64_t LogDocMergePolicy::size(SegmentInfoPtr info) - { - return sizeDocs(info); - } - - void LogDocMergePolicy::setMinMergeDocs(int32_t minMergeDocs) - { - minMergeSize = minMergeDocs; - } - - int32_t LogDocMergePolicy::getMinMergeDocs() - { - return (int32_t)minMergeSize; - } +namespace Lucene { + +/// Default minimum segment size. @see setMinMergeDocs +const int32_t LogDocMergePolicy::DEFAULT_MIN_MERGE_DOCS = 1000; + +LogDocMergePolicy::LogDocMergePolicy(const IndexWriterPtr& writer) : LogMergePolicy(writer) { + minMergeSize = DEFAULT_MIN_MERGE_DOCS; + + // maxMergeSize is never used by LogDocMergePolicy; set it to LLONG_MAX to disable it + maxMergeSize = std::numeric_limits::max(); +} + +LogDocMergePolicy::~LogDocMergePolicy() { +} + +int64_t LogDocMergePolicy::size(const SegmentInfoPtr& info) { + return sizeDocs(info); +} + +void LogDocMergePolicy::setMinMergeDocs(int32_t minMergeDocs) { + minMergeSize = minMergeDocs; +} + +int32_t LogDocMergePolicy::getMinMergeDocs() { + return (int32_t)minMergeSize; +} + } diff --git a/src/core/index/LogMergePolicy.cpp b/src/core/index/LogMergePolicy.cpp index de532c84..ed835b43 100644 --- a/src/core/index/LogMergePolicy.cpp +++ b/src/core/index/LogMergePolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,414 +10,365 @@ #include "SegmentInfo.h" #include "StringUtils.h" -namespace Lucene -{ - /// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment - /// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. - const double LogMergePolicy::LEVEL_LOG_SPAN = 0.75; - - /// Default merge factor, which is how many segments are merged at a time. - const int32_t LogMergePolicy::DEFAULT_MERGE_FACTOR = 10; - - /// Default maximum segment size. A segment of this size or larger will never be merged. - const int32_t LogMergePolicy::DEFAULT_MAX_MERGE_DOCS = INT_MAX; - - /// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. - const double LogMergePolicy::DEFAULT_NO_CFS_RATIO = 0.1; - - LogMergePolicy::LogMergePolicy(IndexWriterPtr writer) : MergePolicy(writer) - { - mergeFactor = DEFAULT_MERGE_FACTOR; - noCFSRatio = DEFAULT_NO_CFS_RATIO; - minMergeSize = 0; - maxMergeSize = 0; - maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; - calibrateSizeByDeletes = false; - _useCompoundFile = true; - _useCompoundDocStore = true; - } - - LogMergePolicy::~LogMergePolicy() - { - } - - double LogMergePolicy::getNoCFSRatio() - { - return noCFSRatio; - } - - void LogMergePolicy::setNoCFSRatio(double noCFSRatio) - { - if (noCFSRatio < 0.0 || noCFSRatio > 1.0) - boost::throw_exception(IllegalArgumentException(L"noCFSRatio must be 0.0 to 1.0 inclusive; got " + StringUtils::toString(noCFSRatio))); - this->noCFSRatio = noCFSRatio; - } - - bool LogMergePolicy::verbose() - { - return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); - } - - void LogMergePolicy::message(const String& message) - { - if (verbose()) - IndexWriterPtr(_writer)->message(L"LMP: " + message); - } - - int32_t LogMergePolicy::getMergeFactor() - { - return mergeFactor; - } - - void LogMergePolicy::setMergeFactor(int32_t mergeFactor) - { - if (mergeFactor < 2) - boost::throw_exception(IllegalArgumentException(L"mergeFactor cannot be less than 2")); - this->mergeFactor = mergeFactor; - } - - bool LogMergePolicy::getUseCompoundFile() - { - return _useCompoundFile; - } - - void LogMergePolicy::setUseCompoundFile(bool useCompoundFile) - { - _useCompoundFile = useCompoundFile; - } - - bool LogMergePolicy::useCompoundFile(SegmentInfosPtr segments, SegmentInfoPtr newSegment) - { - return _useCompoundFile; - } - - bool LogMergePolicy::useCompoundDocStore(SegmentInfosPtr segments) - { - return _useCompoundDocStore; - } - - void LogMergePolicy::setUseCompoundDocStore(bool useCompoundDocStore) - { - _useCompoundDocStore = useCompoundDocStore; - } - - bool LogMergePolicy::getUseCompoundDocStore() - { - return _useCompoundDocStore; +namespace Lucene { + +/// Defines the allowed range of log(size) for each level. A level is computed by taking the max segment +/// log size, minus LEVEL_LOG_SPAN, and finding all segments falling within that range. +const double LogMergePolicy::LEVEL_LOG_SPAN = 0.75; + +/// Default merge factor, which is how many segments are merged at a time. +const int32_t LogMergePolicy::DEFAULT_MERGE_FACTOR = 10; + +/// Default maximum segment size. A segment of this size or larger will never be merged. +const int32_t LogMergePolicy::DEFAULT_MAX_MERGE_DOCS = INT_MAX; + +/// Default noCFSRatio. If a merge's size is >= 10% of the index, then we disable compound file for it. +const double LogMergePolicy::DEFAULT_NO_CFS_RATIO = 0.1; + +LogMergePolicy::LogMergePolicy(const IndexWriterPtr& writer) : MergePolicy(writer) { + mergeFactor = DEFAULT_MERGE_FACTOR; + noCFSRatio = DEFAULT_NO_CFS_RATIO; + minMergeSize = 0; + maxMergeSize = 0; + maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + calibrateSizeByDeletes = false; + _useCompoundFile = true; + _useCompoundDocStore = true; +} + +LogMergePolicy::~LogMergePolicy() { +} + +double LogMergePolicy::getNoCFSRatio() { + return noCFSRatio; +} + +void LogMergePolicy::setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + boost::throw_exception(IllegalArgumentException(L"noCFSRatio must be 0.0 to 1.0 inclusive; got " + StringUtils::toString(noCFSRatio))); } - - void LogMergePolicy::setCalibrateSizeByDeletes(bool calibrateSizeByDeletes) - { - this->calibrateSizeByDeletes = calibrateSizeByDeletes; + this->noCFSRatio = noCFSRatio; +} + +bool LogMergePolicy::verbose() { + return (!_writer.expired() && IndexWriterPtr(_writer)->verbose()); +} + +void LogMergePolicy::message(const String& message) { + if (verbose()) { + IndexWriterPtr(_writer)->message(L"LMP: " + message); } - - bool LogMergePolicy::getCalibrateSizeByDeletes() - { - return calibrateSizeByDeletes; +} + +int32_t LogMergePolicy::getMergeFactor() { + return mergeFactor; +} + +void LogMergePolicy::setMergeFactor(int32_t mergeFactor) { + if (mergeFactor < 2) { + boost::throw_exception(IllegalArgumentException(L"mergeFactor cannot be less than 2")); } - - void LogMergePolicy::close() - { + this->mergeFactor = mergeFactor; +} + +bool LogMergePolicy::getUseCompoundFile() { + return _useCompoundFile; +} + +void LogMergePolicy::setUseCompoundFile(bool useCompoundFile) { + _useCompoundFile = useCompoundFile; +} + +bool LogMergePolicy::useCompoundFile(const SegmentInfosPtr& segments, const SegmentInfoPtr& newSegment) { + return _useCompoundFile; +} + +bool LogMergePolicy::useCompoundDocStore(const SegmentInfosPtr& segments) { + return _useCompoundDocStore; +} + +void LogMergePolicy::setUseCompoundDocStore(bool useCompoundDocStore) { + _useCompoundDocStore = useCompoundDocStore; +} + +bool LogMergePolicy::getUseCompoundDocStore() { + return _useCompoundDocStore; +} + +void LogMergePolicy::setCalibrateSizeByDeletes(bool calibrateSizeByDeletes) { + this->calibrateSizeByDeletes = calibrateSizeByDeletes; +} + +bool LogMergePolicy::getCalibrateSizeByDeletes() { + return calibrateSizeByDeletes; +} + +void LogMergePolicy::close() { +} + +int64_t LogMergePolicy::sizeDocs(const SegmentInfoPtr& info) { + if (calibrateSizeByDeletes) { + int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); + return (info->docCount - (int64_t)delCount); + } else { + return info->docCount; } +} - int64_t LogMergePolicy::sizeDocs(SegmentInfoPtr info) - { - if (calibrateSizeByDeletes) - { - int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); - return (info->docCount - (int64_t)delCount); - } - else - return info->docCount; +int64_t LogMergePolicy::sizeBytes(const SegmentInfoPtr& info) { + int64_t byteSize = info->sizeInBytes(); + if (calibrateSizeByDeletes) { + int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); + double delRatio = info->docCount <= 0 ? 0.0 : ((double)delCount / (double)info->docCount); + return info->docCount <= 0 ? byteSize : (int64_t)(byteSize * (1.0 - delRatio)); + } else { + return byteSize; } - - int64_t LogMergePolicy::sizeBytes(SegmentInfoPtr info) - { - int64_t byteSize = info->sizeInBytes(); - if (calibrateSizeByDeletes) - { - int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); - double delRatio = info->docCount <= 0 ? 0.0 : ((double)delCount / (double)info->docCount); - return info->docCount <= 0 ? byteSize : (int64_t)(byteSize * (1.0 - delRatio)); +} + +bool LogMergePolicy::isOptimized(const SegmentInfosPtr& infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize) { + int32_t numSegments = infos->size(); + int32_t numToOptimize = 0; + SegmentInfoPtr optimizeInfo; + for (int32_t i = 0; i < numSegments && numToOptimize <= maxNumSegments; ++i) { + SegmentInfoPtr info(infos->info(i)); + if (segmentsToOptimize.contains(info)) { + ++numToOptimize; + optimizeInfo = info; } - else - return byteSize; } - - bool LogMergePolicy::isOptimized(SegmentInfosPtr infos, int32_t maxNumSegments, SetSegmentInfo segmentsToOptimize) - { - int32_t numSegments = infos->size(); - int32_t numToOptimize = 0; - SegmentInfoPtr optimizeInfo; - for (int32_t i = 0; i < numSegments && numToOptimize <= maxNumSegments; ++i) - { - SegmentInfoPtr info(infos->info(i)); - if (segmentsToOptimize.contains(info)) - { - ++numToOptimize; - optimizeInfo = info; + return (numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(optimizeInfo))); +} + +bool LogMergePolicy::isOptimized(const SegmentInfoPtr& info) { + IndexWriterPtr writer(_writer); + bool hasDeletions = (writer->numDeletedDocs(info) > 0); + return (!hasDeletions && !info->hasSeparateNorms() && info->dir == writer->getDirectory() && (info->getUseCompoundFile() == _useCompoundFile || noCFSRatio < 1.0)); +} + +MergeSpecificationPtr LogMergePolicy::findMergesForOptimize(const SegmentInfosPtr& segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) { + MergeSpecificationPtr spec; + + BOOST_ASSERT(maxSegmentCount > 0); + + if (!isOptimized(segmentInfos, maxSegmentCount, segmentsToOptimize)) { + // Find the newest (rightmost) segment that needs to be optimized (other segments may have been + // flushed since optimize started) + int32_t last = segmentInfos->size(); + while (last > 0) { + if (segmentsToOptimize.contains(segmentInfos->info(--last))) { + ++last; + break; } } - return (numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(optimizeInfo))); - } - - bool LogMergePolicy::isOptimized(SegmentInfoPtr info) - { - IndexWriterPtr writer(_writer); - bool hasDeletions = (writer->numDeletedDocs(info) > 0); - return (!hasDeletions && !info->hasSeparateNorms() && info->dir == writer->getDirectory() && (info->getUseCompoundFile() == _useCompoundFile || noCFSRatio < 1.0)); - } - - MergeSpecificationPtr LogMergePolicy::findMergesForOptimize(SegmentInfosPtr segmentInfos, int32_t maxSegmentCount, SetSegmentInfo segmentsToOptimize) - { - MergeSpecificationPtr spec; - - BOOST_ASSERT(maxSegmentCount > 0); - - if (!isOptimized(segmentInfos, maxSegmentCount, segmentsToOptimize)) - { - // Find the newest (rightmost) segment that needs to be optimized (other segments may have been - // flushed since optimize started) - int32_t last = segmentInfos->size(); - while (last > 0) - { - if (segmentsToOptimize.contains(segmentInfos->info(--last))) - { - ++last; - break; - } + + if (last > 0) { + spec = newLucene(); + + // First, enroll all "full" merges (size mergeFactor) to potentially be run concurrently + while (last - maxSegmentCount + 1 >= mergeFactor) { + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(last - mergeFactor, last))); + last -= mergeFactor; } - - if (last > 0) - { - spec = newLucene(); - - // First, enroll all "full" merges (size mergeFactor) to potentially be run concurrently - while (last - maxSegmentCount + 1 >= mergeFactor) - { - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(last - mergeFactor, last))); - last -= mergeFactor; - } - - // Only if there are no full merges pending do we add a final partial (< mergeFactor segments) merge - if (spec->merges.empty()) - { - if (maxSegmentCount == 1) - { - // Since we must optimize down to 1 segment, the choice is simple - if (last > 1 || !isOptimized(segmentInfos->info(0))) - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(0, last))); + + // Only if there are no full merges pending do we add a final partial (< mergeFactor segments) merge + if (spec->merges.empty()) { + if (maxSegmentCount == 1) { + // Since we must optimize down to 1 segment, the choice is simple + if (last > 1 || !isOptimized(segmentInfos->info(0))) { + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(0, last))); } - else if (last > maxSegmentCount) - { - // Take care to pick a partial merge that is least cost, but does not make the index too - // lopsided. If we always just picked the partial tail then we could produce a highly - // lopsided index over time - - // We must merge this many segments to leave maxNumSegments in the index (from when - // optimize was first kicked off) - int32_t finalMergeSize = last - maxSegmentCount + 1; - - // Consider all possible starting points - int64_t bestSize = 0; - int32_t bestStart = 0; - - for (int32_t i = 0; i < last - finalMergeSize + 1; ++i) - { - int64_t sumSize = 0; - for (int32_t j = 0; j < finalMergeSize; ++j) - sumSize += size(segmentInfos->info(j + i)); - if (i == 0 || (sumSize < 2 * size(segmentInfos->info(i - 1)) && sumSize < bestSize)) - { - bestStart = i; - bestSize = sumSize; - } + } else if (last > maxSegmentCount) { + // Take care to pick a partial merge that is least cost, but does not make the index too + // lopsided. If we always just picked the partial tail then we could produce a highly + // lopsided index over time + + // We must merge this many segments to leave maxNumSegments in the index (from when + // optimize was first kicked off) + int32_t finalMergeSize = last - maxSegmentCount + 1; + + // Consider all possible starting points + int64_t bestSize = 0; + int32_t bestStart = 0; + + for (int32_t i = 0; i < last - finalMergeSize + 1; ++i) { + int64_t sumSize = 0; + for (int32_t j = 0; j < finalMergeSize; ++j) { + sumSize += size(segmentInfos->info(j + i)); + } + if (i == 0 || (sumSize < 2 * size(segmentInfos->info(i - 1)) && sumSize < bestSize)) { + bestStart = i; + bestSize = sumSize; } - - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(bestStart, bestStart + finalMergeSize))); } + + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(bestStart, bestStart + finalMergeSize))); } } - else - spec.reset(); - } - else + } else { spec.reset(); - - return spec; + } + } else { + spec.reset(); } - - MergeSpecificationPtr LogMergePolicy::findMergesToExpungeDeletes(SegmentInfosPtr segmentInfos) - { - int32_t numSegments = segmentInfos->size(); - - message(L"findMergesToExpungeDeletes: " + StringUtils::toString(numSegments) + L" segments"); - - MergeSpecificationPtr spec(newLucene()); - int32_t firstSegmentWithDeletions = -1; - for (int32_t i = 0; i < numSegments; ++i) - { - SegmentInfoPtr info(segmentInfos->info(i)); - int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); - if (delCount > 0) - { - message(L" segment " + info->name + L" has deletions"); - if (firstSegmentWithDeletions == -1) - firstSegmentWithDeletions = i; - else if (i - firstSegmentWithDeletions == mergeFactor) - { - // We've seen mergeFactor segments in a row with deletions, so force a merge now - message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); - firstSegmentWithDeletions = i; - } - } - else if (firstSegmentWithDeletions != -1) - { - // End of a sequence of segments with deletions, so merge those past segments even if - // it's fewer than mergeFactor segments + + return spec; +} + +MergeSpecificationPtr LogMergePolicy::findMergesToExpungeDeletes(const SegmentInfosPtr& segmentInfos) { + int32_t numSegments = segmentInfos->size(); + + message(L"findMergesToExpungeDeletes: " + StringUtils::toString(numSegments) + L" segments"); + + MergeSpecificationPtr spec(newLucene()); + int32_t firstSegmentWithDeletions = -1; + for (int32_t i = 0; i < numSegments; ++i) { + SegmentInfoPtr info(segmentInfos->info(i)); + int32_t delCount = IndexWriterPtr(_writer)->numDeletedDocs(info); + if (delCount > 0) { + message(L" segment " + info->name + L" has deletions"); + if (firstSegmentWithDeletions == -1) { + firstSegmentWithDeletions = i; + } else if (i - firstSegmentWithDeletions == mergeFactor) { + // We've seen mergeFactor segments in a row with deletions, so force a merge now message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); - firstSegmentWithDeletions = -1; + firstSegmentWithDeletions = i; } + } else if (firstSegmentWithDeletions != -1) { + // End of a sequence of segments with deletions, so merge those past segments even if + // it's fewer than mergeFactor segments + message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(i - 1) + L" inclusive"); + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, i))); + firstSegmentWithDeletions = -1; } - - if (firstSegmentWithDeletions != -1) - { - message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(numSegments - 1) + L" inclusive"); - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, numSegments))); - } - - return spec; } - - MergeSpecificationPtr LogMergePolicy::findMerges(SegmentInfosPtr segmentInfos) - { - int32_t numSegments = segmentInfos->size(); - message(L"findMerges: " + StringUtils::toString(numSegments) + L" segments"); - - // Compute levels, which is just log (base mergeFactor) of the size of each segment - Collection levels(Collection::newInstance(numSegments)); - double norm = std::log((double)mergeFactor); - - for (int32_t i = 0; i < numSegments; ++i) - { - SegmentInfoPtr info(segmentInfos->info(i)); - int64_t _size = size(info); - - // Floor tiny segments - _size = std::max(_size, (int64_t)1); - levels[i] = std::log((double)_size) / norm; + + if (firstSegmentWithDeletions != -1) { + message(L" add merge " + StringUtils::toString(firstSegmentWithDeletions) + L" to " + StringUtils::toString(numSegments - 1) + L" inclusive"); + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(firstSegmentWithDeletions, numSegments))); + } + + return spec; +} + +MergeSpecificationPtr LogMergePolicy::findMerges(const SegmentInfosPtr& segmentInfos) { + int32_t numSegments = segmentInfos->size(); + message(L"findMerges: " + StringUtils::toString(numSegments) + L" segments"); + + // Compute levels, which is just log (base mergeFactor) of the size of each segment + Collection levels(Collection::newInstance(numSegments)); + double norm = std::log((double)mergeFactor); + + for (int32_t i = 0; i < numSegments; ++i) { + SegmentInfoPtr info(segmentInfos->info(i)); + int64_t _size = size(info); + + // Floor tiny segments + _size = std::max(_size, (int64_t)1); + levels[i] = std::log((double)_size) / norm; + } + + double levelFloor = minMergeSize <= 0 ? 0 : (std::log((double)minMergeSize) / norm); + + // Now, we quantize the log values into levels. The first level is any segment whose log + // size is within LEVEL_LOG_SPAN of the max size, or, who has such as segment "to the right". + // Then, we find the max of all other segments and use that to define the next level segment, etc. + + MergeSpecificationPtr spec; + + int32_t start = 0; + while (start < numSegments) { + // Find max level of all segments not already quantized + double maxLevel = levels[start]; + for (int32_t i = 1 + start; i < numSegments; ++i) { + maxLevel = std::max(maxLevel, levels[i]); } - - double levelFloor = minMergeSize <= 0 ? 0 : (std::log((double)minMergeSize) / norm); - - // Now, we quantize the log values into levels. The first level is any segment whose log - // size is within LEVEL_LOG_SPAN of the max size, or, who has such as segment "to the right". - // Then, we find the max of all other segments and use that to define the next level segment, etc. - - MergeSpecificationPtr spec; - - int32_t start = 0; - while (start < numSegments) - { - // Find max level of all segments not already quantized - double maxLevel = levels[start]; - for (int32_t i = 1 + start; i < numSegments; ++i) - maxLevel = std::max(maxLevel, levels[i]); - - // Now search backwards for the rightmost segment that falls into this level - double levelBottom; - if (maxLevel < levelFloor) - levelBottom = -1.0; - else - { - levelBottom = (double)(maxLevel - LEVEL_LOG_SPAN); - - // Force a boundary at the level floor - if (levelBottom < levelFloor && maxLevel >= levelFloor) - levelBottom = levelFloor; + + // Now search backwards for the rightmost segment that falls into this level + double levelBottom; + if (maxLevel < levelFloor) { + levelBottom = -1.0; + } else { + levelBottom = (double)(maxLevel - LEVEL_LOG_SPAN); + + // Force a boundary at the level floor + if (levelBottom < levelFloor && maxLevel >= levelFloor) { + levelBottom = levelFloor; } - - int32_t upto = numSegments - 1; - while (upto >= start) - { - if (levels[upto] >= levelBottom) - break; - --upto; + } + + int32_t upto = numSegments - 1; + while (upto >= start) { + if (levels[upto] >= levelBottom) { + break; } - message(L" level " + StringUtils::toString(levelBottom) + L" to " + StringUtils::toString(maxLevel) + L": " + StringUtils::toString(1 + upto - start) + L" segments"); - - // Finally, record all merges that are viable at this level - int32_t end = start + mergeFactor; - while (end <= 1 + upto) - { - bool anyTooLarge = false; - for (int32_t i = start; i < end; ++i) - { - SegmentInfoPtr info(segmentInfos->info(i)); - if (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs) - { - anyTooLarge = true; - break; - } + --upto; + } + message(L" level " + StringUtils::toString(levelBottom) + L" to " + StringUtils::toString(maxLevel) + L": " + StringUtils::toString(1 + upto - start) + L" segments"); + + // Finally, record all merges that are viable at this level + int32_t end = start + mergeFactor; + while (end <= 1 + upto) { + bool anyTooLarge = false; + for (int32_t i = start; i < end; ++i) { + SegmentInfoPtr info(segmentInfos->info(i)); + if (size(info) >= maxMergeSize || sizeDocs(info) >= maxMergeDocs) { + anyTooLarge = true; + break; } - - if (!anyTooLarge) - { - if (!spec) - spec = newLucene(); - message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": add this merge"); - spec->add(makeOneMerge(segmentInfos, segmentInfos->range(start, end))); + } + + if (!anyTooLarge) { + if (!spec) { + spec = newLucene(); } - else - message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": contains segment over maxMergeSize or maxMergeDocs; skipping"); - - start = end; - end = start + mergeFactor; + message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": add this merge"); + spec->add(makeOneMerge(segmentInfos, segmentInfos->range(start, end))); + } else { + message(L" " + StringUtils::toString(start) + L" to " + StringUtils::toString(end) + L": contains segment over maxMergeSize or maxMergeDocs; skipping"); } - - start = 1 + upto; + + start = end; + end = start + mergeFactor; } - - return spec; + + start = 1 + upto; } - - OneMergePtr LogMergePolicy::makeOneMerge(SegmentInfosPtr infos, SegmentInfosPtr infosToMerge) - { - bool doCFS; - if (!_useCompoundFile) - doCFS = false; - else if (noCFSRatio == 1.0) - doCFS = true; - else - { - int64_t totSize = 0; - int32_t numInfos = infos->size(); - for (int32_t i = 0; i < numInfos; ++i) - { - SegmentInfoPtr info(infos->info(i)); - totSize += size(info); - } - int64_t mergeSize = 0; - int32_t numMerges = infosToMerge->size(); - for (int32_t i = 0; i < numMerges; ++i) - { - SegmentInfoPtr info(infosToMerge->info(i)); - mergeSize += size(info); - } - doCFS = mergeSize <= noCFSRatio * totSize; + + return spec; +} + +OneMergePtr LogMergePolicy::makeOneMerge(const SegmentInfosPtr& infos, const SegmentInfosPtr& infosToMerge) { + bool doCFS; + if (!_useCompoundFile) { + doCFS = false; + } else if (noCFSRatio == 1.0) { + doCFS = true; + } else { + int64_t totSize = 0; + int32_t numInfos = infos->size(); + for (int32_t i = 0; i < numInfos; ++i) { + SegmentInfoPtr info(infos->info(i)); + totSize += size(info); } - return newLucene(infosToMerge, doCFS); - } - - void LogMergePolicy::setMaxMergeDocs(int32_t maxMergeDocs) - { - this->maxMergeDocs = maxMergeDocs; - } - - int32_t LogMergePolicy::getMaxMergeDocs() - { - return maxMergeDocs; + int64_t mergeSize = 0; + int32_t numMerges = infosToMerge->size(); + for (int32_t i = 0; i < numMerges; ++i) { + SegmentInfoPtr info(infosToMerge->info(i)); + mergeSize += size(info); + } + doCFS = mergeSize <= noCFSRatio * totSize; } + return newLucene(infosToMerge, doCFS); +} + +void LogMergePolicy::setMaxMergeDocs(int32_t maxMergeDocs) { + this->maxMergeDocs = maxMergeDocs; +} + +int32_t LogMergePolicy::getMaxMergeDocs() { + return maxMergeDocs; +} + } diff --git a/src/core/index/MergeDocIDRemapper.cpp b/src/core/index/MergeDocIDRemapper.cpp index 41808ebe..4ed090ec 100644 --- a/src/core/index/MergeDocIDRemapper.cpp +++ b/src/core/index/MergeDocIDRemapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,78 +10,70 @@ #include "MergePolicy.h" #include "SegmentInfo.h" -namespace Lucene -{ - MergeDocIDRemapper::MergeDocIDRemapper(SegmentInfosPtr infos, Collection< Collection > docMaps, Collection delCounts, OneMergePtr merge, int32_t mergedDocCount) - { - this->docMaps = docMaps; - SegmentInfoPtr firstSegment(merge->segments->info(0)); - int32_t i = 0; - this->minDocID = 0; - while (true) - { - SegmentInfoPtr info(infos->info(i)); - if (info->equals(firstSegment)) - break; - minDocID += info->docCount; - ++i; - } - - int32_t numDocs = 0; - for (int32_t j = 0; j < docMaps.size(); ++i, ++j) - { - numDocs += infos->info(i)->docCount; - BOOST_ASSERT(infos->info(i)->equals(merge->segments->info(j))); - } - this->maxDocID = minDocID + numDocs; - - starts = Collection::newInstance(docMaps.size()); - newStarts = Collection::newInstance(docMaps.size()); - - starts[0] = minDocID; - newStarts[0] = minDocID; - for (i = 1; i < docMaps.size(); ++i) - { - int32_t lastDocCount = merge->segments->info(i - 1)->docCount; - starts[i] = starts[i - 1] + lastDocCount; - newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; +namespace Lucene { + +MergeDocIDRemapper::MergeDocIDRemapper(const SegmentInfosPtr& infos, Collection< Collection > docMaps, Collection delCounts, const OneMergePtr& merge, int32_t mergedDocCount) { + this->docMaps = docMaps; + SegmentInfoPtr firstSegment(merge->segments->info(0)); + int32_t i = 0; + this->minDocID = 0; + while (true) { + SegmentInfoPtr info(infos->info(i)); + if (info->equals(firstSegment)) { + break; } - this->docShift = numDocs - mergedDocCount; - - // There are rare cases when docShift is 0. It happens if you try to delete a docID that's - // out of bounds, because the SegmentReader still allocates deletedDocs and pretends it has - // deletions ... so we can't make this assert here: BOOST_ASSERT(docShift > 0); - - // Make sure it all adds up - BOOST_ASSERT(docShift == maxDocID - (newStarts[docMaps.size() - 1] + merge->segments->info(docMaps.size() - 1)->docCount - delCounts[docMaps.size() - 1])); + minDocID += info->docCount; + ++i; } - - MergeDocIDRemapper::~MergeDocIDRemapper() - { + + int32_t numDocs = 0; + for (int32_t j = 0; j < docMaps.size(); ++i, ++j) { + numDocs += infos->info(i)->docCount; + BOOST_ASSERT(infos->info(i)->equals(merge->segments->info(j))); } - - int32_t MergeDocIDRemapper::remap(int32_t oldDocID) - { - if (oldDocID < minDocID) - { - // Unaffected by merge - return oldDocID; - } - else if (oldDocID >= maxDocID) - { - // This doc was "after" the merge, so simple shift - return oldDocID - docShift; - } - else - { - // Binary search to locate this document & find its new docID - Collection::iterator doc = std::upper_bound(starts.begin(), starts.begin() + docMaps.size(), oldDocID); - int32_t docMap = std::distance(starts.begin(), doc) - 1; - - if (docMaps[docMap]) - return newStarts[docMap] + docMaps[docMap][oldDocID - starts[docMap]]; - else - return newStarts[docMap] + oldDocID - starts[docMap]; + this->maxDocID = minDocID + numDocs; + + starts = Collection::newInstance(docMaps.size()); + newStarts = Collection::newInstance(docMaps.size()); + + starts[0] = minDocID; + newStarts[0] = minDocID; + for (i = 1; i < docMaps.size(); ++i) { + int32_t lastDocCount = merge->segments->info(i - 1)->docCount; + starts[i] = starts[i - 1] + lastDocCount; + newStarts[i] = newStarts[i - 1] + lastDocCount - delCounts[i - 1]; + } + this->docShift = numDocs - mergedDocCount; + + // There are rare cases when docShift is 0. It happens if you try to delete a docID that's + // out of bounds, because the SegmentReader still allocates deletedDocs and pretends it has + // deletions ... so we can't make this assert here: BOOST_ASSERT(docShift > 0); + + // Make sure it all adds up + BOOST_ASSERT(docShift == maxDocID - (newStarts[docMaps.size() - 1] + merge->segments->info(docMaps.size() - 1)->docCount - delCounts[docMaps.size() - 1])); +} + +MergeDocIDRemapper::~MergeDocIDRemapper() { +} + +int32_t MergeDocIDRemapper::remap(int32_t oldDocID) { + if (oldDocID < minDocID) { + // Unaffected by merge + return oldDocID; + } else if (oldDocID >= maxDocID) { + // This doc was "after" the merge, so simple shift + return oldDocID - docShift; + } else { + // Binary search to locate this document & find its new docID + Collection::iterator doc = std::upper_bound(starts.begin(), starts.begin() + docMaps.size(), oldDocID); + int32_t docMap = std::distance(starts.begin(), doc) - 1; + + if (docMaps[docMap]) { + return newStarts[docMap] + docMaps[docMap][oldDocID - starts[docMap]]; + } else { + return newStarts[docMap] + oldDocID - starts[docMap]; } } } + +} diff --git a/src/core/index/MergePolicy.cpp b/src/core/index/MergePolicy.cpp index 023b0985..77bd83f5 100644 --- a/src/core/index/MergePolicy.cpp +++ b/src/core/index/MergePolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,107 +10,100 @@ #include "SegmentInfo.h" #include "StringUtils.h" -namespace Lucene -{ - MergePolicy::MergePolicy(IndexWriterPtr writer) - { - this->_writer = writer; - } - - MergePolicy::~MergePolicy() - { - } - - OneMerge::OneMerge(SegmentInfosPtr segments, bool useCompoundFile) - { - mergeDocStores = false; - optimize = false; - registerDone = false; - mergeGen = 0; - isExternal = false; - maxNumSegmentsOptimize = 0; - aborted = false; - - if (segments->empty()) - boost::throw_exception(RuntimeException(L"segments must include at least one segment")); - this->segments = segments; - this->useCompoundFile = useCompoundFile; - } - - OneMerge::~OneMerge() - { - } - - void OneMerge::setException(const LuceneException& error) - { - SyncLock syncLock(this); - this->error = error; - } - - LuceneException OneMerge::getException() - { - SyncLock syncLock(this); - return error; - } - - void OneMerge::abort() - { - SyncLock syncLock(this); - aborted = true; - } - - bool OneMerge::isAborted() - { - SyncLock syncLock(this); - return aborted; +namespace Lucene { + +MergePolicy::MergePolicy(const IndexWriterPtr& writer) { + this->_writer = writer; +} + +MergePolicy::~MergePolicy() { +} + +OneMerge::OneMerge(const SegmentInfosPtr& segments, bool useCompoundFile) { + mergeDocStores = false; + optimize = false; + registerDone = false; + mergeGen = 0; + isExternal = false; + maxNumSegmentsOptimize = 0; + aborted = false; + + if (segments->empty()) { + boost::throw_exception(RuntimeException(L"segments must include at least one segment")); } - - void OneMerge::checkAborted(DirectoryPtr dir) - { - SyncLock syncLock(this); - if (aborted) - boost::throw_exception(MergeAbortedException(L"merge is aborted: " + segString(dir))); + this->segments = segments; + this->useCompoundFile = useCompoundFile; +} + +OneMerge::~OneMerge() { +} + +void OneMerge::setException(const LuceneException& error) { + SyncLock syncLock(this); + this->error = error; +} + +LuceneException OneMerge::getException() { + SyncLock syncLock(this); + return error; +} + +void OneMerge::abort() { + SyncLock syncLock(this); + aborted = true; +} + +bool OneMerge::isAborted() { + SyncLock syncLock(this); + return aborted; +} + +void OneMerge::checkAborted(const DirectoryPtr& dir) { + SyncLock syncLock(this); + if (aborted) { + boost::throw_exception(MergeAbortedException(L"merge is aborted: " + segString(dir))); } - - String OneMerge::segString(DirectoryPtr dir) - { - StringStream buffer; - int32_t numSegments = segments->size(); - for (int32_t i = 0; i < numSegments; ++i) - { - if (i > 0) - buffer << L" "; - buffer << segments->info(i)->segString(dir); +} + +String OneMerge::segString(const DirectoryPtr& dir) { + StringStream buffer; + int32_t numSegments = segments->size(); + for (int32_t i = 0; i < numSegments; ++i) { + if (i > 0) { + buffer << L" "; } - if (info) - buffer << L" into " + info->name; - if (optimize) - buffer << L" [optimize]"; - if (mergeDocStores) - buffer << L" [mergeDocStores]"; - return buffer.str(); + buffer << segments->info(i)->segString(dir); } - - MergeSpecification::MergeSpecification() - { - merges = Collection::newInstance(); + if (info) { + buffer << L" into " + info->name; } - - MergeSpecification::~MergeSpecification() - { + if (optimize) { + buffer << L" [optimize]"; } - - void MergeSpecification::add(OneMergePtr merge) - { - merges.add(merge); + if (mergeDocStores) { + buffer << L" [mergeDocStores]"; } - - String MergeSpecification::segString(DirectoryPtr dir) - { - String seg(L"MergeSpec:\n"); - int32_t i = 1; - for (Collection::iterator merge = merges.begin(); merge != merges.end(); ++merge) - seg += L" " + StringUtils::toString(i++) + L": " + (*merge)->segString(dir); - return seg; + return buffer.str(); +} + +MergeSpecification::MergeSpecification() { + merges = Collection::newInstance(); +} + +MergeSpecification::~MergeSpecification() { +} + +void MergeSpecification::add(const OneMergePtr& merge) { + merges.add(merge); +} + +String MergeSpecification::segString(const DirectoryPtr& dir) { + String seg(L"MergeSpec:\n"); + int32_t i = 1; + for (Collection::iterator merge = merges.begin(); merge != merges.end(); ++merge) { + seg += L" " + StringUtils::toString(i++) + L": " + (*merge)->segString(dir); } + return seg; +} + } diff --git a/src/core/index/MergeScheduler.cpp b/src/core/index/MergeScheduler.cpp index eafa734f..785f628e 100644 --- a/src/core/index/MergeScheduler.cpp +++ b/src/core/index/MergeScheduler.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "MergeScheduler.h" -namespace Lucene -{ - MergeScheduler::~MergeScheduler() - { - } +namespace Lucene { + +MergeScheduler::~MergeScheduler() { +} + } diff --git a/src/core/index/MultiLevelSkipListReader.cpp b/src/core/index/MultiLevelSkipListReader.cpp index 3567ea4b..c31dd767 100644 --- a/src/core/index/MultiLevelSkipListReader.cpp +++ b/src/core/index/MultiLevelSkipListReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,227 +9,206 @@ #include "BufferedIndexInput.h" #include "MiscUtils.h" -namespace Lucene -{ - MultiLevelSkipListReader::MultiLevelSkipListReader(IndexInputPtr skipStream, int32_t maxSkipLevels, int32_t skipInterval) - { - this->numberOfLevelsToBuffer = 1; - this->numberOfSkipLevels = 0; - this->docCount = 0; - this->haveSkipped = false; - this->lastDoc = 0; - this->lastChildPointer = 0; - - this->skipStream = Collection::newInstance(maxSkipLevels); - this->skipPointer = Collection::newInstance(maxSkipLevels); - this->childPointer = Collection::newInstance(maxSkipLevels); - this->numSkipped = Collection::newInstance(maxSkipLevels); - this->maxNumberOfSkipLevels = maxSkipLevels; - this->skipInterval = Collection::newInstance(maxSkipLevels); - this->skipStream[0] = skipStream; - this->inputIsBuffered = boost::dynamic_pointer_cast(skipStream); - this->skipInterval[0] = skipInterval; - this->skipDoc = Collection::newInstance(maxSkipLevels); - - MiscUtils::arrayFill(this->skipPointer.begin(), 0, this->skipPointer.size(), 0); - MiscUtils::arrayFill(this->childPointer.begin(), 0, this->childPointer.size(), 0); - MiscUtils::arrayFill(this->numSkipped.begin(), 0, this->numSkipped.size(), 0); - MiscUtils::arrayFill(this->skipDoc.begin(), 0, this->skipDoc.size(), 0); - - for (int32_t i = 1; i < maxSkipLevels; ++i) - { - // cache skip intervals - this->skipInterval[i] = this->skipInterval[i - 1] * skipInterval; - } +namespace Lucene { + +MultiLevelSkipListReader::MultiLevelSkipListReader(const IndexInputPtr& skipStream, int32_t maxSkipLevels, int32_t skipInterval) { + this->numberOfLevelsToBuffer = 1; + this->numberOfSkipLevels = 0; + this->docCount = 0; + this->haveSkipped = false; + this->lastDoc = 0; + this->lastChildPointer = 0; + + this->skipStream = Collection::newInstance(maxSkipLevels); + this->skipPointer = Collection::newInstance(maxSkipLevels); + this->childPointer = Collection::newInstance(maxSkipLevels); + this->numSkipped = Collection::newInstance(maxSkipLevels); + this->maxNumberOfSkipLevels = maxSkipLevels; + this->skipInterval = Collection::newInstance(maxSkipLevels); + this->skipStream[0] = skipStream; + this->inputIsBuffered = boost::dynamic_pointer_cast(skipStream).get() != NULL; + this->skipInterval[0] = skipInterval; + this->skipDoc = Collection::newInstance(maxSkipLevels); + + MiscUtils::arrayFill(this->skipPointer.begin(), 0, this->skipPointer.size(), 0); + MiscUtils::arrayFill(this->childPointer.begin(), 0, this->childPointer.size(), 0); + MiscUtils::arrayFill(this->numSkipped.begin(), 0, this->numSkipped.size(), 0); + MiscUtils::arrayFill(this->skipDoc.begin(), 0, this->skipDoc.size(), 0); + + for (int32_t i = 1; i < maxSkipLevels; ++i) { + // cache skip intervals + this->skipInterval[i] = this->skipInterval[i - 1] * skipInterval; } - - MultiLevelSkipListReader::~MultiLevelSkipListReader() - { +} + +MultiLevelSkipListReader::~MultiLevelSkipListReader() { +} + +int32_t MultiLevelSkipListReader::getDoc() { + return lastDoc; +} + +int32_t MultiLevelSkipListReader::skipTo(int32_t target) { + if (!haveSkipped) { + // first time, load skip levels + loadSkipLevels(); + haveSkipped = true; } - - int32_t MultiLevelSkipListReader::getDoc() - { - return lastDoc; + + // walk up the levels until highest level is found that has a skip for this target + int32_t level = 0; + while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) { + ++level; } - - int32_t MultiLevelSkipListReader::skipTo(int32_t target) - { - if (!haveSkipped) - { - // first time, load skip levels - loadSkipLevels(); - haveSkipped = true; - } - - // walk up the levels until highest level is found that has a skip for this target - int32_t level = 0; - while (level < numberOfSkipLevels - 1 && target > skipDoc[level + 1]) - ++level; - - while (level >= 0) - { - if (target > skipDoc[level]) - { - if (!loadNextSkip(level)) - continue; + + while (level >= 0) { + if (target > skipDoc[level]) { + if (!loadNextSkip(level)) { + continue; } - else - { - // no more skips on this level, go down one level - if (level > 0 && lastChildPointer > (int32_t)skipStream[level - 1]->getFilePointer()) - seekChild(level - 1); - --level; + } else { + // no more skips on this level, go down one level + if (level > 0 && lastChildPointer > skipStream[level - 1]->getFilePointer()) { + seekChild(level - 1); } + --level; } - - return numSkipped[0] - skipInterval[0] - 1; } - - bool MultiLevelSkipListReader::loadNextSkip(int32_t level) - { - // we have to skip, the target document is greater than the current skip list entry - setLastSkipData(level); - - numSkipped[level] += skipInterval[level]; - - if (numSkipped[level] > docCount) - { - // this skip list is exhausted - skipDoc[level] = INT_MAX; - if (numberOfSkipLevels > level) - numberOfSkipLevels = level; - return false; - } - - // read next skip entry - skipDoc[level] += readSkipData(level, skipStream[level]); - - if (level != 0) - { - // read the child pointer if we are not on the leaf level - childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; + + return numSkipped[0] - skipInterval[0] - 1; +} + +bool MultiLevelSkipListReader::loadNextSkip(int32_t level) { + // we have to skip, the target document is greater than the current skip list entry + setLastSkipData(level); + + numSkipped[level] += skipInterval[level]; + + if (numSkipped[level] > docCount) { + // this skip list is exhausted + skipDoc[level] = INT_MAX; + if (numberOfSkipLevels > level) { + numberOfSkipLevels = level; } - - return true; - } - - void MultiLevelSkipListReader::seekChild(int32_t level) - { - skipStream[level]->seek(lastChildPointer); - numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; - skipDoc[level] = lastDoc; - if (level > 0) - childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; + return false; } - - void MultiLevelSkipListReader::close() - { - for (Collection::iterator skip = skipStream.begin(); skip != skipStream.end(); ++skip) - { - if (*skip) - (*skip)->close(); - } + + // read next skip entry + skipDoc[level] += readSkipData(level, skipStream[level]); + + if (level != 0) { + // read the child pointer if we are not on the leaf level + childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } - - void MultiLevelSkipListReader::init(int64_t skipPointer, int32_t df) - { - this->skipPointer[0] = skipPointer; - this->docCount = df; - MiscUtils::arrayFill(skipDoc.begin(), 0, skipDoc.size(), 0); - MiscUtils::arrayFill(numSkipped.begin(), 0, numSkipped.size(), 0); - MiscUtils::arrayFill(childPointer.begin(), 0, childPointer.size(), 0); - - haveSkipped = false; - for (int32_t i = 1; i < numberOfSkipLevels; ++i) - skipStream[i].reset(); + + return true; +} + +void MultiLevelSkipListReader::seekChild(int32_t level) { + skipStream[level]->seek(lastChildPointer); + numSkipped[level] = numSkipped[level + 1] - skipInterval[level + 1]; + skipDoc[level] = lastDoc; + if (level > 0) { + childPointer[level] = skipStream[level]->readVLong() + skipPointer[level - 1]; } - - void MultiLevelSkipListReader::loadSkipLevels() - { - numberOfSkipLevels = docCount == 0 ? 0 : (int32_t)std::floor(std::log((double)docCount) / std::log((double)skipInterval[0])); - if (numberOfSkipLevels > maxNumberOfSkipLevels) - numberOfSkipLevels = maxNumberOfSkipLevels; - - skipStream[0]->seek(skipPointer[0]); - - int32_t toBuffer = numberOfLevelsToBuffer; - - for (int32_t i = numberOfSkipLevels - 1; i > 0; --i) - { - // the length of the current level - int64_t length = skipStream[0]->readVLong(); - - // the start pointer of the current level - skipPointer[i] = skipStream[0]->getFilePointer(); - - if (toBuffer > 0) - { - // buffer this level - skipStream[i] = newLucene(skipStream[0], (int32_t)length); - --toBuffer; - } - else - { - // clone this stream, it is already at the start of the current level - skipStream[i] = boost::dynamic_pointer_cast(skipStream[0]->clone()); - if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) - boost::dynamic_pointer_cast(skipStream[i])->setBufferSize((int32_t)length); - - // move base stream beyond the current level - skipStream[0]->seek(skipStream[0]->getFilePointer() + length); - } +} + +void MultiLevelSkipListReader::close() { + for (int32_t i = 1; i < skipStream.size(); ++i) { + if (skipStream[i]) { + skipStream[i]->close(); } - - // use base stream for the lowest level - skipPointer[0] = skipStream[0]->getFilePointer(); - } - - void MultiLevelSkipListReader::setLastSkipData(int32_t level) - { - lastDoc = skipDoc[level]; - lastChildPointer = childPointer[level]; - } - - SkipBuffer::SkipBuffer(IndexInputPtr input, int32_t length) - { - pos = 0; - data = ByteArray::newInstance(length); - pointer = input->getFilePointer(); - input->readBytes(data.get(), 0, length); - } - - SkipBuffer::~SkipBuffer() - { - } - - void SkipBuffer::close() - { - data.reset(); - } - - int64_t SkipBuffer::getFilePointer() - { - return (pointer + pos); } - - int64_t SkipBuffer::length() - { - return data.size(); - } - - uint8_t SkipBuffer::readByte() - { - return data[pos++]; +} + +void MultiLevelSkipListReader::init(int64_t skipPointer, int32_t df) { + this->skipPointer[0] = skipPointer; + this->docCount = df; + MiscUtils::arrayFill(skipDoc.begin(), 0, skipDoc.size(), 0); + MiscUtils::arrayFill(numSkipped.begin(), 0, numSkipped.size(), 0); + MiscUtils::arrayFill(childPointer.begin(), 0, childPointer.size(), 0); + + haveSkipped = false; + for (int32_t i = 1; i < numberOfSkipLevels; ++i) { + skipStream[i].reset(); } - - void SkipBuffer::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - MiscUtils::arrayCopy(data.get(), pos, b, offset, length); - pos += length; +} + +void MultiLevelSkipListReader::loadSkipLevels() { + numberOfSkipLevels = docCount == 0 ? 0 : (int32_t)std::floor(std::log((double)docCount) / std::log((double)skipInterval[0])); + if (numberOfSkipLevels > maxNumberOfSkipLevels) { + numberOfSkipLevels = maxNumberOfSkipLevels; } - - void SkipBuffer::seek(int64_t pos) - { - this->pos = (int32_t)(pos - pointer); + + skipStream[0]->seek(skipPointer[0]); + + int32_t toBuffer = numberOfLevelsToBuffer; + + for (int32_t i = numberOfSkipLevels - 1; i > 0; --i) { + // the length of the current level + int64_t length = skipStream[0]->readVLong(); + + // the start pointer of the current level + skipPointer[i] = skipStream[0]->getFilePointer(); + + if (toBuffer > 0) { + // buffer this level + skipStream[i] = newLucene(skipStream[0], (int32_t)length); + --toBuffer; + } else { + // clone this stream, it is already at the start of the current level + skipStream[i] = boost::dynamic_pointer_cast(skipStream[0]->clone()); + if (inputIsBuffered && length < BufferedIndexInput::BUFFER_SIZE) { + boost::dynamic_pointer_cast(skipStream[i])->setBufferSize((int32_t)length); + } + + // move base stream beyond the current level + skipStream[0]->seek(skipStream[0]->getFilePointer() + length); + } } + + // use base stream for the lowest level + skipPointer[0] = skipStream[0]->getFilePointer(); +} + +void MultiLevelSkipListReader::setLastSkipData(int32_t level) { + lastDoc = skipDoc[level]; + lastChildPointer = childPointer[level]; +} + +SkipBuffer::SkipBuffer(const IndexInputPtr& input, int32_t length) { + pos = 0; + data = ByteArray::newInstance(length); + pointer = input->getFilePointer(); + input->readBytes(data.get(), 0, length); +} + +SkipBuffer::~SkipBuffer() { +} + +void SkipBuffer::close() { + data.reset(); +} + +int64_t SkipBuffer::getFilePointer() { + return (pointer + pos); +} + +int64_t SkipBuffer::length() { + return data.size(); +} + +uint8_t SkipBuffer::readByte() { + return data[pos++]; +} + +void SkipBuffer::readBytes(uint8_t* b, int32_t offset, int32_t length) { + MiscUtils::arrayCopy(data.get(), pos, b, offset, length); + pos += length; +} + +void SkipBuffer::seek(int64_t pos) { + this->pos = (int32_t)(pos - pointer); +} + } diff --git a/src/core/index/MultiLevelSkipListWriter.cpp b/src/core/index/MultiLevelSkipListWriter.cpp index 13b475d9..6400000d 100644 --- a/src/core/index/MultiLevelSkipListWriter.cpp +++ b/src/core/index/MultiLevelSkipListWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,87 +8,80 @@ #include "MultiLevelSkipListWriter.h" #include "RAMOutputStream.h" -namespace Lucene -{ - MultiLevelSkipListWriter::MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df) - { - this->skipInterval = skipInterval; - - // calculate the maximum number of skip levels for this document frequency - numberOfSkipLevels = df == 0 ? 0 : (int32_t)std::floor(std::log((double)df) / std::log((double)skipInterval)); - - // make sure it does not exceed maxSkipLevels - numberOfSkipLevels = std::max(numberOfSkipLevels, maxSkipLevels); +namespace Lucene { + +MultiLevelSkipListWriter::MultiLevelSkipListWriter(int32_t skipInterval, int32_t maxSkipLevels, int32_t df) { + this->skipInterval = skipInterval; + + // calculate the maximum number of skip levels for this document frequency + numberOfSkipLevels = df == 0 ? 0 : (int32_t)std::floor(std::log((double)df) / std::log((double)skipInterval)); + + // make sure it does not exceed maxSkipLevels + numberOfSkipLevels = std::max(numberOfSkipLevels, maxSkipLevels); +} + +MultiLevelSkipListWriter::~MultiLevelSkipListWriter() { +} + +void MultiLevelSkipListWriter::init() { + skipBuffer = Collection::newInstance(numberOfSkipLevels); + for (int32_t i = 0; i < numberOfSkipLevels; ++i) { + skipBuffer[i] = newLucene(); } - - MultiLevelSkipListWriter::~MultiLevelSkipListWriter() - { +} + +void MultiLevelSkipListWriter::resetSkip() { + // creates new buffers or empties the existing ones + if (!skipBuffer) { + init(); + } else { + for (Collection::iterator buffer = skipBuffer.begin(); buffer != skipBuffer.end(); ++buffer) { + (*buffer)->reset(); + } } - - void MultiLevelSkipListWriter::init() - { - skipBuffer = Collection::newInstance(numberOfSkipLevels); - for (int32_t i = 0; i < numberOfSkipLevels; ++i) - skipBuffer[i] = newLucene(); +} + +void MultiLevelSkipListWriter::bufferSkip(int32_t df) { + int32_t numLevels = 0; + + // determine max level + for (; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) { + ++numLevels; } - - void MultiLevelSkipListWriter::resetSkip() - { - // creates new buffers or empties the existing ones - if (!skipBuffer) - init(); - else - { - for (Collection::iterator buffer = skipBuffer.begin(); buffer != skipBuffer.end(); ++buffer) - (*buffer)->reset(); + + int64_t childPointer = 0; + + for (int32_t level = 0; level < numLevels; ++level) { + writeSkipData(level, skipBuffer[level]); + + int64_t newChildPointer = skipBuffer[level]->getFilePointer(); + + if (level != 0) { + // store child pointers for all levels except the lowest + skipBuffer[level]->writeVLong(childPointer); } + + // remember the childPointer for the next level + childPointer = newChildPointer; } - - void MultiLevelSkipListWriter::bufferSkip(int32_t df) - { - int32_t numLevels = 0; - - // determine max level - for (; (df % skipInterval) == 0 && numLevels < numberOfSkipLevels; df /= skipInterval) - ++numLevels; - - int64_t childPointer = 0; - - for (int32_t level = 0; level < numLevels; ++level) - { - writeSkipData(level, skipBuffer[level]); - - int64_t newChildPointer = skipBuffer[level]->getFilePointer(); - - if (level != 0) - { - // store child pointers for all levels except the lowest - skipBuffer[level]->writeVLong(childPointer); - } - - // remember the childPointer for the next level - childPointer = newChildPointer; - } +} + +int64_t MultiLevelSkipListWriter::writeSkip(const IndexOutputPtr& output) { + int64_t skipPointer = output->getFilePointer(); + if (!skipBuffer || skipBuffer.empty()) { + return skipPointer; } - - int64_t MultiLevelSkipListWriter::writeSkip(IndexOutputPtr output) - { - int64_t skipPointer = output->getFilePointer(); - if (!skipBuffer || skipBuffer.empty()) - return skipPointer; - - for (int32_t level = numberOfSkipLevels - 1; level > 0; --level) - { - int64_t length = skipBuffer[level]->getFilePointer(); - if (length > 0) - { - output->writeVLong(length); - skipBuffer[level]->writeTo(output); - } + + for (int32_t level = numberOfSkipLevels - 1; level > 0; --level) { + int64_t length = skipBuffer[level]->getFilePointer(); + if (length > 0) { + output->writeVLong(length); + skipBuffer[level]->writeTo(output); } - skipBuffer[0]->writeTo(output); - return skipPointer; } - - + skipBuffer[0]->writeTo(output); + return skipPointer; +} + + } diff --git a/src/core/index/MultiReader.cpp b/src/core/index/MultiReader.cpp index 755500a1..8b012aa1 100644 --- a/src/core/index/MultiReader.cpp +++ b/src/core/index/MultiReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,360 +11,321 @@ #include "FieldCache.h" #include "MiscUtils.h" -namespace Lucene -{ - MultiReader::MultiReader(Collection subReaders, bool closeSubReaders) - { - this->normsCache = MapStringByteArray::newInstance(); - this->_maxDoc = 0; - this->_numDocs = -1; - this->_hasDeletions = false; - this->subReaders = subReaders; - starts = Collection::newInstance(subReaders.size() + 1); // build starts array - decrefOnClose = Collection::newInstance(subReaders.size()); - for (int32_t i = 0; i < subReaders.size(); ++i) - { - starts[i] = _maxDoc; - _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs - - if (!closeSubReaders) - { - subReaders[i]->incRef(); - decrefOnClose[i] = true; - } - else - decrefOnClose[i] = false; - - if (subReaders[i]->hasDeletions()) - _hasDeletions = true; - } - starts[subReaders.size()] = _maxDoc; - } - - MultiReader::~MultiReader() - { - } - - IndexReaderPtr MultiReader::reopen() - { - SyncLock syncLock(this); - return doReopen(false); - } - - LuceneObjectPtr MultiReader::clone(LuceneObjectPtr other) - { - SyncLock syncLock(this); - try - { - return doReopen(true); +namespace Lucene { + +MultiReader::MultiReader(Collection subReaders, bool closeSubReaders) { + this->normsCache = MapStringByteArray::newInstance(); + this->_maxDoc = 0; + this->_numDocs = -1; + this->_hasDeletions = false; + this->subReaders = subReaders; + starts = Collection::newInstance(subReaders.size() + 1); // build starts array + decrefOnClose = Collection::newInstance(subReaders.size()); + for (int32_t i = 0; i < subReaders.size(); ++i) { + starts[i] = _maxDoc; + _maxDoc += subReaders[i]->maxDoc(); // compute maxDocs + + if (!closeSubReaders) { + subReaders[i]->incRef(); + decrefOnClose[i] = true; + } else { + decrefOnClose[i] = false; } - catch (LuceneException& e) - { - boost::throw_exception(RuntimeException(e.getError())); + + if (subReaders[i]->hasDeletions()) { + _hasDeletions = true; } - return LuceneObjectPtr(); } - - IndexReaderPtr MultiReader::doReopen(bool doClone) - { - ensureOpen(); - - bool reopened = false; - Collection newSubReaders(Collection::newInstance(subReaders.size())); - - bool success = false; - LuceneException finally; - try - { - for (int32_t i = 0; i < subReaders.size(); ++i) - { - if (doClone) - newSubReaders[i] = boost::dynamic_pointer_cast(subReaders[i]->clone()); - else - newSubReaders[i] = subReaders[i]->reopen(); - // if at least one of the subreaders was updated we remember that and return a new MultiReader - if (newSubReaders[i] != subReaders[i]) - reopened = true; + starts[subReaders.size()] = _maxDoc; +} + +MultiReader::~MultiReader() { +} + +IndexReaderPtr MultiReader::reopen() { + SyncLock syncLock(this); + return doReopen(false); +} + +LuceneObjectPtr MultiReader::clone(const LuceneObjectPtr& other) { + SyncLock syncLock(this); + try { + return doReopen(true); + } catch (LuceneException& e) { + boost::throw_exception(RuntimeException(e.getError())); + } + return LuceneObjectPtr(); +} + +IndexReaderPtr MultiReader::doReopen(bool doClone) { + ensureOpen(); + + bool reopened = false; + Collection newSubReaders(Collection::newInstance(subReaders.size())); + + bool success = false; + LuceneException finally; + try { + for (int32_t i = 0; i < subReaders.size(); ++i) { + if (doClone) { + newSubReaders[i] = boost::dynamic_pointer_cast(subReaders[i]->clone()); + } else { + newSubReaders[i] = subReaders[i]->reopen(); + } + // if at least one of the subreaders was updated we remember that and return a new MultiReader + if (newSubReaders[i] != subReaders[i]) { + reopened = true; } - success = true; - } - catch (LuceneException& e) - { - finally = e; } - if (!success && reopened) - { - for (int32_t i = 0; i < newSubReaders.size(); ++i) - { - if (newSubReaders[i] != subReaders[i]) - { - try - { - if (newSubReaders[i]) - newSubReaders[i]->close(); - } - catch (...) - { - // keep going - we want to clean up as much as possible + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success && reopened) { + for (int32_t i = 0; i < newSubReaders.size(); ++i) { + if (newSubReaders[i] != subReaders[i]) { + try { + if (newSubReaders[i]) { + newSubReaders[i]->close(); } + } catch (...) { + // keep going - we want to clean up as much as possible } } } - finally.throwException(); - - if (reopened) - { - Collection newDecrefOnClose(Collection::newInstance(subReaders.size())); - for (int32_t i = 0; i < subReaders.size(); ++i) - { - if (newSubReaders[i] == subReaders[i]) - { - newSubReaders[i]->incRef(); - newDecrefOnClose[i] = true; - } + } + finally.throwException(); + + if (reopened) { + Collection newDecrefOnClose(Collection::newInstance(subReaders.size())); + for (int32_t i = 0; i < subReaders.size(); ++i) { + if (newSubReaders[i] == subReaders[i]) { + newSubReaders[i]->incRef(); + newDecrefOnClose[i] = true; } - - MultiReaderPtr mr(newLucene(newSubReaders)); - mr->decrefOnClose = newDecrefOnClose; - return mr; } - else - return shared_from_this(); - } - - Collection MultiReader::getTermFreqVectors(int32_t docNumber) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment - } - - TermFreqVectorPtr MultiReader::getTermFreqVector(int32_t docNumber, const String& field) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); - } - - void MultiReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); - } - - void MultiReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - ensureOpen(); - int32_t i = readerIndex(docNumber); // find segment num - subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); - } - - bool MultiReader::isOptimized() - { - return false; + + MultiReaderPtr mr(newLucene(newSubReaders)); + mr->decrefOnClose = newDecrefOnClose; + return mr; + } else { + return shared_from_this(); } - - int32_t MultiReader::numDocs() - { - // Don't call ensureOpen() here (it could affect performance) - - // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless - if (_numDocs == -1) - { - // check cache - int32_t n = 0; // cache miss - recompute - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - n += (*reader)->numDocs(); // sum from readers - _numDocs = n; +} + +Collection MultiReader::getTermFreqVectors(int32_t docNumber) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + return subReaders[i]->getTermFreqVectors(docNumber - starts[i]); // dispatch to segment +} + +TermFreqVectorPtr MultiReader::getTermFreqVector(int32_t docNumber, const String& field) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + return subReaders[i]->getTermFreqVector(docNumber - starts[i], field); +} + +void MultiReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + subReaders[i]->getTermFreqVector(docNumber - starts[i], field, mapper); +} + +void MultiReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + ensureOpen(); + int32_t i = readerIndex(docNumber); // find segment num + subReaders[i]->getTermFreqVector(docNumber - starts[i], mapper); +} + +bool MultiReader::isOptimized() { + return false; +} + +int32_t MultiReader::numDocs() { + // Don't call ensureOpen() here (it could affect performance) + + // NOTE: multiple threads may wind up init'ing numDocs... but that's harmless + if (_numDocs == -1) { + // check cache + int32_t n = 0; // cache miss - recompute + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + n += (*reader)->numDocs(); // sum from readers } - return _numDocs; + _numDocs = n; } - - int32_t MultiReader::maxDoc() - { - // Don't call ensureOpen() here (it could affect performance) - return _maxDoc; - } - - DocumentPtr MultiReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - ensureOpen(); - int32_t i = readerIndex(n); // find segment num - return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader - } - - bool MultiReader::isDeleted(int32_t n) - { - // Don't call ensureOpen() here (it could affect performance) - int32_t i = readerIndex(n); // find segment num - return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader - } - - bool MultiReader::hasDeletions() - { - // Don't call ensureOpen() here (it could affect performance) - return _hasDeletions; + return _numDocs; +} + +int32_t MultiReader::maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return _maxDoc; +} + +DocumentPtr MultiReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + ensureOpen(); + int32_t i = readerIndex(n); // find segment num + return subReaders[i]->document(n - starts[i], fieldSelector); // dispatch to segment reader +} + +bool MultiReader::isDeleted(int32_t n) { + // Don't call ensureOpen() here (it could affect performance) + int32_t i = readerIndex(n); // find segment num + return subReaders[i]->isDeleted(n - starts[i]); // dispatch to segment reader +} + +bool MultiReader::hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return _hasDeletions; +} + +void MultiReader::doDelete(int32_t docNum) { + _numDocs = -1; // invalidate cache + int32_t i = readerIndex(docNum); // find segment num + subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader + _hasDeletions = true; +} + +void MultiReader::doUndeleteAll() { + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->undeleteAll(); } - - void MultiReader::doDelete(int32_t docNum) - { - _numDocs = -1; // invalidate cache - int32_t i = readerIndex(docNum); // find segment num - subReaders[i]->deleteDocument(docNum - starts[i]); // dispatch to segment reader - _hasDeletions = true; + _hasDeletions = false; + _numDocs = -1; // invalidate cache +} + +int32_t MultiReader::readerIndex(int32_t n) { + return DirectoryReader::readerIndex(n, this->starts, this->subReaders.size()); +} + +bool MultiReader::hasNorms(const String& field) { + ensureOpen(); + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + if ((*reader)->hasNorms(field)) { + return true; + } } - - void MultiReader::doUndeleteAll() - { - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->undeleteAll(); - _hasDeletions = false; - _numDocs = -1; // invalidate cache + return false; +} + +ByteArray MultiReader::norms(const String& field) { + SyncLock syncLock(this); + ensureOpen(); + ByteArray bytes(normsCache.get(field)); + if (bytes) { + return bytes; // cache hit } - - int32_t MultiReader::readerIndex(int32_t n) - { - return DirectoryReader::readerIndex(n, this->starts, this->subReaders.size()); + if (!hasNorms(field)) { + return ByteArray(); } - - bool MultiReader::hasNorms(const String& field) - { - ensureOpen(); - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - { - if ((*reader)->hasNorms(field)) - return true; - } - return false; + + bytes = ByteArray::newInstance(maxDoc()); + for (int32_t i = 0; i < subReaders.size(); ++i) { + subReaders[i]->norms(field, bytes, starts[i]); } - - ByteArray MultiReader::norms(const String& field) - { - SyncLock syncLock(this); - ensureOpen(); - ByteArray bytes(normsCache.get(field)); - if (bytes) - return bytes; // cache hit - if (!hasNorms(field)) - return ByteArray(); - - bytes = ByteArray::newInstance(maxDoc()); - for (int32_t i = 0; i < subReaders.size(); ++i) - subReaders[i]->norms(field, bytes, starts[i]); - normsCache.put(field, bytes); // update cache - return bytes; + normsCache.put(field, bytes); // update cache + return bytes; +} + +void MultiReader::norms(const String& field, ByteArray norms, int32_t offset) { + SyncLock syncLock(this); + ensureOpen(); + ByteArray bytes(normsCache.get(field)); + for (int32_t i = 0; i < subReaders.size(); ++i) { // read from segments + subReaders[i]->norms(field, norms, offset + starts[i]); } - - void MultiReader::norms(const String& field, ByteArray norms, int32_t offset) - { - SyncLock syncLock(this); - ensureOpen(); - ByteArray bytes(normsCache.get(field)); - for (int32_t i = 0; i < subReaders.size(); ++i) // read from segments + + if (!bytes && !hasNorms(field)) { + MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); + } else if (bytes) { // cache hit + MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); + } else { + for (int32_t i = 0; i < subReaders.size(); ++i) { subReaders[i]->norms(field, norms, offset + starts[i]); - - if (!bytes && !hasNorms(field)) - MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); - else if (bytes) // cache hit - MiscUtils::arrayCopy(bytes.get(), 0, norms.get(), offset, maxDoc()); - else - { - for (int32_t i = 0; i < subReaders.size(); ++i) - subReaders[i]->norms(field, norms, offset + starts[i]); } } - - void MultiReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - { - SyncLock normsLock(&normsCache); - normsCache.remove(field); // clear cache - } - int32_t i = readerIndex(doc); // find segment num - subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch - } - - TermEnumPtr MultiReader::terms() - { - ensureOpen(); - return newLucene(shared_from_this(), subReaders, starts, TermPtr()); - } - - TermEnumPtr MultiReader::terms(TermPtr t) - { - ensureOpen(); - return newLucene(shared_from_this(), subReaders, starts, t); - } - - int32_t MultiReader::docFreq(TermPtr t) - { - ensureOpen(); - int32_t total = 0; // sum freqs in segments - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - total += (*reader)->docFreq(t); - return total; - } - - TermDocsPtr MultiReader::termDocs() +} + +void MultiReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { { - ensureOpen(); - return newLucene(shared_from_this(), subReaders, starts); + SyncLock normsLock(&normsCache); + normsCache.remove(field); // clear cache } - - TermPositionsPtr MultiReader::termPositions() - { - ensureOpen(); - return newLucene(shared_from_this(), subReaders, starts); + int32_t i = readerIndex(doc); // find segment num + subReaders[i]->setNorm(doc - starts[i], field, value); // dispatch +} + +TermEnumPtr MultiReader::terms() { + ensureOpen(); + return newLucene(shared_from_this(), subReaders, starts, TermPtr()); +} + +TermEnumPtr MultiReader::terms(const TermPtr& t) { + ensureOpen(); + return newLucene(shared_from_this(), subReaders, starts, t); +} + +int32_t MultiReader::docFreq(const TermPtr& t) { + ensureOpen(); + int32_t total = 0; // sum freqs in segments + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + total += (*reader)->docFreq(t); } - - void MultiReader::doCommit(MapStringString commitUserData) - { - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - (*reader)->commit(commitUserData); + return total; +} + +TermDocsPtr MultiReader::termDocs() { + ensureOpen(); + return newLucene(shared_from_this(), subReaders, starts); +} + +TermPositionsPtr MultiReader::termPositions() { + ensureOpen(); + return newLucene(shared_from_this(), subReaders, starts); +} + +void MultiReader::doCommit(MapStringString commitUserData) { + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + (*reader)->commit(commitUserData); } - - void MultiReader::doClose() - { - SyncLock syncLock(this); - for (int32_t i = 0; i < subReaders.size(); ++i) - { - if (decrefOnClose[i]) - subReaders[i]->decRef(); - else - subReaders[i]->close(); +} + +void MultiReader::doClose() { + SyncLock syncLock(this); + for (int32_t i = 0; i < subReaders.size(); ++i) { + if (decrefOnClose[i]) { + subReaders[i]->decRef(); + } else { + subReaders[i]->close(); } - - // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is - // generally not a good idea) - FieldCache::DEFAULT()->purge(shared_from_this()); - } - - HashSet MultiReader::getFieldNames(FieldOption fieldOption) - { - ensureOpen(); - return DirectoryReader::getFieldNames(fieldOption, this->subReaders); } - - bool MultiReader::isCurrent() - { - for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) - { - if (!(*reader)->isCurrent()) - return false; + + // NOTE: only needed in case someone had asked for FieldCache for top-level reader (which is + // generally not a good idea) + FieldCache::DEFAULT()->purge(shared_from_this()); +} + +HashSet MultiReader::getFieldNames(FieldOption fieldOption) { + ensureOpen(); + return DirectoryReader::getFieldNames(fieldOption, this->subReaders); +} + +bool MultiReader::isCurrent() { + for (Collection::iterator reader = subReaders.begin(); reader != subReaders.end(); ++reader) { + if (!(*reader)->isCurrent()) { + return false; } - // all subreaders are up to date - return true; - } - - int64_t MultiReader::getVersion() - { - boost::throw_exception(UnsupportedOperationException()); - return 0; - } - - Collection MultiReader::getSequentialSubReaders() - { - return subReaders; } + // all subreaders are up to date + return true; +} + +int64_t MultiReader::getVersion() { + boost::throw_exception(UnsupportedOperationException()); + return 0; +} + +Collection MultiReader::getSequentialSubReaders() { + return subReaders; +} + } diff --git a/src/core/index/MultipleTermPositions.cpp b/src/core/index/MultipleTermPositions.cpp index a6e3adeb..afed29dd 100644 --- a/src/core/index/MultipleTermPositions.cpp +++ b/src/core/index/MultipleTermPositions.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,185 +10,165 @@ #include "IndexReader.h" #include "Term.h" -namespace Lucene -{ - MultipleTermPositions::MultipleTermPositions(IndexReaderPtr indexReader, Collection terms) - { - Collection termPositions(Collection::newInstance()); - - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - termPositions.add(indexReader->termPositions(*term)); - - termPositionsQueue = newLucene(termPositions); - posList = newLucene(); - _doc = 0; - _freq = 0; +namespace Lucene { + +MultipleTermPositions::MultipleTermPositions(const IndexReaderPtr& indexReader, Collection terms) { + Collection termPositions(Collection::newInstance()); + + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + termPositions.add(indexReader->termPositions(*term)); } - - MultipleTermPositions::~MultipleTermPositions() - { + + termPositionsQueue = newLucene(termPositions); + posList = newLucene(); + _doc = 0; + _freq = 0; +} + +MultipleTermPositions::~MultipleTermPositions() { +} + +bool MultipleTermPositions::next() { + if (termPositionsQueue->empty()) { + return false; } - - bool MultipleTermPositions::next() - { - if (termPositionsQueue->empty()) - return false; - - posList->clear(); - _doc = termPositionsQueue->top()->doc(); - - TermPositionsPtr tp; - do - { - tp = termPositionsQueue->top(); - - for (int32_t i = 0; i < tp->freq(); ++i) - posList->add(tp->nextPosition()); - - if (tp->next()) - termPositionsQueue->updateTop(); - else - termPositionsQueue->pop(); - tp->close(); + + posList->clear(); + _doc = termPositionsQueue->top()->doc(); + + TermPositionsPtr tp; + do { + tp = termPositionsQueue->top(); + + for (int32_t i = 0; i < tp->freq(); ++i) { + posList->add(tp->nextPosition()); } - while (!termPositionsQueue->empty() && termPositionsQueue->top()->doc() == _doc); - - posList->sort(); - _freq = posList->size(); - - return true; - } - - int32_t MultipleTermPositions::nextPosition() - { - return posList->next(); - } - - bool MultipleTermPositions::skipTo(int32_t target) - { - while (termPositionsQueue->top() && target > termPositionsQueue->top()->doc()) - { - TermPositionsPtr tp(termPositionsQueue->top()); + + if (tp->next()) { + termPositionsQueue->updateTop(); + } else { termPositionsQueue->pop(); - - if (tp->skipTo(target)) - termPositionsQueue->add(tp); - else - tp->close(); + tp->close(); } - return next(); - } - - int32_t MultipleTermPositions::doc() - { - return _doc; - } - - int32_t MultipleTermPositions::freq() - { - return _freq; - } - - void MultipleTermPositions::close() - { - while (!termPositionsQueue->empty()) - termPositionsQueue->pop()->close(); - } - - void MultipleTermPositions::seek(TermPtr term) - { - boost::throw_exception(UnsupportedOperationException()); - } - - void MultipleTermPositions::seek(TermEnumPtr termEnum) - { - boost::throw_exception(UnsupportedOperationException()); - } - - int32_t MultipleTermPositions::read(Collection docs, Collection freqs) - { - boost::throw_exception(UnsupportedOperationException()); - return 0; - } - - ByteArray MultipleTermPositions::getPayload(ByteArray data, int32_t offset) - { - boost::throw_exception(UnsupportedOperationException()); - return ByteArray(); - } - - bool MultipleTermPositions::isPayloadAvailable() - { - return false; - } - - TermPositionsQueue::TermPositionsQueue(Collection termPositions) : PriorityQueue(termPositions.size()) - { - this->termPositions = termPositions; - } - - TermPositionsQueue::~TermPositionsQueue() - { - } - - void TermPositionsQueue::initialize() - { - PriorityQueue::initialize(); - for (Collection::iterator tp = termPositions.begin(); tp != termPositions.end(); ++tp) - { - if ((*tp)->next()) - add(*tp); + } while (!termPositionsQueue->empty() && termPositionsQueue->top()->doc() == _doc); + + posList->sort(); + _freq = posList->size(); + + return true; +} + +int32_t MultipleTermPositions::nextPosition() { + return posList->next(); +} + +bool MultipleTermPositions::skipTo(int32_t target) { + while (termPositionsQueue->top() && target > termPositionsQueue->top()->doc()) { + TermPositionsPtr tp(termPositionsQueue->top()); + termPositionsQueue->pop(); + + if (tp->skipTo(target)) { + termPositionsQueue->add(tp); + } else { + tp->close(); } } - - bool TermPositionsQueue::lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second) - { - return (first->doc() < second->doc()); - } - - IntQueue::IntQueue() - { - arraySize = 16; - index = 0; - lastIndex = 0; - array = Collection::newInstance(arraySize); - } - - IntQueue::~IntQueue() - { - } - - void IntQueue::add(int32_t i) - { - if (lastIndex == arraySize) - growArray(); - array[lastIndex++] = i; - } - - int32_t IntQueue::next() - { - return array[index++]; - } - - void IntQueue::sort() - { - std::sort(array.begin() + index, array.begin() + lastIndex); - } - - void IntQueue::clear() - { - index = 0; - lastIndex = 0; + return next(); +} + +int32_t MultipleTermPositions::doc() { + return _doc; +} + +int32_t MultipleTermPositions::freq() { + return _freq; +} + +void MultipleTermPositions::close() { + while (!termPositionsQueue->empty()) { + termPositionsQueue->pop()->close(); } - - int32_t IntQueue::size() - { - return (lastIndex - index); +} + +void MultipleTermPositions::seek(const TermPtr& term) { + boost::throw_exception(UnsupportedOperationException()); +} + +void MultipleTermPositions::seek(const TermEnumPtr& termEnum) { + boost::throw_exception(UnsupportedOperationException()); +} + +int32_t MultipleTermPositions::read(Collection& docs, Collection& freqs) { + boost::throw_exception(UnsupportedOperationException()); + return 0; +} + +ByteArray MultipleTermPositions::getPayload(ByteArray data, int32_t offset) { + boost::throw_exception(UnsupportedOperationException()); + return ByteArray(); +} + +bool MultipleTermPositions::isPayloadAvailable() { + return false; +} + +TermPositionsQueue::TermPositionsQueue(Collection termPositions) : PriorityQueue(termPositions.size()) { + this->termPositions = termPositions; +} + +TermPositionsQueue::~TermPositionsQueue() { +} + +void TermPositionsQueue::initialize() { + PriorityQueue::initialize(); + for (Collection::iterator tp = termPositions.begin(); tp != termPositions.end(); ++tp) { + if ((*tp)->next()) { + add(*tp); + } } - - void IntQueue::growArray() - { - array.resize(arraySize * 2); - arraySize *= 2; +} + +bool TermPositionsQueue::lessThan(const TermPositionsPtr& first, const TermPositionsPtr& second) { + return (first->doc() < second->doc()); +} + +IntQueue::IntQueue() { + arraySize = 16; + index = 0; + lastIndex = 0; + array = Collection::newInstance(arraySize); +} + +IntQueue::~IntQueue() { +} + +void IntQueue::add(int32_t i) { + if (lastIndex == arraySize) { + growArray(); } + array[lastIndex++] = i; +} + +int32_t IntQueue::next() { + return array[index++]; +} + +void IntQueue::sort() { + std::sort(array.begin() + index, array.begin() + lastIndex); +} + +void IntQueue::clear() { + index = 0; + lastIndex = 0; +} + +int32_t IntQueue::size() { + return (lastIndex - index); +} + +void IntQueue::growArray() { + array.resize(arraySize * 2); + arraySize *= 2; +} + } diff --git a/src/core/index/NormsWriter.cpp b/src/core/index/NormsWriter.cpp index ecbc5e87..002a3d76 100644 --- a/src/core/index/NormsWriter.cpp +++ b/src/core/index/NormsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -18,172 +18,151 @@ #include "FieldInfo.h" #include "Directory.h" -namespace Lucene -{ - NormsWriter::NormsWriter() - { - } - - NormsWriter::~NormsWriter() - { - } - - uint8_t NormsWriter::getDefaultNorm() - { - static uint8_t defaultNorm = 0; - if (defaultNorm == 0) - defaultNorm = Similarity::encodeNorm(1.0); - return defaultNorm; - } - - InvertedDocEndConsumerPerThreadPtr NormsWriter::addThread(DocInverterPerThreadPtr docInverterPerThread) - { - return newLucene(docInverterPerThread, shared_from_this()); - } - - void NormsWriter::abort() - { - } - - void NormsWriter::files(HashSet files) - { - } - - void NormsWriter::setFieldInfos(FieldInfosPtr fieldInfos) - { - this->fieldInfos = fieldInfos; - } - - void NormsWriter::flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - MapFieldInfoCollectionNormsWriterPerField byField(MapFieldInfoCollectionNormsWriterPerField::newInstance()); - - // Typically, each thread will have encountered the same field. So first we collate by field, ie all - // per-thread field instances that correspond to the same FieldInfo - for (MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end();) - { - NormsWriterPerFieldPtr normsPerField(boost::static_pointer_cast(*perField)); - if (normsPerField->upto > 0) - { - // It has some norms - Collection l = byField.get(normsPerField->fieldInfo); - if (!l) - { - l = Collection::newInstance(); - byField.put(normsPerField->fieldInfo, l); - } - l.add(normsPerField); - ++perField; - } - else - { - // Remove this field since we haven't seen it since the previous flush - perField = entry->second.remove(perField); +namespace Lucene { + +NormsWriter::NormsWriter() { +} + +NormsWriter::~NormsWriter() { +} + +uint8_t NormsWriter::getDefaultNorm() { + static uint8_t defaultNorm = 0; + LUCENE_RUN_ONCE( + defaultNorm = Similarity::encodeNorm(1.0); + ); + return defaultNorm; +} + +InvertedDocEndConsumerPerThreadPtr NormsWriter::addThread(const DocInverterPerThreadPtr& docInverterPerThread) { + return newLucene(docInverterPerThread, shared_from_this()); +} + +void NormsWriter::abort() { +} + +void NormsWriter::files(HashSet files) { +} + +void NormsWriter::setFieldInfos(const FieldInfosPtr& fieldInfos) { + this->fieldInfos = fieldInfos; +} + +void NormsWriter::flush(MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + MapFieldInfoCollectionNormsWriterPerField byField(MapFieldInfoCollectionNormsWriterPerField::newInstance()); + + // Typically, each thread will have encountered the same field. So first we collate by field, ie all + // per-thread field instances that correspond to the same FieldInfo + for (MapInvertedDocEndConsumerPerThreadCollectionInvertedDocEndConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end();) { + NormsWriterPerFieldPtr normsPerField(boost::static_pointer_cast(*perField)); + if (normsPerField->upto > 0) { + // It has some norms + Collection l = byField.get(normsPerField->fieldInfo); + if (!l) { + l = Collection::newInstance(); + byField.put(normsPerField->fieldInfo, l); } + l.add(normsPerField); + ++perField; + } else { + // Remove this field since we haven't seen it since the previous flush + perField = entry->second.remove(perField); } } - - String normsFileName(state->segmentName + L"." + IndexFileNames::NORMS_EXTENSION()); - state->flushedFiles.add(normsFileName); - IndexOutputPtr normsOut(state->directory->createOutput(normsFileName)); - - LuceneException finally; - try - { - normsOut->writeBytes(SegmentMerger::NORMS_HEADER, 0, SegmentMerger::NORMS_HEADER_LENGTH); - - int32_t numField = fieldInfos->size(); - - int32_t normCount = 0; - - for (int32_t fieldNumber = 0; fieldNumber < numField; ++fieldNumber) - { - FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(fieldNumber)); - - Collection toMerge = byField.get(fieldInfo); - int32_t upto = 0; - - if (toMerge) - { - int32_t numFields = toMerge.size(); - - ++normCount; - - Collection fields(Collection::newInstance(numFields)); - Collection uptos(Collection::newInstance(numFields)); - - for (int32_t j = 0; j < numFields; ++j) - fields[j] = toMerge[j]; - - int32_t numLeft = numFields; - - while (numLeft > 0) - { - BOOST_ASSERT(uptos[0] < fields[0]->docIDs.size()); - - int32_t minLoc = 0; - int32_t minDocID = fields[0]->docIDs[uptos[0]]; - - for (int32_t j = 1; j < numLeft; ++j) - { - int32_t docID = fields[j]->docIDs[uptos[j]]; - if (docID < minDocID) - { - minDocID = docID; - minLoc = j; - } - } - - BOOST_ASSERT(minDocID < state->numDocs); - - // Fill hole - for (;upto < minDocID; ++upto) - normsOut->writeByte(getDefaultNorm()); - - normsOut->writeByte(fields[minLoc]->norms[uptos[minLoc]]); - ++(uptos[minLoc]); - ++upto; - - if (uptos[minLoc] == fields[minLoc]->upto) - { - fields[minLoc]->reset(); - if (minLoc != numLeft - 1) - { - fields[minLoc] = fields[numLeft - 1]; - uptos[minLoc] = uptos[numLeft - 1]; - } - --numLeft; + } + + String normsFileName(state->segmentName + L"." + IndexFileNames::NORMS_EXTENSION()); + state->flushedFiles.add(normsFileName); + IndexOutputPtr normsOut(state->directory->createOutput(normsFileName)); + + LuceneException finally; + try { + normsOut->writeBytes(SegmentMerger::NORMS_HEADER, 0, SegmentMerger::NORMS_HEADER_LENGTH); + + int32_t numField = fieldInfos->size(); + + int32_t normCount = 0; + + for (int32_t fieldNumber = 0; fieldNumber < numField; ++fieldNumber) { + FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(fieldNumber)); + + Collection toMerge = byField.get(fieldInfo); + int32_t upto = 0; + + if (toMerge) { + int32_t numFields = toMerge.size(); + + ++normCount; + + Collection fields(Collection::newInstance(numFields)); + Collection uptos(Collection::newInstance(numFields)); + + for (int32_t j = 0; j < numFields; ++j) { + fields[j] = toMerge[j]; + } + + int32_t numLeft = numFields; + + while (numLeft > 0) { + BOOST_ASSERT(uptos[0] < fields[0]->docIDs.size()); + + int32_t minLoc = 0; + int32_t minDocID = fields[0]->docIDs[uptos[0]]; + + for (int32_t j = 1; j < numLeft; ++j) { + int32_t docID = fields[j]->docIDs[uptos[j]]; + if (docID < minDocID) { + minDocID = docID; + minLoc = j; } } - - // Fill final hole with defaultNorm - for (;upto < state->numDocs; ++upto) + + BOOST_ASSERT(minDocID < state->numDocs); + + // Fill hole + for (; upto < minDocID; ++upto) { normsOut->writeByte(getDefaultNorm()); + } + + normsOut->writeByte(fields[minLoc]->norms[uptos[minLoc]]); + ++(uptos[minLoc]); + ++upto; + + if (uptos[minLoc] == fields[minLoc]->upto) { + fields[minLoc]->reset(); + if (minLoc != numLeft - 1) { + fields[minLoc] = fields[numLeft - 1]; + uptos[minLoc] = uptos[numLeft - 1]; + } + --numLeft; + } } - else if (fieldInfo->isIndexed && !fieldInfo->omitNorms) - { - ++normCount; - // Fill entire field with default norm - for (;upto < state->numDocs; ++upto) - normsOut->writeByte(getDefaultNorm()); + + // Fill final hole with defaultNorm + for (; upto < state->numDocs; ++upto) { + normsOut->writeByte(getDefaultNorm()); + } + } else if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { + ++normCount; + // Fill entire field with default norm + for (; upto < state->numDocs; ++upto) { + normsOut->writeByte(getDefaultNorm()); } - - BOOST_ASSERT(4 + normCount * state->numDocs == normsOut->getFilePointer()); // .nrm file size mismatch? } + + BOOST_ASSERT(4 + normCount * state->numDocs == normsOut->getFilePointer()); // .nrm file size mismatch? } - catch (LuceneException& e) - { - finally = e; - } - - normsOut->close(); - - finally.throwException(); - } - - void NormsWriter::closeDocStore(SegmentWriteStatePtr state) - { + } catch (LuceneException& e) { + finally = e; } + + normsOut->close(); + + finally.throwException(); +} + +void NormsWriter::closeDocStore(const SegmentWriteStatePtr& state) { +} + } diff --git a/src/core/index/NormsWriterPerField.cpp b/src/core/index/NormsWriterPerField.cpp index 951ee866..11377235 100644 --- a/src/core/index/NormsWriterPerField.cpp +++ b/src/core/index/NormsWriterPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,57 +13,50 @@ #include "FieldInfo.h" #include "MiscUtils.h" -namespace Lucene -{ - NormsWriterPerField::NormsWriterPerField(DocInverterPerFieldPtr docInverterPerField, NormsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) - { - docIDs = Collection::newInstance(1); - norms = ByteArray::newInstance(1); - upto = 0; - - this->_perThread = perThread; - this->fieldInfo = fieldInfo; - docState = perThread->docState; - fieldState = docInverterPerField->fieldState; - } - - NormsWriterPerField::~NormsWriterPerField() - { - } - - void NormsWriterPerField::reset() - { - // Shrink back if we are over allocated now - docIDs.resize(MiscUtils::getShrinkSize(docIDs.size(), upto)); - norms.resize(MiscUtils::getShrinkSize(norms.size(), upto)); - upto = 0; - } - - void NormsWriterPerField::abort() - { - upto = 0; - } - - int32_t NormsWriterPerField::compareTo(LuceneObjectPtr other) - { - return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); - } - - void NormsWriterPerField::finish() - { - BOOST_ASSERT(docIDs.size() == norms.size()); - if (fieldInfo->isIndexed && !fieldInfo->omitNorms) - { - if (docIDs.size() <= upto) - { - BOOST_ASSERT(docIDs.size() == upto); - docIDs.resize(MiscUtils::getNextSize(1 + upto)); - norms.resize(MiscUtils::getNextSize(1 + upto)); - } - double norm = docState->similarity->computeNorm(fieldInfo->name, fieldState); - norms[upto] = Similarity::encodeNorm(norm); - docIDs[upto] = docState->docID; - ++upto; +namespace Lucene { + +NormsWriterPerField::NormsWriterPerField(const DocInverterPerFieldPtr& docInverterPerField, const NormsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { + docIDs = Collection::newInstance(1); + norms = ByteArray::newInstance(1); + upto = 0; + + this->_perThread = perThread; + this->fieldInfo = fieldInfo; + docState = perThread->docState; + fieldState = docInverterPerField->fieldState; +} + +NormsWriterPerField::~NormsWriterPerField() { +} + +void NormsWriterPerField::reset() { + // Shrink back if we are over allocated now + docIDs.resize(MiscUtils::getShrinkSize(docIDs.size(), upto)); + norms.resize(MiscUtils::getShrinkSize(norms.size(), upto)); + upto = 0; +} + +void NormsWriterPerField::abort() { + upto = 0; +} + +int32_t NormsWriterPerField::compareTo(const LuceneObjectPtr& other) { + return fieldInfo->name.compare(boost::static_pointer_cast(other)->fieldInfo->name); +} + +void NormsWriterPerField::finish() { + BOOST_ASSERT(docIDs.size() == norms.size()); + if (fieldInfo->isIndexed && !fieldInfo->omitNorms) { + if (docIDs.size() <= upto) { + BOOST_ASSERT(docIDs.size() == upto); + docIDs.resize(MiscUtils::getNextSize(1 + upto)); + norms.resize(MiscUtils::getNextSize(1 + upto)); } + double norm = docState->similarity->computeNorm(fieldInfo->name, fieldState); + norms[upto] = Similarity::encodeNorm(norm); + docIDs[upto] = docState->docID; + ++upto; } } + +} diff --git a/src/core/index/NormsWriterPerThread.cpp b/src/core/index/NormsWriterPerThread.cpp index 68f5dc51..75b8fea7 100644 --- a/src/core/index/NormsWriterPerThread.cpp +++ b/src/core/index/NormsWriterPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,37 +9,31 @@ #include "NormsWriterPerField.h" #include "DocInverterPerThread.h" -namespace Lucene -{ - NormsWriterPerThread::NormsWriterPerThread(DocInverterPerThreadPtr docInverterPerThread, NormsWriterPtr normsWriter) - { - this->_normsWriter = normsWriter; - docState = docInverterPerThread->docState; - } - - NormsWriterPerThread::~NormsWriterPerThread() - { - } - - InvertedDocEndConsumerPerFieldPtr NormsWriterPerThread::addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) - { - return newLucene(docInverterPerField, shared_from_this(), fieldInfo); - } - - void NormsWriterPerThread::abort() - { - } - - void NormsWriterPerThread::startDocument() - { - } - - void NormsWriterPerThread::finishDocument() - { - } - - bool NormsWriterPerThread::freeRAM() - { - return false; - } +namespace Lucene { + +NormsWriterPerThread::NormsWriterPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const NormsWriterPtr& normsWriter) { + this->_normsWriter = normsWriter; + docState = docInverterPerThread->docState; +} + +NormsWriterPerThread::~NormsWriterPerThread() { +} + +InvertedDocEndConsumerPerFieldPtr NormsWriterPerThread::addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) { + return newLucene(docInverterPerField, shared_from_this(), fieldInfo); +} + +void NormsWriterPerThread::abort() { +} + +void NormsWriterPerThread::startDocument() { +} + +void NormsWriterPerThread::finishDocument() { +} + +bool NormsWriterPerThread::freeRAM() { + return false; +} + } diff --git a/src/core/index/ParallelReader.cpp b/src/core/index/ParallelReader.cpp index 671c58b4..80368b66 100644 --- a/src/core/index/ParallelReader.cpp +++ b/src/core/index/ParallelReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,599 +13,529 @@ #include "FieldCache.h" #include "StringUtils.h" -namespace Lucene -{ - ParallelReader::ParallelReader(bool closeSubReaders) - { - this->readers = Collection::newInstance(); - this->decrefOnClose = Collection::newInstance(); - this->fieldToReader = MapStringIndexReader::newInstance(); - this->readerToFields = MapIndexReaderSetString::newInstance(); - this->storedFieldReaders = Collection::newInstance(); - this->_maxDoc = 0; - this->_numDocs = 0; - this->_hasDeletions = false; - - this->incRefReaders = !closeSubReaders; - } - - ParallelReader::~ParallelReader() - { - } - - void ParallelReader::add(IndexReaderPtr reader) - { - ensureOpen(); - add(reader, false); - } - - void ParallelReader::add(IndexReaderPtr reader, bool ignoreStoredFields) - { - ensureOpen(); - if (readers.empty()) - { - this->_maxDoc = reader->maxDoc(); - this->_numDocs = reader->numDocs(); - this->_hasDeletions = reader->hasDeletions(); - } - - if (reader->maxDoc() != _maxDoc) // check compatibility - { - boost::throw_exception(IllegalArgumentException(L"All readers must have same maxDoc: " + StringUtils::toString(_maxDoc) + - L" != " + StringUtils::toString(reader->maxDoc()))); - } - if (reader->numDocs() != _numDocs) - { - boost::throw_exception(IllegalArgumentException(L"All readers must have same numDocs: " + StringUtils::toString(_numDocs) + - L" != " + StringUtils::toString(reader->numDocs()))); - } - - HashSet fields(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); - readerToFields.put(reader, fields); - for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) // update fieldToReader map - { - if (!fieldToReader.contains(*field)) - fieldToReader.put(*field, reader); - } - - if (!ignoreStoredFields) - storedFieldReaders.add(reader); // add to storedFieldReaders - readers.add(reader); - - if (incRefReaders) - reader->incRef(); - - decrefOnClose.add(incRefReaders); - } - - LuceneObjectPtr ParallelReader::clone(LuceneObjectPtr other) - { - SyncLock syncLock(this); - try - { - return doReopen(true); - } - catch (LuceneException& e) - { - boost::throw_exception(RuntimeException(e.getError())); +namespace Lucene { + +ParallelReader::ParallelReader(bool closeSubReaders) { + this->readers = Collection::newInstance(); + this->decrefOnClose = Collection::newInstance(); + this->fieldToReader = MapStringIndexReader::newInstance(); + this->readerToFields = MapIndexReaderSetString::newInstance(); + this->storedFieldReaders = Collection::newInstance(); + this->_maxDoc = 0; + this->_numDocs = 0; + this->_hasDeletions = false; + + this->incRefReaders = !closeSubReaders; +} + +ParallelReader::~ParallelReader() { +} + +void ParallelReader::add(const IndexReaderPtr& reader) { + ensureOpen(); + add(reader, false); +} + +void ParallelReader::add(const IndexReaderPtr& reader, bool ignoreStoredFields) { + ensureOpen(); + if (readers.empty()) { + this->_maxDoc = reader->maxDoc(); + this->_numDocs = reader->numDocs(); + this->_hasDeletions = reader->hasDeletions(); + } + + if (reader->maxDoc() != _maxDoc) { // check compatibility + boost::throw_exception(IllegalArgumentException(L"All readers must have same maxDoc: " + StringUtils::toString(_maxDoc) + + L" != " + StringUtils::toString(reader->maxDoc()))); + } + if (reader->numDocs() != _numDocs) { + boost::throw_exception(IllegalArgumentException(L"All readers must have same numDocs: " + StringUtils::toString(_numDocs) + + L" != " + StringUtils::toString(reader->numDocs()))); + } + + HashSet fields(reader->getFieldNames(IndexReader::FIELD_OPTION_ALL)); + readerToFields.put(reader, fields); + for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) { // update fieldToReader map + if (!fieldToReader.contains(*field)) { + fieldToReader.put(*field, reader); } - return LuceneObjectPtr(); - } - - IndexReaderPtr ParallelReader::reopen() - { - SyncLock syncLock(this); - return doReopen(false); - } - - IndexReaderPtr ParallelReader::doReopen(bool doClone) - { - ensureOpen(); - - bool reopened = false; - Collection newReaders(Collection::newInstance()); - - bool success = false; - LuceneException finally; - try - { - for (Collection::iterator oldReader = readers.begin(); oldReader != readers.end(); ++oldReader) - { - IndexReaderPtr newReader; - if (doClone) - newReader = boost::dynamic_pointer_cast((*oldReader)->clone()); - else - newReader = (*oldReader)->reopen(); - newReaders.add(newReader); - // if at least one of the subreaders was updated we remember that and return a new ParallelReader - if (newReader != *oldReader) - reopened = true; + } + + if (!ignoreStoredFields) { + storedFieldReaders.add(reader); // add to storedFieldReaders + } + readers.add(reader); + + if (incRefReaders) { + reader->incRef(); + } + + decrefOnClose.add(incRefReaders); +} + +LuceneObjectPtr ParallelReader::clone(const LuceneObjectPtr& other) { + SyncLock syncLock(this); + try { + return doReopen(true); + } catch (LuceneException& e) { + boost::throw_exception(RuntimeException(e.getError())); + } + return LuceneObjectPtr(); +} + +IndexReaderPtr ParallelReader::reopen() { + SyncLock syncLock(this); + return doReopen(false); +} + +IndexReaderPtr ParallelReader::doReopen(bool doClone) { + ensureOpen(); + + bool reopened = false; + Collection newReaders(Collection::newInstance()); + + bool success = false; + LuceneException finally; + try { + for (Collection::iterator oldReader = readers.begin(); oldReader != readers.end(); ++oldReader) { + IndexReaderPtr newReader; + if (doClone) { + newReader = boost::dynamic_pointer_cast((*oldReader)->clone()); + } else { + newReader = (*oldReader)->reopen(); + } + newReaders.add(newReader); + // if at least one of the subreaders was updated we remember that and return a new ParallelReader + if (newReader != *oldReader) { + reopened = true; } - success = true; - } - catch (LuceneException& e) - { - finally = e; } - if (!success && reopened) - { - for (int32_t i = 0; i < newReaders.size(); ++i) - { - if (newReaders[i] != readers[i]) - { - try - { - if (newReaders[i]) - newReaders[i]->close(); - } - catch (...) - { - // keep going - we want to clean up as much as possible + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success && reopened) { + for (int32_t i = 0; i < newReaders.size(); ++i) { + if (newReaders[i] != readers[i]) { + try { + if (newReaders[i]) { + newReaders[i]->close(); } + } catch (...) { + // keep going - we want to clean up as much as possible } } } - finally.throwException(); - - if (reopened) - { - Collection newDecrefOnClose(Collection::newInstance()); - ParallelReaderPtr pr(newLucene()); - for (int32_t i = 0; i < readers.size(); ++i) - { - IndexReaderPtr oldReader(readers[i]); - IndexReaderPtr newReader(newReaders[i]); - if (newReader == oldReader) - { - newDecrefOnClose.add(true); - newReader->incRef(); - } - else - { - // this is a new subreader instance, so on close() we don't decRef but close it - newDecrefOnClose.add(false); - } - pr->add(newReader, !storedFieldReaders.contains(oldReader)); + } + finally.throwException(); + + if (reopened) { + Collection newDecrefOnClose(Collection::newInstance()); + ParallelReaderPtr pr(newLucene()); + for (int32_t i = 0; i < readers.size(); ++i) { + IndexReaderPtr oldReader(readers[i]); + IndexReaderPtr newReader(newReaders[i]); + if (newReader == oldReader) { + newDecrefOnClose.add(true); + newReader->incRef(); + } else { + // this is a new subreader instance, so on close() we don't decRef but close it + newDecrefOnClose.add(false); } - pr->decrefOnClose = newDecrefOnClose; - pr->incRefReaders = incRefReaders; - return pr; - } - else - { - // No subreader was refreshed - return shared_from_this(); + pr->add(newReader, !storedFieldReaders.contains(oldReader)); } + pr->decrefOnClose = newDecrefOnClose; + pr->incRefReaders = incRefReaders; + return pr; + } else { + // No subreader was refreshed + return shared_from_this(); } - - int32_t ParallelReader::numDocs() - { - // Don't call ensureOpen() here (it could affect performance) - return _numDocs; - } - - int32_t ParallelReader::maxDoc() - { - // Don't call ensureOpen() here (it could affect performance) - return _maxDoc; - } - - bool ParallelReader::hasDeletions() - { - // Don't call ensureOpen() here (it could affect performance) - return _hasDeletions; - } - - bool ParallelReader::isDeleted(int32_t n) - { - // Don't call ensureOpen() here (it could affect performance) - return !readers.empty() ? readers[0]->isDeleted(n) : false; // check first reader - } - - void ParallelReader::doDelete(int32_t docNum) - { - // delete in all readers - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - (*reader)->deleteDocument(docNum); - _hasDeletions = true; - } - - void ParallelReader::doUndeleteAll() - { - // undeleteAll in all readers - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - (*reader)->undeleteAll(); - _hasDeletions = false; - } - - DocumentPtr ParallelReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - ensureOpen(); - DocumentPtr result(newLucene()); - - // append fields from storedFieldReaders - for (Collection::iterator reader = storedFieldReaders.begin(); reader != storedFieldReaders.end(); ++reader) - { - bool include = !fieldSelector; - if (!include) - { - HashSet fields = readerToFields.get(*reader); - for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) - { - if (fieldSelector->accept(*field) != FieldSelector::SELECTOR_NO_LOAD) - { - include = true; - break; - } +} + +int32_t ParallelReader::numDocs() { + // Don't call ensureOpen() here (it could affect performance) + return _numDocs; +} + +int32_t ParallelReader::maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return _maxDoc; +} + +bool ParallelReader::hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return _hasDeletions; +} + +bool ParallelReader::isDeleted(int32_t n) { + // Don't call ensureOpen() here (it could affect performance) + return !readers.empty() ? readers[0]->isDeleted(n) : false; // check first reader +} + +void ParallelReader::doDelete(int32_t docNum) { + // delete in all readers + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + (*reader)->deleteDocument(docNum); + } + _hasDeletions = true; +} + +void ParallelReader::doUndeleteAll() { + // undeleteAll in all readers + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + (*reader)->undeleteAll(); + } + _hasDeletions = false; +} + +DocumentPtr ParallelReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + ensureOpen(); + DocumentPtr result(newLucene()); + + // append fields from storedFieldReaders + for (Collection::iterator reader = storedFieldReaders.begin(); reader != storedFieldReaders.end(); ++reader) { + bool include = !fieldSelector; + if (!include) { + HashSet fields = readerToFields.get(*reader); + for (HashSet::iterator field = fields.begin(); field != fields.end(); ++field) { + if (fieldSelector->accept(*field) != FieldSelector::SELECTOR_NO_LOAD) { + include = true; + break; } } - if (include) - { - Collection fields((*reader)->document(n, fieldSelector)->getFields()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - result->add(*field); - } - } - return result; - } - - Collection ParallelReader::getTermFreqVectors(int32_t docNumber) - { - ensureOpen(); - - Collection results(Collection::newInstance()); - - // get all vectors - for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) - { - TermFreqVectorPtr vector(entry->second->getTermFreqVector(docNumber, entry->first)); - if (vector) - results.add(vector); } - - return results; - } - - TermFreqVectorPtr ParallelReader::getTermFreqVector(int32_t docNumber, const String& field) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - return reader == fieldToReader.end() ? TermFreqVectorPtr() : reader->second->getTermFreqVector(docNumber, field); - } - - void ParallelReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - if (reader != fieldToReader.end()) - reader->second->getTermFreqVector(docNumber, field, mapper); - } - - void ParallelReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - ensureOpen(); - for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) - entry->second->getTermFreqVector(docNumber, entry->first, mapper); - } - - bool ParallelReader::hasNorms(const String& field) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - return reader == fieldToReader.end() ? false : reader->second->hasNorms(field); - } - - ByteArray ParallelReader::norms(const String& field) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - return reader == fieldToReader.end() ? ByteArray() : reader->second->norms(field); - } - - void ParallelReader::norms(const String& field, ByteArray norms, int32_t offset) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - if (reader != fieldToReader.end()) - reader->second->norms(field, norms, offset); - } - - void ParallelReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(field); - if (reader != fieldToReader.end()) - reader->second->doSetNorm(doc, field, value); - } - - TermEnumPtr ParallelReader::terms() - { - ensureOpen(); - return newLucene(shared_from_this()); - } - - TermEnumPtr ParallelReader::terms(TermPtr t) - { - ensureOpen(); - return newLucene(shared_from_this(), t); - } - - int32_t ParallelReader::docFreq(TermPtr t) - { - ensureOpen(); - MapStringIndexReader::iterator reader = fieldToReader.find(t->field()); - return reader == fieldToReader.end() ? 0 : reader->second->docFreq(t); - } - - TermDocsPtr ParallelReader::termDocs(TermPtr term) - { - ensureOpen(); - return newLucene(shared_from_this(), term); - } - - TermDocsPtr ParallelReader::termDocs() - { - ensureOpen(); - return newLucene(shared_from_this()); - } - - TermPositionsPtr ParallelReader::termPositions(TermPtr term) - { - ensureOpen(); - return newLucene(shared_from_this(), term); - } - - TermPositionsPtr ParallelReader::termPositions() - { - ensureOpen(); - return newLucene(shared_from_this()); - } - - bool ParallelReader::isCurrent() - { - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - if (!(*reader)->isCurrent()) - return false; - } - - // all subreaders are up to date - return true; - } - - bool ParallelReader::isOptimized() - { - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - if (!(*reader)->isOptimized()) - return false; + if (include) { + Collection fields((*reader)->document(n, fieldSelector)->getFields()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + result->add(*field); + } } - - // all subindexes are optimized - return true; - } - - int64_t ParallelReader::getVersion() - { - boost::throw_exception(UnsupportedOperationException(L"ParallelReader does not support this method.")); - return 0; - } - - Collection ParallelReader::getSubReaders() - { - return readers; - } - - void ParallelReader::doCommit(MapStringString commitUserData) - { - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - (*reader)->commit(commitUserData); - } - - void ParallelReader::doClose() - { - SyncLock syncLock(this); - for (int32_t i = 0; i < readers.size(); ++i) - { - if (decrefOnClose[i]) - readers[i]->decRef(); - else - readers[i]->close(); + } + return result; +} + +Collection ParallelReader::getTermFreqVectors(int32_t docNumber) { + ensureOpen(); + + Collection results(Collection::newInstance()); + + // get all vectors + for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) { + TermFreqVectorPtr vector(entry->second->getTermFreqVector(docNumber, entry->first)); + if (vector) { + results.add(vector); } - - FieldCache::DEFAULT()->purge(shared_from_this()); - } - - HashSet ParallelReader::getFieldNames(FieldOption fieldOption) - { - ensureOpen(); - HashSet fieldSet(HashSet::newInstance()); - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - HashSet names((*reader)->getFieldNames(fieldOption)); - fieldSet.addAll(names.begin(), names.end()); + } + + return results; +} + +TermFreqVectorPtr ParallelReader::getTermFreqVector(int32_t docNumber, const String& field) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + return reader == fieldToReader.end() ? TermFreqVectorPtr() : reader->second->getTermFreqVector(docNumber, field); +} + +void ParallelReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + if (reader != fieldToReader.end()) { + reader->second->getTermFreqVector(docNumber, field, mapper); + } +} + +void ParallelReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + ensureOpen(); + for (MapStringIndexReader::iterator entry = fieldToReader.begin(); entry != fieldToReader.end(); ++entry) { + entry->second->getTermFreqVector(docNumber, entry->first, mapper); + } +} + +bool ParallelReader::hasNorms(const String& field) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + return reader == fieldToReader.end() ? false : reader->second->hasNorms(field); +} + +ByteArray ParallelReader::norms(const String& field) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + return reader == fieldToReader.end() ? ByteArray() : reader->second->norms(field); +} + +void ParallelReader::norms(const String& field, ByteArray norms, int32_t offset) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + if (reader != fieldToReader.end()) { + reader->second->norms(field, norms, offset); + } +} + +void ParallelReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(field); + if (reader != fieldToReader.end()) { + reader->second->doSetNorm(doc, field, value); + } +} + +TermEnumPtr ParallelReader::terms() { + ensureOpen(); + return newLucene(shared_from_this()); +} + +TermEnumPtr ParallelReader::terms(const TermPtr& t) { + ensureOpen(); + return newLucene(shared_from_this(), t); +} + +int32_t ParallelReader::docFreq(const TermPtr& t) { + ensureOpen(); + MapStringIndexReader::iterator reader = fieldToReader.find(t->field()); + return reader == fieldToReader.end() ? 0 : reader->second->docFreq(t); +} + +TermDocsPtr ParallelReader::termDocs(const TermPtr& term) { + ensureOpen(); + return newLucene(shared_from_this(), term); +} + +TermDocsPtr ParallelReader::termDocs() { + ensureOpen(); + return newLucene(shared_from_this()); +} + +TermPositionsPtr ParallelReader::termPositions(const TermPtr& term) { + ensureOpen(); + return newLucene(shared_from_this(), term); +} + +TermPositionsPtr ParallelReader::termPositions() { + ensureOpen(); + return newLucene(shared_from_this()); +} + +bool ParallelReader::isCurrent() { + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + if (!(*reader)->isCurrent()) { + return false; } - return fieldSet; - } - - ParallelTermEnum::ParallelTermEnum(ParallelReaderPtr reader) - { - this->setIterator = false; - this->_reader = reader; - MapStringIndexReader::iterator indexReader = reader->fieldToReader.begin(); - if (indexReader != reader->fieldToReader.end()) - this->field = indexReader->first; - if (!field.empty()) - this->termEnum = reader->fieldToReader[field]->terms(); - } - - ParallelTermEnum::ParallelTermEnum(ParallelReaderPtr reader, TermPtr term) - { - this->setIterator = false; - this->_reader = reader; - this->field = term->field(); - MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(field); - if (indexReader != reader->fieldToReader.end()) - this->termEnum = indexReader->second->terms(term); - } - - ParallelTermEnum::~ParallelTermEnum() - { - } - - bool ParallelTermEnum::next() - { - if (!termEnum) + } + + // all subreaders are up to date + return true; +} + +bool ParallelReader::isOptimized() { + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + if (!(*reader)->isOptimized()) { return false; - - // another term in this field? - if (termEnum->next() && termEnum->term()->field() == field) - return true; // yes, keep going - - termEnum->close(); // close old termEnum - ParallelReaderPtr reader(_reader); - - // find the next field with terms, if any - if (!setIterator) - { - fieldIterator = reader->fieldToReader.find(field); - ++fieldIterator; // Skip field to get next one - setIterator = false; } - - while (fieldIterator != reader->fieldToReader.end()) - { - field = fieldIterator->first; - termEnum = fieldIterator->second->terms(newLucene(field)); - ++fieldIterator; - TermPtr term(termEnum->term()); - if (term && term->field() == field) - return true; - else - termEnum->close(); + } + + // all subindexes are optimized + return true; +} + +int64_t ParallelReader::getVersion() { + boost::throw_exception(UnsupportedOperationException(L"ParallelReader does not support this method.")); + return 0; +} + +Collection ParallelReader::getSubReaders() { + return readers; +} + +void ParallelReader::doCommit(MapStringString commitUserData) { + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + (*reader)->commit(commitUserData); + } +} + +void ParallelReader::doClose() { + SyncLock syncLock(this); + for (int32_t i = 0; i < readers.size(); ++i) { + if (decrefOnClose[i]) { + readers[i]->decRef(); + } else { + readers[i]->close(); } - - return false; // no more fields - } - - TermPtr ParallelTermEnum::term() - { - return termEnum ? termEnum->term() : TermPtr(); - } - - int32_t ParallelTermEnum::docFreq() - { - return termEnum ? termEnum->docFreq() : 0; - } - - void ParallelTermEnum::close() - { - if (termEnum) + } + + FieldCache::DEFAULT()->purge(shared_from_this()); +} + +HashSet ParallelReader::getFieldNames(FieldOption fieldOption) { + ensureOpen(); + HashSet fieldSet(HashSet::newInstance()); + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + HashSet names((*reader)->getFieldNames(fieldOption)); + fieldSet.addAll(names.begin(), names.end()); + } + return fieldSet; +} + +ParallelTermEnum::ParallelTermEnum(const ParallelReaderPtr& reader) { + this->setIterator = false; + this->_reader = reader; + MapStringIndexReader::iterator indexReader = reader->fieldToReader.begin(); + if (indexReader != reader->fieldToReader.end()) { + this->field = indexReader->first; + } + if (!field.empty()) { + this->termEnum = reader->fieldToReader[field]->terms(); + } +} + +ParallelTermEnum::ParallelTermEnum(const ParallelReaderPtr& reader, const TermPtr& term) { + this->setIterator = false; + this->_reader = reader; + this->field = term->field(); + MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(field); + if (indexReader != reader->fieldToReader.end()) { + this->termEnum = indexReader->second->terms(term); + } +} + +ParallelTermEnum::~ParallelTermEnum() { +} + +bool ParallelTermEnum::next() { + if (!termEnum) { + return false; + } + + // another term in this field? + if (termEnum->next() && termEnum->term()->field() == field) { + return true; // yes, keep going + } + + termEnum->close(); // close old termEnum + ParallelReaderPtr reader(_reader); + + // find the next field with terms, if any + if (!setIterator) { + fieldIterator = reader->fieldToReader.find(field); + ++fieldIterator; // Skip field to get next one + setIterator = false; + } + + while (fieldIterator != reader->fieldToReader.end()) { + field = fieldIterator->first; + termEnum = fieldIterator->second->terms(newLucene(field)); + ++fieldIterator; + TermPtr term(termEnum->term()); + if (term && term->field() == field) { + return true; + } else { termEnum->close(); + } } - - ParallelTermDocs::ParallelTermDocs(ParallelReaderPtr reader) - { - this->_reader = reader; - } - - ParallelTermDocs::ParallelTermDocs(ParallelReaderPtr reader, TermPtr term) - { - this->_reader = reader; - if (!term) - termDocs = reader->readers.empty() ? TermDocsPtr() : reader->readers[0]->termDocs(TermPtr()); - else - seek(term); - } - - ParallelTermDocs::~ParallelTermDocs() - { - } - - int32_t ParallelTermDocs::doc() - { - return termDocs->doc(); - } - - int32_t ParallelTermDocs::freq() - { - return termDocs->freq(); - } - - void ParallelTermDocs::seek(TermPtr term) - { - ParallelReaderPtr reader(_reader); - MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); - termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termDocs(term) : TermDocsPtr(); - } - - void ParallelTermDocs::seek(TermEnumPtr termEnum) - { - seek(termEnum->term()); - } - - bool ParallelTermDocs::next() - { - return termDocs ? termDocs->next() : false; - } - - int32_t ParallelTermDocs::read(Collection docs, Collection freqs) - { - return termDocs ? termDocs->read(docs, freqs) : 0; - } - - bool ParallelTermDocs::skipTo(int32_t target) - { - return termDocs ? termDocs->skipTo(target) : false; - } - - void ParallelTermDocs::close() - { - if (termDocs) - termDocs->close(); - } - - ParallelTermPositions::ParallelTermPositions(ParallelReaderPtr reader) : ParallelTermDocs(reader) - { - } - - ParallelTermPositions::ParallelTermPositions(ParallelReaderPtr reader, TermPtr term) : ParallelTermDocs(reader) - { + + return false; // no more fields +} + +TermPtr ParallelTermEnum::term() { + return termEnum ? termEnum->term() : TermPtr(); +} + +int32_t ParallelTermEnum::docFreq() { + return termEnum ? termEnum->docFreq() : 0; +} + +void ParallelTermEnum::close() { + if (termEnum) { + termEnum->close(); + } +} + +ParallelTermDocs::ParallelTermDocs(const ParallelReaderPtr& reader) { + this->_reader = reader; +} + +ParallelTermDocs::ParallelTermDocs(const ParallelReaderPtr& reader, const TermPtr& term) { + this->_reader = reader; + if (!term) { + termDocs = reader->readers.empty() ? TermDocsPtr() : reader->readers[0]->termDocs(TermPtr()); + } else { seek(term); } - - ParallelTermPositions::~ParallelTermPositions() - { - } - - void ParallelTermPositions::seek(TermPtr term) - { - ParallelReaderPtr reader(_reader); - MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); - termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termPositions(term) : TermDocsPtr(); - } - - int32_t ParallelTermPositions::nextPosition() - { - // It is an error to call this if there is no next position, eg. if termDocs==null - return boost::static_pointer_cast(termDocs)->nextPosition(); - } - - int32_t ParallelTermPositions::getPayloadLength() - { - // It is an error to call this if there is no next position, eg. if termDocs==null - return boost::static_pointer_cast(termDocs)->getPayloadLength(); - } - - ByteArray ParallelTermPositions::getPayload(ByteArray data, int32_t offset) - { - // It is an error to call this if there is no next position, eg. if termDocs==null - return boost::static_pointer_cast(termDocs)->getPayload(data, offset); - } - - bool ParallelTermPositions::isPayloadAvailable() - { - // It is an error to call this if there is no next position, eg. if termDocs==null - return boost::static_pointer_cast(termDocs)->isPayloadAvailable(); +} + +ParallelTermDocs::~ParallelTermDocs() { +} + +int32_t ParallelTermDocs::doc() { + return termDocs->doc(); +} + +int32_t ParallelTermDocs::freq() { + return termDocs->freq(); +} + +void ParallelTermDocs::seek(const TermPtr& term) { + ParallelReaderPtr reader(_reader); + MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); + termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termDocs(term) : TermDocsPtr(); +} + +void ParallelTermDocs::seek(const TermEnumPtr& termEnum) { + seek(termEnum->term()); +} + +bool ParallelTermDocs::next() { + return termDocs ? termDocs->next() : false; +} + +int32_t ParallelTermDocs::read(Collection& docs, Collection& freqs) { + return termDocs ? termDocs->read(docs, freqs) : 0; +} + +bool ParallelTermDocs::skipTo(int32_t target) { + return termDocs ? termDocs->skipTo(target) : false; +} + +void ParallelTermDocs::close() { + if (termDocs) { + termDocs->close(); } } + +ParallelTermPositions::ParallelTermPositions(const ParallelReaderPtr& reader) : ParallelTermDocs(reader) { +} + +ParallelTermPositions::ParallelTermPositions(const ParallelReaderPtr& reader, const TermPtr& term) : ParallelTermDocs(reader) { + seek(term); +} + +ParallelTermPositions::~ParallelTermPositions() { +} + +void ParallelTermPositions::seek(const TermPtr& term) { + ParallelReaderPtr reader(_reader); + MapStringIndexReader::iterator indexReader = reader->fieldToReader.find(term->field()); + termDocs = indexReader != reader->fieldToReader.end() ? indexReader->second->termPositions(term) : TermDocsPtr(); +} + +int32_t ParallelTermPositions::nextPosition() { + // It is an error to call this if there is no next position, eg. if termDocs==null + return boost::static_pointer_cast(termDocs)->nextPosition(); +} + +int32_t ParallelTermPositions::getPayloadLength() { + // It is an error to call this if there is no next position, eg. if termDocs==null + return boost::static_pointer_cast(termDocs)->getPayloadLength(); +} + +ByteArray ParallelTermPositions::getPayload(ByteArray data, int32_t offset) { + // It is an error to call this if there is no next position, eg. if termDocs==null + return boost::static_pointer_cast(termDocs)->getPayload(data, offset); +} + +bool ParallelTermPositions::isPayloadAvailable() { + // It is an error to call this if there is no next position, eg. if termDocs==null + return boost::static_pointer_cast(termDocs)->isPayloadAvailable(); +} + +} diff --git a/src/core/index/Payload.cpp b/src/core/index/Payload.cpp index 9c28c51b..1b01e978 100644 --- a/src/core/index/Payload.cpp +++ b/src/core/index/Payload.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,126 +8,113 @@ #include "Payload.h" #include "MiscUtils.h" -namespace Lucene -{ - Payload::Payload() - { - this->offset = 0; - this->_length = 0; - } - - Payload::Payload(ByteArray data) - { - this->data = data; - this->offset = 0; - this->_length = data.size(); - } - - Payload::Payload(ByteArray data, int32_t offset, int32_t length) - { - if (offset < 0 || offset + length > data.size()) - boost::throw_exception(IllegalArgumentException()); - this->data = data; - this->offset = offset; - this->_length = length; - } - - Payload::~Payload() - { - } - - void Payload::setData(ByteArray data) - { - setData(data, 0, data.size()); - } - - void Payload::setData(ByteArray data, int32_t offset, int32_t length) - { - this->data = data; - this->offset = offset; - this->_length = length; - } - - ByteArray Payload::getData() - { - return this->data; - } - - int32_t Payload::getOffset() - { - return this->offset; +namespace Lucene { + +Payload::Payload() { + this->offset = 0; + this->_length = 0; +} + +Payload::Payload(ByteArray data) { + this->data = data; + this->offset = 0; + this->_length = data.size(); +} + +Payload::Payload(ByteArray data, int32_t offset, int32_t length) { + if (offset < 0 || offset + length > data.size()) { + boost::throw_exception(IllegalArgumentException()); } - - int32_t Payload::length() - { - return this->_length; + this->data = data; + this->offset = offset; + this->_length = length; +} + +Payload::~Payload() { +} + +void Payload::setData(ByteArray data) { + setData(data, 0, data.size()); +} + +void Payload::setData(ByteArray data, int32_t offset, int32_t length) { + this->data = data; + this->offset = offset; + this->_length = length; +} + +ByteArray Payload::getData() { + return this->data; +} + +int32_t Payload::getOffset() { + return this->offset; +} + +int32_t Payload::length() { + return this->_length; +} + +uint8_t Payload::byteAt(int32_t index) { + if (0 <= index && index < this->_length) { + return this->data[this->offset + index]; } - - uint8_t Payload::byteAt(int32_t index) - { - if (0 <= index && index < this->_length) - return this->data[this->offset + index]; + boost::throw_exception(IndexOutOfBoundsException()); + return 0; +} + +ByteArray Payload::toByteArray() { + ByteArray retArray(ByteArray::newInstance(this->_length)); + MiscUtils::arrayCopy(this->data.get(), this->offset, retArray.get(), 0, this->_length); + return retArray; +} + +void Payload::copyTo(ByteArray target, int32_t targetOffset) { + if (this->_length > target.size() + targetOffset) { boost::throw_exception(IndexOutOfBoundsException()); - return 0; - } - - ByteArray Payload::toByteArray() - { - ByteArray retArray(ByteArray::newInstance(this->_length)); - MiscUtils::arrayCopy(this->data.get(), this->offset, retArray.get(), 0, this->_length); - return retArray; } - - void Payload::copyTo(ByteArray target, int32_t targetOffset) - { - if (this->_length > target.size() + targetOffset) - boost::throw_exception(IndexOutOfBoundsException()); - MiscUtils::arrayCopy(this->data.get(), this->offset, target.get(), targetOffset, this->_length); + MiscUtils::arrayCopy(this->data.get(), this->offset, target.get(), targetOffset, this->_length); +} + +LuceneObjectPtr Payload::clone(const LuceneObjectPtr& other) { + // Start with a shallow copy of data + LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); + PayloadPtr clonePayload(boost::dynamic_pointer_cast(clone)); + clonePayload->offset = offset; + clonePayload->_length = _length; + + // Only copy the part of data that belongs to this Payload + if (offset == 0 && _length == data.size()) { + // It is the whole thing, so just clone it. + clonePayload->data = ByteArray::newInstance(data.size()); + MiscUtils::arrayCopy(data.get(), 0, clonePayload->data.get(), 0, data.size()); + } else { + // Just get the part + clonePayload->data = toByteArray(); + clonePayload->offset = 0; } - - LuceneObjectPtr Payload::clone(LuceneObjectPtr other) - { - // Start with a shallow copy of data - LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); - PayloadPtr clonePayload(boost::dynamic_pointer_cast(clone)); - clonePayload->offset = offset; - clonePayload->_length = _length; - - // Only copy the part of data that belongs to this Payload - if (offset == 0 && _length == data.size()) - { - // It is the whole thing, so just clone it. - clonePayload->data = ByteArray::newInstance(data.size()); - MiscUtils::arrayCopy(data.get(), 0, clonePayload->data.get(), 0, data.size()); - } - else - { - // Just get the part - clonePayload->data = toByteArray(); - clonePayload->offset = 0; - } - return clonePayload; + return clonePayload; +} + +bool Payload::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - bool Payload::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - PayloadPtr otherPayload(boost::dynamic_pointer_cast(other)); - if (otherPayload) - { - if (_length == otherPayload->_length) - return (std::memcmp(data.get(), otherPayload->data.get(), _length) == 0); - else - return false; + + PayloadPtr otherPayload(boost::dynamic_pointer_cast(other)); + if (otherPayload) { + if (_length == otherPayload->_length) { + return (std::memcmp(data.get(), otherPayload->data.get(), _length) == 0); + } else { + return false; } - - return false; - } - - int32_t Payload::hashCode() - { - return MiscUtils::hashCode(data.get(), offset, offset + _length); } + + return false; +} + +int32_t Payload::hashCode() { + return MiscUtils::hashCode(data.get(), offset, offset + _length); +} + } diff --git a/src/core/index/PositionBasedTermVectorMapper.cpp b/src/core/index/PositionBasedTermVectorMapper.cpp index 44216b95..3bb8ddeb 100644 --- a/src/core/index/PositionBasedTermVectorMapper.cpp +++ b/src/core/index/PositionBasedTermVectorMapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,87 +7,76 @@ #include "LuceneInc.h" #include "PositionBasedTermVectorMapper.h" -namespace Lucene -{ - PositionBasedTermVectorMapper::PositionBasedTermVectorMapper(bool ignoringOffsets) : TermVectorMapper(false, ignoringOffsets) - { - storeOffsets = false; - } - - PositionBasedTermVectorMapper::~PositionBasedTermVectorMapper() - { - } - - bool PositionBasedTermVectorMapper::isIgnoringPositions() - { - return false; - } - - void PositionBasedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) - { - for (int32_t i = 0; i < positions.size(); ++i) - { - TermVectorsPositionInfoPtr pos(currentPositions.get(positions[i])); - if (!pos) - { - pos = newLucene(positions[i], storeOffsets); - currentPositions.put(positions[i], pos); - } - pos->addTerm(term, offsets ? offsets[i] : TermVectorOffsetInfoPtr()); - } - } - - void PositionBasedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) - { - if (storePositions == false) - boost::throw_exception(RuntimeException(L"You must store positions in order to use this Mapper")); - if (storeOffsets == true) - { - // ignoring offsets +namespace Lucene { + +PositionBasedTermVectorMapper::PositionBasedTermVectorMapper(bool ignoringOffsets) : TermVectorMapper(false, ignoringOffsets) { + storeOffsets = false; +} + +PositionBasedTermVectorMapper::~PositionBasedTermVectorMapper() { +} + +bool PositionBasedTermVectorMapper::isIgnoringPositions() { + return false; +} + +void PositionBasedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { + for (int32_t i = 0; i < positions.size(); ++i) { + TermVectorsPositionInfoPtr pos(currentPositions.get(positions[i])); + if (!pos) { + pos = newLucene(positions[i], storeOffsets); + currentPositions.put(positions[i], pos); } - this->fieldToTerms = MapStringMapIntTermVectorsPositionInfo::newInstance(); - this->storeOffsets = storeOffsets; - currentField = field; - this->currentPositions = MapIntTermVectorsPositionInfo::newInstance(); - fieldToTerms.put(currentField, currentPositions); - } - - MapStringMapIntTermVectorsPositionInfo PositionBasedTermVectorMapper::getFieldToTerms() - { - return fieldToTerms; + pos->addTerm(term, offsets ? offsets[i] : TermVectorOffsetInfoPtr()); } - - TermVectorsPositionInfo::TermVectorsPositionInfo(int32_t position, bool storeOffsets) - { - this->position = position; - this->terms = Collection::newInstance(); - if (storeOffsets) - offsets = Collection::newInstance(); - } - - TermVectorsPositionInfo::~TermVectorsPositionInfo() - { - } - - void TermVectorsPositionInfo::addTerm(const String& term, TermVectorOffsetInfoPtr info) - { - terms.add(term); - if (offsets) - offsets.add(info); +} + +void PositionBasedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { + if (storePositions == false) { + boost::throw_exception(RuntimeException(L"You must store positions in order to use this Mapper")); } - - int32_t TermVectorsPositionInfo::getPosition() - { - return position; + if (storeOffsets == true) { + // ignoring offsets } - - Collection TermVectorsPositionInfo::getTerms() - { - return terms; + this->fieldToTerms = MapStringMapIntTermVectorsPositionInfo::newInstance(); + this->storeOffsets = storeOffsets; + currentField = field; + this->currentPositions = MapIntTermVectorsPositionInfo::newInstance(); + fieldToTerms.put(currentField, currentPositions); +} + +MapStringMapIntTermVectorsPositionInfo PositionBasedTermVectorMapper::getFieldToTerms() { + return fieldToTerms; +} + +TermVectorsPositionInfo::TermVectorsPositionInfo(int32_t position, bool storeOffsets) { + this->position = position; + this->terms = Collection::newInstance(); + if (storeOffsets) { + offsets = Collection::newInstance(); } - - Collection TermVectorsPositionInfo::getOffsets() - { - return offsets; +} + +TermVectorsPositionInfo::~TermVectorsPositionInfo() { +} + +void TermVectorsPositionInfo::addTerm(const String& term, const TermVectorOffsetInfoPtr& info) { + terms.add(term); + if (offsets) { + offsets.add(info); } } + +int32_t TermVectorsPositionInfo::getPosition() { + return position; +} + +Collection TermVectorsPositionInfo::getTerms() { + return terms; +} + +Collection TermVectorsPositionInfo::getOffsets() { + return offsets; +} + +} diff --git a/src/core/index/RawPostingList.cpp b/src/core/index/RawPostingList.cpp index 900a8b60..251f1096 100644 --- a/src/core/index/RawPostingList.cpp +++ b/src/core/index/RawPostingList.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,18 +8,17 @@ #include "RawPostingList.h" #include "DocumentsWriter.h" -namespace Lucene -{ - const int32_t RawPostingList::BYTES_SIZE = DocumentsWriter::OBJECT_HEADER_BYTES + 3 * DocumentsWriter::INT_NUM_BYTE; - - RawPostingList::RawPostingList() - { - textStart = 0; - intStart = 0; - byteStart = 0; - } - - RawPostingList::~RawPostingList() - { - } +namespace Lucene { + +const int32_t RawPostingList::BYTES_SIZE = DocumentsWriter::OBJECT_HEADER_BYTES + 3 * DocumentsWriter::INT_NUM_BYTE; + +RawPostingList::RawPostingList() { + textStart = 0; + intStart = 0; + byteStart = 0; +} + +RawPostingList::~RawPostingList() { +} + } diff --git a/src/core/index/ReadOnlyDirectoryReader.cpp b/src/core/index/ReadOnlyDirectoryReader.cpp index e3baedaa..3c7da5e7 100644 --- a/src/core/index/ReadOnlyDirectoryReader.cpp +++ b/src/core/index/ReadOnlyDirectoryReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,33 +8,29 @@ #include "ReadOnlyDirectoryReader.h" #include "ReadOnlySegmentReader.h" -namespace Lucene -{ - ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr sis, - IndexDeletionPolicyPtr deletionPolicy, int32_t termInfosIndexDivisor) : - DirectoryReader(directory, sis, deletionPolicy, true, termInfosIndexDivisor) - { - } - - ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(DirectoryPtr directory, SegmentInfosPtr infos, - Collection oldReaders, Collection oldStarts, - MapStringByteArray oldNormsCache, bool doClone, - int32_t termInfosIndexDivisor) : - DirectoryReader(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) - { - } - - ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(IndexWriterPtr writer, SegmentInfosPtr infos, int32_t termInfosIndexDivisor) : - DirectoryReader(writer, infos, termInfosIndexDivisor) - { - } - - ReadOnlyDirectoryReader::~ReadOnlyDirectoryReader() - { - } - - void ReadOnlyDirectoryReader::acquireWriteLock() - { - ReadOnlySegmentReader::noWrite(); - } +namespace Lucene { + +ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& sis, + const IndexDeletionPolicyPtr& deletionPolicy, int32_t termInfosIndexDivisor) : + DirectoryReader(directory, sis, deletionPolicy, true, termInfosIndexDivisor) { +} + +ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const DirectoryPtr& directory, const SegmentInfosPtr& infos, + Collection oldReaders, Collection oldStarts, + MapStringByteArray oldNormsCache, bool doClone, + int32_t termInfosIndexDivisor) : + DirectoryReader(directory, infos, oldReaders, oldStarts, oldNormsCache, true, doClone, termInfosIndexDivisor) { +} + +ReadOnlyDirectoryReader::ReadOnlyDirectoryReader(const IndexWriterPtr& writer, const SegmentInfosPtr& infos, int32_t termInfosIndexDivisor) : + DirectoryReader(writer, infos, termInfosIndexDivisor) { +} + +ReadOnlyDirectoryReader::~ReadOnlyDirectoryReader() { +} + +void ReadOnlyDirectoryReader::acquireWriteLock() { + ReadOnlySegmentReader::noWrite(); +} + } diff --git a/src/core/index/ReadOnlySegmentReader.cpp b/src/core/index/ReadOnlySegmentReader.cpp index 994a5035..46ac0813 100644 --- a/src/core/index/ReadOnlySegmentReader.cpp +++ b/src/core/index/ReadOnlySegmentReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,24 +8,21 @@ #include "ReadOnlySegmentReader.h" #include "BitVector.h" -namespace Lucene -{ - ReadOnlySegmentReader::~ReadOnlySegmentReader() - { - } - - void ReadOnlySegmentReader::noWrite() - { - boost::throw_exception(UnsupportedOperationException(L"This IndexReader cannot make any changes to the index (it was opened with readOnly = true)")); - } - - void ReadOnlySegmentReader::acquireWriteLock() - { - noWrite(); - } - - bool ReadOnlySegmentReader::isDeleted(int32_t n) - { - return (deletedDocs && deletedDocs->get(n)); - } +namespace Lucene { + +ReadOnlySegmentReader::~ReadOnlySegmentReader() { +} + +void ReadOnlySegmentReader::noWrite() { + boost::throw_exception(UnsupportedOperationException(L"This IndexReader cannot make any changes to the index (it was opened with readOnly = true)")); +} + +void ReadOnlySegmentReader::acquireWriteLock() { + noWrite(); +} + +bool ReadOnlySegmentReader::isDeleted(int32_t n) { + return (deletedDocs && deletedDocs->get(n)); +} + } diff --git a/src/core/index/ReusableStringReader.cpp b/src/core/index/ReusableStringReader.cpp index 50640428..1a6adf7e 100644 --- a/src/core/index/ReusableStringReader.cpp +++ b/src/core/index/ReusableStringReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,50 +8,41 @@ #include "ReusableStringReader.h" #include "MiscUtils.h" -namespace Lucene -{ - ReusableStringReader::ReusableStringReader() - { - upto = 0; +namespace Lucene { + +ReusableStringReader::ReusableStringReader() { + upto = 0; + left = 0; +} + +ReusableStringReader::~ReusableStringReader() { +} + +void ReusableStringReader::init(const String& s) { + this->s = s; + left = s.length(); + this->upto = 0; +} + +int32_t ReusableStringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { + if (left > length) { + MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, length); + upto += length; + left -= length; + return length; + } else if (left == 0) { + s.clear(); + return -1; + } else { + MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, left); + int32_t r = left; left = 0; + upto = s.length(); + return r; } - - ReusableStringReader::~ReusableStringReader() - { - } - - void ReusableStringReader::init(const String& s) - { - this->s = s; - left = s.length(); - this->upto = 0; - } - - int32_t ReusableStringReader::read(wchar_t* buffer, int32_t offset, int32_t length) - { - if (left > length) - { - MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, length); - upto += length; - left -= length; - return length; - } - else if (left == 0) - { - s.clear(); - return -1; - } - else - { - MiscUtils::arrayCopy(s.begin(), upto, buffer, offset, left); - int32_t r = left; - left = 0; - upto = s.length(); - return r; - } - } - - void ReusableStringReader::close() - { - } +} + +void ReusableStringReader::close() { +} + } diff --git a/src/core/index/SegmentInfo.cpp b/src/core/index/SegmentInfo.cpp index 3a3a5a2b..ed8deb3d 100644 --- a/src/core/index/SegmentInfo.cpp +++ b/src/core/index/SegmentInfo.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -18,595 +18,537 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t SegmentInfo::NO = -1; // no norms; no deletes; - const int32_t SegmentInfo::YES = 1; // have norms; have deletes; - const int32_t SegmentInfo::CHECK_DIR = 0; // must check dir to see if there are norms/deletions - const int32_t SegmentInfo::WITHOUT_GEN = 0; // a file name that has no GEN in it. - - SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir) - { - _sizeInBytes = -1; - this->name = name; - this->docCount = docCount; - this->dir = dir; - delGen = NO; - this->isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; - delCount = 0; - hasProx = true; - } - - SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile) - { - _sizeInBytes = -1; - this->name = name; - this->docCount = docCount; - this->dir = dir; - delGen = NO; - this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); - this->hasSingleNormFile = hasSingleNormFile; - preLockless = false; - docStoreOffset = -1; - docStoreIsCompoundFile = false; - delCount = 0; - hasProx = true; - } - - SegmentInfo::SegmentInfo(const String& name, int32_t docCount, DirectoryPtr dir, bool isCompoundFile, bool hasSingleNormFile, - int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx) - { - _sizeInBytes = -1; - this->name = name; - this->docCount = docCount; - this->dir = dir; - delGen = NO; - this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); - this->hasSingleNormFile = hasSingleNormFile; - preLockless = false; - this->docStoreOffset = docStoreOffset; - this->docStoreSegment = docStoreSegment; - this->docStoreIsCompoundFile = docStoreIsCompoundFile; - delCount = 0; - this->hasProx = hasProx; - } - - SegmentInfo::SegmentInfo(DirectoryPtr dir, int32_t format, IndexInputPtr input) - { - _sizeInBytes = -1; - this->dir = dir; - name = input->readString(); - docCount = input->readInt(); - if (format <= SegmentInfos::FORMAT_LOCKLESS) - { - delGen = input->readLong(); - if (format <= SegmentInfos::FORMAT_SHARED_DOC_STORE) - { - docStoreOffset = input->readInt(); - if (docStoreOffset != -1) - { - docStoreSegment = input->readString(); - docStoreIsCompoundFile = (input->readByte() == 1); - } - else - { - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - } - else - { - docStoreOffset = -1; +namespace Lucene { + +const int32_t SegmentInfo::NO = -1; // no norms; no deletes; +const int32_t SegmentInfo::YES = 1; // have norms; have deletes; +const int32_t SegmentInfo::CHECK_DIR = 0; // must check dir to see if there are norms/deletions +const int32_t SegmentInfo::WITHOUT_GEN = 0; // a file name that has no GEN in it. + +SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir) { + _sizeInBytes = -1; + this->name = name; + this->docCount = docCount; + this->dir = dir; + delGen = NO; + this->isCompoundFile = CHECK_DIR; + preLockless = true; + hasSingleNormFile = false; + docStoreOffset = -1; + docStoreSegment = name; + docStoreIsCompoundFile = false; + delCount = 0; + hasProx = true; +} + +SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile) { + _sizeInBytes = -1; + this->name = name; + this->docCount = docCount; + this->dir = dir; + delGen = NO; + this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); + this->hasSingleNormFile = hasSingleNormFile; + preLockless = false; + docStoreOffset = -1; + docStoreIsCompoundFile = false; + delCount = 0; + hasProx = true; +} + +SegmentInfo::SegmentInfo(const String& name, int32_t docCount, const DirectoryPtr& dir, bool isCompoundFile, bool hasSingleNormFile, + int32_t docStoreOffset, const String& docStoreSegment, bool docStoreIsCompoundFile, bool hasProx) { + _sizeInBytes = -1; + this->name = name; + this->docCount = docCount; + this->dir = dir; + delGen = NO; + this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); + this->hasSingleNormFile = hasSingleNormFile; + preLockless = false; + this->docStoreOffset = docStoreOffset; + this->docStoreSegment = docStoreSegment; + this->docStoreIsCompoundFile = docStoreIsCompoundFile; + delCount = 0; + this->hasProx = hasProx; +} + +SegmentInfo::SegmentInfo(const DirectoryPtr& dir, int32_t format, const IndexInputPtr& input) { + _sizeInBytes = -1; + this->dir = dir; + name = input->readString(); + docCount = input->readInt(); + if (format <= SegmentInfos::FORMAT_LOCKLESS) { + delGen = input->readLong(); + if (format <= SegmentInfos::FORMAT_SHARED_DOC_STORE) { + docStoreOffset = input->readInt(); + if (docStoreOffset != -1) { + docStoreSegment = input->readString(); + docStoreIsCompoundFile = (input->readByte() == 1); + } else { docStoreSegment = name; docStoreIsCompoundFile = false; } - if (format <= SegmentInfos::FORMAT_SINGLE_NORM_FILE) - hasSingleNormFile = (input->readByte() == 1); - else - hasSingleNormFile = false; - int32_t numNormGen = input->readInt(); - if (numNormGen != NO) - { - normGen = Collection::newInstance(numNormGen); - for (int32_t j = 0; j < numNormGen; ++j) - normGen[j] = input->readLong(); - } - isCompoundFile = input->readByte(); - preLockless = (isCompoundFile == CHECK_DIR); - if (format <= SegmentInfos::FORMAT_DEL_COUNT) - { - delCount = input->readInt(); - BOOST_ASSERT(delCount <= docCount); - } - else - delCount = -1; - if (format <= SegmentInfos::FORMAT_HAS_PROX) - hasProx = (input->readByte() == 1); - else - hasProx = true; - - if (format <= SegmentInfos::FORMAT_DIAGNOSTICS) - diagnostics = input->readStringStringMap(); - else - diagnostics = MapStringString::newInstance(); - } - else - { - delGen = CHECK_DIR; - isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; + } else { docStoreOffset = -1; + docStoreSegment = name; docStoreIsCompoundFile = false; + } + if (format <= SegmentInfos::FORMAT_SINGLE_NORM_FILE) { + hasSingleNormFile = (input->readByte() == 1); + } else { + hasSingleNormFile = false; + } + int32_t numNormGen = input->readInt(); + if (numNormGen != NO) { + normGen = Collection::newInstance(numNormGen); + for (int32_t j = 0; j < numNormGen; ++j) { + normGen[j] = input->readLong(); + } + } + isCompoundFile = input->readByte(); + preLockless = (isCompoundFile == CHECK_DIR); + if (format <= SegmentInfos::FORMAT_DEL_COUNT) { + delCount = input->readInt(); + BOOST_ASSERT(delCount <= docCount); + } else { delCount = -1; + } + if (format <= SegmentInfos::FORMAT_HAS_PROX) { + hasProx = (input->readByte() == 1); + } else { hasProx = true; + } + + if (format <= SegmentInfos::FORMAT_DIAGNOSTICS) { + diagnostics = input->readStringStringMap(); + } else { diagnostics = MapStringString::newInstance(); } + } else { + delGen = CHECK_DIR; + isCompoundFile = CHECK_DIR; + preLockless = true; + hasSingleNormFile = false; + docStoreOffset = -1; + docStoreIsCompoundFile = false; + delCount = -1; + hasProx = true; + diagnostics = MapStringString::newInstance(); } - - SegmentInfo::~SegmentInfo() - { - } - - void SegmentInfo::reset(SegmentInfoPtr src) - { - clearFiles(); - name = src->name; - docCount = src->docCount; - dir = src->dir; - preLockless = src->preLockless; - delGen = src->delGen; - docStoreOffset = src->docStoreOffset; - docStoreIsCompoundFile = src->docStoreIsCompoundFile; - if (!src->normGen) - normGen = src->normGen; - else - normGen = Collection::newInstance(src->normGen.begin(), src->normGen.end()); - isCompoundFile = src->isCompoundFile; - hasSingleNormFile = src->hasSingleNormFile; - delCount = src->delCount; - } - - void SegmentInfo::setDiagnostics(MapStringString diagnostics) - { - this->diagnostics = diagnostics; - } - - MapStringString SegmentInfo::getDiagnostics() - { - return diagnostics; - } - - void SegmentInfo::setNumFields(int32_t numFields) - { - if (!normGen) - { - // normGen is null if we loaded a pre-2.1 segment file, or, if this segments file hasn't had any - // norms set against it yet - normGen = Collection::newInstance(numFields); - - if (!preLockless) - { // Do nothing: thus leaving normGen[k] == CHECK_DIR (==0), so that later we know - } // we have to check filesystem for norm files, because this is prelockless. - else - { - // This is a FORMAT_LOCKLESS segment, which means there are no separate norms - for (int32_t i = 0; i < numFields; ++i) - normGen[i] = NO; +} + +SegmentInfo::~SegmentInfo() { +} + +void SegmentInfo::reset(const SegmentInfoPtr& src) { + clearFiles(); + name = src->name; + docCount = src->docCount; + dir = src->dir; + preLockless = src->preLockless; + delGen = src->delGen; + docStoreOffset = src->docStoreOffset; + docStoreIsCompoundFile = src->docStoreIsCompoundFile; + if (!src->normGen) { + normGen = src->normGen; + } else { + normGen = Collection::newInstance(src->normGen.begin(), src->normGen.end()); + } + isCompoundFile = src->isCompoundFile; + hasSingleNormFile = src->hasSingleNormFile; + delCount = src->delCount; +} + +void SegmentInfo::setDiagnostics(MapStringString diagnostics) { + this->diagnostics = diagnostics; +} + +MapStringString SegmentInfo::getDiagnostics() { + return diagnostics; +} + +void SegmentInfo::setNumFields(int32_t numFields) { + if (!normGen) { + // normGen is null if we loaded a pre-2.1 segment file, or, if this segments file hasn't had any + // norms set against it yet + normGen = Collection::newInstance(numFields); + + if (!preLockless) { + // Do nothing: thus leaving normGen[k] == CHECK_DIR (==0), so that later we know + } // we have to check filesystem for norm files, because this is prelockless. + else { + // This is a FORMAT_LOCKLESS segment, which means there are no separate norms + for (int32_t i = 0; i < numFields; ++i) { + normGen[i] = NO; } } } - - int64_t SegmentInfo::sizeInBytes() - { - if (_sizeInBytes == -1) - { - HashSet _files(files()); - _sizeInBytes = 0; - for (HashSet::iterator fileName = _files.begin(); fileName != _files.end(); ++fileName) - { - // we don't count bytes used by a shared doc store against this segment - if (docStoreOffset == -1 || !IndexFileNames::isDocStoreFile(*fileName)) - _sizeInBytes += dir->fileLength(*fileName); +} + +int64_t SegmentInfo::sizeInBytes() { + if (_sizeInBytes == -1) { + HashSet _files(files()); + _sizeInBytes = 0; + for (HashSet::iterator fileName = _files.begin(); fileName != _files.end(); ++fileName) { + // we don't count bytes used by a shared doc store against this segment + if (docStoreOffset == -1 || !IndexFileNames::isDocStoreFile(*fileName)) { + _sizeInBytes += dir->fileLength(*fileName); } } - return _sizeInBytes; - } - - bool SegmentInfo::hasDeletions() - { - if (delGen == NO) - return false; - else if (delGen >= YES) - return true; - else - return dir->fileExists(getDelFileName()); - } - - void SegmentInfo::advanceDelGen() - { - // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == NO) - delGen = YES; - else - delGen++; - clearFiles(); } - - void SegmentInfo::clearDelGen() - { - delGen = NO; - clearFiles(); + return _sizeInBytes; +} + +bool SegmentInfo::hasDeletions() { + if (delGen == NO) { + return false; + } else if (delGen >= YES) { + return true; + } else { + return dir->fileExists(getDelFileName()); } - - LuceneObjectPtr SegmentInfo::clone(LuceneObjectPtr other) - { - SegmentInfoPtr si(newLucene(name, docCount, dir)); - si->isCompoundFile = isCompoundFile; - si->delGen = delGen; - si->delCount = delCount; - si->hasProx = hasProx; - si->preLockless = preLockless; - si->hasSingleNormFile = hasSingleNormFile; - si->diagnostics = MapStringString::newInstance(); - si->diagnostics.putAll(diagnostics.begin(), diagnostics.end()); - if (normGen) - si->normGen = Collection::newInstance(normGen.begin(), normGen.end()); - si->docStoreOffset = docStoreOffset; - si->docStoreSegment = docStoreSegment; - si->docStoreIsCompoundFile = docStoreIsCompoundFile; - return si; +} + +void SegmentInfo::advanceDelGen() { + // delGen 0 is reserved for pre-LOCKLESS format + if (delGen == NO) { + delGen = YES; + } else { + delGen++; + } + clearFiles(); +} + +void SegmentInfo::clearDelGen() { + delGen = NO; + clearFiles(); +} + +LuceneObjectPtr SegmentInfo::clone(const LuceneObjectPtr& other) { + SegmentInfoPtr si(newLucene(name, docCount, dir)); + si->isCompoundFile = isCompoundFile; + si->delGen = delGen; + si->delCount = delCount; + si->hasProx = hasProx; + si->preLockless = preLockless; + si->hasSingleNormFile = hasSingleNormFile; + si->diagnostics = MapStringString::newInstance(); + si->diagnostics.putAll(diagnostics.begin(), diagnostics.end()); + if (normGen) { + si->normGen = Collection::newInstance(normGen.begin(), normGen.end()); + } + si->docStoreOffset = docStoreOffset; + si->docStoreSegment = docStoreSegment; + si->docStoreIsCompoundFile = docStoreIsCompoundFile; + return si; +} + +String SegmentInfo::getDelFileName() { + if (delGen == NO) { + // in this case we know there is no deletion filename against this segment + return L""; + } else { + // if delgen is check_dir, it's the pre-lockless-commit file format + return IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen); } - - String SegmentInfo::getDelFileName() - { - if (delGen == NO) - { - // in this case we know there is no deletion filename against this segment - return L""; - } - else - { - // if delgen is check_dir, it's the pre-lockless-commit file format - return IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen); - } +} + +bool SegmentInfo::hasSeparateNorms(int32_t fieldNumber) { + if ((!normGen && preLockless) || (normGen && normGen[fieldNumber] == CHECK_DIR)) { + // must fallback to directory file exists check + return dir->fileExists(name + L".s" + StringUtils::toString(fieldNumber)); + } else if (!normGen || normGen[fieldNumber] == NO) { + return false; + } else { + return true; } - - bool SegmentInfo::hasSeparateNorms(int32_t fieldNumber) - { - if ((!normGen && preLockless) || (normGen && normGen[fieldNumber] == CHECK_DIR)) - { - // must fallback to directory file exists check - return dir->fileExists(name + L".s" + StringUtils::toString(fieldNumber)); - } - else if (!normGen || normGen[fieldNumber] == NO) +} + +bool SegmentInfo::hasSeparateNorms() { + if (!normGen) { + if (!preLockless) { + // this means we were created with lockless code and no norms are written yet return false; - else - return true; - } - - bool SegmentInfo::hasSeparateNorms() - { - if (!normGen) - { - if (!preLockless) - { - // this means we were created with lockless code and no norms are written yet - return false; + } else { + HashSet result(dir->listAll()); + if (!result) { + boost::throw_exception(IOException(L"Cannot read directory " + dir->toString() + L": listAll() returned null")); } - else - { - HashSet result(dir->listAll()); - if (!result) - boost::throw_exception(IOException(L"Cannot read directory " + dir->toString() + L": listAll() returned null")); - String pattern(name + L".s"); - int32_t patternLength = pattern.length(); - for (HashSet::iterator fileName = result.begin(); fileName != result.end(); ++fileName) - { - if (IndexFileNameFilter::accept(L"", *fileName) && boost::starts_with(*fileName, pattern) && UnicodeUtil::isDigit((*fileName)[patternLength])) - return true; + String pattern(name + L".s"); + int32_t patternLength = pattern.length(); + for (HashSet::iterator fileName = result.begin(); fileName != result.end(); ++fileName) { + if (IndexFileNameFilter::accept(L"", *fileName) && boost::starts_with(*fileName, pattern) && UnicodeUtil::isDigit((*fileName)[patternLength])) { + return true; } - return false; } + return false; } - else - { - // This means this segment was saved with LOCKLESS code so we first check whether any normGen's are >= 1 - // (meaning they definitely have separate norms) - for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) - { - if (*gen >= YES) - return true; + } else { + // This means this segment was saved with LOCKLESS code so we first check whether any normGen's are >= 1 + // (meaning they definitely have separate norms) + for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) { + if (*gen >= YES) { + return true; } + } - // Next we look for any == 0. These cases were pre-LOCKLESS and must be checked in directory - for (int32_t gen = 0; gen < normGen.size(); ++gen) - { - if (normGen[gen] == CHECK_DIR && hasSeparateNorms(gen)) - return true; + // Next we look for any == 0. These cases were pre-LOCKLESS and must be checked in directory + for (int32_t gen = 0; gen < normGen.size(); ++gen) { + if (normGen[gen] == CHECK_DIR && hasSeparateNorms(gen)) { + return true; } } - return false; } - - void SegmentInfo::advanceNormGen(int32_t fieldIndex) - { - if (normGen[fieldIndex] == NO) - normGen[fieldIndex] = YES; - else - normGen[fieldIndex]++; - clearFiles(); - } - - String SegmentInfo::getNormFileName(int32_t number) - { - String prefix; - int64_t gen = !normGen ? CHECK_DIR : normGen[number]; - - if (hasSeparateNorms(number)) - { - // case 1: separate norm - prefix = L".s"; - return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), gen); - } - - if (hasSingleNormFile) - { - // case 2: lockless (or nrm file exists) - single file for all norms - prefix = String(L".") + IndexFileNames::NORMS_EXTENSION(); - return IndexFileNames::fileNameFromGeneration(name, prefix, WITHOUT_GEN); - } - - // case 3: norm file for each field - prefix = L".f"; - return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), WITHOUT_GEN); - } - - void SegmentInfo::setUseCompoundFile(bool isCompoundFile) - { - this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); - clearFiles(); - } - - bool SegmentInfo::getUseCompoundFile() - { - if (isCompoundFile == (uint8_t)NO) - return false; - else if (isCompoundFile == (uint8_t)YES) - return true; - else - return dir->fileExists(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); - } - - int32_t SegmentInfo::getDelCount() - { - if (delCount == -1) - delCount = hasDeletions() ? BitVector(dir, getDelFileName()).count() : 0; - BOOST_ASSERT(delCount <= docCount); - return delCount; - } - - void SegmentInfo::setDelCount(int32_t delCount) - { - this->delCount = delCount; - BOOST_ASSERT(delCount <= docCount); - } - - int32_t SegmentInfo::getDocStoreOffset() - { - return docStoreOffset; + return false; +} + +void SegmentInfo::advanceNormGen(int32_t fieldIndex) { + if (normGen[fieldIndex] == NO) { + normGen[fieldIndex] = YES; + } else { + normGen[fieldIndex]++; } - - bool SegmentInfo::getDocStoreIsCompoundFile() - { - return docStoreIsCompoundFile; + clearFiles(); +} + +String SegmentInfo::getNormFileName(int32_t number) { + String prefix; + int64_t gen = !normGen ? CHECK_DIR : normGen[number]; + + if (hasSeparateNorms(number)) { + // case 1: separate norm + prefix = L".s"; + return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), gen); } - - void SegmentInfo::setDocStoreIsCompoundFile(bool v) - { - docStoreIsCompoundFile = v; - clearFiles(); + + if (hasSingleNormFile) { + // case 2: lockless (or nrm file exists) - single file for all norms + prefix = String(L".") + IndexFileNames::NORMS_EXTENSION(); + return IndexFileNames::fileNameFromGeneration(name, prefix, WITHOUT_GEN); } - - String SegmentInfo::getDocStoreSegment() - { - return docStoreSegment; + + // case 3: norm file for each field + prefix = L".f"; + return IndexFileNames::fileNameFromGeneration(name, prefix + StringUtils::toString(number), WITHOUT_GEN); +} + +void SegmentInfo::setUseCompoundFile(bool isCompoundFile) { + this->isCompoundFile = (uint8_t)(isCompoundFile ? YES : NO); + clearFiles(); +} + +bool SegmentInfo::getUseCompoundFile() { + if (isCompoundFile == (uint8_t)NO) { + return false; + } else if (isCompoundFile == (uint8_t)YES) { + return true; + } else { + return dir->fileExists(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); } - - void SegmentInfo::setDocStoreOffset(int32_t offset) - { - docStoreOffset = offset; - clearFiles(); +} + +int32_t SegmentInfo::getDelCount() { + if (delCount == -1) { + delCount = hasDeletions() ? BitVector(dir, getDelFileName()).count() : 0; } - - void SegmentInfo::setDocStore(int32_t offset, const String& segment, bool isCompoundFile) - { - docStoreOffset = offset; - docStoreSegment = segment; - docStoreIsCompoundFile = isCompoundFile; + BOOST_ASSERT(delCount <= docCount); + return delCount; +} + +void SegmentInfo::setDelCount(int32_t delCount) { + this->delCount = delCount; + BOOST_ASSERT(delCount <= docCount); +} + +int32_t SegmentInfo::getDocStoreOffset() { + return docStoreOffset; +} + +bool SegmentInfo::getDocStoreIsCompoundFile() { + return docStoreIsCompoundFile; +} + +void SegmentInfo::setDocStoreIsCompoundFile(bool v) { + docStoreIsCompoundFile = v; + clearFiles(); +} + +String SegmentInfo::getDocStoreSegment() { + return docStoreSegment; +} + +void SegmentInfo::setDocStoreOffset(int32_t offset) { + docStoreOffset = offset; + clearFiles(); +} + +void SegmentInfo::setDocStore(int32_t offset, const String& segment, bool isCompoundFile) { + docStoreOffset = offset; + docStoreSegment = segment; + docStoreIsCompoundFile = isCompoundFile; +} + +void SegmentInfo::write(const IndexOutputPtr& output) { + output->writeString(name); + output->writeInt(docCount); + output->writeLong(delGen); + output->writeInt(docStoreOffset); + if (docStoreOffset != -1) { + output->writeString(docStoreSegment); + output->writeByte((uint8_t)(docStoreIsCompoundFile ? 1 : 0)); } - - void SegmentInfo::write(IndexOutputPtr output) - { - output->writeString(name); - output->writeInt(docCount); - output->writeLong(delGen); - output->writeInt(docStoreOffset); - if (docStoreOffset != -1) - { - output->writeString(docStoreSegment); - output->writeByte((uint8_t)(docStoreIsCompoundFile ? 1 : 0)); - } - - output->writeByte((uint8_t)(hasSingleNormFile ? 1 : 0)); - if (!normGen) - output->writeInt(NO); - else - { - output->writeInt(normGen.size()); - for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) - output->writeLong(*gen); + + output->writeByte((uint8_t)(hasSingleNormFile ? 1 : 0)); + if (!normGen) { + output->writeInt(NO); + } else { + output->writeInt(normGen.size()); + for (Collection::iterator gen = normGen.begin(); gen != normGen.end(); ++gen) { + output->writeLong(*gen); } - output->writeByte(isCompoundFile); - output->writeInt(delCount); - output->writeByte((uint8_t)(hasProx ? 1 : 0)); - output->writeStringStringMap(diagnostics); } - - void SegmentInfo::setHasProx(bool hasProx) - { - this->hasProx = hasProx; - clearFiles(); - } - - bool SegmentInfo::getHasProx() - { - return hasProx; + output->writeByte(isCompoundFile); + output->writeInt(delCount); + output->writeByte((uint8_t)(hasProx ? 1 : 0)); + output->writeStringStringMap(diagnostics); +} + +void SegmentInfo::setHasProx(bool hasProx) { + this->hasProx = hasProx; + clearFiles(); +} + +bool SegmentInfo::getHasProx() { + return hasProx; +} + +void SegmentInfo::addIfExists(HashSet files, const String& fileName) { + if (dir->fileExists(fileName)) { + files.add(fileName); } - - void SegmentInfo::addIfExists(HashSet files, const String& fileName) - { - if (dir->fileExists(fileName)) - files.add(fileName); +} + +HashSet SegmentInfo::files() { + if (_files) { + // already cached + return _files; } - - HashSet SegmentInfo::files() - { - if (_files) - { - // already cached - return _files; - } - - _files = HashSet::newInstance(); - bool useCompoundFile = getUseCompoundFile(); - - if (useCompoundFile) - _files.add(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); - else - { - for (HashSet::iterator ext = IndexFileNames::NON_STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::NON_STORE_INDEX_EXTENSIONS().end(); ++ext) - addIfExists(_files, name + L"." + *ext); + + _files = HashSet::newInstance(); + bool useCompoundFile = getUseCompoundFile(); + + if (useCompoundFile) { + _files.add(name + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION()); + } else { + for (HashSet::iterator ext = IndexFileNames::NON_STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::NON_STORE_INDEX_EXTENSIONS().end(); ++ext) { + addIfExists(_files, name + L"." + *ext); } - - if (docStoreOffset != -1) - { - // we are sharing doc stores (stored fields, term vectors) with other segments - BOOST_ASSERT(!docStoreSegment.empty()); - if (docStoreIsCompoundFile) - _files.add(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); - else - { - for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) - addIfExists(_files, docStoreSegment + L"." + *ext); + } + + if (docStoreOffset != -1) { + // we are sharing doc stores (stored fields, term vectors) with other segments + BOOST_ASSERT(!docStoreSegment.empty()); + if (docStoreIsCompoundFile) { + _files.add(docStoreSegment + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION()); + } else { + for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) { + addIfExists(_files, docStoreSegment + L"." + *ext); } } - else if (!useCompoundFile) - { - // we are not sharing, and, these files were not included in the compound file - for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) - addIfExists(_files, name + L"." + *ext); + } else if (!useCompoundFile) { + // we are not sharing, and, these files were not included in the compound file + for (HashSet::iterator ext = IndexFileNames::STORE_INDEX_EXTENSIONS().begin(); ext != IndexFileNames::STORE_INDEX_EXTENSIONS().end(); ++ext) { + addIfExists(_files, name + L"." + *ext); } - - String delFileName(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen)); - if (!delFileName.empty() && (delGen >= YES || dir->fileExists(delFileName))) - _files.add(delFileName); - - // careful logic for norms files - if (normGen) - { - for (int32_t gen = 0; gen < normGen.size(); ++gen) - { - if (normGen[gen] >= YES) - { - // definitely a separate norm file, with generation - _files.add(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen), normGen[gen])); - } - else if (normGen[gen] == NO) - { - // no separate norms but maybe plain norms in the non compound file case - if (!hasSingleNormFile && !useCompoundFile) - { - String fileName(name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen)); - if (dir->fileExists(fileName)) - _files.add(fileName); + } + + String delFileName(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::DELETES_EXTENSION(), delGen)); + if (!delFileName.empty() && (delGen >= YES || dir->fileExists(delFileName))) { + _files.add(delFileName); + } + + // careful logic for norms files + if (normGen) { + for (int32_t gen = 0; gen < normGen.size(); ++gen) { + if (normGen[gen] >= YES) { + // definitely a separate norm file, with generation + _files.add(IndexFileNames::fileNameFromGeneration(name, String(L".") + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen), normGen[gen])); + } else if (normGen[gen] == NO) { + // no separate norms but maybe plain norms in the non compound file case + if (!hasSingleNormFile && !useCompoundFile) { + String fileName(name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen)); + if (dir->fileExists(fileName)) { + _files.add(fileName); } } - else if (normGen[gen] == CHECK_DIR) - { - // pre-2.1: we have to check file existence - String fileName; - if (useCompoundFile) - fileName = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen); - else if (!hasSingleNormFile) - fileName = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen); - if (!fileName.empty() && dir->fileExists(fileName)) - _files.add(fileName); + } else if (normGen[gen] == CHECK_DIR) { + // pre-2.1: we have to check file existence + String fileName; + if (useCompoundFile) { + fileName = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION() + StringUtils::toString(gen); + } else if (!hasSingleNormFile) { + fileName = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION() + StringUtils::toString(gen); + } + if (!fileName.empty() && dir->fileExists(fileName)) { + _files.add(fileName); } } } - else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) - { - // pre-2.1: we have to scan the dir to find all matching _x.sn/_x.fn files for our segment - String prefix; - if (useCompoundFile) - prefix = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION(); - else - prefix = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION(); - int32_t prefixLength = prefix.length(); - HashSet allFiles(dir->listAll()); - for (HashSet::iterator fileName = allFiles.begin(); fileName != allFiles.end(); ++fileName) - { - if (IndexFileNameFilter::accept(L"", *fileName) && (int32_t)fileName->length() > prefixLength && - UnicodeUtil::isDigit((*fileName)[prefixLength]) && boost::starts_with(*fileName, prefix)) - _files.add(*fileName); + } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { + // pre-2.1: we have to scan the dir to find all matching _x.sn/_x.fn files for our segment + String prefix; + if (useCompoundFile) { + prefix = name + L"." + IndexFileNames::SEPARATE_NORMS_EXTENSION(); + } else { + prefix = name + L"." + IndexFileNames::PLAIN_NORMS_EXTENSION(); + } + int32_t prefixLength = prefix.length(); + HashSet allFiles(dir->listAll()); + for (HashSet::iterator fileName = allFiles.begin(); fileName != allFiles.end(); ++fileName) { + if (IndexFileNameFilter::accept(L"", *fileName) && (int32_t)fileName->length() > prefixLength && + UnicodeUtil::isDigit((*fileName)[prefixLength]) && boost::starts_with(*fileName, prefix)) { + _files.add(*fileName); } } - return _files; } - - void SegmentInfo::clearFiles() - { - _files.reset(); - _sizeInBytes = -1; + return _files; +} + +void SegmentInfo::clearFiles() { + _files.reset(); + _sizeInBytes = -1; +} + +String SegmentInfo::segString(const DirectoryPtr& dir) { + String cfs; + try { + cfs = getUseCompoundFile() ? L"c" : L"C"; + } catch (LuceneException&) { + cfs = L"?"; } - - String SegmentInfo::segString(DirectoryPtr dir) - { - String cfs; - try - { - cfs = getUseCompoundFile() ? L"c" : L"C"; - } - catch (LuceneException&) - { - cfs = L"?"; - } - - String docStore; - if (docStoreOffset != -1) - docStore = L"->" + docStoreSegment; - - return name + L":" + cfs + (this->dir == dir ? L"" : L"x") + StringUtils::toString(docCount) + docStore; + + String docStore; + if (docStoreOffset != -1) { + docStore = L"->" + docStoreSegment; } - - bool SegmentInfo::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - SegmentInfoPtr otherSegmentInfo(boost::dynamic_pointer_cast(other)); - if (!otherSegmentInfo) - return false; - return (otherSegmentInfo->dir == dir && otherSegmentInfo->name == name); + + return name + L":" + cfs + (this->dir == dir ? L"" : L"x") + StringUtils::toString(docCount) + docStore; +} + +bool SegmentInfo::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t SegmentInfo::hashCode() - { - return dir->hashCode() + StringUtils::hashCode(name); + SegmentInfoPtr otherSegmentInfo(boost::dynamic_pointer_cast(other)); + if (!otherSegmentInfo) { + return false; } + return (otherSegmentInfo->dir == dir && otherSegmentInfo->name == name); +} + +int32_t SegmentInfo::hashCode() { + return dir->hashCode() + StringUtils::hashCode(name); +} + } diff --git a/src/core/index/SegmentInfoCollection.cpp b/src/core/index/SegmentInfoCollection.cpp index b1325706..21273594 100644 --- a/src/core/index/SegmentInfoCollection.cpp +++ b/src/core/index/SegmentInfoCollection.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,81 +8,76 @@ #include "SegmentInfoCollection.h" #include "SegmentInfo.h" -namespace Lucene -{ - SegmentInfoCollection::SegmentInfoCollection() - { - segmentInfos = Collection::newInstance(); - } - - SegmentInfoCollection::~SegmentInfoCollection() - { - } - - int32_t SegmentInfoCollection::size() - { - return segmentInfos.size(); - } - - bool SegmentInfoCollection::empty() - { - return segmentInfos.empty(); - } - - void SegmentInfoCollection::clear() - { - segmentInfos.clear(); - } - - void SegmentInfoCollection::add(SegmentInfoPtr info) - { - segmentInfos.add(info); - } - - void SegmentInfoCollection::add(int32_t pos, SegmentInfoPtr info) - { - segmentInfos.add(pos, info); - } - - void SegmentInfoCollection::addAll(SegmentInfoCollectionPtr segmentInfos) - { - this->segmentInfos.addAll(segmentInfos->segmentInfos.begin(), segmentInfos->segmentInfos.end()); - } - - bool SegmentInfoCollection::equals(SegmentInfoCollectionPtr other) - { - if (LuceneObject::equals(other)) - return true; - return segmentInfos.equals(other->segmentInfos, luceneEquals()); - } - - int32_t SegmentInfoCollection::find(SegmentInfoPtr info) - { - Collection::iterator idx = segmentInfos.find_if(luceneEqualTo(info)); - return idx == segmentInfos.end() ? -1 : std::distance(segmentInfos.begin(), idx); - } - - bool SegmentInfoCollection::contains(SegmentInfoPtr info) - { - return segmentInfos.contains_if(luceneEqualTo(info)); - } - - void SegmentInfoCollection::remove(int32_t pos) - { - segmentInfos.remove(segmentInfos.begin() + pos); +namespace Lucene { + +SegmentInfoCollection::SegmentInfoCollection() { + segmentInfos = Collection::newInstance(); +} + +SegmentInfoCollection::~SegmentInfoCollection() { +} + +int32_t SegmentInfoCollection::size() { + return segmentInfos.size(); +} + +bool SegmentInfoCollection::empty() { + return segmentInfos.empty(); +} + +void SegmentInfoCollection::clear() { + segmentInfos.clear(); +} + +void SegmentInfoCollection::add(const SegmentInfoPtr& info) { + segmentInfos.add(info); +} + +void SegmentInfoCollection::add(int32_t pos, const SegmentInfoPtr& info) { + segmentInfos.add(pos, info); +} + +void SegmentInfoCollection::addAll(const SegmentInfoCollectionPtr& segmentInfos) { + this->segmentInfos.addAll(segmentInfos->segmentInfos.begin(), segmentInfos->segmentInfos.end()); +} + +bool SegmentInfoCollection::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - void SegmentInfoCollection::remove(int32_t start, int32_t end) - { - segmentInfos.remove(segmentInfos.begin() + start, segmentInfos.begin() + end); + + SegmentInfoCollectionPtr otherColl(boost::dynamic_pointer_cast(other)); + if (!otherColl) { + return false; } - - LuceneObjectPtr SegmentInfoCollection::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); - SegmentInfoCollectionPtr cloneInfos(boost::dynamic_pointer_cast(clone)); - for (Collection::iterator info = segmentInfos.begin(); info != segmentInfos.end(); ++info) - cloneInfos->segmentInfos.add(*info); - return cloneInfos; + + return segmentInfos.equals(otherColl->segmentInfos, luceneEquals()); +} + +int32_t SegmentInfoCollection::find(const SegmentInfoPtr& info) { + Collection::iterator idx = segmentInfos.find_if(luceneEqualTo(info)); + return idx == segmentInfos.end() ? -1 : std::distance(segmentInfos.begin(), idx); +} + +bool SegmentInfoCollection::contains(const SegmentInfoPtr& info) { + return segmentInfos.contains_if(luceneEqualTo(info)); +} + +void SegmentInfoCollection::remove(int32_t pos) { + segmentInfos.remove(segmentInfos.begin() + pos); +} + +void SegmentInfoCollection::remove(int32_t start, int32_t end) { + segmentInfos.remove(segmentInfos.begin() + start, segmentInfos.begin() + end); +} + +LuceneObjectPtr SegmentInfoCollection::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); + SegmentInfoCollectionPtr cloneInfos(boost::dynamic_pointer_cast(clone)); + for (Collection::iterator info = segmentInfos.begin(); info != segmentInfos.end(); ++info) { + cloneInfos->segmentInfos.add(*info); } + return cloneInfos; +} + } diff --git a/src/core/index/SegmentInfos.cpp b/src/core/index/SegmentInfos.cpp index a29984a2..2593152e 100644 --- a/src/core/index/SegmentInfos.cpp +++ b/src/core/index/SegmentInfos.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,750 +20,656 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 - const int32_t SegmentInfos::FORMAT = -1; - - /// This format adds details used for lockless commits. It differs slightly from the previous format in that file names - /// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, - /// segments_2, etc. This allows us to not use a commit lock. - const int32_t SegmentInfos::FORMAT_LOCKLESS = -2; - - /// This format adds a "hasSingleNormFile" flag into each segment info. - const int32_t SegmentInfos::FORMAT_SINGLE_NORM_FILE = -3; - - /// This format allows multiple segments to share a single vectors and stored fields file. - const int32_t SegmentInfos::FORMAT_SHARED_DOC_STORE = -4; - - /// This format adds a checksum at the end of the file to ensure all bytes were successfully written. - const int32_t SegmentInfos::FORMAT_CHECKSUM = -5; - - /// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). - const int32_t SegmentInfos::FORMAT_DEL_COUNT = -6; - - /// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have - /// omitTermFreqAndPositions == false) - const int32_t SegmentInfos::FORMAT_HAS_PROX = -7; - - /// This format adds optional commit userData storage. - const int32_t SegmentInfos::FORMAT_USER_DATA = -8; - - /// This format adds optional per-segment string diagnostics storage, and switches userData to Map - const int32_t SegmentInfos::FORMAT_DIAGNOSTICS = -9; - - /// This must always point to the most recent file format. - const int32_t SegmentInfos::CURRENT_FORMAT = SegmentInfos::FORMAT_DIAGNOSTICS; - - /// Advanced configuration of retry logic in loading segments_N file. - int32_t SegmentInfos::defaultGenFileRetryCount = 10; - int32_t SegmentInfos::defaultGenFileRetryPauseMsec = 50; - int32_t SegmentInfos::defaultGenLookaheadCount = 10; - - MapStringString SegmentInfos::singletonUserData; - InfoStreamPtr SegmentInfos::infoStream; - - SegmentInfos::SegmentInfos() - { - userData = MapStringString::newInstance(); - lastGeneration = 0; - generation = 0; - counter = 0; - version = MiscUtils::currentTimeMillis(); - } - - SegmentInfos::~SegmentInfos() - { - } - - SegmentInfoPtr SegmentInfos::info(int32_t i) - { - return segmentInfos[i]; - } - - int64_t SegmentInfos::getCurrentSegmentGeneration(HashSet files) - { - if (!files) - return -1; - int64_t max = -1; - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) - { - if (boost::starts_with(*file, IndexFileNames::SEGMENTS()) && *file != IndexFileNames::SEGMENTS_GEN()) - max = std::max(generationFromSegmentsFileName(*file), max); - } - return max; - } - - int64_t SegmentInfos::getCurrentSegmentGeneration(DirectoryPtr directory) - { - try - { - return getCurrentSegmentGeneration(directory->listAll()); - } - catch (LuceneException&) - { - return -1; +namespace Lucene { + +/// The file format version, a negative number. Works since counter, the old 1st entry, is always >= 0 +const int32_t SegmentInfos::FORMAT = -1; + +/// This format adds details used for lockless commits. It differs slightly from the previous format in that file names +/// are never re-used (write once). Instead, each file is written to the next generation. For example, segments_1, +/// segments_2, etc. This allows us to not use a commit lock. +const int32_t SegmentInfos::FORMAT_LOCKLESS = -2; + +/// This format adds a "hasSingleNormFile" flag into each segment info. +const int32_t SegmentInfos::FORMAT_SINGLE_NORM_FILE = -3; + +/// This format allows multiple segments to share a single vectors and stored fields file. +const int32_t SegmentInfos::FORMAT_SHARED_DOC_STORE = -4; + +/// This format adds a checksum at the end of the file to ensure all bytes were successfully written. +const int32_t SegmentInfos::FORMAT_CHECKSUM = -5; + +/// This format adds the deletion count for each segment. This way IndexWriter can efficiently report numDocs(). +const int32_t SegmentInfos::FORMAT_DEL_COUNT = -6; + +/// This format adds the boolean hasProx to record if any fields in the segment store prox information (ie, have +/// omitTermFreqAndPositions == false) +const int32_t SegmentInfos::FORMAT_HAS_PROX = -7; + +/// This format adds optional commit userData storage. +const int32_t SegmentInfos::FORMAT_USER_DATA = -8; + +/// This format adds optional per-segment string diagnostics storage, and switches userData to Map +const int32_t SegmentInfos::FORMAT_DIAGNOSTICS = -9; + +/// This must always point to the most recent file format. +const int32_t SegmentInfos::CURRENT_FORMAT = SegmentInfos::FORMAT_DIAGNOSTICS; + +/// Advanced configuration of retry logic in loading segments_N file. +int32_t SegmentInfos::defaultGenFileRetryCount = 10; +int32_t SegmentInfos::defaultGenFileRetryPauseMsec = 50; +int32_t SegmentInfos::defaultGenLookaheadCount = 10; + +MapStringString SegmentInfos::singletonUserData; +InfoStreamPtr SegmentInfos::infoStream; + +SegmentInfos::SegmentInfos() { + userData = MapStringString::newInstance(); + lastGeneration = 0; + generation = 0; + counter = 0; + version = MiscUtils::currentTimeMillis(); +} + +SegmentInfos::~SegmentInfos() { +} + +SegmentInfoPtr SegmentInfos::info(int32_t i) { + return segmentInfos[i]; +} + +int64_t SegmentInfos::getCurrentSegmentGeneration(HashSet files) { + if (!files) { + return -1; + } + int64_t max = -1; + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + if (boost::starts_with(*file, IndexFileNames::SEGMENTS()) && *file != IndexFileNames::SEGMENTS_GEN()) { + max = std::max(generationFromSegmentsFileName(*file), max); } } - - String SegmentInfos::getCurrentSegmentFileName(HashSet files) - { - return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(files)); - } - - String SegmentInfos::getCurrentSegmentFileName(DirectoryPtr directory) - { - return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(directory)); - } - - String SegmentInfos::getCurrentSegmentFileName() - { - return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", lastGeneration); + return max; +} + +int64_t SegmentInfos::getCurrentSegmentGeneration(const DirectoryPtr& directory) { + try { + return getCurrentSegmentGeneration(directory->listAll()); + } catch (LuceneException&) { + return -1; } - - int64_t SegmentInfos::generationFromSegmentsFileName(const String& fileName) - { - if (fileName == IndexFileNames::SEGMENTS()) - return 0; - else if (boost::starts_with(fileName, IndexFileNames::SEGMENTS())) - return StringUtils::toLong(fileName.substr(wcslen(IndexFileNames::SEGMENTS().c_str()) + 1), StringUtils::CHARACTER_MAX_RADIX); - else - boost::throw_exception(IllegalArgumentException(L"FileName '" + fileName + L"' is not a segments file")); +} + +String SegmentInfos::getCurrentSegmentFileName(HashSet files) { + return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(files)); +} + +String SegmentInfos::getCurrentSegmentFileName(const DirectoryPtr& directory) { + return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", getCurrentSegmentGeneration(directory)); +} + +String SegmentInfos::getCurrentSegmentFileName() { + return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", lastGeneration); +} + +int64_t SegmentInfos::generationFromSegmentsFileName(const String& fileName) { + if (fileName == IndexFileNames::SEGMENTS()) { return 0; + } else if (boost::starts_with(fileName, IndexFileNames::SEGMENTS())) { + return StringUtils::toLong(fileName.substr(wcslen(IndexFileNames::SEGMENTS().c_str()) + 1), StringUtils::CHARACTER_MAX_RADIX); + } else { + boost::throw_exception(IllegalArgumentException(L"FileName '" + fileName + L"' is not a segments file")); } - - String SegmentInfos::getNextSegmentFileName() - { - return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation == -1 ? 1 : generation + 1); - } - - void SegmentInfos::read(DirectoryPtr directory, const String& segmentFileName) - { - bool success = false; + return 0; +} - // clear any previous segments - segmentInfos.clear(); - - ChecksumIndexInputPtr input(newLucene(directory->openInput(segmentFileName))); - - generation = generationFromSegmentsFileName(segmentFileName); - lastGeneration = generation; - LuceneException finally; - try - { - int32_t format = input->readInt(); - - if (format < 0) // file contains explicit format info - { - if (format < CURRENT_FORMAT) - boost::throw_exception(CorruptIndexException(L"Unknown format version: " + StringUtils::toString(format))); - version = input->readLong(); // read version - counter = input->readInt(); // read counter +String SegmentInfos::getNextSegmentFileName() { + return IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation == -1 ? 1 : generation + 1); +} + +void SegmentInfos::read(const DirectoryPtr& directory, const String& segmentFileName) { + bool success = false; + + // clear any previous segments + segmentInfos.clear(); + + ChecksumIndexInputPtr input(newLucene(directory->openInput(segmentFileName))); + + generation = generationFromSegmentsFileName(segmentFileName); + lastGeneration = generation; + LuceneException finally; + try { + int32_t format = input->readInt(); + + if (format < 0) { // file contains explicit format info + if (format < CURRENT_FORMAT) { + boost::throw_exception(CorruptIndexException(L"Unknown format version: " + StringUtils::toString(format))); } - else - counter = format; - - for (int32_t i = input->readInt(); i > 0; --i) // read segmentInfos - segmentInfos.add(newLucene(directory, format, input)); - - // in old format the version number may be at the end of the file - if (format >= 0) - { - if (input->getFilePointer() >= input->length()) - version = MiscUtils::currentTimeMillis(); // old file format without version number - else - input->readLong(); // read version + version = input->readLong(); // read version + counter = input->readInt(); // read counter + } else { + counter = format; + } + + for (int32_t i = input->readInt(); i > 0; --i) { // read segmentInfos + segmentInfos.add(newLucene(directory, format, input)); + } + + // in old format the version number may be at the end of the file + if (format >= 0) { + if (input->getFilePointer() >= input->length()) { + version = MiscUtils::currentTimeMillis(); // old file format without version number + } else { + input->readLong(); // read version } - - if (format <= FORMAT_USER_DATA) - { - if (format <= FORMAT_DIAGNOSTICS) - userData = input->readStringStringMap(); - else if (input->readByte() != 0) - { - if (!singletonUserData) - singletonUserData = MapStringString::newInstance(); - singletonUserData[String(L"userData")] = input->readString(); - userData = singletonUserData; + } + + if (format <= FORMAT_USER_DATA) { + if (format <= FORMAT_DIAGNOSTICS) { + userData = input->readStringStringMap(); + } else if (input->readByte() != 0) { + if (!singletonUserData) { + singletonUserData = MapStringString::newInstance(); } - else - userData.clear(); - } - else + singletonUserData[String(L"userData")] = input->readString(); + userData = singletonUserData; + } else { userData.clear(); - - if (format <= FORMAT_CHECKSUM) - { - int64_t checksumNow = input->getChecksum(); - int64_t checksumThen = input->readLong(); - if (checksumNow != checksumThen) - boost::throw_exception(CorruptIndexException(L"Checksum mismatch in segments file")); } - - success = true; + } else { + userData.clear(); } - catch (LuceneException& e) - { - finally = e; - } - - input->close(); - - // clear any segment infos we had loaded so we have a clean slate on retry - if (!success) - segmentInfos.clear(); - - finally.throwException(); - } - - void SegmentInfos::read(DirectoryPtr directory) - { - lastGeneration = -1; - generation = lastGeneration; - newLucene(shared_from_this(), directory)->run(); - } - - void SegmentInfos::write(DirectoryPtr directory) - { - String segmentFileName(getNextSegmentFileName()); - - // always advance the generation on write - if (generation == -1) - generation = 1; - else - ++generation; - - ChecksumIndexOutputPtr segnOutput(newLucene(directory->createOutput(segmentFileName))); - - bool success = false; - LuceneException finally; - try - { - segnOutput->writeInt(CURRENT_FORMAT); // write FORMAT - segnOutput->writeLong(++version); // every write changes the index - segnOutput->writeInt(counter); // write counter - segnOutput->writeInt(segmentInfos.size()); // write infos - for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) - (*seginfo)->write(segnOutput); - segnOutput->writeStringStringMap(userData); - segnOutput->prepareCommit(); - success = true; - pendingSegnOutput = segnOutput; - } - catch (LuceneException& e) - { - finally = e; - } - - if (!success) - { - // We hit an exception above; try to close the file but suppress any exception - try - { - segnOutput->close(); - } - catch (...) - { - // Suppress so we keep throwing the original exception - } - - try - { - // try not to leave a truncated segments_n file in the index - directory->deleteFile(segmentFileName); - } - catch (...) - { - // Suppress so we keep throwing the original exception + + if (format <= FORMAT_CHECKSUM) { + int64_t checksumNow = input->getChecksum(); + int64_t checksumThen = input->readLong(); + if (checksumNow != checksumThen) { + boost::throw_exception(CorruptIndexException(L"Checksum mismatch in segments file")); } } - - finally.throwException(); - } - - LuceneObjectPtr SegmentInfos::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = SegmentInfoCollection::clone(other ? other : newLucene()); - SegmentInfosPtr cloneInfos(boost::dynamic_pointer_cast(clone)); - cloneInfos->counter = counter; - cloneInfos->generation = generation; - cloneInfos->lastGeneration = lastGeneration; - cloneInfos->version = version; - cloneInfos->pendingSegnOutput = pendingSegnOutput; - for (int32_t i = 0; i < cloneInfos->size(); ++i) - cloneInfos->segmentInfos[i] = boost::dynamic_pointer_cast(cloneInfos->info(i)->clone()); - cloneInfos->userData = MapStringString::newInstance(); - cloneInfos->userData.putAll(userData.begin(), userData.end()); - return cloneInfos; - } - - int64_t SegmentInfos::getVersion() - { - return version; - } - - int64_t SegmentInfos::getGeneration() - { - return generation; - } - - int64_t SegmentInfos::getLastGeneration() - { - return lastGeneration; - } - - int64_t SegmentInfos::readCurrentVersion(DirectoryPtr directory) - { - // Fully read the segments file: this ensures that it's completely written so that if IndexWriter.prepareCommit has been called - // (but not yet commit), then the reader will still see itself as current. - SegmentInfosPtr sis(newLucene()); - sis->read(directory); - return sis->getVersion(); - } - - MapStringString SegmentInfos::readCurrentUserData(DirectoryPtr directory) - { - SegmentInfosPtr sis(newLucene()); - sis->read(directory); - return sis->getUserData(); - } - - void SegmentInfos::setInfoStream(InfoStreamPtr infoStream) - { - SegmentInfos::infoStream = infoStream; - } - - void SegmentInfos::setDefaultGenFileRetryCount(int32_t count) - { - defaultGenFileRetryCount = count; - } - - int32_t SegmentInfos::getDefaultGenFileRetryCount() - { - return defaultGenFileRetryCount; - } - - void SegmentInfos::setDefaultGenFileRetryPauseMsec(int32_t msec) - { - defaultGenFileRetryPauseMsec = msec; - } - - int32_t SegmentInfos::getDefaultGenFileRetryPauseMsec() - { - return defaultGenFileRetryPauseMsec; - } - - void SegmentInfos::setDefaultGenLookaheadCount(int32_t count) - { - defaultGenLookaheadCount = count; + + success = true; + } catch (LuceneException& e) { + finally = e; } - - int32_t SegmentInfos::getDefaultGenLookahedCount() - { - return defaultGenLookaheadCount; + + input->close(); + + // clear any segment infos we had loaded so we have a clean slate on retry + if (!success) { + segmentInfos.clear(); } - - InfoStreamPtr SegmentInfos::getInfoStream() - { - return infoStream; + + finally.throwException(); +} + +void SegmentInfos::read(const DirectoryPtr& directory) { + lastGeneration = -1; + generation = lastGeneration; + newLucene(shared_from_this(), directory)->run(); +} + +void SegmentInfos::write(const DirectoryPtr& directory) { + String segmentFileName(getNextSegmentFileName()); + + // always advance the generation on write + if (generation == -1) { + generation = 1; + } else { + ++generation; } - - void SegmentInfos::message(const String& message) - { - if (infoStream) - *infoStream << L"SIS [" << message << L"]\n"; + + ChecksumIndexOutputPtr segnOutput(newLucene(directory->createOutput(segmentFileName))); + + bool success = false; + LuceneException finally; + try { + segnOutput->writeInt(CURRENT_FORMAT); // write FORMAT + segnOutput->writeLong(++version); // every write changes the index + segnOutput->writeInt(counter); // write counter + segnOutput->writeInt(segmentInfos.size()); // write infos + for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { + (*seginfo)->write(segnOutput); + } + segnOutput->writeStringStringMap(userData); + segnOutput->prepareCommit(); + success = true; + pendingSegnOutput = segnOutput; + } catch (LuceneException& e) { + finally = e; } - - FindSegmentsFile::FindSegmentsFile(SegmentInfosPtr infos, DirectoryPtr directory) - { - this->_segmentInfos = infos; - this->directory = directory; + + if (!success) { + // We hit an exception above; try to close the file but suppress any exception + try { + segnOutput->close(); + } catch (...) { + // Suppress so we keep throwing the original exception + } + + try { + // try not to leave a truncated segments_n file in the index + directory->deleteFile(segmentFileName); + } catch (...) { + // Suppress so we keep throwing the original exception + } } - - FindSegmentsFile::~FindSegmentsFile() - { + + finally.throwException(); +} + +LuceneObjectPtr SegmentInfos::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = SegmentInfoCollection::clone(other ? other : newLucene()); + SegmentInfosPtr cloneInfos(boost::dynamic_pointer_cast(clone)); + cloneInfos->counter = counter; + cloneInfos->generation = generation; + cloneInfos->lastGeneration = lastGeneration; + cloneInfos->version = version; + cloneInfos->pendingSegnOutput = pendingSegnOutput; + for (int32_t i = 0; i < cloneInfos->size(); ++i) { + cloneInfos->segmentInfos[i] = boost::dynamic_pointer_cast(cloneInfos->info(i)->clone()); + } + cloneInfos->userData = MapStringString::newInstance(); + cloneInfos->userData.putAll(userData.begin(), userData.end()); + return cloneInfos; +} + +int64_t SegmentInfos::getVersion() { + return version; +} + +int64_t SegmentInfos::getGeneration() { + return generation; +} + +int64_t SegmentInfos::getLastGeneration() { + return lastGeneration; +} + +int64_t SegmentInfos::readCurrentVersion(const DirectoryPtr& directory) { + // Fully read the segments file: this ensures that it's completely written so that if IndexWriter.prepareCommit has been called + // (but not yet commit), then the reader will still see itself as current. + SegmentInfosPtr sis(newLucene()); + sis->read(directory); + return sis->getVersion(); +} + +MapStringString SegmentInfos::readCurrentUserData(const DirectoryPtr& directory) { + SegmentInfosPtr sis(newLucene()); + sis->read(directory); + return sis->getUserData(); +} + +void SegmentInfos::setInfoStream(const InfoStreamPtr& infoStream) { + SegmentInfos::infoStream = infoStream; +} + +void SegmentInfos::setDefaultGenFileRetryCount(int32_t count) { + defaultGenFileRetryCount = count; +} + +int32_t SegmentInfos::getDefaultGenFileRetryCount() { + return defaultGenFileRetryCount; +} + +void SegmentInfos::setDefaultGenFileRetryPauseMsec(int32_t msec) { + defaultGenFileRetryPauseMsec = msec; +} + +int32_t SegmentInfos::getDefaultGenFileRetryPauseMsec() { + return defaultGenFileRetryPauseMsec; +} + +void SegmentInfos::setDefaultGenLookaheadCount(int32_t count) { + defaultGenLookaheadCount = count; +} + +int32_t SegmentInfos::getDefaultGenLookahedCount() { + return defaultGenLookaheadCount; +} + +InfoStreamPtr SegmentInfos::getInfoStream() { + return infoStream; +} + +void SegmentInfos::message(const String& message) { + if (infoStream) { + *infoStream << L"SIS [" << message << L"]\n"; } - - void FindSegmentsFile::doRun(IndexCommitPtr commit) - { - if (commit) - { - if (directory != commit->getDirectory()) - boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); - runBody(commit->getSegmentsFileName()); - return; +} + +FindSegmentsFile::FindSegmentsFile(const SegmentInfosPtr& infos, const DirectoryPtr& directory) { + this->_segmentInfos = infos; + this->directory = directory; +} + +FindSegmentsFile::~FindSegmentsFile() { +} + +void FindSegmentsFile::doRun(const IndexCommitPtr& commit) { + if (commit) { + if (directory != commit->getDirectory()) { + boost::throw_exception(IOException(L"The specified commit does not match the specified Directory")); } - - String segmentFileName; - int64_t lastGen = -1; - int64_t gen = 0; - int32_t genLookaheadCount = 0; - bool retry = false; - LuceneException exc; - SegmentInfosPtr segmentInfos(_segmentInfos); - - int32_t method = 0; - - // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely - // means a commit was in process and has finished, in the time it took us to load the now-old infos files - // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, - // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, - // then the original error is real and we throw it. - - // We have three methods for determining the current generation. We try the first two in parallel, and - // fall back to the third when necessary. - - while (true) - { - if (method == 0) - { - // Method 1: list the directory and use the highest segments_N file. This method works well as long - // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) - HashSet files(directory->listAll()); - int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); - - segmentInfos->message(L"directory listing genA=" + genA); - - // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, - // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. - int64_t genB = -1; - for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) - { - IndexInputPtr genInput; - try - { - genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); - } - catch (FileNotFoundException& e) - { - segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); - break; - } - catch (IOException& e) - { - segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); - } - - if (genInput) - { - LuceneException finally; - bool fileConsistent = false; - try - { - int32_t version = genInput->readInt(); - if (version == SegmentInfos::FORMAT_LOCKLESS) - { - int64_t gen0 = genInput->readLong(); - int64_t gen1 = genInput->readLong(); - segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); - if (gen0 == gen1) - { - // the file is consistent - genB = gen0; - fileConsistent = true; - } + runBody(commit->getSegmentsFileName()); + return; + } + + String segmentFileName; + int64_t lastGen = -1; + int64_t gen = 0; + int32_t genLookaheadCount = 0; + bool retry = false; + LuceneException exc; + SegmentInfosPtr segmentInfos(_segmentInfos); + + int32_t method = 0; + + // Loop until we succeed in calling runBody() without hitting an IOException. An IOException most likely + // means a commit was in process and has finished, in the time it took us to load the now-old infos files + // (and segments files). It's also possible it's a true error (corrupt index). To distinguish these, + // on each retry we must see "forward progress" on which generation we are trying to load. If we don't, + // then the original error is real and we throw it. + + // We have three methods for determining the current generation. We try the first two in parallel, and + // fall back to the third when necessary. + + while (true) { + if (method == 0) { + // Method 1: list the directory and use the highest segments_N file. This method works well as long + // as there is no stale caching on the directory contents (NOTE: NFS clients often have such stale caching) + HashSet files(directory->listAll()); + int64_t genA = segmentInfos->getCurrentSegmentGeneration(files); + + segmentInfos->message(L"directory listing genA=" + StringUtils::toString(genA)); + + // Method 2: open segments.gen and read its contents. Then we take the larger of the two gens. This way, + // if either approach is hitting a stale cache (NFS) we have a better chance of getting the right generation. + int64_t genB = -1; + for (int32_t i = 0; i < SegmentInfos::defaultGenFileRetryCount; ++i) { + IndexInputPtr genInput; + try { + genInput = directory->openInput(IndexFileNames::SEGMENTS_GEN()); + } catch (FileNotFoundException& e) { + segmentInfos->message(L"Segments.gen open: FileNotFoundException " + e.getError()); + break; + } catch (IOException& e) { + segmentInfos->message(L"Segments.gen open: IOException " + e.getError()); + } + + if (genInput) { + LuceneException finally; + bool fileConsistent = false; + try { + int32_t version = genInput->readInt(); + if (version == SegmentInfos::FORMAT_LOCKLESS) { + int64_t gen0 = genInput->readLong(); + int64_t gen1 = genInput->readLong(); + segmentInfos->message(L"fallback check: " + StringUtils::toString(gen0) + L"; " + StringUtils::toString(gen1)); + if (gen0 == gen1) { + // the file is consistent + genB = gen0; + fileConsistent = true; } } - catch (IOException&) - { - // will retry - } - catch (LuceneException& e) - { - finally = e; - } - genInput->close(); - finally.throwException(); - if (fileConsistent) - break; + } catch (IOException&) { + // will retry + } catch (LuceneException& e) { + finally = e; + } + genInput->close(); + finally.throwException(); + if (fileConsistent) { + break; } - - LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } - - segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); - - // pick the larger of the two gen's - gen = std::max(genA, genB); - - // neither approach found a generation - if (gen == -1) - boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); + + LuceneThread::threadSleep(SegmentInfos::defaultGenFileRetryPauseMsec); } - - // Third method (fallback if first & second methods are not reliable): since both directory cache and - // file contents cache seem to be stale, just advance the generation. - if (method == 1 || (method == 0 && lastGen == gen && retry)) - { - method = 1; - - if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) - { - ++gen; - ++genLookaheadCount; - segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); - } + + segmentInfos->message(String(IndexFileNames::SEGMENTS_GEN()) + L" check: genB=" + StringUtils::toString(genB)); + + // pick the larger of the two gen's + gen = std::max(genA, genB); + + // neither approach found a generation + if (gen == -1) { + boost::throw_exception(FileNotFoundException(L"No segments* file found in directory")); } - - if (lastGen == gen) - { - // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because - // writer could have been in the process of writing segments_N last time. - - if (retry) - { - // OK, we've tried the same segments_N file twice in a row, so this must be a real error. - exc.throwException(); - } - else - retry = true; + } + + // Third method (fallback if first & second methods are not reliable): since both directory cache and + // file contents cache seem to be stale, just advance the generation. + if (method == 1 || (method == 0 && lastGen == gen && retry)) { + method = 1; + + if (genLookaheadCount < SegmentInfos::defaultGenLookaheadCount) { + ++gen; + ++genLookaheadCount; + segmentInfos->message(L"look ahead increment gen to " + StringUtils::toString(gen)); } - else if (method == 0) - { - // Segment file has advanced since our last loop, so reset retry - retry = false; + } + + if (lastGen == gen) { + // This means we're about to try the same segments_N last tried. This is allowed, exactly once, because + // writer could have been in the process of writing segments_N last time. + + if (retry) { + // OK, we've tried the same segments_N file twice in a row, so this must be a real error. + exc.throwException(); + } else { + retry = true; } - - lastGen = gen; - - segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); - - try - { - runBody(segmentFileName); - segmentInfos->message(L"success on " + segmentFileName); - return; + } else if (method == 0) { + // Segment file has advanced since our last loop, so reset retry + retry = false; + } + + lastGen = gen; + + segmentFileName = IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen); + + try { + runBody(segmentFileName); + segmentInfos->message(L"success on " + segmentFileName); + return; + } catch (LuceneException& err) { + // Save the original root cause + if (exc.isNull()) { + exc = err; } - catch (LuceneException& err) - { - // Save the original root cause - if (exc.isNull()) - exc = err; - - segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); - - if (!retry && gen > 1) - { - // This is our first time trying this segments file (because retry is false), and, there is possibly a - // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. - String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); - - if (directory->fileExists(prevSegmentFileName)) - { - segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); - - try - { - runBody(prevSegmentFileName); - if (!exc.isNull()) - segmentInfos->message(L"success on fallback " + prevSegmentFileName); - return; - } - catch (LuceneException& err2) - { - segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); + + segmentInfos->message(L"primary Exception on '" + segmentFileName + L"': " + err.getError() + L"'; will retry: retry=" + StringUtils::toString(retry) + L"; gen = " + StringUtils::toString(gen)); + + if (!retry && gen > 1) { + // This is our first time trying this segments file (because retry is false), and, there is possibly a + // segments_(N-1) (because gen > 1). So, check if the segments_(N-1) exists and try it if so. + String prevSegmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", gen - 1)); + + if (directory->fileExists(prevSegmentFileName)) { + segmentInfos->message(L"fallback to prior segment file '" + prevSegmentFileName + L"'"); + + try { + runBody(prevSegmentFileName); + if (!exc.isNull()) { + segmentInfos->message(L"success on fallback " + prevSegmentFileName); } + return; + } catch (LuceneException& err2) { + segmentInfos->message(L"secondary Exception on '" + prevSegmentFileName + L"': " + err2.getError() + L"'; will retry"); } } } } } - - FindSegmentsRead::FindSegmentsRead(SegmentInfosPtr infos, DirectoryPtr directory) : FindSegmentsFileT(infos, directory) - { - result = 0; +} + +FindSegmentsRead::FindSegmentsRead(const SegmentInfosPtr& infos, const DirectoryPtr& directory) : FindSegmentsFileT(infos, directory) { + result = 0; +} + +FindSegmentsRead::~FindSegmentsRead() { +} + +int64_t FindSegmentsRead::doBody(const String& segmentFileName) { + SegmentInfosPtr(_segmentInfos)->read(directory, segmentFileName); + return 0; +} + +SegmentInfosPtr SegmentInfos::range(int32_t first, int32_t last) { + SegmentInfosPtr infos(newLucene()); + infos->segmentInfos.addAll(segmentInfos.begin() + first, segmentInfos.begin() + last); + return infos; +} + +void SegmentInfos::updateGeneration(const SegmentInfosPtr& other) { + lastGeneration = other->lastGeneration; + generation = other->generation; + version = other->version; +} + +void SegmentInfos::rollbackCommit(const DirectoryPtr& dir) { + if (pendingSegnOutput) { + try { + pendingSegnOutput->close(); + } catch (...) { + } + + // must carefully compute filename from "generation" since lastgeneration isn't incremented + try { + String segmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); + dir->deleteFile(segmentFileName); + } catch (...) { + } + pendingSegnOutput.reset(); } - - FindSegmentsRead::~FindSegmentsRead() - { +} + +void SegmentInfos::prepareCommit(const DirectoryPtr& dir) { + TestScope testScope(L"SegmentInfos", L"prepareCommit"); + if (pendingSegnOutput) { + boost::throw_exception(IllegalStateException(L"prepareCommit was already called")); } - - int64_t FindSegmentsRead::doBody(const String& segmentFileName) - { - SegmentInfosPtr(_segmentInfos)->read(directory, segmentFileName); - return 0; + write(dir); +} + +HashSet SegmentInfos::files(const DirectoryPtr& dir, bool includeSegmentsFile) { + HashSet files(HashSet::newInstance()); + if (includeSegmentsFile) { + files.add(getCurrentSegmentFileName()); + } + for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { + if ((*seginfo)->dir == dir) { + HashSet segFiles((*seginfo)->files()); + files.addAll(segFiles.begin(), segFiles.end()); + } } - - SegmentInfosPtr SegmentInfos::range(int32_t first, int32_t last) - { - SegmentInfosPtr infos(newLucene()); - infos->segmentInfos.addAll(segmentInfos.begin() + first, segmentInfos.begin() + last); - return infos; + return files; +} + +void SegmentInfos::finishCommit(const DirectoryPtr& dir) { + if (!pendingSegnOutput) { + boost::throw_exception(IllegalStateException(L"prepareCommit was not called")); } - - void SegmentInfos::updateGeneration(SegmentInfosPtr other) - { - lastGeneration = other->lastGeneration; - generation = other->generation; - version = other->version; + + bool success = false; + LuceneException finally; + try { + pendingSegnOutput->finishCommit(); + pendingSegnOutput->close(); + pendingSegnOutput.reset(); + success = true; + } catch (LuceneException& e) { + finally = e; } - - void SegmentInfos::rollbackCommit(DirectoryPtr dir) - { - if (pendingSegnOutput) - { - try - { - pendingSegnOutput->close(); - } - catch (...) - { - } - - // must carefully compute filename from "generation" since lastgeneration isn't incremented - try - { - String segmentFileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); - dir->deleteFile(segmentFileName); - } - catch (...) - { - } - pendingSegnOutput.reset(); - } + + if (!success) { + rollbackCommit(dir); } - - void SegmentInfos::prepareCommit(DirectoryPtr dir) - { - TestScope testScope(L"SegmentInfos", L"prepareCommit"); - if (pendingSegnOutput) - boost::throw_exception(IllegalStateException(L"prepareCommit was already called")); - write(dir); + finally.throwException(); + + // NOTE: if we crash here, we have left a segments_N file in the directory in a possibly corrupt state (if + // some bytes made it to stable storage and others didn't). But, the segments_N file includes checksum + // at the end, which should catch this case. So when a reader tries to read it, it will throw a + // CorruptIndexException, which should cause the retry logic in SegmentInfos to kick in and load the last + // good (previous) segments_N-1 file. + + String fileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); + + success = false; + try { + dir->sync(fileName); + success = true; + } catch (...) { } - - HashSet SegmentInfos::files(DirectoryPtr dir, bool includeSegmentsFile) - { - HashSet files(HashSet::newInstance()); - if (includeSegmentsFile) - files.add(getCurrentSegmentFileName()); - for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) - { - if ((*seginfo)->dir == dir) - { - HashSet segFiles((*seginfo)->files()); - files.addAll(segFiles.begin(), segFiles.end()); - } - } - return files; + + if (!success) { + dir->deleteFile(fileName); } - - void SegmentInfos::finishCommit(DirectoryPtr dir) - { - if (!pendingSegnOutput) - boost::throw_exception(IllegalStateException(L"prepareCommit was not called")); - - bool success = false; - LuceneException finally; - try - { - pendingSegnOutput->finishCommit(); - pendingSegnOutput->close(); - pendingSegnOutput.reset(); - success = true; - } - catch (LuceneException& e) - { + + lastGeneration = generation; + IndexOutputPtr genOutput; + try { + genOutput = dir->createOutput(IndexFileNames::SEGMENTS_GEN()); + + try { + genOutput->writeInt(FORMAT_LOCKLESS); + genOutput->writeLong(generation); + genOutput->writeLong(generation); + } catch (LuceneException& e) { finally = e; } - - if (!success) - rollbackCommit(dir); + + genOutput->close(); finally.throwException(); - - // NOTE: if we crash here, we have left a segments_N file in the directory in a possibly corrupt state (if - // some bytes made it to stable storage and others didn't). But, the segments_N file includes checksum - // at the end, which should catch this case. So when a reader tries to read it, it will throw a - // CorruptIndexException, which should cause the retry logic in SegmentInfos to kick in and load the last - // good (previous) segments_N-1 file. - - String fileName(IndexFileNames::fileNameFromGeneration(IndexFileNames::SEGMENTS(), L"", generation)); - - success = false; - try - { - dir->sync(fileName); - success = true; - } - catch (...) - { - } - - if (!success) - dir->deleteFile(fileName); - - lastGeneration = generation; - IndexOutputPtr genOutput; - try - { - genOutput = dir->createOutput(IndexFileNames::SEGMENTS_GEN()); - - try - { - genOutput->writeInt(FORMAT_LOCKLESS); - genOutput->writeLong(generation); - genOutput->writeLong(generation); - } - catch (LuceneException& e) - { - finally = e; - } - - genOutput->close(); - finally.throwException(); - } - catch (...) - { - } + } catch (...) { } - - void SegmentInfos::commit(DirectoryPtr dir) - { - prepareCommit(dir); - finishCommit(dir); - } - - String SegmentInfos::segString(DirectoryPtr directory) - { - SyncLock syncLock(this); - String buffer; - for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) - { - if (seginfo != segmentInfos.begin()) - buffer += L' '; - buffer += (*seginfo)->segString(directory); - if ((*seginfo)->dir != directory) - buffer += L"**"; +} + +void SegmentInfos::commit(const DirectoryPtr& dir) { + prepareCommit(dir); + finishCommit(dir); +} + +String SegmentInfos::segString(const DirectoryPtr& directory) { + SyncLock syncLock(this); + String buffer; + for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { + if (seginfo != segmentInfos.begin()) { + buffer += L' '; + } + buffer += (*seginfo)->segString(directory); + if ((*seginfo)->dir != directory) { + buffer += L"**"; } - return buffer; - } - - MapStringString SegmentInfos::getUserData() - { - return userData; - } - - void SegmentInfos::setUserData(MapStringString data) - { - if (!data) - userData = MapStringString::newInstance(); - else - userData = data; } - - void SegmentInfos::replace(SegmentInfosPtr other) - { - segmentInfos.clear(); - segmentInfos.addAll(other->segmentInfos.begin(), other->segmentInfos.end()); - lastGeneration = other->lastGeneration; + return buffer; +} + +MapStringString SegmentInfos::getUserData() { + return userData; +} + +void SegmentInfos::setUserData(MapStringString data) { + if (!data) { + userData = MapStringString::newInstance(); + } else { + userData = data; } - - bool SegmentInfos::hasExternalSegments(DirectoryPtr dir) - { - for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) - { - if ((*seginfo)->dir != dir) - return true; +} + +void SegmentInfos::replace(const SegmentInfosPtr& other) { + segmentInfos.clear(); + segmentInfos.addAll(other->segmentInfos.begin(), other->segmentInfos.end()); + lastGeneration = other->lastGeneration; +} + +bool SegmentInfos::hasExternalSegments(const DirectoryPtr& dir) { + for (Collection::iterator seginfo = segmentInfos.begin(); seginfo != segmentInfos.end(); ++seginfo) { + if ((*seginfo)->dir != dir) { + return true; } - return false; } + return false; +} + } diff --git a/src/core/index/SegmentMergeInfo.cpp b/src/core/index/SegmentMergeInfo.cpp index 967e9b5c..0379e38e 100644 --- a/src/core/index/SegmentMergeInfo.cpp +++ b/src/core/index/SegmentMergeInfo.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,75 +10,65 @@ #include "TermEnum.h" #include "TermPositions.h" -namespace Lucene -{ - SegmentMergeInfo::SegmentMergeInfo(int32_t b, TermEnumPtr te, IndexReaderPtr r) - { - base = b; - _reader = r; - termEnum = te; - term = te->term(); - ord = 0; +namespace Lucene { + +SegmentMergeInfo::SegmentMergeInfo(int32_t b, const TermEnumPtr& te, const IndexReaderPtr& r) { + base = b; + _reader = r; + termEnum = te; + term = te->term(); + ord = 0; + delCount = 0; +} + +SegmentMergeInfo::~SegmentMergeInfo() { +} + +Collection SegmentMergeInfo::getDocMap() { + if (!docMap) { delCount = 0; - } - - SegmentMergeInfo::~SegmentMergeInfo() - { - } - - Collection SegmentMergeInfo::getDocMap() - { - if (!docMap) - { - delCount = 0; - IndexReaderPtr reader(_reader); - - // build array which maps document numbers around deletions - if (reader->hasDeletions()) - { - int32_t maxDoc = reader->maxDoc(); - docMap = Collection::newInstance(maxDoc); - int32_t j = 0; - for (int32_t i = 0; i < maxDoc; ++i) - { - if (reader->isDeleted(i)) - { - ++delCount; - docMap[i] = -1; - } - else - docMap[i] = j++; + IndexReaderPtr reader(_reader); + + // build array which maps document numbers around deletions + if (reader->hasDeletions()) { + int32_t maxDoc = reader->maxDoc(); + docMap = Collection::newInstance(maxDoc); + int32_t j = 0; + for (int32_t i = 0; i < maxDoc; ++i) { + if (reader->isDeleted(i)) { + ++delCount; + docMap[i] = -1; + } else { + docMap[i] = j++; } } } - return docMap; } - - TermPositionsPtr SegmentMergeInfo::getPositions() - { - if (!postings) - postings = IndexReaderPtr(_reader)->termPositions(); - return postings; + return docMap; +} + +TermPositionsPtr SegmentMergeInfo::getPositions() { + if (!postings) { + postings = IndexReaderPtr(_reader)->termPositions(); } - - bool SegmentMergeInfo::next() - { - if (termEnum->next()) - { - term = termEnum->term(); - return true; - } - else - { - term.reset(); - return false; - } + return postings; +} + +bool SegmentMergeInfo::next() { + if (termEnum->next()) { + term = termEnum->term(); + return true; + } else { + term.reset(); + return false; } - - void SegmentMergeInfo::close() - { - termEnum->close(); - if (postings) - postings->close(); +} + +void SegmentMergeInfo::close() { + termEnum->close(); + if (postings) { + postings->close(); } } + +} diff --git a/src/core/index/SegmentMergeQueue.cpp b/src/core/index/SegmentMergeQueue.cpp index 6d1933ec..1dfd7a24 100644 --- a/src/core/index/SegmentMergeQueue.cpp +++ b/src/core/index/SegmentMergeQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,25 +8,23 @@ #include "SegmentMergeQueue.h" #include "SegmentMergeInfo.h" -namespace Lucene -{ - SegmentMergeQueue::SegmentMergeQueue(int32_t size) : PriorityQueue(size) - { - } - - SegmentMergeQueue::~SegmentMergeQueue() - { - } - - void SegmentMergeQueue::close() - { - while (top()) - pop()->close(); - } - - bool SegmentMergeQueue::lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second) - { - int32_t comparison = first->term->compareTo(second->term); - return comparison == 0 ? (first->base < second->base) : (comparison < 0); +namespace Lucene { + +SegmentMergeQueue::SegmentMergeQueue(int32_t size) : PriorityQueue(size) { +} + +SegmentMergeQueue::~SegmentMergeQueue() { +} + +void SegmentMergeQueue::close() { + while (top()) { + pop()->close(); } } + +bool SegmentMergeQueue::lessThan(const SegmentMergeInfoPtr& first, const SegmentMergeInfoPtr& second) { + int32_t comparison = first->term->compareTo(second->term); + return comparison == 0 ? (first->base < second->base) : (comparison < 0); +} + +} diff --git a/src/core/index/SegmentMerger.cpp b/src/core/index/SegmentMerger.cpp index d5325019..630b0f88 100644 --- a/src/core/index/SegmentMerger.cpp +++ b/src/core/index/SegmentMerger.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -32,738 +32,650 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// Maximum number of contiguous documents to bulk-copy when merging stored fields - const int32_t SegmentMerger::MAX_RAW_MERGE_DOCS = 4192; - - /// norms header placeholder - const uint8_t SegmentMerger::NORMS_HEADER[] = {'N', 'R', 'M', -1}; - const int32_t SegmentMerger::NORMS_HEADER_LENGTH = 4; - - SegmentMerger::SegmentMerger(DirectoryPtr dir, const String& name) - { - readers = Collection::newInstance(); - termIndexInterval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL; - mergedDocs = 0; - mergeDocStores = false; - omitTermFreqAndPositions = false; - - directory = dir; - segment = name; +namespace Lucene { + +/// Maximum number of contiguous documents to bulk-copy when merging stored fields +const int32_t SegmentMerger::MAX_RAW_MERGE_DOCS = 4192; + +/// norms header placeholder +const uint8_t SegmentMerger::NORMS_HEADER[] = {'N', 'R', 'M', static_cast(-1) }; +const int32_t SegmentMerger::NORMS_HEADER_LENGTH = 4; + +SegmentMerger::SegmentMerger(const DirectoryPtr& dir, const String& name) { + readers = Collection::newInstance(); + termIndexInterval = IndexWriter::DEFAULT_TERM_INDEX_INTERVAL; + mergedDocs = 0; + mergeDocStores = false; + omitTermFreqAndPositions = false; + + directory = dir; + segment = name; + checkAbort = newLucene(); +} + +SegmentMerger::SegmentMerger(const IndexWriterPtr& writer, const String& name, const OneMergePtr& merge) { + readers = Collection::newInstance(); + mergedDocs = 0; + mergeDocStores = false; + omitTermFreqAndPositions = false; + + directory = writer->getDirectory(); + segment = name; + + if (merge) { + checkAbort = newLucene(merge, directory); + } else { checkAbort = newLucene(); } - - SegmentMerger::SegmentMerger(IndexWriterPtr writer, const String& name, OneMergePtr merge) - { - readers = Collection::newInstance(); - mergedDocs = 0; - mergeDocStores = false; - omitTermFreqAndPositions = false; - - directory = writer->getDirectory(); - segment = name; - - if (merge) - checkAbort = newLucene(merge, directory); - else - checkAbort = newLucene(); - termIndexInterval = writer->getTermIndexInterval(); - } - - SegmentMerger::~SegmentMerger() - { + termIndexInterval = writer->getTermIndexInterval(); +} + +SegmentMerger::~SegmentMerger() { +} + +bool SegmentMerger::hasProx() { + return fieldInfos->hasProx(); +} + +void SegmentMerger::add(const IndexReaderPtr& reader) { + readers.add(reader); +} + +IndexReaderPtr SegmentMerger::segmentReader(int32_t i) { + return readers[i]; +} + +int32_t SegmentMerger::merge() { + return merge(true); +} + +int32_t SegmentMerger::merge(bool mergeDocStores) { + this->mergeDocStores = mergeDocStores; + + // NOTE: it's important to add calls to checkAbort.work(...) if you make any changes to this method that will spend a lot of time. + // The frequency of this check impacts how long IndexWriter.close(false) takes to actually stop the threads. + + mergedDocs = mergeFields(); + mergeTerms(); + mergeNorms(); + + if (mergeDocStores && fieldInfos->hasVectors()) { + mergeVectors(); } - bool SegmentMerger::hasProx() - { - return fieldInfos->hasProx(); + return mergedDocs; +} + +void SegmentMerger::closeReaders() { + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + (*reader)->close(); } - - void SegmentMerger::add(IndexReaderPtr reader) - { - readers.add(reader); +} + +HashSet SegmentMerger::getMergedFiles() { + HashSet fileSet(HashSet::newInstance()); + + // Basic files + for (HashSet::iterator ext = IndexFileNames::COMPOUND_EXTENSIONS().begin(); ext != IndexFileNames::COMPOUND_EXTENSIONS().end(); ++ext) { + if (*ext == IndexFileNames::PROX_EXTENSION() && !hasProx()) { + continue; + } + + if (mergeDocStores || (*ext != IndexFileNames::FIELDS_EXTENSION() && *ext != IndexFileNames::FIELDS_INDEX_EXTENSION())) { + fileSet.add(segment + L"." + *ext); + } } - - IndexReaderPtr SegmentMerger::segmentReader(int32_t i) - { - return readers[i]; + + // Fieldable norm files + for (int32_t i = 0; i < fieldInfos->size(); ++i) { + FieldInfoPtr fi(fieldInfos->fieldInfo(i)); + if (fi->isIndexed && !fi->omitNorms) { + fileSet.add(segment + L"." + IndexFileNames::NORMS_EXTENSION()); + break; + } } - - int32_t SegmentMerger::merge() - { - return merge(true); + + // Vector files + if (fieldInfos->hasVectors() && mergeDocStores) { + for (HashSet::iterator ext = IndexFileNames::VECTOR_EXTENSIONS().begin(); ext != IndexFileNames::VECTOR_EXTENSIONS().end(); ++ext) { + fileSet.add(segment + L"." + *ext); + } } - - int32_t SegmentMerger::merge(bool mergeDocStores) - { - this->mergeDocStores = mergeDocStores; - - // NOTE: it's important to add calls to checkAbort.work(...) if you make any changes to this method that will spend a lot of time. - // The frequency of this check impacts how long IndexWriter.close(false) takes to actually stop the threads. - - mergedDocs = mergeFields(); - mergeTerms(); - mergeNorms(); - - if (mergeDocStores && fieldInfos->hasVectors()) - mergeVectors(); - - return mergedDocs; + + return fileSet; +} + +HashSet SegmentMerger::createCompoundFile(const String& fileName) { + HashSet files(getMergedFiles()); + CompoundFileWriterPtr cfsWriter(newLucene(directory, fileName, checkAbort)); + + // Now merge all added files + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + cfsWriter->addFile(*file); } - - void SegmentMerger::closeReaders() - { - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - (*reader)->close(); + + // Perform the merge + cfsWriter->close(); + + return files; +} + +void SegmentMerger::addIndexed(const IndexReaderPtr& reader, const FieldInfosPtr& fInfos, HashSet names, + bool storeTermVectors, bool storePositionWithTermVector, + bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) { + for (HashSet::iterator field = names.begin(); field != names.end(); ++field) { + fInfos->add(*field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, + !reader->hasNorms(*field), storePayloads, omitTFAndPositions); } - - HashSet SegmentMerger::getMergedFiles() - { - HashSet fileSet(HashSet::newInstance()); - - // Basic files - for (HashSet::iterator ext = IndexFileNames::COMPOUND_EXTENSIONS().begin(); ext != IndexFileNames::COMPOUND_EXTENSIONS().end(); ++ext) - { - if (*ext == IndexFileNames::PROX_EXTENSION() && !hasProx()) - continue; - - if (mergeDocStores || (*ext != IndexFileNames::FIELDS_EXTENSION() && *ext != IndexFileNames::FIELDS_INDEX_EXTENSION())) - fileSet.add(segment + L"." + *ext); - } - - // Fieldable norm files - for (int32_t i = 0; i < fieldInfos->size(); ++i) - { - FieldInfoPtr fi(fieldInfos->fieldInfo(i)); - if (fi->isIndexed && !fi->omitNorms) - { - fileSet.add(segment + L"." + IndexFileNames::NORMS_EXTENSION()); - break; +} + +void SegmentMerger::setMatchingSegmentReaders() { + // If the i'th reader is a SegmentReader and has identical fieldName -> number mapping, then + // this array will be non-null at position i + int32_t numReaders = readers.size(); + matchingSegmentReaders = Collection::newInstance(numReaders); + + // If this reader is a SegmentReader, and all of its field name -> number mappings match the + // "merged" FieldInfos, then we can do a bulk copy of the stored fields + for (int32_t i = 0; i < numReaders; ++i) { + IndexReaderPtr reader(readers[i]); + SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); + if (segmentReader) { + bool same = true; + FieldInfosPtr segmentFieldInfos(segmentReader->fieldInfos()); + int32_t numFieldInfos = segmentFieldInfos->size(); + for (int32_t j = 0; same && j < numFieldInfos; ++j) { + same = (fieldInfos->fieldName(j) == segmentFieldInfos->fieldName(j)); + } + if (same) { + matchingSegmentReaders[i] = segmentReader; } } - - // Vector files - if (fieldInfos->hasVectors() && mergeDocStores) - { - for (HashSet::iterator ext = IndexFileNames::VECTOR_EXTENSIONS().begin(); ext != IndexFileNames::VECTOR_EXTENSIONS().end(); ++ext) - fileSet.add(segment + L"." + *ext); - } - - return fileSet; } - - HashSet SegmentMerger::createCompoundFile(const String& fileName) - { - HashSet files(getMergedFiles()); - CompoundFileWriterPtr cfsWriter(newLucene(directory, fileName, checkAbort)); - - // Now merge all added files - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) - cfsWriter->addFile(*file); - - // Perform the merge - cfsWriter->close(); - - return files; - } - - void SegmentMerger::addIndexed(IndexReaderPtr reader, FieldInfosPtr fInfos, HashSet names, - bool storeTermVectors, bool storePositionWithTermVector, - bool storeOffsetWithTermVector, bool storePayloads, bool omitTFAndPositions) - { - for (HashSet::iterator field = names.begin(); field != names.end(); ++field) - { - fInfos->add(*field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, - !reader->hasNorms(*field), storePayloads, omitTFAndPositions); - } + + // Used for bulk-reading raw bytes for stored fields + rawDocLengths = Collection::newInstance(MAX_RAW_MERGE_DOCS); + rawDocLengths2 = Collection::newInstance(MAX_RAW_MERGE_DOCS); +} + +int32_t SegmentMerger::mergeFields() { + if (!mergeDocStores) { + // When we are not merging by doc stores, their field name -> number mapping are the same. + // So, we start with the fieldInfos of the last segment in this case, to keep that numbering + fieldInfos = boost::dynamic_pointer_cast(boost::dynamic_pointer_cast(readers[readers.size() - 1])->core->fieldInfos->clone()); + } else { + fieldInfos = newLucene(); // merge field names } - void SegmentMerger::setMatchingSegmentReaders() - { - // If the i'th reader is a SegmentReader and has identical fieldName -> number mapping, then - // this array will be non-null at position i - int32_t numReaders = readers.size(); - matchingSegmentReaders = Collection::newInstance(numReaders); - - // If this reader is a SegmentReader, and all of its field name -> number mappings match the - // "merged" FieldInfos, then we can do a bulk copy of the stored fields - for (int32_t i = 0; i < numReaders; ++i) - { - IndexReaderPtr reader(readers[i]); - SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); - if (segmentReader) - { - bool same = true; - FieldInfosPtr segmentFieldInfos(segmentReader->fieldInfos()); - int32_t numFieldInfos = segmentFieldInfos->size(); - for (int32_t j = 0; same && j < numFieldInfos; ++j) - same = (fieldInfos->fieldName(j) == segmentFieldInfos->fieldName(j)); - if (same) - matchingSegmentReaders[i] = segmentReader; + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(*reader)); + if (segmentReader) { + FieldInfosPtr readerFieldInfos(segmentReader->fieldInfos()); + int32_t numReaderFieldInfos = readerFieldInfos->size(); + for (int32_t j = 0; j < numReaderFieldInfos; ++j) { + FieldInfoPtr fi(readerFieldInfos->fieldInfo(j)); + fieldInfos->add(fi->name, fi->isIndexed, fi->storeTermVector, fi->storePositionWithTermVector, + fi->storeOffsetWithTermVector, !(*reader)->hasNorms(fi->name), fi->storePayloads, + fi->omitTermFreqAndPositions); } + } else { + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION), true, true, false, false, false); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET), true, false, true, false, false); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR), true, false, false, false, false); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_STORES_PAYLOADS), false, false, false, true, false); + addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_INDEXED), false, false, false, false, false); + fieldInfos->add((*reader)->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED), false); } - - // Used for bulk-reading raw bytes for stored fields - rawDocLengths = Collection::newInstance(MAX_RAW_MERGE_DOCS); - rawDocLengths2 = Collection::newInstance(MAX_RAW_MERGE_DOCS); } + fieldInfos->write(directory, segment + L".fnm"); - int32_t SegmentMerger::mergeFields() - { - if (!mergeDocStores) - { - // When we are not merging by doc stores, their field name -> number mapping are the same. - // So, we start with the fieldInfos of the last segment in this case, to keep that numbering - fieldInfos = boost::dynamic_pointer_cast(boost::dynamic_pointer_cast(readers[readers.size() - 1])->core->fieldInfos->clone()); - } - else - fieldInfos = newLucene(); // merge field names - - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(*reader)); - if (segmentReader) - { - FieldInfosPtr readerFieldInfos(segmentReader->fieldInfos()); - int32_t numReaderFieldInfos = readerFieldInfos->size(); - for (int32_t j = 0; j < numReaderFieldInfos; ++j) - { - FieldInfoPtr fi(readerFieldInfos->fieldInfo(j)); - fieldInfos->add(fi->name, fi->isIndexed, fi->storeTermVector, fi->storePositionWithTermVector, - fi->storeOffsetWithTermVector, !(*reader)->hasNorms(fi->name), fi->storePayloads, - fi->omitTermFreqAndPositions); - } - } - else - { - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_POSITION), true, true, false, false, false); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR_WITH_OFFSET), true, false, true, false, false); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_TERMVECTOR), true, false, false, false, false); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS), false, false, false, false, true); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_STORES_PAYLOADS), false, false, false, true, false); - addIndexed(*reader, fieldInfos, (*reader)->getFieldNames(IndexReader::FIELD_OPTION_INDEXED), false, false, false, false, false); - fieldInfos->add((*reader)->getFieldNames(IndexReader::FIELD_OPTION_UNINDEXED), false); - } - } - fieldInfos->write(directory, segment + L".fnm"); - - int32_t docCount = 0; - - setMatchingSegmentReaders(); - - if (mergeDocStores) - { - // merge field values - FieldsWriterPtr fieldsWriter(newLucene(directory, segment, fieldInfos)); - - LuceneException finally; - try - { - int32_t idx = 0; - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); - FieldsReaderPtr matchingFieldsReader; - if (matchingSegmentReader) - { - FieldsReaderPtr fieldsReader(matchingSegmentReader->getFieldsReader()); - if (fieldsReader && fieldsReader->canReadRawDocs()) - matchingFieldsReader = fieldsReader; + int32_t docCount = 0; + + setMatchingSegmentReaders(); + + if (mergeDocStores) { + // merge field values + FieldsWriterPtr fieldsWriter(newLucene(directory, segment, fieldInfos)); + + LuceneException finally; + try { + int32_t idx = 0; + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); + FieldsReaderPtr matchingFieldsReader; + if (matchingSegmentReader) { + FieldsReaderPtr fieldsReader(matchingSegmentReader->getFieldsReader()); + if (fieldsReader && fieldsReader->canReadRawDocs()) { + matchingFieldsReader = fieldsReader; } - if ((*reader)->hasDeletions()) - docCount += copyFieldsWithDeletions(fieldsWriter, *reader, matchingFieldsReader); - else - docCount += copyFieldsNoDeletions(fieldsWriter, *reader, matchingFieldsReader); + } + if ((*reader)->hasDeletions()) { + docCount += copyFieldsWithDeletions(fieldsWriter, *reader, matchingFieldsReader); + } else { + docCount += copyFieldsNoDeletions(fieldsWriter, *reader, matchingFieldsReader); } } - catch (LuceneException& e) - { - finally = e; - } - fieldsWriter->close(); - finally.throwException(); - - String fileName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - int64_t fdxFileLength = directory->fileLength(fileName); - - if (4 + ((int64_t)docCount) * 8 != fdxFileLength) - { - boost::throw_exception(RuntimeException(L"mergeFields produced an invalid result: docCount is " + - StringUtils::toString(docCount) + L" but fdx file size is " + - StringUtils::toString(fdxFileLength) + L" file=" + fileName + - L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + - L"; now aborting this merge to prevent index corruption")); - } + } catch (LuceneException& e) { + finally = e; } - else - { - // If we are skipping the doc stores, that means there are no deletions in any of these segments, - // so we just sum numDocs() of each segment to get total docCount - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - docCount += (*reader)->numDocs(); + fieldsWriter->close(); + finally.throwException(); + + String fileName(segment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); + int64_t fdxFileLength = directory->fileLength(fileName); + + if (4 + ((int64_t)docCount) * 8 != fdxFileLength) { + boost::throw_exception(RuntimeException(L"mergeFields produced an invalid result: docCount is " + + StringUtils::toString(docCount) + L" but fdx file size is " + + StringUtils::toString(fdxFileLength) + L" file=" + fileName + + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + + L"; now aborting this merge to prevent index corruption")); + } + } else { + // If we are skipping the doc stores, that means there are no deletions in any of these segments, + // so we just sum numDocs() of each segment to get total docCount + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + docCount += (*reader)->numDocs(); } - - return docCount; } - int32_t SegmentMerger::copyFieldsWithDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader) - { - int32_t docCount = 0; - int32_t maxDoc = reader->maxDoc(); - if (matchingFieldsReader) - { - // We can bulk-copy because the fieldInfos are "congruent" - for (int32_t j = 0; j < maxDoc;) - { - if (reader->isDeleted(j)) - { - // skip deleted docs - ++j; - continue; + return docCount; +} + +int32_t SegmentMerger::copyFieldsWithDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader) { + int32_t docCount = 0; + int32_t maxDoc = reader->maxDoc(); + if (matchingFieldsReader) { + // We can bulk-copy because the fieldInfos are "congruent" + for (int32_t j = 0; j < maxDoc;) { + if (reader->isDeleted(j)) { + // skip deleted docs + ++j; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field numbers are identical + int32_t start = j; + int32_t numDocs = 0; + do { + ++j; + ++numDocs; + if (j >= maxDoc) { + break; } - // We can optimize this case (doing a bulk byte copy) since the field numbers are identical - int32_t start = j; - int32_t numDocs = 0; - do - { + if (reader->isDeleted(j)) { ++j; - ++numDocs; - if (j >= maxDoc) - break; - if (reader->isDeleted(j)) - { - ++j; - break; - } + break; } - while (numDocs < MAX_RAW_MERGE_DOCS); - - IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, start, numDocs)); - fieldsWriter->addRawDocuments(stream, rawDocLengths, numDocs); - docCount += numDocs; - checkAbort->work(300 * numDocs); - } + } while (numDocs < MAX_RAW_MERGE_DOCS); + + IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, start, numDocs)); + fieldsWriter->addRawDocuments(stream, rawDocLengths, numDocs); + docCount += numDocs; + checkAbort->work(300 * numDocs); } - else - { - for (int32_t j = 0; j < maxDoc; ++j) - { - if (reader->isDeleted(j)) - { - // skip deleted docs - continue; - } - - // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors - fieldsWriter->addDocument(reader->document(j)); - ++docCount; - checkAbort->work(300); + } else { + for (int32_t j = 0; j < maxDoc; ++j) { + if (reader->isDeleted(j)) { + // skip deleted docs + continue; } + + // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors + fieldsWriter->addDocument(reader->document(j)); + ++docCount; + checkAbort->work(300); } - return docCount; } - - int32_t SegmentMerger::copyFieldsNoDeletions(FieldsWriterPtr fieldsWriter, IndexReaderPtr reader, FieldsReaderPtr matchingFieldsReader) - { - int32_t docCount = 0; - int32_t maxDoc = reader->maxDoc(); - if (matchingFieldsReader) - { - // We can bulk-copy because the fieldInfos are "congruent" - while (docCount < maxDoc) - { - int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); - IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, docCount, len)); - fieldsWriter->addRawDocuments(stream, rawDocLengths, len); - docCount += len; - checkAbort->work(300 * len); - } + return docCount; +} + +int32_t SegmentMerger::copyFieldsNoDeletions(const FieldsWriterPtr& fieldsWriter, const IndexReaderPtr& reader, const FieldsReaderPtr& matchingFieldsReader) { + int32_t docCount = 0; + int32_t maxDoc = reader->maxDoc(); + if (matchingFieldsReader) { + // We can bulk-copy because the fieldInfos are "congruent" + while (docCount < maxDoc) { + int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + IndexInputPtr stream(matchingFieldsReader->rawDocs(rawDocLengths, docCount, len)); + fieldsWriter->addRawDocuments(stream, rawDocLengths, len); + docCount += len; + checkAbort->work(300 * len); } - else - { - for (; docCount < maxDoc; ++docCount) - { - // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors - fieldsWriter->addDocument(reader->document(docCount)); - checkAbort->work(300); - } + } else { + for (; docCount < maxDoc; ++docCount) { + // NOTE: it's very important to first assign to doc then pass it to termVectorsWriter.addAllDocVectors + fieldsWriter->addDocument(reader->document(docCount)); + checkAbort->work(300); } - return docCount; } + return docCount; +} - void SegmentMerger::mergeVectors() - { - TermVectorsWriterPtr termVectorsWriter(newLucene(directory, segment, fieldInfos)); - - LuceneException finally; - try - { - int32_t idx = 0; - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); - TermVectorsReaderPtr matchingVectorsReader; - if (matchingSegmentReader) - { - TermVectorsReaderPtr vectorsReader(matchingSegmentReader->getTermVectorsReaderOrig()); - - // If the TV* files are an older format then they cannot read raw docs - if (vectorsReader && vectorsReader->canReadRawDocs()) - matchingVectorsReader = vectorsReader; +void SegmentMerger::mergeVectors() { + TermVectorsWriterPtr termVectorsWriter(newLucene(directory, segment, fieldInfos)); + + LuceneException finally; + try { + int32_t idx = 0; + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + SegmentReaderPtr matchingSegmentReader(matchingSegmentReaders[idx++]); + TermVectorsReaderPtr matchingVectorsReader; + if (matchingSegmentReader) { + TermVectorsReaderPtr vectorsReader(matchingSegmentReader->getTermVectorsReaderOrig()); + + // If the TV* files are an older format then they cannot read raw docs + if (vectorsReader && vectorsReader->canReadRawDocs()) { + matchingVectorsReader = vectorsReader; } - if ((*reader)->hasDeletions()) - copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, *reader); - else - copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, *reader); + } + if ((*reader)->hasDeletions()) { + copyVectorsWithDeletions(termVectorsWriter, matchingVectorsReader, *reader); + } else { + copyVectorsNoDeletions(termVectorsWriter, matchingVectorsReader, *reader); } } - catch (LuceneException& e) - { - finally = e; - } - termVectorsWriter->close(); - finally.throwException(); - - String fileName(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - int64_t tvxSize = directory->fileLength(fileName); - - if (4 + ((int64_t)mergedDocs) * 16 != tvxSize) - { - boost::throw_exception(RuntimeException(L"mergeVectors produced an invalid result: mergedDocs is " + - StringUtils::toString(mergedDocs) + L" but tvx size is " + - StringUtils::toString(tvxSize) + L" file=" + fileName + - L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + - L"; now aborting this merge to prevent index corruption")); - } + } catch (LuceneException& e) { + finally = e; } - - void SegmentMerger::copyVectorsWithDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader) - { - int32_t maxDoc = reader->maxDoc(); - if (matchingVectorsReader) - { - // We can bulk-copy because the fieldInfos are "congruent" - for (int32_t docNum = 0; docNum < maxDoc;) - { - if (reader->isDeleted(docNum)) - { - // skip deleted docs - ++docNum; - continue; + termVectorsWriter->close(); + finally.throwException(); + + String fileName(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + int64_t tvxSize = directory->fileLength(fileName); + + if (4 + ((int64_t)mergedDocs) * 16 != tvxSize) { + boost::throw_exception(RuntimeException(L"mergeVectors produced an invalid result: mergedDocs is " + + StringUtils::toString(mergedDocs) + L" but tvx size is " + + StringUtils::toString(tvxSize) + L" file=" + fileName + + L" file exists?=" + StringUtils::toString(directory->fileExists(fileName)) + + L"; now aborting this merge to prevent index corruption")); + } +} + +void SegmentMerger::copyVectorsWithDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader) { + int32_t maxDoc = reader->maxDoc(); + if (matchingVectorsReader) { + // We can bulk-copy because the fieldInfos are "congruent" + for (int32_t docNum = 0; docNum < maxDoc;) { + if (reader->isDeleted(docNum)) { + // skip deleted docs + ++docNum; + continue; + } + // We can optimize this case (doing a bulk byte copy) since the field numbers are identical + int32_t start = docNum; + int32_t numDocs = 0; + do { + ++docNum; + ++numDocs; + if (docNum >= maxDoc) { + break; } - // We can optimize this case (doing a bulk byte copy) since the field numbers are identical - int32_t start = docNum; - int32_t numDocs = 0; - do - { + if (reader->isDeleted(docNum)) { ++docNum; - ++numDocs; - if (docNum >= maxDoc) - break; - if (reader->isDeleted(docNum)) - { - ++docNum; - break; - } + break; } - while (numDocs < MAX_RAW_MERGE_DOCS); - - matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); - termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); - checkAbort->work(300 * numDocs); - } + } while (numDocs < MAX_RAW_MERGE_DOCS); + + matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); + termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); + checkAbort->work(300 * numDocs); } - else - { - for (int32_t docNum = 0; docNum < maxDoc; ++docNum) - { - if (reader->isDeleted(docNum)) - { - // skip deleted docs - continue; - } - - // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors - termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); - checkAbort->work(300); + } else { + for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { + if (reader->isDeleted(docNum)) { + // skip deleted docs + continue; } + + // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors + termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); + checkAbort->work(300); } } - - void SegmentMerger::copyVectorsNoDeletions(TermVectorsWriterPtr termVectorsWriter, TermVectorsReaderPtr matchingVectorsReader, IndexReaderPtr reader) - { - int32_t maxDoc = reader->maxDoc(); - if (matchingVectorsReader) - { - // We can bulk-copy because the fieldInfos are "congruent" - int32_t docCount = 0; - while (docCount < maxDoc) - { - int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); - matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, docCount, len); - termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); - docCount += len; - checkAbort->work(300 * len); - } +} + +void SegmentMerger::copyVectorsNoDeletions(const TermVectorsWriterPtr& termVectorsWriter, const TermVectorsReaderPtr& matchingVectorsReader, const IndexReaderPtr& reader) { + int32_t maxDoc = reader->maxDoc(); + if (matchingVectorsReader) { + // We can bulk-copy because the fieldInfos are "congruent" + int32_t docCount = 0; + while (docCount < maxDoc) { + int32_t len = std::min(MAX_RAW_MERGE_DOCS, maxDoc - docCount); + matchingVectorsReader->rawDocs(rawDocLengths, rawDocLengths2, docCount, len); + termVectorsWriter->addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, len); + docCount += len; + checkAbort->work(300 * len); } - else - { - for (int32_t docNum = 0; docNum < maxDoc; ++docNum) - { - // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors - termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); - checkAbort->work(300); - } + } else { + for (int32_t docNum = 0; docNum < maxDoc; ++docNum) { + // NOTE: it's very important to first assign to vectors then pass it to termVectorsWriter.addAllDocVectors + termVectorsWriter->addAllDocVectors(reader->getTermFreqVectors(docNum)); + checkAbort->work(300); } } +} - void SegmentMerger::mergeTerms() - { - TestScope testScope(L"SegmentMerger", L"mergeTerms"); - - SegmentWriteStatePtr state(newLucene(DocumentsWriterPtr(), directory, segment, L"", mergedDocs, 0, termIndexInterval)); +void SegmentMerger::mergeTerms() { + TestScope testScope(L"SegmentMerger", L"mergeTerms"); - FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); + SegmentWriteStatePtr state(newLucene(DocumentsWriterPtr(), directory, segment, L"", mergedDocs, 0, termIndexInterval)); - LuceneException finally; - try - { - queue = newLucene(readers.size()); - mergeTermInfos(consumer); - } - catch (LuceneException& e) - { - finally = e; - } - consumer->finish(); - if (queue) - queue->close(); - finally.throwException(); + FormatPostingsFieldsConsumerPtr consumer(newLucene(state, fieldInfos)); + + LuceneException finally; + try { + queue = newLucene(readers.size()); + mergeTermInfos(consumer); + } catch (LuceneException& e) { + finally = e; } - - void SegmentMerger::mergeTermInfos(FormatPostingsFieldsConsumerPtr consumer) - { - int32_t base = 0; - int32_t readerCount = readers.size(); - for (int32_t i = 0; i < readerCount; ++i) - { - IndexReaderPtr reader(readers[i]); - TermEnumPtr termEnum(reader->terms()); - SegmentMergeInfoPtr smi(newLucene(base, termEnum, reader)); - Collection docMap(smi->getDocMap()); - if (docMap) - { - if (!docMaps) - { - docMaps = Collection< Collection >::newInstance(readerCount); - delCounts = Collection::newInstance(readerCount); - } - docMaps[i] = docMap; - IndexReaderPtr segmentMergeReader(smi->_reader); - delCounts[i] = segmentMergeReader->maxDoc() - segmentMergeReader->numDocs(); + consumer->finish(); + if (queue) { + queue->close(); + } + finally.throwException(); +} + +void SegmentMerger::mergeTermInfos(const FormatPostingsFieldsConsumerPtr& consumer) { + int32_t base = 0; + int32_t readerCount = readers.size(); + for (int32_t i = 0; i < readerCount; ++i) { + IndexReaderPtr reader(readers[i]); + TermEnumPtr termEnum(reader->terms()); + SegmentMergeInfoPtr smi(newLucene(base, termEnum, reader)); + Collection docMap(smi->getDocMap()); + if (docMap) { + if (!docMaps) { + docMaps = Collection< Collection >::newInstance(readerCount); + delCounts = Collection::newInstance(readerCount); } - - base += reader->numDocs(); - - BOOST_ASSERT(reader->numDocs() == reader->maxDoc() - smi->delCount); - - if (smi->next()) - queue->add(smi); // initialize queue - else - smi->close(); + docMaps[i] = docMap; + IndexReaderPtr segmentMergeReader(smi->_reader); + delCounts[i] = segmentMergeReader->maxDoc() - segmentMergeReader->numDocs(); + } + + base += reader->numDocs(); + + BOOST_ASSERT(reader->numDocs() == reader->maxDoc() - smi->delCount); + + if (smi->next()) { + queue->add(smi); // initialize queue + } else { + smi->close(); } - - Collection match(Collection::newInstance(readers.size())); - - String currentField; - FormatPostingsTermsConsumerPtr termsConsumer; - - while (!queue->empty()) - { - int32_t matchSize = 0; // pop matching terms + } + + Collection match(Collection::newInstance(readers.size())); + + String currentField; + FormatPostingsTermsConsumerPtr termsConsumer; + + while (!queue->empty()) { + int32_t matchSize = 0; // pop matching terms + match[matchSize++] = queue->pop(); + TermPtr term(match[0]->term); + SegmentMergeInfoPtr top(queue->empty() ? SegmentMergeInfoPtr() : queue->top()); + + while (top && term->compareTo(top->term) == 0) { match[matchSize++] = queue->pop(); - TermPtr term(match[0]->term); - SegmentMergeInfoPtr top(queue->empty() ? SegmentMergeInfoPtr() : queue->top()); - - while (top && term->compareTo(top->term) == 0) - { - match[matchSize++] = queue->pop(); - top = queue->top(); - } - - if (currentField != term->_field) - { - currentField = term->_field; - if (termsConsumer) - termsConsumer->finish(); - FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(currentField)); - termsConsumer = consumer->addField(fieldInfo); - omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + top = queue->top(); + } + + if (currentField != term->_field) { + currentField = term->_field; + if (termsConsumer) { + termsConsumer->finish(); } - - int32_t df = appendPostings(termsConsumer, match, matchSize); // add new TermInfo - - checkAbort->work(df / 3.0); - - while (matchSize > 0) - { - SegmentMergeInfoPtr smi(match[--matchSize]); - if (smi->next()) - queue->add(smi); // restore queue - else - smi->close(); // done with a segment + FieldInfoPtr fieldInfo(fieldInfos->fieldInfo(currentField)); + termsConsumer = consumer->addField(fieldInfo); + omitTermFreqAndPositions = fieldInfo->omitTermFreqAndPositions; + } + + int32_t df = appendPostings(termsConsumer, match, matchSize); // add new TermInfo + + checkAbort->work(df / 3.0); + + while (matchSize > 0) { + SegmentMergeInfoPtr smi(match[--matchSize]); + if (smi->next()) { + queue->add(smi); // restore queue + } else { + smi->close(); // done with a segment } } } - - Collection< Collection > SegmentMerger::getDocMaps() - { - return docMaps; - } - - Collection SegmentMerger::getDelCounts() - { - return delCounts; - } - - int32_t SegmentMerger::appendPostings(FormatPostingsTermsConsumerPtr termsConsumer, Collection smis, int32_t n) - { - FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(smis[0]->term->_text)); - int32_t df = 0; - for (int32_t i = 0; i < n; ++i) - { - SegmentMergeInfoPtr smi(smis[i]); - TermPositionsPtr postings(smi->getPositions()); - BOOST_ASSERT(postings); - int32_t base = smi->base; - Collection docMap(smi->getDocMap()); - postings->seek(smi->termEnum); - - while (postings->next()) - { - ++df; - int32_t doc = postings->doc(); - if (docMap) - doc = docMap[doc]; // map around deletions - doc += base; // convert to merged space - - int32_t freq = postings->freq(); - FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(doc, freq)); - - if (!omitTermFreqAndPositions) - { - for (int32_t j = 0; j < freq; ++j) - { - int32_t position = postings->nextPosition(); - int32_t payloadLength = postings->getPayloadLength(); - if (payloadLength > 0) - { - if (!payloadBuffer) - payloadBuffer = ByteArray::newInstance(payloadLength); - if (payloadBuffer.size() < payloadLength) - payloadBuffer.resize(payloadLength); - postings->getPayload(payloadBuffer, 0); +} + +Collection< Collection > SegmentMerger::getDocMaps() { + return docMaps; +} + +Collection SegmentMerger::getDelCounts() { + return delCounts; +} + +int32_t SegmentMerger::appendPostings(const FormatPostingsTermsConsumerPtr& termsConsumer, Collection smis, int32_t n) { + FormatPostingsDocsConsumerPtr docConsumer(termsConsumer->addTerm(smis[0]->term->_text)); + int32_t df = 0; + for (int32_t i = 0; i < n; ++i) { + SegmentMergeInfoPtr smi(smis[i]); + TermPositionsPtr postings(smi->getPositions()); + BOOST_ASSERT(postings); + int32_t base = smi->base; + Collection docMap(smi->getDocMap()); + postings->seek(smi->termEnum); + + while (postings->next()) { + ++df; + int32_t doc = postings->doc(); + if (docMap) { + doc = docMap[doc]; // map around deletions + } + doc += base; // convert to merged space + + int32_t freq = postings->freq(); + FormatPostingsPositionsConsumerPtr posConsumer(docConsumer->addDoc(doc, freq)); + + if (!omitTermFreqAndPositions) { + for (int32_t j = 0; j < freq; ++j) { + int32_t position = postings->nextPosition(); + int32_t payloadLength = postings->getPayloadLength(); + if (payloadLength > 0) { + if (!payloadBuffer) { + payloadBuffer = ByteArray::newInstance(payloadLength); + } + if (payloadBuffer.size() < payloadLength) { + payloadBuffer.resize(payloadLength); } - posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); + postings->getPayload(payloadBuffer, 0); } - posConsumer->finish(); + posConsumer->addPosition(position, payloadBuffer, 0, payloadLength); } + posConsumer->finish(); } } - docConsumer->finish(); - - return df; } + docConsumer->finish(); - void SegmentMerger::mergeNorms() - { - ByteArray normBuffer; - IndexOutputPtr output; - LuceneException finally; - try - { - int32_t numFieldInfos = fieldInfos->size(); - for (int32_t i = 0; i < numFieldInfos; ++i) - { - FieldInfoPtr fi(fieldInfos->fieldInfo(i)); - if (fi->isIndexed && !fi->omitNorms) - { - if (!output) - { - output = directory->createOutput(segment + L"." + IndexFileNames::NORMS_EXTENSION()); - output->writeBytes(NORMS_HEADER, SIZEOF_ARRAY(NORMS_HEADER)); + return df; +} + +void SegmentMerger::mergeNorms() { + ByteArray normBuffer; + IndexOutputPtr output; + LuceneException finally; + try { + int32_t numFieldInfos = fieldInfos->size(); + for (int32_t i = 0; i < numFieldInfos; ++i) { + FieldInfoPtr fi(fieldInfos->fieldInfo(i)); + if (fi->isIndexed && !fi->omitNorms) { + if (!output) { + output = directory->createOutput(segment + L"." + IndexFileNames::NORMS_EXTENSION()); + output->writeBytes(NORMS_HEADER, SIZEOF_ARRAY(NORMS_HEADER)); + } + for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) { + int32_t maxDoc = (*reader)->maxDoc(); + + if (!normBuffer) { + normBuffer = ByteArray::newInstance(maxDoc); } - for (Collection::iterator reader = readers.begin(); reader != readers.end(); ++reader) - { - int32_t maxDoc = (*reader)->maxDoc(); - - if (!normBuffer) - normBuffer = ByteArray::newInstance(maxDoc); - if (normBuffer.size() < maxDoc) // the buffer is too small for the current segment - normBuffer.resize(maxDoc); - MiscUtils::arrayFill(normBuffer.get(), 0, normBuffer.size(), 0); - (*reader)->norms(fi->name, normBuffer, 0); - if (!(*reader)->hasDeletions()) - { - // optimized case for segments without deleted docs - output->writeBytes(normBuffer.get(), maxDoc); - } - else - { - // this segment has deleted docs, so we have to check for every doc if it is deleted or not - for (int32_t k = 0; k < maxDoc; ++k) - { - if (!(*reader)->isDeleted(k)) - output->writeByte(normBuffer[k]); + if (normBuffer.size() < maxDoc) { // the buffer is too small for the current segment + normBuffer.resize(maxDoc); + } + MiscUtils::arrayFill(normBuffer.get(), 0, normBuffer.size(), 0); + (*reader)->norms(fi->name, normBuffer, 0); + if (!(*reader)->hasDeletions()) { + // optimized case for segments without deleted docs + output->writeBytes(normBuffer.get(), maxDoc); + } else { + // this segment has deleted docs, so we have to check for every doc if it is deleted or not + for (int32_t k = 0; k < maxDoc; ++k) { + if (!(*reader)->isDeleted(k)) { + output->writeByte(normBuffer[k]); } } - checkAbort->work(maxDoc); } + checkAbort->work(maxDoc); } } } - catch (LuceneException& e) - { - finally = e; - } - if (output) - output->close(); - finally.throwException(); + } catch (LuceneException& e) { + finally = e; + } + if (output) { + output->close(); } + finally.throwException(); +} - CheckAbort::CheckAbort(OneMergePtr merge, DirectoryPtr dir) - { +CheckAbort::CheckAbort(const OneMergePtr& merge, const DirectoryPtr& dir) { + workCount = 0; + this->merge = merge; + this->_dir = dir; +} + +CheckAbort::~CheckAbort() { +} + +void CheckAbort::work(double units) { + workCount += units; + if (workCount >= 10000.0) { + merge->checkAborted(DirectoryPtr(_dir)); workCount = 0; - this->merge = merge; - this->_dir = dir; - } - - CheckAbort::~CheckAbort() - { - } - - void CheckAbort::work(double units) - { - workCount += units; - if (workCount >= 10000.0) - { - merge->checkAborted(DirectoryPtr(_dir)); - workCount = 0; - } - } - - CheckAbortNull::CheckAbortNull() : CheckAbort(OneMergePtr(), DirectoryPtr()) - { - } - - CheckAbortNull::~CheckAbortNull() - { - } - - void CheckAbortNull::work(double units) - { - // do nothing } } + +CheckAbortNull::CheckAbortNull() : CheckAbort(OneMergePtr(), DirectoryPtr()) { +} + +CheckAbortNull::~CheckAbortNull() { +} + +void CheckAbortNull::work(double units) { + // do nothing +} + +} diff --git a/src/core/index/SegmentReader.cpp b/src/core/index/SegmentReader.cpp index 2be0b1f0..201da19e 100644 --- a/src/core/index/SegmentReader.cpp +++ b/src/core/index/SegmentReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -30,1328 +30,1160 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - SegmentReader::SegmentReader() - { - _norms = MapStringNorm::newInstance(); - readOnly = false; - deletedDocsDirty = false; - normsDirty = false; - rollbackHasChanges = false; - rollbackDeletedDocsDirty = false; - rollbackNormsDirty = false; - - readBufferSize = 0; - pendingDeleteCount = 0; - rollbackPendingDeleteCount = 0; +namespace Lucene { + +SegmentReader::SegmentReader() { + _norms = MapStringNorm::newInstance(); + readOnly = false; + deletedDocsDirty = false; + normsDirty = false; + rollbackHasChanges = false; + rollbackDeletedDocsDirty = false; + rollbackNormsDirty = false; + + readBufferSize = 0; + pendingDeleteCount = 0; + rollbackPendingDeleteCount = 0; +} + +SegmentReader::~SegmentReader() { +} + +void SegmentReader::initialize() { + fieldsReaderLocal = newLucene(shared_from_this()); +} + +SegmentReaderPtr SegmentReader::get(bool readOnly, const SegmentInfoPtr& si, int32_t termInfosIndexDivisor) { + return get(readOnly, si->dir, si, BufferedIndexInput::BUFFER_SIZE, true, termInfosIndexDivisor); +} + +SegmentReaderPtr SegmentReader::get(bool readOnly, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor) { + SegmentReaderPtr instance(readOnly ? newLucene() : newLucene()); + instance->readOnly = readOnly; + instance->si = si; + instance->readBufferSize = readBufferSize; + + bool success = false; + LuceneException finally; + try { + instance->core = newLucene(instance, dir, si, readBufferSize, termInfosIndexDivisor); + if (doOpenStores) { + instance->core->openDocStores(si); + } + instance->loadDeletedDocs(); + instance->openNorms(instance->core->cfsDir, readBufferSize); + success = true; + } catch (LuceneException& e) { + finally = e; } - - SegmentReader::~SegmentReader() - { + + // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. + // In this case, we want to explicitly close any subset of things that were opened + if (!success) { + instance->doClose(); } - - void SegmentReader::initialize() - { - fieldsReaderLocal = newLucene(shared_from_this()); + finally.throwException(); + return instance; +} + +void SegmentReader::openDocStores() { + core->openDocStores(si); +} + +bool SegmentReader::checkDeletedCounts() { + int32_t recomputedCount = deletedDocs->getRecomputedCount(); + + BOOST_ASSERT(deletedDocs->count() == recomputedCount); + + BOOST_ASSERT(si->getDelCount() == recomputedCount); + + // Verify # deletes does not exceed maxDoc for this segment + BOOST_ASSERT(si->getDelCount() <= maxDoc()); + + return true; +} + +void SegmentReader::loadDeletedDocs() { + // NOTE: the bitvector is stored using the regular directory, not cfs + if (hasDeletions(si)) { + deletedDocs = newLucene(directory(), si->getDelFileName()); + deletedDocsRef = newLucene(); + BOOST_ASSERT(checkDeletedCounts()); + } else { + BOOST_ASSERT(si->getDelCount() == 0); } - - SegmentReaderPtr SegmentReader::get(bool readOnly, SegmentInfoPtr si, int32_t termInfosIndexDivisor) - { - return get(readOnly, si->dir, si, BufferedIndexInput::BUFFER_SIZE, true, termInfosIndexDivisor); +} + +ByteArray SegmentReader::cloneNormBytes(ByteArray bytes) { + ByteArray cloneBytes(ByteArray::newInstance(bytes.size())); + MiscUtils::arrayCopy(bytes.get(), 0, cloneBytes.get(), 0, bytes.size()); + return cloneBytes; +} + +BitVectorPtr SegmentReader::cloneDeletedDocs(const BitVectorPtr& bv) { + return boost::dynamic_pointer_cast(bv->clone()); +} + +LuceneObjectPtr SegmentReader::clone(const LuceneObjectPtr& other) { + try { + return SegmentReader::clone(readOnly, other); // Preserve current readOnly + } catch (...) { + boost::throw_exception(RuntimeException()); } - - SegmentReaderPtr SegmentReader::get(bool readOnly, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, bool doOpenStores, int32_t termInfosIndexDivisor) - { - SegmentReaderPtr instance(readOnly ? newLucene() : newLucene()); - instance->readOnly = readOnly; - instance->si = si; - instance->readBufferSize = readBufferSize; - - bool success = false; - LuceneException finally; - try - { - instance->core = newLucene(instance, dir, si, readBufferSize, termInfosIndexDivisor); - if (doOpenStores) - instance->core->openDocStores(si); - instance->loadDeletedDocs(); - instance->openNorms(instance->core->cfsDir, readBufferSize); - success = true; - } - catch (LuceneException& e) - { - finally = e; + return LuceneObjectPtr(); +} + +LuceneObjectPtr SegmentReader::clone(bool openReadOnly, const LuceneObjectPtr& other) { + SyncLock syncLock(this); + return reopenSegment(si, true, openReadOnly); +} + +SegmentReaderPtr SegmentReader::reopenSegment(const SegmentInfoPtr& si, bool doClone, bool openReadOnly) { + SyncLock syncLock(this); + + bool deletionsUpToDate = (this->si->hasDeletions() == si->hasDeletions() && + (!si->hasDeletions() || this->si->getDelFileName() == si->getDelFileName())); + bool normsUpToDate = true; + int32_t fieldCount = core->fieldInfos->size(); + Collection fieldNormsChanged(Collection::newInstance(fieldCount)); + + for (int32_t i = 0; i < fieldCount; ++i) { + if (this->si->getNormFileName(i) != si->getNormFileName(i)) { + normsUpToDate = false; + fieldNormsChanged[i] = true; } - - // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. - // In this case, we want to explicitly close any subset of things that were opened - if (!success) - instance->doClose(); - finally.throwException(); - return instance; } - - void SegmentReader::openDocStores() - { - core->openDocStores(si); + + // if we're cloning we need to run through the reopenSegment logic also if both old and new readers + // aren't readonly, we clone to avoid sharing modifications + if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) { + return shared_from_this(); } - - bool SegmentReader::checkDeletedCounts() - { - int32_t recomputedCount = deletedDocs->getRecomputedCount(); - BOOST_ASSERT(deletedDocs->count() == recomputedCount); + // When cloning, the incoming SegmentInfos should not have any changes in it + BOOST_ASSERT(!doClone || (normsUpToDate && deletionsUpToDate)); - BOOST_ASSERT(si->getDelCount() == recomputedCount); + // clone reader + SegmentReaderPtr clone(openReadOnly ? newLucene() : newLucene()); - // Verify # deletes does not exceed maxDoc for this segment - BOOST_ASSERT(si->getDelCount() <= maxDoc()); - - return true; - } - - void SegmentReader::loadDeletedDocs() - { - // NOTE: the bitvector is stored using the regular directory, not cfs - if (hasDeletions(si)) - { - deletedDocs = newLucene(directory(), si->getDelFileName()); - deletedDocsRef = newLucene(); - BOOST_ASSERT(checkDeletedCounts()); + bool success = false; + LuceneException finally; + try { + core->incRef(); + clone->core = core; + clone->readOnly = openReadOnly; + clone->si = si; + clone->readBufferSize = readBufferSize; + + if (!openReadOnly && _hasChanges) { + // My pending changes transfer to the new reader + clone->pendingDeleteCount = pendingDeleteCount; + clone->deletedDocsDirty = deletedDocsDirty; + clone->normsDirty = normsDirty; + clone->_hasChanges = _hasChanges; + _hasChanges = false; } - else - BOOST_ASSERT(si->getDelCount() == 0); - } - - ByteArray SegmentReader::cloneNormBytes(ByteArray bytes) - { - ByteArray cloneBytes(ByteArray::newInstance(bytes.size())); - MiscUtils::arrayCopy(bytes.get(), 0, cloneBytes.get(), 0, bytes.size()); - return cloneBytes; - } - - BitVectorPtr SegmentReader::cloneDeletedDocs(BitVectorPtr bv) - { - return boost::dynamic_pointer_cast(bv->clone()); - } - - LuceneObjectPtr SegmentReader::clone(LuceneObjectPtr other) - { - try - { - return SegmentReader::clone(readOnly, other); // Preserve current readOnly + + if (doClone) { + if (deletedDocs) { + deletedDocsRef->incRef(); + clone->deletedDocs = deletedDocs; + clone->deletedDocsRef = deletedDocsRef; + } + } else { + if (!deletionsUpToDate) { + // load deleted docs + BOOST_ASSERT(!clone->deletedDocs); + clone->loadDeletedDocs(); + } else if (deletedDocs) { + deletedDocsRef->incRef(); + clone->deletedDocs = deletedDocs; + clone->deletedDocsRef = deletedDocsRef; + } } - catch (...) - { - boost::throw_exception(RuntimeException()); + + clone->_norms = MapStringNorm::newInstance(); + + // Clone norms + for (int32_t i = 0; i < fieldNormsChanged.size(); ++i) { + // Clone unchanged norms to the cloned reader + if (doClone || !fieldNormsChanged[i]) { + String curField(core->fieldInfos->fieldInfo(i)->name); + NormPtr norm(this->_norms.get(curField)); + if (norm) { + NormPtr cloneNorm(boost::dynamic_pointer_cast(norm->clone())); + cloneNorm->_reader = clone; + clone->_norms.put(curField, cloneNorm); + } + } } - return LuceneObjectPtr(); + + // If we are not cloning, then this will open anew any norms that have changed + clone->openNorms(si->getUseCompoundFile() ? core->getCFSReader() : directory(), readBufferSize); + + success = true; + } catch (LuceneException& e) { + finally = e; } - - LuceneObjectPtr SegmentReader::clone(bool openReadOnly, LuceneObjectPtr other) - { - SyncLock syncLock(this); - return reopenSegment(si, true, openReadOnly); + if (!success) { + // An exception occurred during reopen, we have to decRef the norms that we incRef'ed already + // and close singleNormsStream and FieldsReader + clone->decRef(); } - - SegmentReaderPtr SegmentReader::reopenSegment(SegmentInfoPtr si, bool doClone, bool openReadOnly) - { - SyncLock syncLock(this); - - bool deletionsUpToDate = (this->si->hasDeletions() == si->hasDeletions() && - (!si->hasDeletions() || this->si->getDelFileName() == si->getDelFileName())); - bool normsUpToDate = true; - int32_t fieldCount = core->fieldInfos->size(); - Collection fieldNormsChanged(Collection::newInstance(fieldCount)); - - for (int32_t i = 0; i < fieldCount; ++i) - { - if (this->si->getNormFileName(i) != si->getNormFileName(i)) - { - normsUpToDate = false; - fieldNormsChanged[i] = true; - } - } - - // if we're cloning we need to run through the reopenSegment logic also if both old and new readers - // aren't readonly, we clone to avoid sharing modifications - if (normsUpToDate && deletionsUpToDate && !doClone && openReadOnly && readOnly) - return shared_from_this(); - - // When cloning, the incoming SegmentInfos should not have any changes in it - BOOST_ASSERT(!doClone || (normsUpToDate && deletionsUpToDate)); - - // clone reader - SegmentReaderPtr clone(openReadOnly ? newLucene() : newLucene()); - + finally.throwException(); + return clone; +} + +void SegmentReader::doCommit(MapStringString commitUserData) { + if (_hasChanges) { + startCommit(); bool success = false; LuceneException finally; - try - { - core->incRef(); - clone->core = core; - clone->readOnly = openReadOnly; - clone->si = si; - clone->readBufferSize = readBufferSize; - - if (!openReadOnly && _hasChanges) - { - // My pending changes transfer to the new reader - clone->pendingDeleteCount = pendingDeleteCount; - clone->deletedDocsDirty = deletedDocsDirty; - clone->normsDirty = normsDirty; - clone->_hasChanges = _hasChanges; - _hasChanges = false; - } - - if (doClone) - { - if (deletedDocs) - { - deletedDocsRef->incRef(); - clone->deletedDocs = deletedDocs; - clone->deletedDocsRef = deletedDocsRef; - } - } - else - { - if (!deletionsUpToDate) - { - // load deleted docs - BOOST_ASSERT(!clone->deletedDocs); - clone->loadDeletedDocs(); - } - else if (deletedDocs) - { - deletedDocsRef->incRef(); - clone->deletedDocs = deletedDocs; - clone->deletedDocsRef = deletedDocsRef; - } - } - - clone->_norms = MapStringNorm::newInstance(); - - // Clone norms - for (int32_t i = 0; i < fieldNormsChanged.size(); ++i) - { - // Clone unchanged norms to the cloned reader - if (doClone || !fieldNormsChanged[i]) - { - String curField(core->fieldInfos->fieldInfo(i)->name); - NormPtr norm(this->_norms.get(curField)); - if (norm) - { - NormPtr cloneNorm(boost::dynamic_pointer_cast(norm->clone())); - cloneNorm->_reader = clone; - clone->_norms.put(curField, cloneNorm); - } - } - } - - // If we are not cloning, then this will open anew any norms that have changed - clone->openNorms(si->getUseCompoundFile() ? core->getCFSReader() : directory(), readBufferSize); - + try { + commitChanges(commitUserData); success = true; - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { finally = e; } - if (!success) - { - // An exception occurred during reopen, we have to decRef the norms that we incRef'ed already - // and close singleNormsStream and FieldsReader - clone->decRef(); + if (!success) { + rollbackCommit(); } finally.throwException(); - return clone; - } - - void SegmentReader::doCommit(MapStringString commitUserData) - { - if (_hasChanges) - { - startCommit(); - bool success = false; - LuceneException finally; - try - { - commitChanges(commitUserData); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - rollbackCommit(); - finally.throwException(); - } } - - void SegmentReader::commitChanges(MapStringString commitUserData) - { - if (deletedDocsDirty) // re-write deleted - { - si->advanceDelGen(); - - // We can write directly to the actual name (vs to a .tmp & renaming it) because the file - // is not live until segments file is written - String delFileName(si->getDelFileName()); - - bool success = false; - LuceneException finally; - try - { - deletedDocs->write(directory(), delFileName); - success = true; - } - catch (LuceneException& e) - { - finally = e; - } - if (!success) - { - try - { - directory()->deleteFile(delFileName); - } - catch (...) - { - // suppress this so we keep throwing the original exception - } - } - finally.throwException(); - - si->setDelCount(si->getDelCount() + pendingDeleteCount); - pendingDeleteCount = 0; - BOOST_ASSERT(deletedDocs->count() == si->getDelCount()); // delete count mismatch during commit? - } - else - { - BOOST_ASSERT(pendingDeleteCount == 0); +} + +void SegmentReader::commitChanges(MapStringString commitUserData) { + if (deletedDocsDirty) { // re-write deleted + si->advanceDelGen(); + + // We can write directly to the actual name (vs to a .tmp & renaming it) because the file + // is not live until segments file is written + String delFileName(si->getDelFileName()); + + bool success = false; + LuceneException finally; + try { + deletedDocs->write(directory(), delFileName); + success = true; + } catch (LuceneException& e) { + finally = e; } - - if (normsDirty) // re-write norms - { - si->setNumFields(core->fieldInfos->size()); - for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) - { - if (norm->second->dirty) - norm->second->reWrite(si); + if (!success) { + try { + directory()->deleteFile(delFileName); + } catch (...) { + // suppress this so we keep throwing the original exception } } - deletedDocsDirty = false; - normsDirty = false; - _hasChanges = false; - } - - FieldsReaderPtr SegmentReader::getFieldsReader() - { - return fieldsReaderLocal->get(); - } - - void SegmentReader::doClose() - { - termVectorsLocal.close(); - fieldsReaderLocal->close(); - if (deletedDocs) - { - deletedDocsRef->decRef(); - deletedDocs.reset(); // null so if an app hangs on to us we still free most ram - } - for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) - norm->second->decRef(); - if (core) - core->decRef(); - } - - bool SegmentReader::hasDeletions(SegmentInfoPtr si) - { - // Don't call ensureOpen() here (it could affect performance) - return si->hasDeletions(); - } - - bool SegmentReader::hasDeletions() - { - // Don't call ensureOpen() here (it could affect performance) - return deletedDocs; - } - - bool SegmentReader::usesCompoundFile(SegmentInfoPtr si) - { - return si->getUseCompoundFile(); - } - - bool SegmentReader::hasSeparateNorms(SegmentInfoPtr si) - { - return si->hasSeparateNorms(); - } - - void SegmentReader::doDelete(int32_t docNum) - { - if (!deletedDocs) - { - deletedDocs = newLucene(maxDoc()); - deletedDocsRef = newLucene(); - } - // there is more than 1 SegmentReader with a reference to this deletedDocs BitVector so decRef - // the current deletedDocsRef, clone the BitVector, create a new deletedDocsRef - if (deletedDocsRef->refCount() > 1) - { - SegmentReaderRefPtr oldRef(deletedDocsRef); - deletedDocs = cloneDeletedDocs(deletedDocs); - deletedDocsRef = newLucene(); - oldRef->decRef(); - } - deletedDocsDirty = true; - if (!deletedDocs->getAndSet(docNum)) - ++pendingDeleteCount; + finally.throwException(); + + si->setDelCount(si->getDelCount() + pendingDeleteCount); + pendingDeleteCount = 0; + BOOST_ASSERT(deletedDocs->count() == si->getDelCount()); // delete count mismatch during commit? + } else { + BOOST_ASSERT(pendingDeleteCount == 0); } - - void SegmentReader::doUndeleteAll() - { - deletedDocsDirty = false; - if (deletedDocs) - { - BOOST_ASSERT(deletedDocsRef); - deletedDocsRef->decRef(); - deletedDocs.reset(); - deletedDocsRef.reset(); - pendingDeleteCount = 0; - si->clearDelGen(); - si->setDelCount(0); - } - else - { - BOOST_ASSERT(!deletedDocsRef); - BOOST_ASSERT(pendingDeleteCount == 0); + + if (normsDirty) { // re-write norms + si->setNumFields(core->fieldInfos->size()); + for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { + if (norm->second->dirty) { + norm->second->reWrite(si); + } } } - - HashSet SegmentReader::files() - { - return si->files(); - } - - TermEnumPtr SegmentReader::terms() - { - ensureOpen(); - return core->getTermsReader()->terms(); - } - - TermEnumPtr SegmentReader::terms(TermPtr t) - { - ensureOpen(); - return core->getTermsReader()->terms(t); - } - - FieldInfosPtr SegmentReader::fieldInfos() - { - return core->fieldInfos; - } - - DocumentPtr SegmentReader::document(int32_t n, FieldSelectorPtr fieldSelector) - { - ensureOpen(); - return getFieldsReader()->doc(n, fieldSelector); - } - - bool SegmentReader::isDeleted(int32_t n) - { - SyncLock syncLock(this); - return (deletedDocs && deletedDocs->get(n)); + deletedDocsDirty = false; + normsDirty = false; + _hasChanges = false; +} + +FieldsReaderPtr SegmentReader::getFieldsReader() { + return fieldsReaderLocal->get(); +} + +void SegmentReader::doClose() { + termVectorsLocal.close(); + fieldsReaderLocal->close(); + if (deletedDocs) { + deletedDocsRef->decRef(); + deletedDocs.reset(); // null so if an app hangs on to us we still free most ram } - - TermDocsPtr SegmentReader::termDocs(TermPtr term) - { - if (!term) - return newLucene(shared_from_this()); - else - return IndexReader::termDocs(term); + for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { + norm->second->decRef(); } - - TermDocsPtr SegmentReader::termDocs() - { - ensureOpen(); - return newLucene(shared_from_this()); + if (core) { + core->decRef(); } - - TermPositionsPtr SegmentReader::termPositions() - { - ensureOpen(); - return newLucene(shared_from_this()); +} + +bool SegmentReader::hasDeletions(const SegmentInfoPtr& si) { + // Don't call ensureOpen() here (it could affect performance) + return si->hasDeletions(); +} + +bool SegmentReader::hasDeletions() { + // Don't call ensureOpen() here (it could affect performance) + return deletedDocs.get() != NULL; +} + +bool SegmentReader::usesCompoundFile(const SegmentInfoPtr& si) { + return si->getUseCompoundFile(); +} + +bool SegmentReader::hasSeparateNorms(const SegmentInfoPtr& si) { + return si->hasSeparateNorms(); +} + +void SegmentReader::doDelete(int32_t docNum) { + if (!deletedDocs) { + deletedDocs = newLucene(maxDoc()); + deletedDocsRef = newLucene(); + } + // there is more than 1 SegmentReader with a reference to this deletedDocs BitVector so decRef + // the current deletedDocsRef, clone the BitVector, create a new deletedDocsRef + if (deletedDocsRef->refCount() > 1) { + SegmentReaderRefPtr oldRef(deletedDocsRef); + deletedDocs = cloneDeletedDocs(deletedDocs); + deletedDocsRef = newLucene(); + oldRef->decRef(); + } + deletedDocsDirty = true; + if (!deletedDocs->getAndSet(docNum)) { + ++pendingDeleteCount; } - - int32_t SegmentReader::docFreq(TermPtr t) - { - ensureOpen(); - TermInfoPtr ti(core->getTermsReader()->get(t)); - return ti ? ti->docFreq : 0; +} + +void SegmentReader::doUndeleteAll() { + deletedDocsDirty = false; + if (deletedDocs) { + BOOST_ASSERT(deletedDocsRef); + deletedDocsRef->decRef(); + deletedDocs.reset(); + deletedDocsRef.reset(); + pendingDeleteCount = 0; + si->clearDelGen(); + si->setDelCount(0); + } else { + BOOST_ASSERT(!deletedDocsRef); + BOOST_ASSERT(pendingDeleteCount == 0); } - - int32_t SegmentReader::numDocs() - { - // Don't call ensureOpen() here (it could affect performance) - int32_t n = maxDoc(); - if (deletedDocs) - n -= deletedDocs->count(); - return n; +} + +HashSet SegmentReader::files() { + return si->files(); +} + +TermEnumPtr SegmentReader::terms() { + ensureOpen(); + return core->getTermsReader()->terms(); +} + +TermEnumPtr SegmentReader::terms(const TermPtr& t) { + ensureOpen(); + return core->getTermsReader()->terms(t); +} + +FieldInfosPtr SegmentReader::fieldInfos() { + return core->fieldInfos; +} + +DocumentPtr SegmentReader::document(int32_t n, const FieldSelectorPtr& fieldSelector) { + ensureOpen(); + return getFieldsReader()->doc(n, fieldSelector); +} + +bool SegmentReader::isDeleted(int32_t n) { + SyncLock syncLock(this); + return (deletedDocs && deletedDocs->get(n)); +} + +TermDocsPtr SegmentReader::termDocs(const TermPtr& term) { + if (!term) { + return newLucene(shared_from_this()); + } else { + return IndexReader::termDocs(term); } - - int32_t SegmentReader::maxDoc() - { - // Don't call ensureOpen() here (it could affect performance) - return si->docCount; +} + +TermDocsPtr SegmentReader::termDocs() { + ensureOpen(); + return newLucene(shared_from_this()); +} + +TermPositionsPtr SegmentReader::termPositions() { + ensureOpen(); + return newLucene(shared_from_this()); +} + +int32_t SegmentReader::docFreq(const TermPtr& t) { + ensureOpen(); + TermInfoPtr ti(core->getTermsReader()->get(t)); + return ti ? ti->docFreq : 0; +} + +int32_t SegmentReader::numDocs() { + // Don't call ensureOpen() here (it could affect performance) + int32_t n = maxDoc(); + if (deletedDocs) { + n -= deletedDocs->count(); } - - HashSet SegmentReader::getFieldNames(FieldOption fieldOption) - { - ensureOpen(); - HashSet fieldSet(HashSet::newInstance()); - for (int32_t i = 0; i < core->fieldInfos->size(); ++i) - { - FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); - if (fieldOption == FIELD_OPTION_ALL) - fieldSet.add(fi->name); - else if (!fi->isIndexed && fieldOption == FIELD_OPTION_UNINDEXED) - fieldSet.add(fi->name); - else if (fi->omitTermFreqAndPositions && fieldOption == FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS) - fieldSet.add(fi->name); - else if (fi->storePayloads && fieldOption == FIELD_OPTION_STORES_PAYLOADS) - fieldSet.add(fi->name); - else if (fi->isIndexed && fieldOption == FIELD_OPTION_INDEXED) - fieldSet.add(fi->name); - else if (fi->isIndexed && !fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) - fieldSet.add(fi->name); - else if (fi->storeTermVector && !fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR) - fieldSet.add(fi->name); - else if (fi->isIndexed && fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_WITH_TERMVECTOR) - fieldSet.add(fi->name); - else if (fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION) - fieldSet.add(fi->name); - else if (fi->storeOffsetWithTermVector && !fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET) - fieldSet.add(fi->name); - else if (fi->storeOffsetWithTermVector && fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET) - fieldSet.add(fi->name); + return n; +} + +int32_t SegmentReader::maxDoc() { + // Don't call ensureOpen() here (it could affect performance) + return si->docCount; +} + +HashSet SegmentReader::getFieldNames(FieldOption fieldOption) { + ensureOpen(); + HashSet fieldSet(HashSet::newInstance()); + for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { + FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); + if (fieldOption == FIELD_OPTION_ALL) { + fieldSet.add(fi->name); + } else if (!fi->isIndexed && fieldOption == FIELD_OPTION_UNINDEXED) { + fieldSet.add(fi->name); + } else if (fi->omitTermFreqAndPositions && fieldOption == FIELD_OPTION_OMIT_TERM_FREQ_AND_POSITIONS) { + fieldSet.add(fi->name); + } else if (fi->storePayloads && fieldOption == FIELD_OPTION_STORES_PAYLOADS) { + fieldSet.add(fi->name); + } else if (fi->isIndexed && fieldOption == FIELD_OPTION_INDEXED) { + fieldSet.add(fi->name); + } else if (fi->isIndexed && !fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_NO_TERMVECTOR) { + fieldSet.add(fi->name); + } else if (fi->storeTermVector && !fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR) { + fieldSet.add(fi->name); + } else if (fi->isIndexed && fi->storeTermVector && fieldOption == FIELD_OPTION_INDEXED_WITH_TERMVECTOR) { + fieldSet.add(fi->name); + } else if (fi->storePositionWithTermVector && !fi->storeOffsetWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION) { + fieldSet.add(fi->name); + } else if (fi->storeOffsetWithTermVector && !fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_OFFSET) { + fieldSet.add(fi->name); + } else if (fi->storeOffsetWithTermVector && fi->storePositionWithTermVector && fieldOption == FIELD_OPTION_TERMVECTOR_WITH_POSITION_OFFSET) { + fieldSet.add(fi->name); } - return fieldSet; - } - - bool SegmentReader::hasNorms(const String& field) - { - SyncLock syncLock(this); - ensureOpen(); - return _norms.contains(field); } - - ByteArray SegmentReader::getNorms(const String& field) - { - SyncLock syncLock(this); - NormPtr norm(_norms.get(field)); - return norm ? norm->bytes() : ByteArray(); + return fieldSet; +} + +bool SegmentReader::hasNorms(const String& field) { + SyncLock syncLock(this); + ensureOpen(); + return _norms.contains(field); +} + +ByteArray SegmentReader::getNorms(const String& field) { + SyncLock syncLock(this); + NormPtr norm(_norms.get(field)); + return norm ? norm->bytes() : ByteArray(); +} + +ByteArray SegmentReader::norms(const String& field) { + SyncLock syncLock(this); + ensureOpen(); + return getNorms(field); +} + +void SegmentReader::doSetNorm(int32_t doc, const String& field, uint8_t value) { + NormPtr norm(_norms.get(field)); + if (!norm) { // not an indexed field + return; } - - ByteArray SegmentReader::norms(const String& field) - { - SyncLock syncLock(this); - ensureOpen(); - return getNorms(field); + + normsDirty = true; + ByteArray bytes(norm->copyOnWrite()); + if (doc < 0 || doc >= bytes.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - void SegmentReader::doSetNorm(int32_t doc, const String& field, uint8_t value) - { - NormPtr norm(_norms.get(field)); - if (!norm) // not an indexed field - return; - - normsDirty = true; - ByteArray bytes(norm->copyOnWrite()); - if (doc < 0 || doc >= bytes.size()) - boost::throw_exception(IndexOutOfBoundsException()); - bytes[doc] = value; // set the value + bytes[doc] = value; // set the value +} + +void SegmentReader::norms(const String& field, ByteArray norms, int32_t offset) { + SyncLock syncLock(this); + ensureOpen(); + NormPtr norm(_norms.get(field)); + if (!norm) { + MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); + return; } - - void SegmentReader::norms(const String& field, ByteArray norms, int32_t offset) - { - SyncLock syncLock(this); - ensureOpen(); - NormPtr norm(_norms.get(field)); - if (!norm) - { - MiscUtils::arrayFill(norms.get(), offset, norms.size(), DefaultSimilarity::encodeNorm(1.0)); - return; + + norm->bytes(norms.get(), offset, maxDoc()); +} + +void SegmentReader::openNorms(const DirectoryPtr& cfsDir, int32_t readBufferSize) { + int64_t nextNormSeek = SegmentMerger::NORMS_HEADER_LENGTH; // skip header (header unused for now) + int32_t _maxDoc = maxDoc(); + for (int32_t i = 0; i < core->fieldInfos->size(); ++i) { + FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); + if (_norms.contains(fi->name)) { + // in case this SegmentReader is being re-opened, we might be able to reuse some norm + // instances and skip loading them here + continue; } - - norm->bytes(norms.get(), offset, maxDoc()); - } - - void SegmentReader::openNorms(DirectoryPtr cfsDir, int32_t readBufferSize) - { - int64_t nextNormSeek = SegmentMerger::NORMS_HEADER_LENGTH; // skip header (header unused for now) - int32_t _maxDoc = maxDoc(); - for (int32_t i = 0; i < core->fieldInfos->size(); ++i) - { - FieldInfoPtr fi(core->fieldInfos->fieldInfo(i)); - if (_norms.contains(fi->name)) - { - // in case this SegmentReader is being re-opened, we might be able to reuse some norm - // instances and skip loading them here - continue; + if (fi->isIndexed && !fi->omitNorms) { + DirectoryPtr d(directory()); + String fileName(si->getNormFileName(fi->number)); + if (!si->hasSeparateNorms(fi->number)) { + d = cfsDir; } - if (fi->isIndexed && !fi->omitNorms) - { - DirectoryPtr d(directory()); - String fileName(si->getNormFileName(fi->number)); - if (!si->hasSeparateNorms(fi->number)) - d = cfsDir; - - // singleNormFile means multiple norms share this file - bool singleNormFile = boost::ends_with(fileName, String(L".") + IndexFileNames::NORMS_EXTENSION()); - IndexInputPtr normInput; - int64_t normSeek; - - if (singleNormFile) - { - normSeek = nextNormSeek; - if (!singleNormStream) - { - singleNormStream = d->openInput(fileName, readBufferSize); - singleNormRef = newLucene(); - } - else - singleNormRef->incRef(); - - // All norms in the .nrm file can share a single IndexInput since they are only used in - // a synchronized context. If this were to change in the future, a clone could be done here. - normInput = singleNormStream; - } - else - { - normSeek = 0; - normInput = d->openInput(fileName); + + // singleNormFile means multiple norms share this file + bool singleNormFile = boost::ends_with(fileName, String(L".") + IndexFileNames::NORMS_EXTENSION()); + IndexInputPtr normInput; + int64_t normSeek; + + if (singleNormFile) { + normSeek = nextNormSeek; + if (!singleNormStream) { + singleNormStream = d->openInput(fileName, readBufferSize); + singleNormRef = newLucene(); + } else { + singleNormRef->incRef(); } - - _norms.put(fi->name, newLucene(shared_from_this(), normInput, fi->number, normSeek)); - nextNormSeek += _maxDoc; // increment also if some norms are separate + + // All norms in the .nrm file can share a single IndexInput since they are only used in + // a synchronized context. If this were to change in the future, a clone could be done here. + normInput = singleNormStream; + } else { + normSeek = 0; + normInput = d->openInput(fileName); } + + _norms.put(fi->name, newLucene(shared_from_this(), normInput, fi->number, normSeek)); + nextNormSeek += _maxDoc; // increment also if some norms are separate } } - - bool SegmentReader::termsIndexLoaded() - { - return core->termsIndexIsLoaded(); - } - - void SegmentReader::loadTermsIndex(int32_t termsIndexDivisor) - { - core->loadTermsIndex(si, termsIndexDivisor); +} + +bool SegmentReader::termsIndexLoaded() { + return core->termsIndexIsLoaded(); +} + +void SegmentReader::loadTermsIndex(int32_t termsIndexDivisor) { + core->loadTermsIndex(si, termsIndexDivisor); +} + +bool SegmentReader::normsClosed() { + if (singleNormStream) { + return false; } - - bool SegmentReader::normsClosed() - { - if (singleNormStream) + for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { + if (norm->second->refCount > 0) { return false; - for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) - { - if (norm->second->refCount > 0) - return false; } - return true; - } - - bool SegmentReader::normsClosed(const String& field) - { - return (_norms.get(field)->refCount == 0); } - - TermVectorsReaderPtr SegmentReader::getTermVectorsReader() - { - TermVectorsReaderPtr tvReader(termVectorsLocal.get()); - if (!tvReader) - { - TermVectorsReaderPtr orig(core->getTermVectorsReaderOrig()); - if (!orig) + return true; +} + +bool SegmentReader::normsClosed(const String& field) { + return (_norms.get(field)->refCount == 0); +} + +TermVectorsReaderPtr SegmentReader::getTermVectorsReader() { + TermVectorsReaderPtr tvReader(termVectorsLocal.get()); + if (!tvReader) { + TermVectorsReaderPtr orig(core->getTermVectorsReaderOrig()); + if (!orig) { + return TermVectorsReaderPtr(); + } else { + try { + tvReader = boost::dynamic_pointer_cast(orig->clone()); + } catch (...) { return TermVectorsReaderPtr(); - else - { - try - { - tvReader = boost::dynamic_pointer_cast(orig->clone()); - } - catch (...) - { - return TermVectorsReaderPtr(); - } } - termVectorsLocal.set(tvReader); } - return tvReader; - } - - TermVectorsReaderPtr SegmentReader::getTermVectorsReaderOrig() - { - return core->getTermVectorsReaderOrig(); + termVectorsLocal.set(tvReader); } - - TermFreqVectorPtr SegmentReader::getTermFreqVector(int32_t docNumber, const String& field) - { - // Check if this field is invalid or has no stored term vector - ensureOpen(); - FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); - if (!fi || !fi->storeTermVector) - return TermFreqVectorPtr(); - - TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); - if (!termVectorsReader) - return TermFreqVectorPtr(); - - return termVectorsReader->get(docNumber, field); - } - - void SegmentReader::getTermFreqVector(int32_t docNumber, const String& field, TermVectorMapperPtr mapper) - { - ensureOpen(); - FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); - if (!fi || !fi->storeTermVector) - return; - - TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); - if (!termVectorsReader) - return; - - termVectorsReader->get(docNumber, field, mapper); - } - - void SegmentReader::getTermFreqVector(int32_t docNumber, TermVectorMapperPtr mapper) - { - ensureOpen(); - - TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); - if (!termVectorsReader) - return; - - termVectorsReader->get(docNumber, mapper); - } - - Collection SegmentReader::getTermFreqVectors(int32_t docNumber) - { - ensureOpen(); - - TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); - if (!termVectorsReader) - return Collection(); - - return termVectorsReader->get(docNumber); - } - - String SegmentReader::getSegmentName() - { - return core->segment; - } - - SegmentInfoPtr SegmentReader::getSegmentInfo() - { - return si; + return tvReader; +} + +TermVectorsReaderPtr SegmentReader::getTermVectorsReaderOrig() { + return core->getTermVectorsReaderOrig(); +} + +TermFreqVectorPtr SegmentReader::getTermFreqVector(int32_t docNumber, const String& field) { + // Check if this field is invalid or has no stored term vector + ensureOpen(); + FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); + if (!fi || !fi->storeTermVector) { + return TermFreqVectorPtr(); } - - void SegmentReader::setSegmentInfo(SegmentInfoPtr info) - { - si = info; + + TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); + if (!termVectorsReader) { + return TermFreqVectorPtr(); } - - void SegmentReader::startCommit() - { - rollbackSegmentInfo = boost::dynamic_pointer_cast(si->clone()); - rollbackHasChanges = _hasChanges; - rollbackDeletedDocsDirty = deletedDocsDirty; - rollbackNormsDirty = normsDirty; - rollbackPendingDeleteCount = pendingDeleteCount; - for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) - norm->second->rollbackDirty = norm->second->dirty; + + return termVectorsReader->get(docNumber, field); +} + +void SegmentReader::getTermFreqVector(int32_t docNumber, const String& field, const TermVectorMapperPtr& mapper) { + ensureOpen(); + FieldInfoPtr fi(core->fieldInfos->fieldInfo(field)); + if (!fi || !fi->storeTermVector) { + return; } - - void SegmentReader::rollbackCommit() - { - si->reset(rollbackSegmentInfo); - _hasChanges = rollbackHasChanges; - deletedDocsDirty = rollbackDeletedDocsDirty; - normsDirty = rollbackNormsDirty; - pendingDeleteCount = rollbackPendingDeleteCount; - for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) - norm->second->dirty = norm->second->rollbackDirty; + + TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); + if (!termVectorsReader) { + return; } - - DirectoryPtr SegmentReader::directory() - { - // Don't ensureOpen here - in certain cases, when a cloned/reopened reader needs to commit, - // it may call this method on the closed original reader - return core->dir; + + termVectorsReader->get(docNumber, field, mapper); +} + +void SegmentReader::getTermFreqVector(int32_t docNumber, const TermVectorMapperPtr& mapper) { + ensureOpen(); + + TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); + if (!termVectorsReader) { + return; } - - LuceneObjectPtr SegmentReader::getFieldCacheKey() - { - return core->freqStream; + + termVectorsReader->get(docNumber, mapper); +} + +Collection SegmentReader::getTermFreqVectors(int32_t docNumber) { + ensureOpen(); + + TermVectorsReaderPtr termVectorsReader(getTermVectorsReader()); + if (!termVectorsReader) { + return Collection(); } - - LuceneObjectPtr SegmentReader::getDeletesCacheKey() - { - return deletedDocs; + + return termVectorsReader->get(docNumber); +} + +String SegmentReader::getSegmentName() { + return core->segment; +} + +SegmentInfoPtr SegmentReader::getSegmentInfo() { + return si; +} + +void SegmentReader::setSegmentInfo(const SegmentInfoPtr& info) { + si = info; +} + +void SegmentReader::startCommit() { + rollbackSegmentInfo = boost::dynamic_pointer_cast(si->clone()); + rollbackHasChanges = _hasChanges; + rollbackDeletedDocsDirty = deletedDocsDirty; + rollbackNormsDirty = normsDirty; + rollbackPendingDeleteCount = pendingDeleteCount; + for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { + norm->second->rollbackDirty = norm->second->dirty; } - - int64_t SegmentReader::getUniqueTermCount() - { - return core->getTermsReader()->size(); +} + +void SegmentReader::rollbackCommit() { + si->reset(rollbackSegmentInfo); + _hasChanges = rollbackHasChanges; + deletedDocsDirty = rollbackDeletedDocsDirty; + normsDirty = rollbackNormsDirty; + pendingDeleteCount = rollbackPendingDeleteCount; + for (MapStringNorm::iterator norm = _norms.begin(); norm != _norms.end(); ++norm) { + norm->second->dirty = norm->second->rollbackDirty; } - - SegmentReaderPtr SegmentReader::getOnlySegmentReader(DirectoryPtr dir) - { - return getOnlySegmentReader(IndexReader::open(dir, false)); +} + +DirectoryPtr SegmentReader::directory() { + // Don't ensureOpen here - in certain cases, when a cloned/reopened reader needs to commit, + // it may call this method on the closed original reader + return core->dir; +} + +LuceneObjectPtr SegmentReader::getFieldCacheKey() { + return core->freqStream; +} + +LuceneObjectPtr SegmentReader::getDeletesCacheKey() { + return deletedDocs; +} + +int64_t SegmentReader::getUniqueTermCount() { + return core->getTermsReader()->size(); +} + +SegmentReaderPtr SegmentReader::getOnlySegmentReader(const DirectoryPtr& dir) { + return getOnlySegmentReader(IndexReader::open(dir, false)); +} + +SegmentReaderPtr SegmentReader::getOnlySegmentReader(const IndexReaderPtr& reader) { + SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); + if (segmentReader) { + return segmentReader; } - - SegmentReaderPtr SegmentReader::getOnlySegmentReader(IndexReaderPtr reader) - { - SegmentReaderPtr segmentReader(boost::dynamic_pointer_cast(reader)); - if (segmentReader) - return segmentReader; - - DirectoryReaderPtr directoryReader(boost::dynamic_pointer_cast(reader)); - if (directoryReader) - { - Collection subReaders(directoryReader->getSequentialSubReaders()); - if (subReaders.size() != 1) - boost::throw_exception(IllegalArgumentException(L"reader has " + StringUtils::toString(subReaders.size()) + L" segments instead of exactly one")); - return boost::dynamic_pointer_cast(subReaders[0]); + + DirectoryReaderPtr directoryReader(boost::dynamic_pointer_cast(reader)); + if (directoryReader) { + Collection subReaders(directoryReader->getSequentialSubReaders()); + if (subReaders.size() != 1) { + boost::throw_exception(IllegalArgumentException(L"reader has " + StringUtils::toString(subReaders.size()) + L" segments instead of exactly one")); } - - boost::throw_exception(IllegalArgumentException(L"reader is not a SegmentReader or a single-segment DirectoryReader")); - - return SegmentReaderPtr(); + return boost::dynamic_pointer_cast(subReaders[0]); } - - int32_t SegmentReader::getTermInfosIndexDivisor() - { - return core->termsIndexDivisor; - } - - CoreReaders::CoreReaders(SegmentReaderPtr origInstance, DirectoryPtr dir, SegmentInfoPtr si, int32_t readBufferSize, int32_t termsIndexDivisor) - { - ref = newLucene(); - - segment = si->name; - this->readBufferSize = readBufferSize; - this->dir = dir; - - bool success = false; - LuceneException finally; - try - { - DirectoryPtr dir0(dir); - if (si->getUseCompoundFile()) - { - cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); - dir0 = cfsReader; - } - cfsDir = dir0; - - fieldInfos = newLucene(cfsDir, segment + L"." + IndexFileNames::FIELD_INFOS_EXTENSION()); - - this->termsIndexDivisor = termsIndexDivisor; - TermInfosReaderPtr reader(newLucene(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor)); - if (termsIndexDivisor == -1) - tisNoIndex = reader; - else - tis = reader; - - // make sure that all index files have been read or are kept open so that if an index - // update removes them we'll still have them - freqStream = cfsDir->openInput(segment + L"." + IndexFileNames::FREQ_EXTENSION(), readBufferSize); - - if (fieldInfos->hasProx()) - proxStream = cfsDir->openInput(segment + L"." + IndexFileNames::PROX_EXTENSION(), readBufferSize); - - success = true; + + boost::throw_exception(IllegalArgumentException(L"reader is not a SegmentReader or a single-segment DirectoryReader")); + + return SegmentReaderPtr(); +} + +int32_t SegmentReader::getTermInfosIndexDivisor() { + return core->termsIndexDivisor; +} + +CoreReaders::CoreReaders(const SegmentReaderPtr& origInstance, const DirectoryPtr& dir, const SegmentInfoPtr& si, int32_t readBufferSize, int32_t termsIndexDivisor) { + ref = newLucene(); + + segment = si->name; + this->readBufferSize = readBufferSize; + this->dir = dir; + + bool success = false; + LuceneException finally; + try { + DirectoryPtr dir0(dir); + if (si->getUseCompoundFile()) { + cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); + dir0 = cfsReader; } - catch (LuceneException& e) - { - finally = e; + cfsDir = dir0; + + fieldInfos = newLucene(cfsDir, segment + L"." + IndexFileNames::FIELD_INFOS_EXTENSION()); + + this->termsIndexDivisor = termsIndexDivisor; + TermInfosReaderPtr reader(newLucene(cfsDir, segment, fieldInfos, readBufferSize, termsIndexDivisor)); + if (termsIndexDivisor == -1) { + tisNoIndex = reader; + } else { + tis = reader; } - if (!success) - decRef(); - finally.throwException(); - - // Must assign this at the end -- if we hit an exception above core, we don't want to attempt to - // purge the FieldCache (will hit NPE because core is not assigned yet). - _origInstance = origInstance; - } - - CoreReaders::~CoreReaders() - { - } - - TermVectorsReaderPtr CoreReaders::getTermVectorsReaderOrig() - { - SyncLock syncLock(this); - return termVectorsReaderOrig; - } - - FieldsReaderPtr CoreReaders::getFieldsReaderOrig() - { - SyncLock syncLock(this); - return fieldsReaderOrig; - } - - void CoreReaders::incRef() - { - SyncLock syncLock(this); - ref->incRef(); - } - - DirectoryPtr CoreReaders::getCFSReader() - { - SyncLock syncLock(this); - return cfsReader; - } - - TermInfosReaderPtr CoreReaders::getTermsReader() - { - SyncLock syncLock(this); - return tis ? tis : tisNoIndex; + + // make sure that all index files have been read or are kept open so that if an index + // update removes them we'll still have them + freqStream = cfsDir->openInput(segment + L"." + IndexFileNames::FREQ_EXTENSION(), readBufferSize); + + if (fieldInfos->hasProx()) { + proxStream = cfsDir->openInput(segment + L"." + IndexFileNames::PROX_EXTENSION(), readBufferSize); + } + + success = true; + } catch (LuceneException& e) { + finally = e; } - - bool CoreReaders::termsIndexIsLoaded() - { - SyncLock syncLock(this); - return tis; + if (!success) { + decRef(); } - - void CoreReaders::loadTermsIndex(SegmentInfoPtr si, int32_t termsIndexDivisor) - { - SyncLock syncLock(this); - if (!tis) - { - DirectoryPtr dir0; - if (si->getUseCompoundFile()) - { - // In some cases, we were originally opened when CFS was not used, but then we are asked - // to open the terms reader with index, the segment has switched to CFS - if (!cfsReader) - cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); - - dir0 = cfsReader; + finally.throwException(); + + // Must assign this at the end -- if we hit an exception above core, we don't want to attempt to + // purge the FieldCache (will hit NPE because core is not assigned yet). + _origInstance = origInstance; +} + +CoreReaders::~CoreReaders() { +} + +TermVectorsReaderPtr CoreReaders::getTermVectorsReaderOrig() { + SyncLock syncLock(this); + return termVectorsReaderOrig; +} + +FieldsReaderPtr CoreReaders::getFieldsReaderOrig() { + SyncLock syncLock(this); + return fieldsReaderOrig; +} + +void CoreReaders::incRef() { + SyncLock syncLock(this); + ref->incRef(); +} + +DirectoryPtr CoreReaders::getCFSReader() { + SyncLock syncLock(this); + return cfsReader; +} + +TermInfosReaderPtr CoreReaders::getTermsReader() { + SyncLock syncLock(this); + return tis ? tis : tisNoIndex; +} + +bool CoreReaders::termsIndexIsLoaded() { + SyncLock syncLock(this); + return tis.get() != NULL; +} + +void CoreReaders::loadTermsIndex(const SegmentInfoPtr& si, int32_t termsIndexDivisor) { + SyncLock syncLock(this); + if (!tis) { + DirectoryPtr dir0; + if (si->getUseCompoundFile()) { + // In some cases, we were originally opened when CFS was not used, but then we are asked + // to open the terms reader with index, the segment has switched to CFS + if (!cfsReader) { + cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); } - else - dir0 = dir; - - tis = newLucene(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); + + dir0 = cfsReader; + } else { + dir0 = dir; } + + tis = newLucene(dir0, segment, fieldInfos, readBufferSize, termsIndexDivisor); } - - void CoreReaders::decRef() - { - SyncLock syncLock(this); - if (ref->decRef() == 0) - { - // close everything, nothing is shared anymore with other readers - if (tis) - { - tis->close(); - tis.reset(); // null so if an app hangs on to us we still free most ram - } - if (tisNoIndex) - tisNoIndex->close(); - if (freqStream) - freqStream->close(); - if (proxStream) - proxStream->close(); - if (termVectorsReaderOrig) - termVectorsReaderOrig->close(); - if (fieldsReaderOrig) - fieldsReaderOrig->close(); - if (cfsReader) - cfsReader->close(); - if (storeCFSReader) - storeCFSReader->close(); - - // Force FieldCache to evict our entries at this point - SegmentReaderPtr origInstance(_origInstance.lock()); - if (origInstance) - FieldCache::DEFAULT()->purge(origInstance); +} + +void CoreReaders::decRef() { + SyncLock syncLock(this); + if (ref->decRef() == 0) { + // close everything, nothing is shared anymore with other readers + if (tis) { + tis->close(); + tis.reset(); // null so if an app hangs on to us we still free most ram + } + if (tisNoIndex) { + tisNoIndex->close(); + } + if (freqStream) { + freqStream->close(); + } + if (proxStream) { + proxStream->close(); + } + if (termVectorsReaderOrig) { + termVectorsReaderOrig->close(); + } + if (fieldsReaderOrig) { + fieldsReaderOrig->close(); + } + if (cfsReader) { + cfsReader->close(); + } + if (storeCFSReader) { + storeCFSReader->close(); + } + + // Force FieldCache to evict our entries at this point + SegmentReaderPtr origInstance(_origInstance.lock()); + if (origInstance) { + FieldCache::DEFAULT()->purge(origInstance); } } - - void CoreReaders::openDocStores(SegmentInfoPtr si) - { - SyncLock syncLock(this); - BOOST_ASSERT(si->name == segment); - - if (!fieldsReaderOrig) - { - DirectoryPtr storeDir; - if (si->getDocStoreOffset() != -1) - { - if (si->getDocStoreIsCompoundFile()) - { - BOOST_ASSERT(!storeCFSReader); - storeCFSReader = newLucene(dir, si->getDocStoreSegment() + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION(), readBufferSize); - storeDir = storeCFSReader; - BOOST_ASSERT(storeDir); - } - else - { - storeDir = dir; - BOOST_ASSERT(storeDir); - } - } - else if (si->getUseCompoundFile()) - { - // In some cases, we were originally opened when CFS was not used, but then we are asked to open doc - // stores after the segment has switched to CFS - if (!cfsReader) - cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); - storeDir = cfsReader; +} + +void CoreReaders::openDocStores(const SegmentInfoPtr& si) { + SyncLock syncLock(this); + BOOST_ASSERT(si->name == segment); + + if (!fieldsReaderOrig) { + DirectoryPtr storeDir; + if (si->getDocStoreOffset() != -1) { + if (si->getDocStoreIsCompoundFile()) { + BOOST_ASSERT(!storeCFSReader); + storeCFSReader = newLucene(dir, si->getDocStoreSegment() + L"." + IndexFileNames::COMPOUND_FILE_STORE_EXTENSION(), readBufferSize); + storeDir = storeCFSReader; BOOST_ASSERT(storeDir); - } - else - { + } else { storeDir = dir; BOOST_ASSERT(storeDir); } - - String storesSegment(si->getDocStoreOffset() != -1 ? si->getDocStoreSegment() : segment); - - fieldsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); - - // Verify two sources of "maxDoc" agree - if (si->getDocStoreOffset() == -1 && fieldsReaderOrig->size() != si->docCount) - { - boost::throw_exception(CorruptIndexException(L"doc counts differ for segment " + segment + - L": fieldsReader shows " + StringUtils::toString(fieldsReaderOrig->size()) + - L" but segmentInfo shows " + StringUtils::toString(si->docCount))); + } else if (si->getUseCompoundFile()) { + // In some cases, we were originally opened when CFS was not used, but then we are asked to open doc + // stores after the segment has switched to CFS + if (!cfsReader) { + cfsReader = newLucene(dir, segment + L"." + IndexFileNames::COMPOUND_FILE_EXTENSION(), readBufferSize); } - - if (fieldInfos->hasVectors()) // open term vector files only as needed - termVectorsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); + storeDir = cfsReader; + BOOST_ASSERT(storeDir); + } else { + storeDir = dir; + BOOST_ASSERT(storeDir); } - } - - FieldsReaderLocal::FieldsReaderLocal(SegmentReaderPtr reader) - { - this->_reader = reader; - } - - FieldsReaderPtr FieldsReaderLocal::initialValue() - { - return boost::dynamic_pointer_cast(SegmentReaderPtr(_reader)->core->getFieldsReaderOrig()->clone()); - } - - SegmentReaderRef::SegmentReaderRef() - { - _refCount = 1; - } - - SegmentReaderRef::~SegmentReaderRef() - { - } - - String SegmentReaderRef::toString() - { - StringStream buffer; - buffer << L"refcount: " << _refCount; - return buffer.str(); - } - - int32_t SegmentReaderRef::refCount() - { - SyncLock syncLock(this); - return _refCount; - } - - int32_t SegmentReaderRef::incRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(_refCount > 0); - return ++_refCount; - } - - int32_t SegmentReaderRef::decRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(_refCount > 0); - return --_refCount; - } - - Norm::Norm() - { - this->refCount = 1; - this->normSeek = 0; - this->dirty = false; - this->rollbackDirty = false; - this->number = 0; - } - - Norm::Norm(SegmentReaderPtr reader, IndexInputPtr in, int32_t number, int64_t normSeek) - { - this->_reader = reader; - this->refCount = 1; - this->dirty = false; - this->rollbackDirty = false; - this->in = in; - this->number = number; - this->normSeek = normSeek; - } - - Norm::~Norm() - { - } - - void Norm::incRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - ++refCount; - } - - void Norm::closeInput() - { - SegmentReaderPtr reader(_reader.lock()); - if (in && reader) - { - if (in != reader->singleNormStream) - { - // It's private to us -- just close it - in->close(); - } - else - { - // We are sharing this with others -- decRef and maybe close the shared norm stream - if (reader->singleNormRef->decRef() == 0) - { - reader->singleNormStream->close(); - reader->singleNormStream.reset(); - } - } - - in.reset(); + + String storesSegment(si->getDocStoreOffset() != -1 ? si->getDocStoreSegment() : segment); + + fieldsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); + + // Verify two sources of "maxDoc" agree + if (si->getDocStoreOffset() == -1 && fieldsReaderOrig->size() != si->docCount) { + boost::throw_exception(CorruptIndexException(L"doc counts differ for segment " + segment + + L": fieldsReader shows " + StringUtils::toString(fieldsReaderOrig->size()) + + L" but segmentInfo shows " + StringUtils::toString(si->docCount))); + } + + if (fieldInfos->hasVectors()) { // open term vector files only as needed + termVectorsReaderOrig = newLucene(storeDir, storesSegment, fieldInfos, readBufferSize, si->getDocStoreOffset(), si->docCount); } } - - void Norm::decRef() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - - if (--refCount == 0) - { - if (origNorm) - { - origNorm->decRef(); - origNorm.reset(); - } - else - closeInput(); - - if (origReader) - origReader.reset(); - - if (_bytes) - { - BOOST_ASSERT(_bytesRef); - _bytesRef->decRef(); - _bytes.reset(); - _bytesRef.reset(); - } - else - { - BOOST_ASSERT(!_bytesRef); +} + +FieldsReaderLocal::FieldsReaderLocal(const SegmentReaderPtr& reader) { + this->_reader = reader; +} + +FieldsReaderPtr FieldsReaderLocal::initialValue() { + return boost::dynamic_pointer_cast(SegmentReaderPtr(_reader)->core->getFieldsReaderOrig()->clone()); +} + +SegmentReaderRef::SegmentReaderRef() { + _refCount = 1; +} + +SegmentReaderRef::~SegmentReaderRef() { +} + +String SegmentReaderRef::toString() { + StringStream buffer; + buffer << L"refcount: " << _refCount; + return buffer.str(); +} + +int32_t SegmentReaderRef::refCount() { + SyncLock syncLock(this); + return _refCount; +} + +int32_t SegmentReaderRef::incRef() { + SyncLock syncLock(this); + BOOST_ASSERT(_refCount > 0); + return ++_refCount; +} + +int32_t SegmentReaderRef::decRef() { + SyncLock syncLock(this); + BOOST_ASSERT(_refCount > 0); + return --_refCount; +} + +Norm::Norm() { + this->refCount = 1; + this->normSeek = 0; + this->dirty = false; + this->rollbackDirty = false; + this->number = 0; +} + +Norm::Norm(const SegmentReaderPtr& reader, const IndexInputPtr& in, int32_t number, int64_t normSeek) { + this->_reader = reader; + this->refCount = 1; + this->dirty = false; + this->rollbackDirty = false; + this->in = in; + this->number = number; + this->normSeek = normSeek; +} + +Norm::~Norm() { +} + +void Norm::incRef() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + ++refCount; +} + +void Norm::closeInput() { + SegmentReaderPtr reader(_reader.lock()); + if (in && reader) { + if (in != reader->singleNormStream) { + // It's private to us -- just close it + in->close(); + } else { + // We are sharing this with others -- decRef and maybe close the shared norm stream + if (reader->singleNormRef->decRef() == 0) { + reader->singleNormStream->close(); + reader->singleNormStream.reset(); } } + + in.reset(); } - - void Norm::bytes(uint8_t* bytesOut, int32_t offset, int32_t length) - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - if (_bytes) - { - // Already cached - copy from cache - BOOST_ASSERT(length <= SegmentReaderPtr(_reader)->maxDoc()); - MiscUtils::arrayCopy(_bytes.get(), 0, bytesOut, offset, length); +} + +void Norm::decRef() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + + if (--refCount == 0) { + if (origNorm) { + origNorm->decRef(); + origNorm.reset(); + } else { + closeInput(); } - else - { - // Not cached - if (origNorm) - { - // Ask origNorm to load - origNorm->bytes(bytesOut, offset, length); - } - else - { - // We are orig - read ourselves from disk - SyncLock instancesLock(in); - in->seek(normSeek); - in->readBytes(bytesOut, offset, length, false); - } + + if (origReader) { + origReader.reset(); } - } - - ByteArray Norm::bytes() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - if (!_bytes) // value not yet read - { + + if (_bytes) { + BOOST_ASSERT(_bytesRef); + _bytesRef->decRef(); + _bytes.reset(); + _bytesRef.reset(); + } else { BOOST_ASSERT(!_bytesRef); - if (origNorm) - { - // Ask origNorm to load so that for a series of reopened readers we share a single read-only byte[] - _bytes = origNorm->bytes(); - _bytesRef = origNorm->_bytesRef; - _bytesRef->incRef(); - - // Once we've loaded the bytes we no longer need origNorm - origNorm->decRef(); - origNorm.reset(); - origReader.reset(); - } - else - { - // We are the origNorm, so load the bytes for real ourself - int32_t count = SegmentReaderPtr(_reader)->maxDoc(); - _bytes = ByteArray::newInstance(count); - - // Since we are orig, in must not be null - BOOST_ASSERT(in); - - // Read from disk. - { - SyncLock instancesLock(in); - in->seek(normSeek); - in->readBytes(_bytes.get(), 0, count, false); - } - - _bytesRef = newLucene(); - closeInput(); - } } - - return _bytes; - } - - SegmentReaderRefPtr Norm::bytesRef() - { - return _bytesRef; } - - ByteArray Norm::copyOnWrite() - { - SyncLock syncLock(this); - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - bytes(); - BOOST_ASSERT(_bytes); - BOOST_ASSERT(_bytesRef); - if (_bytesRef->refCount() > 1) - { - // I cannot be the origNorm for another norm instance if I'm being changed. - // ie, only the "head Norm" can be changed - BOOST_ASSERT(refCount == 1); - SegmentReaderRefPtr oldRef(_bytesRef); - _bytes = SegmentReaderPtr(_reader)->cloneNormBytes(_bytes); - _bytesRef = newLucene(); - oldRef->decRef(); +} + +void Norm::bytes(uint8_t* bytesOut, int32_t offset, int32_t length) { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + if (_bytes) { + // Already cached - copy from cache + BOOST_ASSERT(length <= SegmentReaderPtr(_reader)->maxDoc()); + MiscUtils::arrayCopy(_bytes.get(), 0, bytesOut, offset, length); + } else { + // Not cached + if (origNorm) { + // Ask origNorm to load + origNorm->bytes(bytesOut, offset, length); + } else { + // We are orig - read ourselves from disk + SyncLock instancesLock(in); + in->seek(normSeek); + in->readBytes(bytesOut, offset, length, false); } - dirty = true; - return _bytes; } - - LuceneObjectPtr Norm::clone(LuceneObjectPtr other) - { - SyncLock syncLock(this); - - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - LuceneObjectPtr clone = other ? other : newLucene(); - NormPtr cloneNorm(boost::dynamic_pointer_cast(clone)); - cloneNorm->_reader = _reader; - cloneNorm->origNorm = origNorm; - cloneNorm->origReader = origReader; - cloneNorm->normSeek = normSeek; - cloneNorm->_bytesRef = _bytesRef; - cloneNorm->_bytes = _bytes; - cloneNorm->dirty = dirty; - cloneNorm->number = number; - cloneNorm->rollbackDirty = rollbackDirty; - - cloneNorm->refCount = 1; - - if (_bytes) - { - BOOST_ASSERT(_bytesRef); - BOOST_ASSERT(!origNorm); - - // Clone holds a reference to my bytes - cloneNorm->_bytesRef->incRef(); - } - else - { - BOOST_ASSERT(!_bytesRef); - if (!origNorm) +} + +ByteArray Norm::bytes() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + if (!_bytes) { // value not yet read + BOOST_ASSERT(!_bytesRef); + if (origNorm) { + // Ask origNorm to load so that for a series of reopened readers we share a single read-only byte[] + _bytes = origNorm->bytes(); + _bytesRef = origNorm->_bytesRef; + _bytesRef->incRef(); + + // Once we've loaded the bytes we no longer need origNorm + origNorm->decRef(); + origNorm.reset(); + origReader.reset(); + } else { + // We are the origNorm, so load the bytes for real ourself + int32_t count = SegmentReaderPtr(_reader)->maxDoc(); + _bytes = ByteArray::newInstance(count); + + // Since we are orig, in must not be null + BOOST_ASSERT(in); + + // Read from disk. { - // I become the origNorm for the clone - cloneNorm->origNorm = shared_from_this(); - cloneNorm->origReader = SegmentReaderPtr(_reader); + SyncLock instancesLock(in); + in->seek(normSeek); + in->readBytes(_bytes.get(), 0, count, false); } - cloneNorm->origNorm->incRef(); + + _bytesRef = newLucene(); + closeInput(); } - - // Only the origNorm will actually readBytes from in - cloneNorm->in.reset(); - - return cloneNorm; } - - void Norm::reWrite(SegmentInfoPtr si) - { - BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); - - // NOTE: norms are re-written in regular directory, not cfs - si->advanceNormGen(this->number); - String normFileName(si->getNormFileName(this->number)); - SegmentReaderPtr reader(_reader); - IndexOutputPtr out(reader->directory()->createOutput(normFileName)); - bool success = false; - LuceneException finally; - try - { - try - { - out->writeBytes(_bytes.get(), reader->maxDoc()); - } - catch (LuceneException& e) - { - finally = e; - } - out->close(); - finally.throwException(); - success = true; + + return _bytes; +} + +SegmentReaderRefPtr Norm::bytesRef() { + return _bytesRef; +} + +ByteArray Norm::copyOnWrite() { + SyncLock syncLock(this); + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + bytes(); + BOOST_ASSERT(_bytes); + BOOST_ASSERT(_bytesRef); + if (_bytesRef->refCount() > 1) { + // I cannot be the origNorm for another norm instance if I'm being changed. + // ie, only the "head Norm" can be changed + BOOST_ASSERT(refCount == 1); + SegmentReaderRefPtr oldRef(_bytesRef); + _bytes = SegmentReaderPtr(_reader)->cloneNormBytes(_bytes); + _bytesRef = newLucene(); + oldRef->decRef(); + } + dirty = true; + return _bytes; +} + +LuceneObjectPtr Norm::clone(const LuceneObjectPtr& other) { + SyncLock syncLock(this); + + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + LuceneObjectPtr clone = other ? other : newLucene(); + NormPtr cloneNorm(boost::dynamic_pointer_cast(clone)); + cloneNorm->_reader = _reader; + cloneNorm->origNorm = origNorm; + cloneNorm->origReader = origReader; + cloneNorm->normSeek = normSeek; + cloneNorm->_bytesRef = _bytesRef; + cloneNorm->_bytes = _bytes; + cloneNorm->dirty = dirty; + cloneNorm->number = number; + cloneNorm->rollbackDirty = rollbackDirty; + + cloneNorm->refCount = 1; + + if (_bytes) { + BOOST_ASSERT(_bytesRef); + BOOST_ASSERT(!origNorm); + + // Clone holds a reference to my bytes + cloneNorm->_bytesRef->incRef(); + } else { + BOOST_ASSERT(!_bytesRef); + if (!origNorm) { + // I become the origNorm for the clone + cloneNorm->origNorm = shared_from_this(); + cloneNorm->origReader = SegmentReaderPtr(_reader); } - catch (LuceneException& e) - { + cloneNorm->origNorm->incRef(); + } + + // Only the origNorm will actually readBytes from in + cloneNorm->in.reset(); + + return cloneNorm; +} + +void Norm::reWrite(const SegmentInfoPtr& si) { + BOOST_ASSERT(refCount > 0 && (!origNorm || origNorm->refCount > 0)); + + // NOTE: norms are re-written in regular directory, not cfs + si->advanceNormGen(this->number); + String normFileName(si->getNormFileName(this->number)); + SegmentReaderPtr reader(_reader); + IndexOutputPtr out(reader->directory()->createOutput(normFileName)); + bool success = false; + LuceneException finally; + try { + try { + out->writeBytes(_bytes.get(), reader->maxDoc()); + } catch (LuceneException& e) { finally = e; } - if (!success) - { - try - { - reader->directory()->deleteFile(normFileName); - } - catch (...) - { - // suppress this so we keep throwing the original exception - } - } + out->close(); finally.throwException(); - this->dirty = false; + success = true; + } catch (LuceneException& e) { + finally = e; + } + if (!success) { + try { + reader->directory()->deleteFile(normFileName); + } catch (...) { + // suppress this so we keep throwing the original exception + } } + finally.throwException(); + this->dirty = false; +} + } diff --git a/src/core/index/SegmentTermDocs.cpp b/src/core/index/SegmentTermDocs.cpp index 937c1166..7e17b9ef 100644 --- a/src/core/index/SegmentTermDocs.cpp +++ b/src/core/index/SegmentTermDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -19,232 +19,211 @@ #include "BitVector.h" #include "MiscUtils.h" -namespace Lucene -{ - SegmentTermDocs::SegmentTermDocs(SegmentReaderPtr parent) +namespace Lucene { + +SegmentTermDocs::SegmentTermDocs(const SegmentReaderPtr& parent) { + this->_parent = parent; + this->count = 0; + this->df = 0; + this->_doc = 0; + this->_freq = 0; + this->freqBasePointer = 0; + this->proxBasePointer = 0; + this->skipPointer = 0; + this->haveSkipped = false; + this->currentFieldStoresPayloads = false; + this->currentFieldOmitTermFreqAndPositions = false; + + this->_freqStream = boost::dynamic_pointer_cast(parent->core->freqStream->clone()); { - this->_parent = parent; - this->count = 0; - this->df = 0; - this->_doc = 0; - this->_freq = 0; - this->freqBasePointer = 0; - this->proxBasePointer = 0; - this->skipPointer = 0; - this->haveSkipped = false; - this->currentFieldStoresPayloads = false; - this->currentFieldOmitTermFreqAndPositions = false; - - this->_freqStream = boost::dynamic_pointer_cast(parent->core->freqStream->clone()); - { - SyncLock parentLock(parent); - this->deletedDocs = parent->deletedDocs; - } - this->skipInterval = parent->core->getTermsReader()->getSkipInterval(); - this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels(); + SyncLock parentLock(parent); + this->deletedDocs = parent->deletedDocs; + this->__deletedDocs = this->deletedDocs.get(); } - - SegmentTermDocs::~SegmentTermDocs() - { + this->skipInterval = parent->core->getTermsReader()->getSkipInterval(); + this->maxSkipLevels = parent->core->getTermsReader()->getMaxSkipLevels(); + this->__parent = parent.get(); + this->__freqStream = _freqStream.get(); +} + +SegmentTermDocs::~SegmentTermDocs() { +} + +void SegmentTermDocs::seek(const TermPtr& term) { + TermInfoPtr ti(__parent->core->getTermsReader()->get(term)); + seek(ti, term); +} + +void SegmentTermDocs::seek(const TermEnumPtr& termEnum) { + TermInfoPtr ti; + TermPtr term; + + SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast(termEnum)); + + // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs + if (segmentTermEnum && segmentTermEnum->fieldInfos == __parent->core->fieldInfos) { // optimized case + term = segmentTermEnum->term(); + ti = segmentTermEnum->termInfo(); + } else { // punt case + term = termEnum->term(); + ti = __parent->core->getTermsReader()->get(term); } - - void SegmentTermDocs::seek(TermPtr term) - { - TermInfoPtr ti(SegmentReaderPtr(_parent)->core->getTermsReader()->get(term)); - seek(ti, term); + + seek(ti, term); +} + +void SegmentTermDocs::seek(const TermInfoPtr& ti, const TermPtr& term) { + count = 0; + FieldInfoPtr fi(__parent->core->fieldInfos->fieldInfo(term->_field)); + currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false; + currentFieldStoresPayloads = fi ? fi->storePayloads : false; + if (!ti) { + df = 0; + } else { + df = ti->docFreq; + _doc = 0; + freqBasePointer = ti->freqPointer; + proxBasePointer = ti->proxPointer; + skipPointer = freqBasePointer + ti->skipOffset; + __freqStream->seek(freqBasePointer); + haveSkipped = false; } - - void SegmentTermDocs::seek(TermEnumPtr termEnum) - { - TermInfoPtr ti; - TermPtr term; - - SegmentTermEnumPtr segmentTermEnum(boost::dynamic_pointer_cast(termEnum)); - SegmentReaderPtr parent(_parent); - - // use comparison of fieldinfos to verify that termEnum belongs to the same segment as this SegmentTermDocs - if (segmentTermEnum && segmentTermEnum->fieldInfos == parent->core->fieldInfos) // optimized case - { - term = segmentTermEnum->term(); - ti = segmentTermEnum->termInfo(); - } - else // punt case - { - term = termEnum->term(); - ti = parent->core->getTermsReader()->get(term); - } - - seek(ti, term); +} + +void SegmentTermDocs::close() { + __freqStream->close(); + if (skipListReader) { + skipListReader->close(); } - - void SegmentTermDocs::seek(TermInfoPtr ti, TermPtr term) - { - count = 0; - FieldInfoPtr fi(SegmentReaderPtr(_parent)->core->fieldInfos->fieldInfo(term->_field)); - currentFieldOmitTermFreqAndPositions = fi ? fi->omitTermFreqAndPositions : false; - currentFieldStoresPayloads = fi ? fi->storePayloads : false; - if (!ti) - df = 0; - else - { - df = ti->docFreq; - _doc = 0; - freqBasePointer = ti->freqPointer; - proxBasePointer = ti->proxPointer; - skipPointer = freqBasePointer + ti->skipOffset; - _freqStream->seek(freqBasePointer); - haveSkipped = false; +} + +int32_t SegmentTermDocs::doc() { + return _doc; +} + +int32_t SegmentTermDocs::freq() { + return _freq; +} + +void SegmentTermDocs::skippingDoc() { +} + +bool SegmentTermDocs::next() { + while (true) { + if (count == df) { + return false; } - } - - void SegmentTermDocs::close() - { - _freqStream->close(); - if (skipListReader) - skipListReader->close(); - } - - int32_t SegmentTermDocs::doc() - { - return _doc; - } - - int32_t SegmentTermDocs::freq() - { - return _freq; - } - - void SegmentTermDocs::skippingDoc() - { - } - - bool SegmentTermDocs::next() - { - while (true) - { - if (count == df) - return false; - int32_t docCode = _freqStream->readVInt(); - - if (currentFieldOmitTermFreqAndPositions) - { - _doc += docCode; - _freq = 1; - } - else - { - _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - _freq = 1; // freq is one - else - _freq = _freqStream->readVInt(); // else read freq + int32_t docCode = __freqStream->readVInt(); + + if (currentFieldOmitTermFreqAndPositions) { + _doc += docCode; + _freq = 1; + } else { + _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit + if ((docCode & 1) != 0) { // if low bit is set + _freq = 1; // freq is one + } else { + _freq = __freqStream->readVInt(); // else read freq } - - ++count; - - if (!deletedDocs || !deletedDocs->get(_doc)) - break; - skippingDoc(); } - return true; - } - - int32_t SegmentTermDocs::read(Collection docs, Collection freqs) - { - int32_t length = docs.size(); - if (currentFieldOmitTermFreqAndPositions) - return readNoTf(docs, freqs, length); - else - { - int32_t i = 0; - while (i < length && count < df) - { - // manually inlined call to next() for speed - int32_t docCode = _freqStream->readVInt(); - _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit - if ((docCode & 1) != 0) // if low bit is set - _freq = 1; // freq is one - else - _freq = _freqStream->readVInt(); // else read freq - ++count; - - if (!deletedDocs || !deletedDocs->get(_doc)) - { - docs[i] = _doc; - freqs[i] = _freq; - ++i; - } - } - return i; + + ++count; + + if (!__deletedDocs || !__deletedDocs->get(_doc)) { + break; } + skippingDoc(); } - - int32_t SegmentTermDocs::readNoTf(Collection docs, Collection freqs, int32_t length) - { + return true; +} + +int32_t SegmentTermDocs::read(Collection& docs, Collection& freqs) { + auto* __docs = docs.get(); + auto* __freqs = freqs.get(); + int32_t length = __docs->size(); + if (currentFieldOmitTermFreqAndPositions) { + return readNoTf(docs, freqs, length); + } else { int32_t i = 0; - while (i < length && count < df) - { + while (i < length && count < df) { // manually inlined call to next() for speed - _doc += _freqStream->readVInt(); + int32_t docCode = __freqStream->readVInt(); + _doc += MiscUtils::unsignedShift(docCode, 1); // shift off low bit + if ((docCode & 1) != 0) { // if low bit is set + _freq = 1; // freq is one + } else { + _freq = __freqStream->readVInt(); // else read freq + } ++count; - - if (!deletedDocs || !deletedDocs->get(_doc)) - { - docs[i] = _doc; - - // Hardware freq to 1 when term freqs were not stored in the index - freqs[i] = 1; + + if (!__deletedDocs || !__deletedDocs->get(_doc)) { + (*__docs)[i] = _doc; + (*__freqs)[i] = _freq; ++i; } } return i; } - - void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) - { +} + +int32_t SegmentTermDocs::readNoTf(Collection& docs, Collection& freqs, int32_t length) { + int32_t i = 0; + while (i < length && count < df) { + // manually inlined call to next() for speed + _doc += __freqStream->readVInt(); + ++count; + + if (!__deletedDocs || !__deletedDocs->get(_doc)) { + docs[i] = _doc; + + // Hardware freq to 1 when term freqs were not stored in the index + freqs[i] = 1; + ++i; + } } - - bool SegmentTermDocs::skipTo(int32_t target) - { - if (df >= skipInterval) // optimized case - { - if (!skipListReader) - skipListReader = newLucene(boost::dynamic_pointer_cast(_freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone - - if (!haveSkipped) // lazily initialize skip stream - { - skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); - haveSkipped = true; - } - - int32_t newCount = skipListReader->skipTo(target); - if (newCount > count) - { - _freqStream->seek(skipListReader->getFreqPointer()); - skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); - - _doc = skipListReader->getDoc(); - count = newCount; - } + return i; +} + +void SegmentTermDocs::skipProx(int64_t proxPointer, int32_t payloadLength) { +} + +bool SegmentTermDocs::skipTo(int32_t target) { + if (df >= skipInterval) { // optimized case + if (!skipListReader) { + skipListReader = newLucene(boost::dynamic_pointer_cast(__freqStream->clone()), maxSkipLevels, skipInterval); // lazily clone } - - // done skipping, now just scan - do - { - if (!next()) - return false; + + if (!haveSkipped) { // lazily initialize skip stream + skipListReader->init(skipPointer, freqBasePointer, proxBasePointer, df, currentFieldStoresPayloads); + haveSkipped = true; + } + + int32_t newCount = skipListReader->skipTo(target); + if (newCount > count) { + __freqStream->seek(skipListReader->getFreqPointer()); + skipProx(skipListReader->getProxPointer(), skipListReader->getPayloadLength()); + + _doc = skipListReader->getDoc(); + count = newCount; } - while (target > _doc); - return true; - } - - IndexInputPtr SegmentTermDocs::freqStream() - { - return _freqStream; - } - - void SegmentTermDocs::freqStream(IndexInputPtr freqStream) - { - _freqStream = freqStream; } + + // done skipping, now just scan + do { + if (!next()) { + return false; + } + } while (target > _doc); + return true; +} + +IndexInputPtr SegmentTermDocs::freqStream() { + return _freqStream; +} + +void SegmentTermDocs::freqStream(const IndexInputPtr& freqStream) { + _freqStream = freqStream; + __freqStream = freqStream.get(); +} + } diff --git a/src/core/index/SegmentTermEnum.cpp b/src/core/index/SegmentTermEnum.cpp index c69da940..2a157b4a 100644 --- a/src/core/index/SegmentTermEnum.cpp +++ b/src/core/index/SegmentTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,217 +12,196 @@ #include "TermInfo.h" #include "StringUtils.h" -namespace Lucene -{ - SegmentTermEnum::SegmentTermEnum() - { - format = 0; - termBuffer = newLucene(); - prevBuffer = newLucene(); - scanBuffer = newLucene(); - _termInfo = newLucene(); - formatM1SkipInterval = 0; - size = 0; - position = -1; - indexPointer = 0; - indexInterval = 0; - skipInterval = 0; - maxSkipLevels = 0; - - isIndex = false; - maxSkipLevels = 0; - } - - SegmentTermEnum::SegmentTermEnum(IndexInputPtr i, FieldInfosPtr fis, bool isi) - { +namespace Lucene { + +SegmentTermEnum::SegmentTermEnum() { + format = 0; + termBuffer = newLucene(); + prevBuffer = newLucene(); + scanBuffer = newLucene(); + _termInfo = newLucene(); + formatM1SkipInterval = 0; + size = 0; + position = -1; + indexPointer = 0; + indexInterval = 0; + skipInterval = 0; + maxSkipLevels = 0; + + isIndex = false; + maxSkipLevels = 0; +} + +SegmentTermEnum::SegmentTermEnum(const IndexInputPtr& i, const FieldInfosPtr& fis, bool isi) { + format = 0; + termBuffer = newLucene(); + prevBuffer = newLucene(); + scanBuffer = newLucene(); + _termInfo = newLucene(); + formatM1SkipInterval = 0; + size = 0; + position = -1; + indexPointer = 0; + indexInterval = 0; + skipInterval = 0; + maxSkipLevels = 0; + + input = i; + fieldInfos = fis; + isIndex = isi; + maxSkipLevels = 1; // use single-level skip lists for formats > -3 + + int32_t firstInt = input->readInt(); + if (firstInt >= 0) { + // original-format file, without explicit format version number format = 0; - termBuffer = newLucene(); - prevBuffer = newLucene(); - scanBuffer = newLucene(); - _termInfo = newLucene(); - formatM1SkipInterval = 0; - size = 0; - position = -1; - indexPointer = 0; - indexInterval = 0; - skipInterval = 0; - maxSkipLevels = 0; - - input = i; - fieldInfos = fis; - isIndex = isi; - maxSkipLevels = 1; // use single-level skip lists for formats > -3 - - int32_t firstInt = input->readInt(); - if (firstInt >= 0) - { - // original-format file, without explicit format version number - format = 0; - size = firstInt; - - // back-compatible settings - indexInterval = 128; - skipInterval = INT_MAX; // switch off skipTo optimization + size = firstInt; + + // back-compatible settings + indexInterval = 128; + skipInterval = INT_MAX; // switch off skipTo optimization + } else { + // we have a format version number + format = firstInt; + + // check that it is a format we can understand + if (format < TermInfosWriter::FORMAT_CURRENT) { + boost::throw_exception(CorruptIndexException(L"Unknown format version:" + StringUtils::toString(format) + L" expected " + StringUtils::toString(TermInfosWriter::FORMAT_CURRENT) + L" or higher")); } - else - { - // we have a format version number - format = firstInt; - - // check that it is a format we can understand - if (format < TermInfosWriter::FORMAT_CURRENT) - boost::throw_exception(CorruptIndexException(L"Unknown format version:" + StringUtils::toString(format) + L" expected " + StringUtils::toString(TermInfosWriter::FORMAT_CURRENT) + L" or higher")); - - size = input->readLong(); // read the size - - if (format == -1) - { - if (!isIndex) - { - indexInterval = input->readInt(); - formatM1SkipInterval = input->readInt(); - } - // switch off skipTo optimization for file format prior to 1.4rc2 - skipInterval = INT_MAX; - } - else - { + + size = input->readLong(); // read the size + + if (format == -1) { + if (!isIndex) { indexInterval = input->readInt(); - skipInterval = input->readInt(); - if (format <= TermInfosWriter::FORMAT) - { - // this new format introduces multi-level skipping - maxSkipLevels = input->readInt(); - } + formatM1SkipInterval = input->readInt(); + } + // switch off skipTo optimization for file format prior to 1.4rc2 + skipInterval = INT_MAX; + } else { + indexInterval = input->readInt(); + skipInterval = input->readInt(); + if (format <= TermInfosWriter::FORMAT) { + // this new format introduces multi-level skipping + maxSkipLevels = input->readInt(); } - - BOOST_ASSERT(indexInterval > 0); // must not be negative - BOOST_ASSERT(skipInterval > 0); // must not be negative - } - if (format > TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) - { - termBuffer->setPreUTF8Strings(); - scanBuffer->setPreUTF8Strings(); - prevBuffer->setPreUTF8Strings(); } + + BOOST_ASSERT(indexInterval > 0); // must not be negative + BOOST_ASSERT(skipInterval > 0); // must not be negative } - - SegmentTermEnum::~SegmentTermEnum() - { - } - - LuceneObjectPtr SegmentTermEnum::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - SegmentTermEnumPtr cloneEnum(boost::dynamic_pointer_cast(TermEnum::clone(clone))); - cloneEnum->format = format; - cloneEnum->isIndex = isIndex; - cloneEnum->formatM1SkipInterval = formatM1SkipInterval; - cloneEnum->fieldInfos = fieldInfos; - cloneEnum->size = size; - cloneEnum->position = position; - cloneEnum->indexPointer = indexPointer; - cloneEnum->indexInterval = indexInterval; - cloneEnum->skipInterval = skipInterval; - cloneEnum->maxSkipLevels = maxSkipLevels; - - cloneEnum->input = boost::dynamic_pointer_cast(input->clone()); - cloneEnum->_termInfo = newLucene(_termInfo); - - cloneEnum->termBuffer = boost::dynamic_pointer_cast(termBuffer->clone()); - cloneEnum->prevBuffer = boost::dynamic_pointer_cast(prevBuffer->clone()); - cloneEnum->scanBuffer = newLucene(); - - return cloneEnum; - } - - void SegmentTermEnum::seek(int64_t pointer, int64_t p, TermPtr t, TermInfoPtr ti) - { - input->seek(pointer); - position = p; - termBuffer->set(t); - prevBuffer->reset(); - _termInfo->set(ti); + if (format > TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES) { + termBuffer->setPreUTF8Strings(); + scanBuffer->setPreUTF8Strings(); + prevBuffer->setPreUTF8Strings(); } - - bool SegmentTermEnum::next() - { - if (position++ >= size - 1) - { - prevBuffer->set(termBuffer); - termBuffer->reset(); - return false; - } - +} + +SegmentTermEnum::~SegmentTermEnum() { +} + +LuceneObjectPtr SegmentTermEnum::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + SegmentTermEnumPtr cloneEnum(boost::dynamic_pointer_cast(TermEnum::clone(clone))); + cloneEnum->format = format; + cloneEnum->isIndex = isIndex; + cloneEnum->formatM1SkipInterval = formatM1SkipInterval; + cloneEnum->fieldInfos = fieldInfos; + cloneEnum->size = size; + cloneEnum->position = position; + cloneEnum->indexPointer = indexPointer; + cloneEnum->indexInterval = indexInterval; + cloneEnum->skipInterval = skipInterval; + cloneEnum->maxSkipLevels = maxSkipLevels; + + cloneEnum->input = boost::dynamic_pointer_cast(input->clone()); + cloneEnum->_termInfo = newLucene(_termInfo); + + cloneEnum->termBuffer = boost::dynamic_pointer_cast(termBuffer->clone()); + cloneEnum->prevBuffer = boost::dynamic_pointer_cast(prevBuffer->clone()); + cloneEnum->scanBuffer = newLucene(); + + return cloneEnum; +} + +void SegmentTermEnum::seek(int64_t pointer, int64_t p, const TermPtr& t, const TermInfoPtr& ti) { + input->seek(pointer); + position = p; + termBuffer->set(t); + prevBuffer->reset(); + _termInfo->set(ti); +} + +bool SegmentTermEnum::next() { + if (position++ >= size - 1) { prevBuffer->set(termBuffer); - termBuffer->read(input, fieldInfos); - - _termInfo->docFreq = input->readVInt(); // read doc freq - _termInfo->freqPointer += input->readVLong(); // read freq pointer - _termInfo->proxPointer += input->readVLong(); // read prox pointer - - if (format == -1) - { - // just read skipOffset in order to increment file pointer; value is never used - // since skipTo is switched off - if (!isIndex && _termInfo->docFreq > formatM1SkipInterval) - _termInfo->skipOffset = input->readVInt(); - } - else if (_termInfo->docFreq >= skipInterval) - _termInfo->skipOffset = input->readVInt(); - - if (isIndex) - indexPointer += input->readVLong(); // read index pointer - - return true; - } - - int32_t SegmentTermEnum::scanTo(TermPtr term) - { - scanBuffer->set(term); - int32_t count = 0; - while (scanBuffer->compareTo(termBuffer) > 0 && next()) - ++count; - return count; + termBuffer->reset(); + return false; } - - TermPtr SegmentTermEnum::term() - { - return termBuffer->toTerm(); - } - - TermPtr SegmentTermEnum::prev() - { - return prevBuffer->toTerm(); - } - - TermInfoPtr SegmentTermEnum::termInfo() - { - return newLucene(_termInfo); - } - - void SegmentTermEnum::termInfo(TermInfoPtr ti) - { - ti->set(_termInfo); - } - - int32_t SegmentTermEnum::docFreq() - { - return _termInfo->docFreq; - } - - int64_t SegmentTermEnum::freqPointer() - { - return _termInfo->freqPointer; + + prevBuffer->set(termBuffer); + termBuffer->read(input, fieldInfos); + + _termInfo->docFreq = input->readVInt(); // read doc freq + _termInfo->freqPointer += input->readVLong(); // read freq pointer + _termInfo->proxPointer += input->readVLong(); // read prox pointer + + if (format == -1) { + // just read skipOffset in order to increment file pointer; value is never used + // since skipTo is switched off + if (!isIndex && _termInfo->docFreq > formatM1SkipInterval) { + _termInfo->skipOffset = input->readVInt(); + } + } else if (_termInfo->docFreq >= skipInterval) { + _termInfo->skipOffset = input->readVInt(); } - - int64_t SegmentTermEnum::proxPointer() - { - return _termInfo->proxPointer; + + if (isIndex) { + indexPointer += input->readVLong(); // read index pointer } - - void SegmentTermEnum::close() - { - input->close(); + + return true; +} + +int32_t SegmentTermEnum::scanTo(const TermPtr& term) { + scanBuffer->set(term); + int32_t count = 0; + while (scanBuffer->compareTo(termBuffer) > 0 && next()) { + ++count; } + return count; +} + +TermPtr SegmentTermEnum::term() { + return termBuffer->toTerm(); +} + +TermPtr SegmentTermEnum::prev() { + return prevBuffer->toTerm(); +} + +TermInfoPtr SegmentTermEnum::termInfo() { + return newLucene(_termInfo); +} + +void SegmentTermEnum::termInfo(const TermInfoPtr& ti) { + ti->set(_termInfo); +} + +int32_t SegmentTermEnum::docFreq() { + return _termInfo->docFreq; +} + +int64_t SegmentTermEnum::freqPointer() { + return _termInfo->freqPointer; +} + +int64_t SegmentTermEnum::proxPointer() { + return _termInfo->proxPointer; +} + +void SegmentTermEnum::close() { + input->close(); +} + } diff --git a/src/core/index/SegmentTermPositionVector.cpp b/src/core/index/SegmentTermPositionVector.cpp index 5bb015b6..cd8dedfa 100644 --- a/src/core/index/SegmentTermPositionVector.cpp +++ b/src/core/index/SegmentTermPositionVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,46 +8,47 @@ #include "SegmentTermPositionVector.h" #include "TermVectorOffsetInfo.h" -namespace Lucene -{ - SegmentTermPositionVector::SegmentTermPositionVector(const String& field, Collection terms, - Collection termFreqs, Collection< Collection > positions, - Collection< Collection > offsets) : - SegmentTermVector(field, terms, termFreqs) - { - this->offsets = offsets; - this->positions = positions; - } - - SegmentTermPositionVector::~SegmentTermPositionVector() - { +namespace Lucene { + +SegmentTermPositionVector::SegmentTermPositionVector(const String& field, Collection terms, + Collection termFreqs, Collection< Collection > positions, + Collection< Collection > offsets) : + SegmentTermVector(field, terms, termFreqs) { + this->offsets = offsets; + this->positions = positions; +} + +SegmentTermPositionVector::~SegmentTermPositionVector() { +} + +const Collection SegmentTermPositionVector::EMPTY_TERM_POS() { + static Collection _EMPTY_TERM_POS; + LUCENE_RUN_ONCE( + _EMPTY_TERM_POS = Collection::newInstance(); + ); + return _EMPTY_TERM_POS; +} + +Collection SegmentTermPositionVector::getOffsets(int32_t index) { + Collection result(TermVectorOffsetInfo::EMPTY_OFFSET_INFO()); + if (!offsets) { + return Collection(); } - - const Collection SegmentTermPositionVector::EMPTY_TERM_POS() - { - static Collection _EMPTY_TERM_POS; - if (!_EMPTY_TERM_POS) - _EMPTY_TERM_POS = Collection::newInstance(); - return _EMPTY_TERM_POS; + if (index >=0 && index < offsets.size()) { + result = offsets[index]; } - - Collection SegmentTermPositionVector::getOffsets(int32_t index) - { - Collection result(TermVectorOffsetInfo::EMPTY_OFFSET_INFO()); - if (!offsets) - return Collection(); - if (index >=0 && index < offsets.size()) - result = offsets[index]; - return result; + return result; +} + +Collection SegmentTermPositionVector::getTermPositions(int32_t index) { + Collection result(EMPTY_TERM_POS()); + if (!positions) { + return Collection(); } - - Collection SegmentTermPositionVector::getTermPositions(int32_t index) - { - Collection result(EMPTY_TERM_POS()); - if (!positions) - return Collection(); - if (index >= 0 && index < positions.size()) - result = positions[index]; - return result; + if (index >= 0 && index < positions.size()) { + result = positions[index]; } + return result; +} + } diff --git a/src/core/index/SegmentTermPositions.cpp b/src/core/index/SegmentTermPositions.cpp index 829cee39..ddf90553 100644 --- a/src/core/index/SegmentTermPositions.cpp +++ b/src/core/index/SegmentTermPositions.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,178 +12,158 @@ #include "IndexInput.h" #include "MiscUtils.h" -namespace Lucene -{ - SegmentTermPositions::SegmentTermPositions(SegmentReaderPtr parent) : SegmentTermDocs(parent) - { - this->proxCount = 0; - this->position = 0; - this->payloadLength = 0; - this->needToLoadPayload = false; - this->lazySkipPointer = -1; - this->lazySkipProxCount = 0; - } - - SegmentTermPositions::~SegmentTermPositions() - { - } - - void SegmentTermPositions::seek(TermInfoPtr ti, TermPtr term) - { - SegmentTermDocs::seek(ti, term); - if (ti) - lazySkipPointer = ti->proxPointer; - lazySkipProxCount = 0; - proxCount = 0; - payloadLength = 0; - needToLoadPayload = false; - } - - void SegmentTermPositions::close() - { - SegmentTermDocs::close(); - if (proxStream) - proxStream->close(); - } - - int32_t SegmentTermPositions::nextPosition() - { - if (currentFieldOmitTermFreqAndPositions) - { - // This field does not store term freq, positions, payloads - return 0; - } - - // perform lazy skips if necessary - lazySkip(); - --proxCount; - position += readDeltaPosition(); - return position; - } - - int32_t SegmentTermPositions::readDeltaPosition() - { - int32_t delta = proxStream->readVInt(); - if (currentFieldStoresPayloads) - { - // if the current field stores payloads then the position delta is shifted one bit to the left. - // if the LSB is set, then we have to read the current payload length - if ((delta & 1) != 0) - payloadLength = proxStream->readVInt(); - delta = MiscUtils::unsignedShift(delta, 1); - needToLoadPayload = true; - } - return delta; - } - - void SegmentTermPositions::skippingDoc() - { - // we remember to skip a document lazily - lazySkipProxCount += _freq; - } - - bool SegmentTermPositions::next() - { - // we remember to skip the remaining positions of the current document lazily - lazySkipProxCount += proxCount; - - if (SegmentTermDocs::next()) - { - proxCount = _freq; // note frequency - position = 0; // reset position - return true; - } - return false; +namespace Lucene { + +SegmentTermPositions::SegmentTermPositions(const SegmentReaderPtr& parent) : SegmentTermDocs(parent) { + this->proxCount = 0; + this->position = 0; + this->payloadLength = 0; + this->needToLoadPayload = false; + this->lazySkipPointer = -1; + this->lazySkipProxCount = 0; +} + +SegmentTermPositions::~SegmentTermPositions() { +} + +void SegmentTermPositions::seek(const TermInfoPtr& ti, const TermPtr& term) { + SegmentTermDocs::seek(ti, term); + if (ti) { + lazySkipPointer = ti->proxPointer; + } + lazySkipProxCount = 0; + proxCount = 0; + payloadLength = 0; + needToLoadPayload = false; +} + +void SegmentTermPositions::close() { + SegmentTermDocs::close(); + if (proxStream) { + proxStream->close(); } - - int32_t SegmentTermPositions::read(Collection docs, Collection freqs) - { - boost::throw_exception(UnsupportedOperationException(L"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.")); +} + +int32_t SegmentTermPositions::nextPosition() { + if (currentFieldOmitTermFreqAndPositions) { + // This field does not store term freq, positions, payloads return 0; } - - void SegmentTermPositions::skipProx(int64_t proxPointer, int32_t payloadLength) - { - // we save the pointer, we might have to skip there lazily - lazySkipPointer = proxPointer; - lazySkipProxCount = 0; - proxCount = 0; - this->payloadLength = payloadLength; - needToLoadPayload = false; - } - - void SegmentTermPositions::skipPositions(int32_t n) - { - BOOST_ASSERT(!currentFieldOmitTermFreqAndPositions); - for (int32_t i = n; i > 0; --i) // skip unread positions - { - readDeltaPosition(); - skipPayload(); + + // perform lazy skips if necessary + lazySkip(); + --proxCount; + position += readDeltaPosition(); + return position; +} + +int32_t SegmentTermPositions::readDeltaPosition() { + int32_t delta = proxStream->readVInt(); + if (currentFieldStoresPayloads) { + // if the current field stores payloads then the position delta is shifted one bit to the left. + // if the LSB is set, then we have to read the current payload length + if ((delta & 1) != 0) { + payloadLength = proxStream->readVInt(); } + delta = MiscUtils::unsignedShift(delta, 1); + needToLoadPayload = true; } - - void SegmentTermPositions::skipPayload() - { - if (needToLoadPayload && payloadLength > 0) - proxStream->seek(proxStream->getFilePointer() + payloadLength); - needToLoadPayload = false; + return delta; +} + +void SegmentTermPositions::skippingDoc() { + // we remember to skip a document lazily + lazySkipProxCount += _freq; +} + +bool SegmentTermPositions::next() { + // we remember to skip the remaining positions of the current document lazily + lazySkipProxCount += proxCount; + + if (SegmentTermDocs::next()) { + proxCount = _freq; // note frequency + position = 0; // reset position + return true; } - - void SegmentTermPositions::lazySkip() - { - if (!proxStream) - { - // clone lazily - proxStream = boost::dynamic_pointer_cast(SegmentReaderPtr(_parent)->core->proxStream->clone()); - } - - // we might have to skip the current payload if it was not read yet + return false; +} + +int32_t SegmentTermPositions::read(Collection& docs, Collection& freqs) { + boost::throw_exception(UnsupportedOperationException(L"TermPositions does not support processing multiple documents in one call. Use TermDocs instead.")); + return 0; +} + +void SegmentTermPositions::skipProx(int64_t proxPointer, int32_t payloadLength) { + // we save the pointer, we might have to skip there lazily + lazySkipPointer = proxPointer; + lazySkipProxCount = 0; + proxCount = 0; + this->payloadLength = payloadLength; + needToLoadPayload = false; +} + +void SegmentTermPositions::skipPositions(int32_t n) { + BOOST_ASSERT(!currentFieldOmitTermFreqAndPositions); + for (int32_t i = n; i > 0; --i) { // skip unread positions + readDeltaPosition(); skipPayload(); - - if (lazySkipPointer != -1) - { - proxStream->seek(lazySkipPointer); - lazySkipPointer = -1; - } - - if (lazySkipProxCount != 0) - { - skipPositions(lazySkipProxCount); - lazySkipProxCount = 0; - } } - - int32_t SegmentTermPositions::getPayloadLength() - { - return payloadLength; +} + +void SegmentTermPositions::skipPayload() { + if (needToLoadPayload && payloadLength > 0) { + proxStream->seek(proxStream->getFilePointer() + payloadLength); } - - ByteArray SegmentTermPositions::getPayload(ByteArray data, int32_t offset) - { - if (!needToLoadPayload) - boost::throw_exception(IOException(L"Either no payload exists at this term position or an attempt was made to load it more than once.")); - - // read payloads lazily - ByteArray retArray; - int32_t retOffset = 0; - if (!data || data.size() - offset < payloadLength) - { - // the array is too small to store the payload data, so we allocate a new one - retArray = ByteArray::newInstance(payloadLength); - retOffset = 0; - } - else - { - retArray = data; - retOffset = offset; - } - proxStream->readBytes(retArray.get(), retOffset, payloadLength); - needToLoadPayload = false; - return retArray; + needToLoadPayload = false; +} + +void SegmentTermPositions::lazySkip() { + if (!proxStream) { + // clone lazily + proxStream = boost::dynamic_pointer_cast(SegmentReaderPtr(_parent)->core->proxStream->clone()); } - - bool SegmentTermPositions::isPayloadAvailable() - { - return (needToLoadPayload && payloadLength > 0); + + // we might have to skip the current payload if it was not read yet + skipPayload(); + + if (lazySkipPointer != -1) { + proxStream->seek(lazySkipPointer); + lazySkipPointer = -1; + } + + if (lazySkipProxCount != 0) { + skipPositions(lazySkipProxCount); + lazySkipProxCount = 0; } } + +int32_t SegmentTermPositions::getPayloadLength() { + return payloadLength; +} + +ByteArray SegmentTermPositions::getPayload(ByteArray data, int32_t offset) { + if (!needToLoadPayload) { + boost::throw_exception(IOException(L"Either no payload exists at this term position or an attempt was made to load it more than once.")); + } + + // read payloads lazily + ByteArray retArray; + int32_t retOffset = 0; + if (!data || data.size() - offset < payloadLength) { + // the array is too small to store the payload data, so we allocate a new one + retArray = ByteArray::newInstance(payloadLength); + retOffset = 0; + } else { + retArray = data; + retOffset = offset; + } + proxStream->readBytes(retArray.get(), retOffset, payloadLength); + needToLoadPayload = false; + return retArray; +} + +bool SegmentTermPositions::isPayloadAvailable() { + return (needToLoadPayload && payloadLength > 0); +} + +} diff --git a/src/core/index/SegmentTermVector.cpp b/src/core/index/SegmentTermVector.cpp index 4627cfdf..959e3e8b 100644 --- a/src/core/index/SegmentTermVector.cpp +++ b/src/core/index/SegmentTermVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,69 +7,62 @@ #include "LuceneInc.h" #include "SegmentTermVector.h" -namespace Lucene -{ - SegmentTermVector::SegmentTermVector(const String& field, Collection terms, Collection termFreqs) - { - this->field = field; - this->terms = terms; - this->termFreqs = termFreqs; - } - - SegmentTermVector::~SegmentTermVector() - { - } - - String SegmentTermVector::getField() - { - return field; - } - - String SegmentTermVector::toString() - { - StringStream segTermVector; - segTermVector << L"{" << field; - if (terms) - { - for (int32_t i = 0; i < terms.size(); ++i) - { - if (i > 0) - segTermVector << L", "; - segTermVector << terms[i] << L"/" << termFreqs[i]; +namespace Lucene { + +SegmentTermVector::SegmentTermVector(const String& field, Collection terms, Collection termFreqs) { + this->field = field; + this->terms = terms; + this->termFreqs = termFreqs; +} + +SegmentTermVector::~SegmentTermVector() { +} + +String SegmentTermVector::getField() { + return field; +} + +String SegmentTermVector::toString() { + StringStream segTermVector; + segTermVector << L"{" << field; + if (terms) { + for (int32_t i = 0; i < terms.size(); ++i) { + if (i > 0) { + segTermVector << L", "; } + segTermVector << terms[i] << L"/" << termFreqs[i]; } - segTermVector << L"}"; - return segTermVector.str(); - } - - int32_t SegmentTermVector::size() - { - return terms ? terms.size() : 0; - } - - Collection SegmentTermVector::getTerms() - { - return terms; - } - - Collection SegmentTermVector::getTermFrequencies() - { - return termFreqs; } - - int32_t SegmentTermVector::indexOf(const String& term) - { - if (!terms) - return -1; - Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); - return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); + segTermVector << L"}"; + return segTermVector.str(); +} + +int32_t SegmentTermVector::size() { + return terms ? terms.size() : 0; +} + +Collection SegmentTermVector::getTerms() { + return terms; +} + +Collection SegmentTermVector::getTermFrequencies() { + return termFreqs; +} + +int32_t SegmentTermVector::indexOf(const String& term) { + if (!terms) { + return -1; } - - Collection SegmentTermVector::indexesOf(Collection termNumbers, int32_t start, int32_t length) - { - Collection res(Collection::newInstance(length)); - for (int32_t i = 0; i < length; ++i) - res[i] = indexOf(termNumbers[start + i]); - return res; + Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); + return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); +} + +Collection SegmentTermVector::indexesOf(Collection termNumbers, int32_t start, int32_t length) { + Collection res(Collection::newInstance(length)); + for (int32_t i = 0; i < length; ++i) { + res[i] = indexOf(termNumbers[start + i]); } + return res; +} + } diff --git a/src/core/index/SegmentWriteState.cpp b/src/core/index/SegmentWriteState.cpp index 246f9878..eb21811f 100644 --- a/src/core/index/SegmentWriteState.cpp +++ b/src/core/index/SegmentWriteState.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,28 +7,26 @@ #include "LuceneInc.h" #include "SegmentWriteState.h" -namespace Lucene -{ - SegmentWriteState::SegmentWriteState(DocumentsWriterPtr docWriter, DirectoryPtr directory, const String& segmentName, - const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, - int32_t termIndexInterval) - { - this->_docWriter = docWriter; - this->directory = directory; - this->segmentName = segmentName; - this->docStoreSegmentName = docStoreSegmentName; - this->numDocs = numDocs; - this->numDocsInStore = numDocsInStore; - this->termIndexInterval = termIndexInterval; - this->flushedFiles = HashSet::newInstance(); - } - - SegmentWriteState::~SegmentWriteState() - { - } - - String SegmentWriteState::segmentFileName(const String& ext) - { - return segmentName + L"." + ext; - } +namespace Lucene { + +SegmentWriteState::SegmentWriteState(const DocumentsWriterPtr& docWriter, const DirectoryPtr& directory, const String& segmentName, + const String& docStoreSegmentName, int32_t numDocs, int32_t numDocsInStore, + int32_t termIndexInterval) { + this->_docWriter = docWriter; + this->directory = directory; + this->segmentName = segmentName; + this->docStoreSegmentName = docStoreSegmentName; + this->numDocs = numDocs; + this->numDocsInStore = numDocsInStore; + this->termIndexInterval = termIndexInterval; + this->flushedFiles = HashSet::newInstance(); +} + +SegmentWriteState::~SegmentWriteState() { +} + +String SegmentWriteState::segmentFileName(const String& ext) { + return segmentName + L"." + ext; +} + } diff --git a/src/core/index/SerialMergeScheduler.cpp b/src/core/index/SerialMergeScheduler.cpp index 38b1c458..b3cf7c0d 100644 --- a/src/core/index/SerialMergeScheduler.cpp +++ b/src/core/index/SerialMergeScheduler.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,25 +8,23 @@ #include "SerialMergeScheduler.h" #include "IndexWriter.h" -namespace Lucene -{ - SerialMergeScheduler::~SerialMergeScheduler() - { - } - - void SerialMergeScheduler::merge(IndexWriterPtr writer) - { - SyncLock syncLock(this); - while (true) - { - OneMergePtr merge(writer->getNextMerge()); - if (!merge) - break; - writer->merge(merge); +namespace Lucene { + +SerialMergeScheduler::~SerialMergeScheduler() { +} + +void SerialMergeScheduler::merge(const IndexWriterPtr& writer) { + SyncLock syncLock(this); + while (true) { + OneMergePtr merge(writer->getNextMerge()); + if (!merge) { + break; } + writer->merge(merge); } - - void SerialMergeScheduler::close() - { - } +} + +void SerialMergeScheduler::close() { +} + } diff --git a/src/core/index/SnapshotDeletionPolicy.cpp b/src/core/index/SnapshotDeletionPolicy.cpp index 17c6f509..7dee00db 100644 --- a/src/core/index/SnapshotDeletionPolicy.cpp +++ b/src/core/index/SnapshotDeletionPolicy.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,121 +8,108 @@ #include "SnapshotDeletionPolicy.h" #include "_SnapshotDeletionPolicy.h" -namespace Lucene -{ - SnapshotDeletionPolicy::SnapshotDeletionPolicy(IndexDeletionPolicyPtr primary) - { - this->primary = primary; - } - - SnapshotDeletionPolicy::~SnapshotDeletionPolicy() - { - } - - void SnapshotDeletionPolicy::onInit(Collection commits) - { - SyncLock syncLock(this); - primary->onInit(wrapCommits(commits)); - lastCommit = commits[commits.size() - 1]; - } - - void SnapshotDeletionPolicy::onCommit(Collection commits) - { - SyncLock syncLock(this); - primary->onCommit(wrapCommits(commits)); - lastCommit = commits[commits.size() - 1]; - } - - IndexCommitPtr SnapshotDeletionPolicy::snapshot() - { - SyncLock syncLock(this); - if (!lastCommit) - boost::throw_exception(IllegalStateException(L"no index commits to snapshot")); - if (_snapshot.empty()) - _snapshot = lastCommit->getSegmentsFileName(); - else - boost::throw_exception(IllegalStateException(L"snapshot is already set; please call release() first")); - return lastCommit; - } - - void SnapshotDeletionPolicy::release() - { - SyncLock syncLock(this); - if (!_snapshot.empty()) - _snapshot.clear(); - else - boost::throw_exception(IllegalStateException(L"snapshot was not set; please call snapshot() first")); - } - - Collection SnapshotDeletionPolicy::wrapCommits(Collection commits) - { - Collection myCommits(Collection::newInstance()); - for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) - myCommits.add(newLucene(shared_from_this(), *commit)); - return myCommits; - } - - MyCommitPoint::MyCommitPoint(SnapshotDeletionPolicyPtr deletionPolicy, IndexCommitPtr cp) - { - this->_deletionPolicy = deletionPolicy; - this->cp = cp; - } - - MyCommitPoint::~MyCommitPoint() - { - } - - String MyCommitPoint::toString() - { - return L"SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp->toString() + L")"; - } - - String MyCommitPoint::getSegmentsFileName() - { - return cp->getSegmentsFileName(); - } - - HashSet MyCommitPoint::getFileNames() - { - return cp->getFileNames(); - } - - DirectoryPtr MyCommitPoint::getDirectory() - { - return cp->getDirectory(); - } - - void MyCommitPoint::deleteCommit() - { - SnapshotDeletionPolicyPtr deletionPolicy(_deletionPolicy); - SyncLock policyLock(deletionPolicy); - // Suppress the delete request if this commit point is our current snapshot. - if (deletionPolicy->_snapshot.empty() || deletionPolicy->_snapshot != getSegmentsFileName()) - cp->deleteCommit(); - } - - bool MyCommitPoint::isDeleted() - { - return cp->isDeleted(); +namespace Lucene { + +SnapshotDeletionPolicy::SnapshotDeletionPolicy(const IndexDeletionPolicyPtr& primary) { + this->primary = primary; +} + +SnapshotDeletionPolicy::~SnapshotDeletionPolicy() { +} + +void SnapshotDeletionPolicy::onInit(Collection commits) { + SyncLock syncLock(this); + primary->onInit(wrapCommits(commits)); + lastCommit = commits[commits.size() - 1]; +} + +void SnapshotDeletionPolicy::onCommit(Collection commits) { + SyncLock syncLock(this); + primary->onCommit(wrapCommits(commits)); + lastCommit = commits[commits.size() - 1]; +} + +IndexCommitPtr SnapshotDeletionPolicy::snapshot() { + SyncLock syncLock(this); + if (!lastCommit) { + boost::throw_exception(IllegalStateException(L"no index commits to snapshot")); } - - int64_t MyCommitPoint::getVersion() - { - return cp->getVersion(); + if (_snapshot.empty()) { + _snapshot = lastCommit->getSegmentsFileName(); + } else { + boost::throw_exception(IllegalStateException(L"snapshot is already set; please call release() first")); } - - int64_t MyCommitPoint::getGeneration() - { - return cp->getGeneration(); + return lastCommit; +} + +void SnapshotDeletionPolicy::release() { + SyncLock syncLock(this); + if (!_snapshot.empty()) { + _snapshot.clear(); + } else { + boost::throw_exception(IllegalStateException(L"snapshot was not set; please call snapshot() first")); } - - MapStringString MyCommitPoint::getUserData() - { - return cp->getUserData(); +} + +Collection SnapshotDeletionPolicy::wrapCommits(Collection commits) { + Collection myCommits(Collection::newInstance()); + for (Collection::iterator commit = commits.begin(); commit != commits.end(); ++commit) { + myCommits.add(newLucene(shared_from_this(), *commit)); } - - bool MyCommitPoint::isOptimized() - { - return cp->isOptimized(); + return myCommits; +} + +MyCommitPoint::MyCommitPoint(const SnapshotDeletionPolicyPtr& deletionPolicy, const IndexCommitPtr& cp) { + this->_deletionPolicy = deletionPolicy; + this->cp = cp; +} + +MyCommitPoint::~MyCommitPoint() { +} + +String MyCommitPoint::toString() { + return L"SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp->toString() + L")"; +} + +String MyCommitPoint::getSegmentsFileName() { + return cp->getSegmentsFileName(); +} + +HashSet MyCommitPoint::getFileNames() { + return cp->getFileNames(); +} + +DirectoryPtr MyCommitPoint::getDirectory() { + return cp->getDirectory(); +} + +void MyCommitPoint::deleteCommit() { + SnapshotDeletionPolicyPtr deletionPolicy(_deletionPolicy); + SyncLock policyLock(deletionPolicy); + // Suppress the delete request if this commit point is our current snapshot. + if (deletionPolicy->_snapshot.empty() || deletionPolicy->_snapshot != getSegmentsFileName()) { + cp->deleteCommit(); } } + +bool MyCommitPoint::isDeleted() { + return cp->isDeleted(); +} + +int64_t MyCommitPoint::getVersion() { + return cp->getVersion(); +} + +int64_t MyCommitPoint::getGeneration() { + return cp->getGeneration(); +} + +MapStringString MyCommitPoint::getUserData() { + return cp->getUserData(); +} + +bool MyCommitPoint::isOptimized() { + return cp->isOptimized(); +} + +} diff --git a/src/core/index/SortedTermVectorMapper.cpp b/src/core/index/SortedTermVectorMapper.cpp index 1871756c..a573b448 100644 --- a/src/core/index/SortedTermVectorMapper.cpp +++ b/src/core/index/SortedTermVectorMapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,87 +8,76 @@ #include "SortedTermVectorMapper.h" #include "TermVectorEntry.h" -namespace Lucene -{ - const wchar_t* SortedTermVectorMapper::ALL = L"_ALL_"; - - SortedTermVectorMapper::SortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) - { - this->storeOffsets = false; - this->storePositions = false; - this->comparator = comparator; - this->currentSet = Collection::newInstance(); - this->termToTVE = MapStringTermVectorEntry::newInstance(); - } - - SortedTermVectorMapper::SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) - : TermVectorMapper(ignoringPositions, ignoringPositions) - { - this->storeOffsets = false; - this->storePositions = false; - this->comparator = comparator; - this->currentSet = Collection::newInstance(); - this->termToTVE = MapStringTermVectorEntry::newInstance(); - } - - SortedTermVectorMapper::~SortedTermVectorMapper() - { - } - - void SortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) - { - // We need to combine any previous mentions of the term - TermVectorEntryPtr entry(termToTVE.get(term)); - if (!entry) - { - entry = newLucene(ALL, term, frequency, storeOffsets ? offsets : Collection(), storePositions ? positions : Collection()); - termToTVE.put(term, entry); - - if (!currentSet.contains_if(luceneEqualTo(entry))) - currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); +namespace Lucene { + +const wchar_t* SortedTermVectorMapper::ALL = L"_ALL_"; + +SortedTermVectorMapper::SortedTermVectorMapper(TermVectorEntryComparator comparator) : TermVectorMapper(false, false) { + this->storeOffsets = false; + this->storePositions = false; + this->comparator = comparator; + this->currentSet = Collection::newInstance(); + this->termToTVE = MapStringTermVectorEntry::newInstance(); +} + +SortedTermVectorMapper::SortedTermVectorMapper(bool ignoringPositions, bool ignoringOffsets, TermVectorEntryComparator comparator) + : TermVectorMapper(ignoringPositions, ignoringPositions) { + this->storeOffsets = false; + this->storePositions = false; + this->comparator = comparator; + this->currentSet = Collection::newInstance(); + this->termToTVE = MapStringTermVectorEntry::newInstance(); +} + +SortedTermVectorMapper::~SortedTermVectorMapper() { +} + +void SortedTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { + // We need to combine any previous mentions of the term + TermVectorEntryPtr entry(termToTVE.get(term)); + if (!entry) { + entry = newLucene(ALL, term, frequency, storeOffsets ? offsets : Collection(), storePositions ? positions : Collection()); + termToTVE.put(term, entry); + + if (!currentSet.contains_if(luceneEqualTo(entry))) { + currentSet.insert(std::upper_bound(currentSet.begin(), currentSet.end(), entry, comparator), entry); } - else - { - entry->setFrequency(entry->getFrequency() + frequency); - if (storeOffsets) - { - Collection existingOffsets(entry->getOffsets()); - // A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions - if (existingOffsets && offsets && !offsets.empty()) - { - // copy over the existing offsets - Collection newOffsets(Collection::newInstance(existingOffsets.begin(), existingOffsets.end())); - newOffsets.addAll(offsets.begin(), offsets.end()); - entry->setOffsets(newOffsets); - } - else if (!existingOffsets && offsets && !offsets.empty()) - entry->setOffsets(offsets); - // else leave it alone + } else { + entry->setFrequency(entry->getFrequency() + frequency); + if (storeOffsets) { + Collection existingOffsets(entry->getOffsets()); + // A few diff. cases here: offsets is null, existing offsets is null, both are null, same for positions + if (existingOffsets && offsets && !offsets.empty()) { + // copy over the existing offsets + Collection newOffsets(Collection::newInstance(existingOffsets.begin(), existingOffsets.end())); + newOffsets.addAll(offsets.begin(), offsets.end()); + entry->setOffsets(newOffsets); + } else if (!existingOffsets && offsets && !offsets.empty()) { + entry->setOffsets(offsets); } - if (storePositions) - { - Collection existingPositions(entry->getPositions()); - if (existingPositions && positions && !positions.empty()) - { - Collection newPositions(existingPositions); - newPositions.addAll(positions.begin(), positions.end()); - entry->setPositions(newPositions); - } - else if (!existingPositions && positions && !positions.empty()) - entry->setPositions(positions); - // else leave it alone + // else leave it alone + } + if (storePositions) { + Collection existingPositions(entry->getPositions()); + if (existingPositions && positions && !positions.empty()) { + Collection newPositions(existingPositions); + newPositions.addAll(positions.begin(), positions.end()); + entry->setPositions(newPositions); + } else if (!existingPositions && positions && !positions.empty()) { + entry->setPositions(positions); } + // else leave it alone } } - - void SortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) - { - this->storeOffsets = storeOffsets; - this->storePositions = storePositions; - } - - Collection SortedTermVectorMapper::getTermVectorEntrySet() - { - return currentSet; - } +} + +void SortedTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { + this->storeOffsets = storeOffsets; + this->storePositions = storePositions; +} + +Collection SortedTermVectorMapper::getTermVectorEntrySet() { + return currentSet; +} + } diff --git a/src/core/index/StoredFieldsWriter.cpp b/src/core/index/StoredFieldsWriter.cpp index 78f1ce5d..06712ecf 100644 --- a/src/core/index/StoredFieldsWriter.cpp +++ b/src/core/index/StoredFieldsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,210 +16,181 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - StoredFieldsWriter::StoredFieldsWriter(DocumentsWriterPtr docWriter, FieldInfosPtr fieldInfos) - { - lastDocID = 0; - docFreeList = Collection::newInstance(1); - freeCount = 0; - allocCount = 0; - - this->_docWriter = docWriter; - this->fieldInfos = fieldInfos; - } - - StoredFieldsWriter::~StoredFieldsWriter() - { - } - - StoredFieldsWriterPerThreadPtr StoredFieldsWriter::addThread(DocStatePtr docState) - { - return newLucene(docState, shared_from_this()); - } - - void StoredFieldsWriter::flush(SegmentWriteStatePtr state) - { - SyncLock syncLock(this); - if (state->numDocsInStore > 0) - { - // It's possible that all documents seen in this segment hit non-aborting exceptions, - // in which case we will not have yet init'd the FieldsWriter - initFieldsWriter(); - - // Fill fdx file to include any final docs that we skipped because they hit non-aborting - // exceptions - fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); - } - - if (fieldsWriter) - fieldsWriter->flush(); +namespace Lucene { + +StoredFieldsWriter::StoredFieldsWriter(const DocumentsWriterPtr& docWriter, const FieldInfosPtr& fieldInfos) { + lastDocID = 0; + docFreeList = Collection::newInstance(1); + freeCount = 0; + allocCount = 0; + + this->_docWriter = docWriter; + this->fieldInfos = fieldInfos; +} + +StoredFieldsWriter::~StoredFieldsWriter() { +} + +StoredFieldsWriterPerThreadPtr StoredFieldsWriter::addThread(const DocStatePtr& docState) { + return newLucene(docState, shared_from_this()); +} + +void StoredFieldsWriter::flush(const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + if (state->numDocsInStore > 0) { + // It's possible that all documents seen in this segment hit non-aborting exceptions, + // in which case we will not have yet init'd the FieldsWriter + initFieldsWriter(); + + // Fill fdx file to include any final docs that we skipped because they hit non-aborting + // exceptions + fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } - - void StoredFieldsWriter::initFieldsWriter() - { - if (!fieldsWriter) - { - DocumentsWriterPtr docWriter(_docWriter); - String docStoreSegment(docWriter->getDocStoreSegment()); - if (!docStoreSegment.empty()) - { - fieldsWriter = newLucene(docWriter->directory, docStoreSegment, fieldInfos); - docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_EXTENSION()); - docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - lastDocID = 0; - } - } + + if (fieldsWriter) { + fieldsWriter->flush(); } - - void StoredFieldsWriter::closeDocStore(SegmentWriteStatePtr state) - { - SyncLock syncLock(this); - int32_t inc = state->numDocsInStore - lastDocID; - if (inc > 0) - { - initFieldsWriter(); - fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); - } - - if (fieldsWriter) - { - fieldsWriter->close(); - fieldsWriter.reset(); +} + +void StoredFieldsWriter::initFieldsWriter() { + if (!fieldsWriter) { + DocumentsWriterPtr docWriter(_docWriter); + String docStoreSegment(docWriter->getDocStoreSegment()); + if (!docStoreSegment.empty()) { + fieldsWriter = newLucene(docWriter->directory, docStoreSegment, fieldInfos); + docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_EXTENSION()); + docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); lastDocID = 0; - BOOST_ASSERT(!state->docStoreSegmentName.empty()); - state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); - state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - - DocumentsWriterPtr docWriter(state->_docWriter); - docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); - docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - - String fileName(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); - - if (4 + ((int64_t)state->numDocsInStore) * 8 != state->directory->fileLength(fileName)) - { - boost::throw_exception(RuntimeException(L"after flush: fdx size mismatch: " + StringUtils::toString(state->numDocsInStore) + - L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + - L" length in bytes of " + fileName + L" file exists?=" + - StringUtils::toString(state->directory->fileExists(fileName)))); - } } } - - StoredFieldsWriterPerDocPtr StoredFieldsWriter::getPerDoc() - { - SyncLock syncLock(this); - if (freeCount == 0) - { - ++allocCount; - if (allocCount > docFreeList.size()) - { - // Grow our free list up front to make sure we have enough space to recycle all - // outstanding StoredFieldsWriterPerDoc instances - BOOST_ASSERT(allocCount == docFreeList.size() + 1); - docFreeList.resize(MiscUtils::getNextSize(allocCount)); - } - return newLucene(shared_from_this()); +} + +void StoredFieldsWriter::closeDocStore(const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + int32_t inc = state->numDocsInStore - lastDocID; + if (inc > 0) { + initFieldsWriter(); + fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); + } + + if (fieldsWriter) { + fieldsWriter->close(); + fieldsWriter.reset(); + lastDocID = 0; + BOOST_ASSERT(!state->docStoreSegmentName.empty()); + state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); + state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); + + DocumentsWriterPtr docWriter(state->_docWriter); + docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_EXTENSION()); + docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); + + String fileName(state->docStoreSegmentName + L"." + IndexFileNames::FIELDS_INDEX_EXTENSION()); + + if (4 + ((int64_t)state->numDocsInStore) * 8 != state->directory->fileLength(fileName)) { + boost::throw_exception(RuntimeException(L"after flush: fdx size mismatch: " + StringUtils::toString(state->numDocsInStore) + + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + + L" length in bytes of " + fileName + L" file exists?=" + + StringUtils::toString(state->directory->fileExists(fileName)))); } - else - return docFreeList[--freeCount]; } - - void StoredFieldsWriter::abort() - { - SyncLock syncLock(this); - if (fieldsWriter) - { - try - { - fieldsWriter->close(); - } - catch (...) - { - } - fieldsWriter.reset(); - lastDocID = 0; +} + +StoredFieldsWriterPerDocPtr StoredFieldsWriter::getPerDoc() { + SyncLock syncLock(this); + if (freeCount == 0) { + ++allocCount; + if (allocCount > docFreeList.size()) { + // Grow our free list up front to make sure we have enough space to recycle all + // outstanding StoredFieldsWriterPerDoc instances + BOOST_ASSERT(allocCount == docFreeList.size() + 1); + docFreeList.resize(MiscUtils::getNextSize(allocCount)); } + return newLucene(shared_from_this()); + } else { + return docFreeList[--freeCount]; } - - void StoredFieldsWriter::fill(int32_t docID) - { - int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); - - // We must "catch up" for all docs before us that had no stored fields - int32_t end = docID + docStoreOffset; - while (lastDocID < end) - { - fieldsWriter->skipDocument(); - ++lastDocID; +} + +void StoredFieldsWriter::abort() { + SyncLock syncLock(this); + if (fieldsWriter) { + try { + fieldsWriter->close(); + } catch (...) { } + fieldsWriter.reset(); + lastDocID = 0; } - - void StoredFieldsWriter::finishDocument(StoredFieldsWriterPerDocPtr perDoc) - { - SyncLock syncLock(this); - IndexWriterPtr writer(DocumentsWriterPtr(_docWriter)->_writer); - BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument start")); - initFieldsWriter(); - - fill(perDoc->docID); - - // Append stored fields to the real FieldsWriter - fieldsWriter->flushDocument(perDoc->numStoredFields, perDoc->fdt); +} + +void StoredFieldsWriter::fill(int32_t docID) { + int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); + + // We must "catch up" for all docs before us that had no stored fields + int32_t end = docID + docStoreOffset; + while (lastDocID < end) { + fieldsWriter->skipDocument(); ++lastDocID; - perDoc->reset(); - free(perDoc); - BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument end")); - } - - bool StoredFieldsWriter::freeRAM() - { - return false; - } - - void StoredFieldsWriter::free(StoredFieldsWriterPerDocPtr perDoc) - { - SyncLock syncLock(this); - BOOST_ASSERT(freeCount < docFreeList.size()); - BOOST_ASSERT(perDoc->numStoredFields == 0); - BOOST_ASSERT(perDoc->fdt->length() == 0); - BOOST_ASSERT(perDoc->fdt->getFilePointer() == 0); - docFreeList[freeCount++] = perDoc; - } - - StoredFieldsWriterPerDoc::StoredFieldsWriterPerDoc(StoredFieldsWriterPtr fieldsWriter) - { - this->_fieldsWriter = fieldsWriter; - buffer = DocumentsWriterPtr(fieldsWriter->_docWriter)->newPerDocBuffer(); - fdt = newLucene(buffer); - numStoredFields = 0; - } - - StoredFieldsWriterPerDoc::~StoredFieldsWriterPerDoc() - { - } - - void StoredFieldsWriterPerDoc::reset() - { - fdt->reset(); - buffer->recycle(); - numStoredFields = 0; - } - - void StoredFieldsWriterPerDoc::abort() - { - reset(); - StoredFieldsWriterPtr(_fieldsWriter)->free(shared_from_this()); - } - - int64_t StoredFieldsWriterPerDoc::sizeInBytes() - { - return buffer->getSizeInBytes(); - } - - void StoredFieldsWriterPerDoc::finish() - { - StoredFieldsWriterPtr(_fieldsWriter)->finishDocument(shared_from_this()); } } + +void StoredFieldsWriter::finishDocument(const StoredFieldsWriterPerDocPtr& perDoc) { + SyncLock syncLock(this); + IndexWriterPtr writer(DocumentsWriterPtr(_docWriter)->_writer); + BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument start")); + initFieldsWriter(); + + fill(perDoc->docID); + + // Append stored fields to the real FieldsWriter + fieldsWriter->flushDocument(perDoc->numStoredFields, perDoc->fdt); + ++lastDocID; + perDoc->reset(); + free(perDoc); + BOOST_ASSERT(writer->testPoint(L"StoredFieldsWriter.finishDocument end")); +} + +bool StoredFieldsWriter::freeRAM() { + return false; +} + +void StoredFieldsWriter::free(const StoredFieldsWriterPerDocPtr& perDoc) { + SyncLock syncLock(this); + BOOST_ASSERT(freeCount < docFreeList.size()); + BOOST_ASSERT(perDoc->numStoredFields == 0); + BOOST_ASSERT(perDoc->fdt->length() == 0); + BOOST_ASSERT(perDoc->fdt->getFilePointer() == 0); + docFreeList[freeCount++] = perDoc; +} + +StoredFieldsWriterPerDoc::StoredFieldsWriterPerDoc(const StoredFieldsWriterPtr& fieldsWriter) { + this->_fieldsWriter = fieldsWriter; + buffer = DocumentsWriterPtr(fieldsWriter->_docWriter)->newPerDocBuffer(); + fdt = newLucene(buffer); + numStoredFields = 0; +} + +StoredFieldsWriterPerDoc::~StoredFieldsWriterPerDoc() { +} + +void StoredFieldsWriterPerDoc::reset() { + fdt->reset(); + buffer->recycle(); + numStoredFields = 0; +} + +void StoredFieldsWriterPerDoc::abort() { + reset(); + StoredFieldsWriterPtr(_fieldsWriter)->free(shared_from_this()); +} + +int64_t StoredFieldsWriterPerDoc::sizeInBytes() { + return buffer->getSizeInBytes(); +} + +void StoredFieldsWriterPerDoc::finish() { + StoredFieldsWriterPtr(_fieldsWriter)->finishDocument(shared_from_this()); +} + +} diff --git a/src/core/index/StoredFieldsWriterPerThread.cpp b/src/core/index/StoredFieldsWriterPerThread.cpp index 870bacb3..888c128e 100644 --- a/src/core/index/StoredFieldsWriterPerThread.cpp +++ b/src/core/index/StoredFieldsWriterPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,61 +10,53 @@ #include "FieldsWriter.h" #include "RAMOutputStream.h" -namespace Lucene -{ - StoredFieldsWriterPerThread::StoredFieldsWriterPerThread(DocStatePtr docState, StoredFieldsWriterPtr storedFieldsWriter) - { - this->_storedFieldsWriter = storedFieldsWriter; - this->docState = docState; - localFieldsWriter = newLucene(IndexOutputPtr(), IndexOutputPtr(), storedFieldsWriter->fieldInfos); - } - - StoredFieldsWriterPerThread::~StoredFieldsWriterPerThread() - { - } - - void StoredFieldsWriterPerThread::startDocument() - { - if (doc) - { - // Only happens if previous document hit non-aborting exception while writing stored fields - // into localFieldsWriter - doc->reset(); - doc->docID = docState->docID; - } +namespace Lucene { + +StoredFieldsWriterPerThread::StoredFieldsWriterPerThread(const DocStatePtr& docState, const StoredFieldsWriterPtr& storedFieldsWriter) { + this->_storedFieldsWriter = storedFieldsWriter; + this->docState = docState; + localFieldsWriter = newLucene(IndexOutputPtr(), IndexOutputPtr(), storedFieldsWriter->fieldInfos); +} + +StoredFieldsWriterPerThread::~StoredFieldsWriterPerThread() { +} + +void StoredFieldsWriterPerThread::startDocument() { + if (doc) { + // Only happens if previous document hit non-aborting exception while writing stored fields + // into localFieldsWriter + doc->reset(); + doc->docID = docState->docID; } - - void StoredFieldsWriterPerThread::addField(FieldablePtr field, FieldInfoPtr fieldInfo) - { - if (!doc) - { - doc = StoredFieldsWriterPtr(_storedFieldsWriter)->getPerDoc(); - doc->docID = docState->docID; - localFieldsWriter->setFieldsStream(doc->fdt); - BOOST_ASSERT(doc->numStoredFields == 0); - BOOST_ASSERT(doc->fdt->length() == 0); - BOOST_ASSERT(doc->fdt->getFilePointer() == 0); - } - - localFieldsWriter->writeField(fieldInfo, field); - BOOST_ASSERT(docState->testPoint(L"StoredFieldsWriterPerThread.processFields.writeField")); - ++doc->numStoredFields; +} + +void StoredFieldsWriterPerThread::addField(const FieldablePtr& field, const FieldInfoPtr& fieldInfo) { + if (!doc) { + doc = StoredFieldsWriterPtr(_storedFieldsWriter)->getPerDoc(); + doc->docID = docState->docID; + localFieldsWriter->setFieldsStream(doc->fdt); + BOOST_ASSERT(doc->numStoredFields == 0); + BOOST_ASSERT(doc->fdt->length() == 0); + BOOST_ASSERT(doc->fdt->getFilePointer() == 0); } - - DocWriterPtr StoredFieldsWriterPerThread::finishDocument() - { - // If there were any stored fields in this doc, doc will be non-null; else it's null. - DocWriterPtr finishDoc(doc); + + localFieldsWriter->writeField(fieldInfo, field); + BOOST_ASSERT(docState->testPoint(L"StoredFieldsWriterPerThread.processFields.writeField")); + ++doc->numStoredFields; +} + +DocWriterPtr StoredFieldsWriterPerThread::finishDocument() { + // If there were any stored fields in this doc, doc will be non-null; else it's null. + DocWriterPtr finishDoc(doc); + doc.reset(); + return finishDoc; +} + +void StoredFieldsWriterPerThread::abort() { + if (doc) { + doc->abort(); doc.reset(); - return finishDoc; - } - - void StoredFieldsWriterPerThread::abort() - { - if (doc) - { - doc->abort(); - doc.reset(); - } } } + +} diff --git a/src/core/index/Term.cpp b/src/core/index/Term.cpp index ae96d75c..d81b740b 100644 --- a/src/core/index/Term.cpp +++ b/src/core/index/Term.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,71 +9,67 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - Term::Term(const String& fld, const String& txt) : _field(fld), _text(txt) - { - } - - Term::~Term() - { - } - - String Term::field() - { - return _field; - } - - String Term::text() - { - return _text; - } - - TermPtr Term::createTerm(const String& text) - { - return newLucene(_field, text); - } - - bool Term::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - TermPtr otherTerm(boost::dynamic_pointer_cast(other)); - if (!otherTerm) - return false; - return (_field == otherTerm->_field && _text == otherTerm->_text); +namespace Lucene { + +Term::Term(const String& fld, const String& txt) : _field(fld), _text(txt) { +} + +Term::~Term() { +} + +String Term::field() { + return _field; +} + +String Term::text() { + return _text; +} + +TermPtr Term::createTerm(const String& text) { + return newLucene(_field, text); +} + +bool Term::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t Term::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + (_field.empty() ? 0 : StringUtils::hashCode(_field)); - result = prime * result + (_text.empty() ? 0 : StringUtils::hashCode(_text)); - return result; + if (!other) { + return false; } - - int32_t Term::compareTo(LuceneObjectPtr other) - { - TermPtr otherTerm(boost::static_pointer_cast(other)); - if (_field == otherTerm->_field) - return _text.compare(otherTerm->_text); - else - return _field.compare(otherTerm->_field); + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - void Term::set(const String& fld, const String& txt) - { - _field = fld; - _text = txt; + TermPtr otherTerm(boost::dynamic_pointer_cast(other)); + if (!otherTerm) { + return false; } - - String Term::toString() - { - return _field + L":" + _text; + return (_field == otherTerm->_field && _text == otherTerm->_text); +} + +int32_t Term::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + (_field.empty() ? 0 : StringUtils::hashCode(_field)); + result = prime * result + (_text.empty() ? 0 : StringUtils::hashCode(_text)); + return result; +} + +int32_t Term::compareTo(const LuceneObjectPtr& other) { + TermPtr otherTerm(boost::static_pointer_cast(other)); + if (_field == otherTerm->_field) { + return _text.compare(otherTerm->_text); + } else { + return _field.compare(otherTerm->_field); } } + +void Term::set(const String& fld, const String& txt) { + _field = fld; + _text = txt; +} + +String Term::toString() { + return _field + L":" + _text; +} + +} diff --git a/src/core/index/TermBuffer.cpp b/src/core/index/TermBuffer.cpp index b6baedc1..0c3394a9 100644 --- a/src/core/index/TermBuffer.cpp +++ b/src/core/index/TermBuffer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,115 +13,107 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - TermBuffer::TermBuffer() - { - preUTF8Strings = false; - text = newLucene(); - bytes = newLucene(); - } - - TermBuffer::~TermBuffer() - { - } - - int32_t TermBuffer::compareTo(LuceneObjectPtr other) - { - TermBufferPtr otherTermBuffer(boost::static_pointer_cast(other)); - if (field == otherTermBuffer->field) - return compareChars(text->result.get(), text->length, otherTermBuffer->text->result.get(), otherTermBuffer->text->length); - else - return field.compare(otherTermBuffer->field); - } - - int32_t TermBuffer::compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2) - { - int32_t end = len1 < len2 ? len1 : len2; - for (int32_t k = 0; k < end; ++k) - { - wchar_t c1 = chars1[k]; - wchar_t c2 = chars2[k]; - if (c1 != c2) - return c1 - c2; - } - return len1 - len2; - } - - void TermBuffer::setPreUTF8Strings() - { - preUTF8Strings = true; - } - - void TermBuffer::read(IndexInputPtr input, FieldInfosPtr fieldInfos) - { - this->term.reset(); // invalidate cache - int32_t start = input->readVInt(); - int32_t length = input->readVInt(); - int32_t totalLength = start + length; - if (preUTF8Strings) - text->setLength(start + input->readChars(text->result.get(), start, length)); - else - { - StringUtils::toUTF8(text->result.get(), text->length, bytes); - bytes->setLength(totalLength); - input->readBytes(bytes->result.get(), start, length); - StringUtils::toUnicode(bytes->result.get(), totalLength, text); - } - this->field = fieldInfos->fieldName(input->readVInt()); +namespace Lucene { + +TermBuffer::TermBuffer() { + preUTF8Strings = false; + text = newLucene(); + bytes = newLucene(); +} + +TermBuffer::~TermBuffer() { +} + +int32_t TermBuffer::compareTo(const LuceneObjectPtr& other) { + TermBufferPtr otherTermBuffer(boost::static_pointer_cast(other)); + if (field == otherTermBuffer->field) { + return compareChars(text->result.get(), text->length, otherTermBuffer->text->result.get(), otherTermBuffer->text->length); + } else { + return field.compare(otherTermBuffer->field); } - - void TermBuffer::set(TermPtr term) - { - if (!term) - { - reset(); - return; +} + +int32_t TermBuffer::compareChars(wchar_t* chars1, int32_t len1, wchar_t* chars2, int32_t len2) { + int32_t end = len1 < len2 ? len1 : len2; + for (int32_t k = 0; k < end; ++k) { + wchar_t c1 = chars1[k]; + wchar_t c2 = chars2[k]; + if (c1 != c2) { + return c1 - c2; } - String termText(term->text()); - int32_t termLen = termText.length(); - text->setLength(termLen); - MiscUtils::arrayCopy(termText.begin(), 0, text->result.get(), 0, termLen); - field = term->field(); - this->term = term; } - - void TermBuffer::set(TermBufferPtr other) - { - text->copyText(other->text); - field = other->field; - term = other->term; + return len1 - len2; +} + +void TermBuffer::setPreUTF8Strings() { + preUTF8Strings = true; +} + +void TermBuffer::read(const IndexInputPtr& input, const FieldInfosPtr& fieldInfos) { + this->term.reset(); // invalidate cache + int32_t start = input->readVInt(); + int32_t length = input->readVInt(); + int32_t totalLength = start + length; + if (preUTF8Strings) { + text->setLength(totalLength); + text->setLength(start + input->readChars(text->result.get(), start, length)); + } else { + StringUtils::toUTF8(text->result.get(), text->length, bytes); + bytes->setLength(totalLength); + input->readBytes(bytes->result.get(), start, length); + StringUtils::toUnicode(bytes->result.get(), totalLength, text); } + this->field = fieldInfos->fieldName(input->readVInt()); +} - void TermBuffer::reset() - { - field.clear(); - text->setLength(0); - term.reset(); +void TermBuffer::set(const TermPtr& term) { + if (!term) { + reset(); + return; } - - TermPtr TermBuffer::toTerm() - { - if (field.empty()) // unset - return TermPtr(); - - if (!term) - term = newLucene(field, String(text->result.get(), text->length)); - - return term; + String termText(term->text()); + int32_t termLen = termText.length(); + text->setLength(termLen); + MiscUtils::arrayCopy(termText.begin(), 0, text->result.get(), 0, termLen); + field = term->field(); + this->term = term; +} + +void TermBuffer::set(const TermBufferPtr& other) { + text->copyText(other->text); + field = other->field; + term = other->term; +} + +void TermBuffer::reset() { + field.clear(); + text->setLength(0); + term.reset(); +} + +TermPtr TermBuffer::toTerm() { + if (field.empty()) { // unset + return TermPtr(); } - - LuceneObjectPtr TermBuffer::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - TermBufferPtr cloneBuffer(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); - cloneBuffer->field = field; - cloneBuffer->term = term; - cloneBuffer->preUTF8Strings = preUTF8Strings; - - cloneBuffer->bytes = newLucene(); - cloneBuffer->text = newLucene(); - cloneBuffer->text->copyText(text); - return cloneBuffer; + + if (!term) { + term = newLucene(field, String(text->result.get(), text->length)); } + + return term; +} + +LuceneObjectPtr TermBuffer::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + TermBufferPtr cloneBuffer(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); + cloneBuffer->field = field; + cloneBuffer->term = term; + cloneBuffer->preUTF8Strings = preUTF8Strings; + + cloneBuffer->bytes = newLucene(); + cloneBuffer->text = newLucene(); + cloneBuffer->text->copyText(text); + return cloneBuffer; +} + } diff --git a/src/core/index/TermDocs.cpp b/src/core/index/TermDocs.cpp index ef8ac077..1c27896a 100644 --- a/src/core/index/TermDocs.cpp +++ b/src/core/index/TermDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,57 +7,49 @@ #include "LuceneInc.h" #include "TermDocs.h" -namespace Lucene -{ - TermDocs::TermDocs() - { - } - - void TermDocs::seek(TermPtr term) - { - BOOST_ASSERT(false); - // override - } - - void TermDocs::seek(TermEnumPtr termEnum) - { - BOOST_ASSERT(false); - // override - } - - int32_t TermDocs::doc() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t TermDocs::freq() - { - BOOST_ASSERT(false); - return 0; // override - } - - bool TermDocs::next() - { - BOOST_ASSERT(false); - return false; // override - } - - int32_t TermDocs::read(Collection docs, Collection freqs) - { - BOOST_ASSERT(false); - return 0; // override - } - - bool TermDocs::skipTo(int32_t target) - { - BOOST_ASSERT(false); - return false; // override - } - - void TermDocs::close() - { - BOOST_ASSERT(false); - // override - } +namespace Lucene { + +TermDocs::TermDocs() { +} + +void TermDocs::seek(const TermPtr& term) { + BOOST_ASSERT(false); + // override +} + +void TermDocs::seek(const TermEnumPtr& termEnum) { + BOOST_ASSERT(false); + // override +} + +int32_t TermDocs::doc() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t TermDocs::freq() { + BOOST_ASSERT(false); + return 0; // override +} + +bool TermDocs::next() { + BOOST_ASSERT(false); + return false; // override +} + +int32_t TermDocs::read(Collection& docs, Collection& freqs) { + BOOST_ASSERT(false); + return 0; // override +} + +bool TermDocs::skipTo(int32_t target) { + BOOST_ASSERT(false); + return false; // override +} + +void TermDocs::close() { + BOOST_ASSERT(false); + // override +} + } diff --git a/src/core/index/TermEnum.cpp b/src/core/index/TermEnum.cpp index 442874d0..aa95bc7c 100644 --- a/src/core/index/TermEnum.cpp +++ b/src/core/index/TermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "TermEnum.h" -namespace Lucene -{ - TermEnum::~TermEnum() - { - } +namespace Lucene { + +TermEnum::~TermEnum() { +} + } diff --git a/src/core/index/TermFreqVector.cpp b/src/core/index/TermFreqVector.cpp index f081f599..3fe9e8a9 100644 --- a/src/core/index/TermFreqVector.cpp +++ b/src/core/index/TermFreqVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,49 +7,42 @@ #include "LuceneInc.h" #include "TermFreqVector.h" -namespace Lucene -{ - TermFreqVector::TermFreqVector() - { - } - - TermFreqVector::~TermFreqVector() - { - } - - String TermFreqVector::getField() - { - BOOST_ASSERT(false); - return L""; // override - } - - int32_t TermFreqVector::size() - { - BOOST_ASSERT(false); - return 0; // override - } - - Collection TermFreqVector::getTerms() - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection TermFreqVector::getTermFrequencies() - { - BOOST_ASSERT(false); - return Collection(); // override - } - - int32_t TermFreqVector::indexOf(const String& term) - { - BOOST_ASSERT(false); - return 0; // override - } - - Collection TermFreqVector::indexesOf(Collection terms, int32_t start, int32_t length) - { - BOOST_ASSERT(false); - return Collection(); // override - } +namespace Lucene { + +TermFreqVector::TermFreqVector() { +} + +TermFreqVector::~TermFreqVector() { +} + +String TermFreqVector::getField() { + BOOST_ASSERT(false); + return L""; // override +} + +int32_t TermFreqVector::size() { + BOOST_ASSERT(false); + return 0; // override +} + +Collection TermFreqVector::getTerms() { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection TermFreqVector::getTermFrequencies() { + BOOST_ASSERT(false); + return Collection(); // override +} + +int32_t TermFreqVector::indexOf(const String& term) { + BOOST_ASSERT(false); + return 0; // override +} + +Collection TermFreqVector::indexesOf(Collection terms, int32_t start, int32_t length) { + BOOST_ASSERT(false); + return Collection(); // override +} + } diff --git a/src/core/index/TermInfo.cpp b/src/core/index/TermInfo.cpp index bb07db8a..f2cd186e 100644 --- a/src/core/index/TermInfo.cpp +++ b/src/core/index/TermInfo.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,38 +7,34 @@ #include "LuceneInc.h" #include "TermInfo.h" -namespace Lucene -{ - TermInfo::TermInfo(TermInfoPtr ti) - { - set(ti); - } - - TermInfo::TermInfo(int32_t df, int64_t fp, int64_t pp) - { - docFreq = df; - freqPointer = fp; - proxPointer = pp; - skipOffset = 0; - } - - TermInfo::~TermInfo() - { - } - - void TermInfo::set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset) - { - this->docFreq = docFreq; - this->freqPointer = freqPointer; - this->proxPointer = proxPointer; - this->skipOffset = skipOffset; - } - - void TermInfo::set(TermInfoPtr ti) - { - docFreq = ti->docFreq; - freqPointer = ti->freqPointer; - proxPointer = ti->proxPointer; - skipOffset = ti->skipOffset; - } +namespace Lucene { + +TermInfo::TermInfo(const TermInfoPtr& ti) { + set(ti); +} + +TermInfo::TermInfo(int32_t df, int64_t fp, int64_t pp) { + docFreq = df; + freqPointer = fp; + proxPointer = pp; + skipOffset = 0; +} + +TermInfo::~TermInfo() { +} + +void TermInfo::set(int32_t docFreq, int64_t freqPointer, int64_t proxPointer, int32_t skipOffset) { + this->docFreq = docFreq; + this->freqPointer = freqPointer; + this->proxPointer = proxPointer; + this->skipOffset = skipOffset; +} + +void TermInfo::set(const TermInfoPtr& ti) { + docFreq = ti->docFreq; + freqPointer = ti->freqPointer; + proxPointer = ti->proxPointer; + skipOffset = ti->skipOffset; +} + } diff --git a/src/core/index/TermInfosReader.cpp b/src/core/index/TermInfosReader.cpp index 7923d1df..39e3db16 100644 --- a/src/core/index/TermInfosReader.cpp +++ b/src/core/index/TermInfosReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,237 +12,212 @@ #include "Term.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t TermInfosReader::DEFAULT_CACHE_SIZE = 1024; - - TermInfosReader::TermInfosReader(DirectoryPtr dir, const String& seg, FieldInfosPtr fis, int32_t readBufferSize, int32_t indexDivisor) - { - bool success = false; - - if (indexDivisor < 1 && indexDivisor != -1) - boost::throw_exception(IllegalArgumentException(L"indexDivisor must be -1 (don't load terms index) or greater than 0: got " + StringUtils::toString(indexDivisor))); - - LuceneException finally; - try - { - directory = dir; - segment = seg; - fieldInfos = fis; - - origEnum = newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_EXTENSION(), readBufferSize), fieldInfos, false); - _size = origEnum->size; - - if (indexDivisor != -1) - { - // Load terms index - totalIndexInterval = origEnum->indexInterval * indexDivisor; - SegmentTermEnumPtr indexEnum(newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_INDEX_EXTENSION(), readBufferSize), fieldInfos, true)); - - try - { - int32_t indexSize = 1 + ((int32_t)indexEnum->size - 1) / indexDivisor; // otherwise read index - - indexTerms = Collection::newInstance(indexSize); - indexInfos = Collection::newInstance(indexSize); - indexPointers = Collection::newInstance(indexSize); - - for (int32_t i = 0; indexEnum->next(); ++i) - { - indexTerms[i] = indexEnum->term(); - indexInfos[i] = indexEnum->termInfo(); - indexPointers[i] = indexEnum->indexPointer; - - for (int32_t j = 1; j < indexDivisor; ++j) - { - if (!indexEnum->next()) - break; +namespace Lucene { + +const int32_t TermInfosReader::DEFAULT_CACHE_SIZE = 1024; + +TermInfosReader::TermInfosReader(const DirectoryPtr& dir, const String& seg, const FieldInfosPtr& fis, int32_t readBufferSize, int32_t indexDivisor) { + bool success = false; + + if (indexDivisor < 1 && indexDivisor != -1) { + boost::throw_exception(IllegalArgumentException(L"indexDivisor must be -1 (don't load terms index) or greater than 0: got " + StringUtils::toString(indexDivisor))); + } + + LuceneException finally; + try { + directory = dir; + segment = seg; + fieldInfos = fis; + + origEnum = newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_EXTENSION(), readBufferSize), fieldInfos, false); + _size = origEnum->size; + + if (indexDivisor != -1) { + // Load terms index + totalIndexInterval = origEnum->indexInterval * indexDivisor; + SegmentTermEnumPtr indexEnum(newLucene(directory->openInput(segment + L"." + IndexFileNames::TERMS_INDEX_EXTENSION(), readBufferSize), fieldInfos, true)); + + try { + int32_t indexSize = 1 + ((int32_t)indexEnum->size - 1) / indexDivisor; // otherwise read index + + indexTerms = Collection::newInstance(indexSize); + indexInfos = Collection::newInstance(indexSize); + indexPointers = Collection::newInstance(indexSize); + + for (int32_t i = 0; indexEnum->next(); ++i) { + indexTerms[i] = indexEnum->term(); + indexInfos[i] = indexEnum->termInfo(); + indexPointers[i] = indexEnum->indexPointer; + + for (int32_t j = 1; j < indexDivisor; ++j) { + if (!indexEnum->next()) { + break; } } } - catch (LuceneException& e) - { - finally = e; - } - indexEnum->close(); - } - else - { - // Do not load terms index - totalIndexInterval = -1; + } catch (LuceneException& e) { + finally = e; } - success = true; + indexEnum->close(); + } else { + // Do not load terms index + totalIndexInterval = -1; } - catch (LuceneException& e) - { - finally = e; - } - // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. - // In this case, we want to explicitly close any subset of things that were opened. - if (!success) - close(); - finally.throwException(); - } - - TermInfosReader::~TermInfosReader() - { + success = true; + } catch (LuceneException& e) { + finally = e; } - - int32_t TermInfosReader::getMaxSkipLevels() - { - return origEnum->maxSkipLevels; + // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception above. + // In this case, we want to explicitly close any subset of things that were opened. + if (!success) { + close(); } - - int32_t TermInfosReader::getSkipInterval() - { - return origEnum->skipInterval; + finally.throwException(); +} + +TermInfosReader::~TermInfosReader() { +} + +int32_t TermInfosReader::getMaxSkipLevels() { + return origEnum->maxSkipLevels; +} + +int32_t TermInfosReader::getSkipInterval() { + return origEnum->skipInterval; +} + +void TermInfosReader::close() { + if (origEnum) { + origEnum->close(); } - - void TermInfosReader::close() - { - if (origEnum) - origEnum->close(); - threadResources.close(); + threadResources.close(); +} + +int64_t TermInfosReader::size() { + return _size; +} + +TermInfosReaderThreadResourcesPtr TermInfosReader::getThreadResources() { + TermInfosReaderThreadResourcesPtr resources(threadResources.get()); + if (!resources) { + resources = newLucene(); + resources->termEnum = terms(); + + // Cache does not have to be thread-safe, it is only used by one thread at the same time + resources->termInfoCache = newInstance(DEFAULT_CACHE_SIZE); + threadResources.set(resources); } - - int64_t TermInfosReader::size() - { - return _size; + return resources; +} + +int32_t TermInfosReader::getIndexOffset(const TermPtr& term) { + // binary search indexTerms + Collection::iterator indexTerm = std::upper_bound(indexTerms.begin(), indexTerms.end(), term, luceneCompare()); + return (std::distance(indexTerms.begin(), indexTerm) - 1); +} + +void TermInfosReader::seekEnum(const SegmentTermEnumPtr& enumerator, int32_t indexOffset) { + enumerator->seek(indexPointers[indexOffset], ((int64_t)indexOffset * (int64_t)totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); +} + +TermInfoPtr TermInfosReader::get(const TermPtr& term) { + return get(term, true); +} + +TermInfoPtr TermInfosReader::get(const TermPtr& term, bool useCache) { + if (_size == 0) { + return TermInfoPtr(); } - - TermInfosReaderThreadResourcesPtr TermInfosReader::getThreadResources() - { - TermInfosReaderThreadResourcesPtr resources(threadResources.get()); - if (!resources) - { - resources = newLucene(); - resources->termEnum = terms(); - - // Cache does not have to be thread-safe, it is only used by one thread at the same time - resources->termInfoCache = newInstance(DEFAULT_CACHE_SIZE); - threadResources.set(resources); + + ensureIndexIsRead(); + + TermInfoPtr ti; + TermInfosReaderThreadResourcesPtr resources(getThreadResources()); + TermInfoCachePtr cache; + + if (useCache) { + cache = resources->termInfoCache; + // check the cache first if the term was recently looked up + ti = cache->get(term); + if (ti) { + return ti; } - return resources; - } - - int32_t TermInfosReader::getIndexOffset(TermPtr term) - { - // binary search indexTerms - Collection::iterator indexTerm = std::upper_bound(indexTerms.begin(), indexTerms.end(), term, luceneCompare()); - return (std::distance(indexTerms.begin(), indexTerm) - 1); - } - - void TermInfosReader::seekEnum(SegmentTermEnumPtr enumerator, int32_t indexOffset) - { - enumerator->seek(indexPointers[indexOffset], ((int64_t)indexOffset * (int64_t)totalIndexInterval) - 1, indexTerms[indexOffset], indexInfos[indexOffset]); } - - TermInfoPtr TermInfosReader::get(TermPtr term) - { - return get(term, true); - } - - TermInfoPtr TermInfosReader::get(TermPtr term, bool useCache) - { - if (_size == 0) - return TermInfoPtr(); - - ensureIndexIsRead(); - - TermInfoPtr ti; - TermInfosReaderThreadResourcesPtr resources(getThreadResources()); - TermInfoCachePtr cache; - - if (useCache) - { - cache = resources->termInfoCache; - // check the cache first if the term was recently looked up - ti = cache->get(term); - if (ti) - return ti; - } - - // optimize sequential access: first try scanning cached enum without seeking - SegmentTermEnumPtr enumerator = resources->termEnum; - - if (enumerator->term() && // term is at or past current + + // optimize sequential access: first try scanning cached enum without seeking + SegmentTermEnumPtr enumerator = resources->termEnum; + + if (enumerator->term() && // term is at or past current ((enumerator->prev() && term->compareTo(enumerator->prev()) > 0) || - term->compareTo(enumerator->term()) >= 0)) - { - int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1; - if (indexTerms.size() == enumOffset || // but before end of block - term->compareTo(indexTerms[enumOffset]) < 0) - { - // no need to seek - int32_t numScans = enumerator->scanTo(term); - if (enumerator->term() && term->compareTo(enumerator->term()) == 0) - { - ti = enumerator->termInfo(); - if (cache && numScans > 1) - { - // we only want to put this TermInfo into the cache if scanEnum skipped more - // than one dictionary entry. This prevents RangeQueries or WildcardQueries to - // wipe out the cache when they iterate over a large numbers of terms in order. - cache->put(term, ti); - } + term->compareTo(enumerator->term()) >= 0)) { + int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1; + if (indexTerms.size() == enumOffset || // but before end of block + term->compareTo(indexTerms[enumOffset]) < 0) { + // no need to seek + int32_t numScans = enumerator->scanTo(term); + if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { + ti = enumerator->termInfo(); + if (cache && numScans > 1) { + // we only want to put this TermInfo into the cache if scanEnum skipped more + // than one dictionary entry. This prevents RangeQueries or WildcardQueries to + // wipe out the cache when they iterate over a large numbers of terms in order. + cache->put(term, ti); } - else - ti.reset(); - return ti; + } else { + ti.reset(); } + return ti; } - - // random-access: must seek - seekEnum(enumerator, getIndexOffset(term)); - enumerator->scanTo(term); - if (enumerator->term() && term->compareTo(enumerator->term()) == 0) - { - ti = enumerator->termInfo(); - if (cache) - cache->put(term, ti); - } - else - ti.reset(); - return ti; } - - void TermInfosReader::ensureIndexIsRead() - { - if (!indexTerms) - boost::throw_exception(IllegalStateException(L"terms index was not loaded when this reader was created")); - } - - int64_t TermInfosReader::getPosition(TermPtr term) - { - if (_size == 0) - return -1; - - ensureIndexIsRead(); - int32_t indexOffset = getIndexOffset(term); - - SegmentTermEnumPtr enumerator(getThreadResources()->termEnum); - seekEnum(enumerator, indexOffset); - - while (term->compareTo(enumerator->term()) > 0 && enumerator->next()) - { + + // random-access: must seek + seekEnum(enumerator, getIndexOffset(term)); + enumerator->scanTo(term); + if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { + ti = enumerator->termInfo(); + if (cache) { + cache->put(term, ti); } - - return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1; + } else { + ti.reset(); } - - SegmentTermEnumPtr TermInfosReader::terms() - { - return boost::static_pointer_cast(origEnum->clone()); + return ti; +} + +void TermInfosReader::ensureIndexIsRead() { + if (!indexTerms) { + boost::throw_exception(IllegalStateException(L"terms index was not loaded when this reader was created")); } - - SegmentTermEnumPtr TermInfosReader::terms(TermPtr term) - { - // don't use the cache in this call because we want to reposition the enumeration - get(term, false); - return boost::static_pointer_cast(getThreadResources()->termEnum->clone()); +} + +int64_t TermInfosReader::getPosition(const TermPtr& term) { + if (_size == 0) { + return -1; } - - TermInfosReaderThreadResources::~TermInfosReaderThreadResources() - { + + ensureIndexIsRead(); + int32_t indexOffset = getIndexOffset(term); + + SegmentTermEnumPtr enumerator(getThreadResources()->termEnum); + seekEnum(enumerator, indexOffset); + + while (term->compareTo(enumerator->term()) > 0 && enumerator->next()) { } + + return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1; +} + +SegmentTermEnumPtr TermInfosReader::terms() { + return boost::static_pointer_cast(origEnum->clone()); +} + +SegmentTermEnumPtr TermInfosReader::terms(const TermPtr& term) { + // don't use the cache in this call because we want to reposition the enumeration + get(term, false); + return boost::static_pointer_cast(getThreadResources()->termEnum->clone()); +} + +TermInfosReaderThreadResources::~TermInfosReaderThreadResources() { +} + } diff --git a/src/core/index/TermInfosWriter.cpp b/src/core/index/TermInfosWriter.cpp index d332592e..02f7d901 100644 --- a/src/core/index/TermInfosWriter.cpp +++ b/src/core/index/TermInfosWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,166 +15,158 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// The file format version, a negative number. - const int32_t TermInfosWriter::FORMAT = -3; - - /// Changed strings to true utf8 with length-in-bytes not length-in-chars. - const int32_t TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; - - /// NOTE: always change this if you switch to a new format. - const int32_t TermInfosWriter::FORMAT_CURRENT = TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; - - TermInfosWriter::TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval) - { - initialize(directory, segment, fis, interval, false); - otherWriter = newLucene(directory, segment, fis, interval, true); - } - - TermInfosWriter::TermInfosWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isIndex) - { - initialize(directory, segment, fis, interval, isIndex); - } - - TermInfosWriter::~TermInfosWriter() - { +namespace Lucene { + +/// The file format version, a negative number. +const int32_t TermInfosWriter::FORMAT = -3; + +/// Changed strings to true utf8 with length-in-bytes not length-in-chars. +const int32_t TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES = -4; + +/// NOTE: always change this if you switch to a new format. +const int32_t TermInfosWriter::FORMAT_CURRENT = TermInfosWriter::FORMAT_VERSION_UTF8_LENGTH_IN_BYTES; + +TermInfosWriter::TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval) { + initialize(directory, segment, fis, interval, false); + otherWriter = newLucene(directory, segment, fis, interval, true); +} + +TermInfosWriter::TermInfosWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isIndex) { + initialize(directory, segment, fis, interval, isIndex); +} + +TermInfosWriter::~TermInfosWriter() { +} + +void TermInfosWriter::initialize() { + if (otherWriter) { + _other = otherWriter; + otherWriter->_other = shared_from_this(); } - - void TermInfosWriter::initialize() - { - if (otherWriter) - { - _other = otherWriter; - otherWriter->_other = shared_from_this(); +} + +void TermInfosWriter::initialize(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fis, int32_t interval, bool isi) { + lastTi = newLucene(); + utf8Result = newLucene(); + lastTermBytes = ByteArray::newInstance(10); + lastTermBytesLength = 0; + lastFieldNumber = -1; + skipInterval = 16; + maxSkipLevels = 10; + size = 0; + lastIndexPointer = 0; + + indexInterval = interval; + fieldInfos = fis; + isIndex = isi; + output = directory->createOutput(segment + (isIndex ? L".tii" : L".tis")); + output->writeInt(FORMAT_CURRENT); // write format + output->writeLong(0); // leave space for size + output->writeInt(indexInterval); // write indexInterval + output->writeInt(skipInterval); // write skipInterval + output->writeInt(maxSkipLevels); // write maxSkipLevels + BOOST_ASSERT(initUnicodeResults()); +} + +void TermInfosWriter::add(const TermPtr& term, const TermInfoPtr& ti) { + StringUtils::toUTF8(term->_text.c_str(), term->_text.size(), utf8Result); + add(fieldInfos->fieldNumber(term->_field), utf8Result->result, utf8Result->length, ti); +} + +bool TermInfosWriter::initUnicodeResults() { + unicodeResult1 = newLucene(); + unicodeResult2 = newLucene(); + return true; +} + +int32_t TermInfosWriter::compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { + if (lastFieldNumber != fieldNumber) { + int32_t cmp = fieldInfos->fieldName(lastFieldNumber).compare(fieldInfos->fieldName(fieldNumber)); + // If there is a field named "" (empty string) then we will get 0 on this comparison, yet, it's "OK". + // But it's not OK if two different field numbers map to the same name. + if (cmp != 0 || lastFieldNumber != -1) { + return cmp; } } - - void TermInfosWriter::initialize(DirectoryPtr directory, const String& segment, FieldInfosPtr fis, int32_t interval, bool isi) - { - lastTi = newLucene(); - utf8Result = newLucene(); - lastTermBytes = ByteArray::newInstance(10); - lastTermBytesLength = 0; - lastFieldNumber = -1; - skipInterval = 16; - maxSkipLevels = 10; - size = 0; - lastIndexPointer = 0; - - indexInterval = interval; - fieldInfos = fis; - isIndex = isi; - output = directory->createOutput(segment + (isIndex ? L".tii" : L".tis")); - output->writeInt(FORMAT_CURRENT); // write format - output->writeLong(0); // leave space for size - output->writeInt(indexInterval); // write indexInterval - output->writeInt(skipInterval); // write skipInterval - output->writeInt(maxSkipLevels); // write maxSkipLevels - BOOST_ASSERT(initUnicodeResults()); + + StringUtils::toUnicode(lastTermBytes.get(), lastTermBytesLength, unicodeResult1); + StringUtils::toUnicode(termBytes.get(), termBytesLength, unicodeResult2); + int32_t len = std::min(unicodeResult1->length, unicodeResult2->length); + + for (int32_t i = 0; i < len; ++i) { + wchar_t ch1 = unicodeResult1->result[i]; + wchar_t ch2 = unicodeResult2->result[i]; + if (ch1 != ch2) { + return (ch1 - ch2); + } } - - void TermInfosWriter::add(TermPtr term, TermInfoPtr ti) - { - StringUtils::toUTF8(term->_text.c_str(), term->_text.size(), utf8Result); - add(fieldInfos->fieldNumber(term->_field), utf8Result->result, utf8Result->length, ti); + return (unicodeResult1->length - unicodeResult2->length); +} + +void TermInfosWriter::add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, const TermInfoPtr& ti) { + // terms out of order? + BOOST_ASSERT(compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0)); + + BOOST_ASSERT(ti->freqPointer >= lastTi->freqPointer); // freqPointer out of order? + BOOST_ASSERT(ti->proxPointer >= lastTi->proxPointer); // proxPointer out of order? + + TermInfosWriterPtr other(_other); + + if (!isIndex && size % indexInterval == 0) { + other->add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term } - - bool TermInfosWriter::initUnicodeResults() - { - unicodeResult1 = newLucene(); - unicodeResult2 = newLucene(); - return true; + + writeTerm(fieldNumber, termBytes, termBytesLength); // write term + + output->writeVInt(ti->docFreq); // write doc freq + output->writeVLong(ti->freqPointer - lastTi->freqPointer); // write pointers + output->writeVLong(ti->proxPointer - lastTi->proxPointer); + + if (ti->docFreq >= skipInterval) { + output->writeVInt(ti->skipOffset); } - - int32_t TermInfosWriter::compareToLastTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) - { - if (lastFieldNumber != fieldNumber) - { - int32_t cmp = fieldInfos->fieldName(lastFieldNumber).compare(fieldInfos->fieldName(fieldNumber)); - // If there is a field named "" (empty string) then we will get 0 on this comparison, yet, it's "OK". - // But it's not OK if two different field numbers map to the same name. - if (cmp != 0 || lastFieldNumber != -1) - return cmp; - } - - StringUtils::toUnicode(lastTermBytes.get(), lastTermBytesLength, unicodeResult1); - StringUtils::toUnicode(termBytes.get(), termBytesLength, unicodeResult2); - int32_t len = std::min(unicodeResult1->length, unicodeResult2->length); - - for (int32_t i = 0; i < len; ++i) - { - wchar_t ch1 = unicodeResult1->result[i]; - wchar_t ch2 = unicodeResult2->result[i]; - if (ch1 != ch2) - return (ch1 - ch2); - } - return (unicodeResult1->length - unicodeResult2->length); + + if (isIndex) { + output->writeVLong(other->output->getFilePointer() - lastIndexPointer); + lastIndexPointer = other->output->getFilePointer(); // write pointer } - - void TermInfosWriter::add(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength, TermInfoPtr ti) - { - // terms out of order? - BOOST_ASSERT(compareToLastTerm(fieldNumber, termBytes, termBytesLength) < 0 || (isIndex && termBytesLength == 0 && lastTermBytesLength == 0)); - - BOOST_ASSERT(ti->freqPointer >= lastTi->freqPointer); // freqPointer out of order? - BOOST_ASSERT(ti->proxPointer >= lastTi->proxPointer); // proxPointer out of order? - - TermInfosWriterPtr other(_other); - - if (!isIndex && size % indexInterval == 0) - other->add(lastFieldNumber, lastTermBytes, lastTermBytesLength, lastTi); // add an index term - - writeTerm(fieldNumber, termBytes, termBytesLength); // write term - - output->writeVInt(ti->docFreq); // write doc freq - output->writeVLong(ti->freqPointer - lastTi->freqPointer); // write pointers - output->writeVLong(ti->proxPointer - lastTi->proxPointer); - - if (ti->docFreq >= skipInterval) - output->writeVInt(ti->skipOffset); - - if (isIndex) - { - output->writeVLong(other->output->getFilePointer() - lastIndexPointer); - lastIndexPointer = other->output->getFilePointer(); // write pointer + + lastFieldNumber = fieldNumber; + lastTi->set(ti); + ++size; +} + +void TermInfosWriter::writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) { + // Compute prefix in common with last term + int32_t start = 0; + int32_t limit = std::min(termBytesLength, lastTermBytesLength); + while (start < limit) { + if (termBytes[start] != lastTermBytes[start]) { + break; } - - lastFieldNumber = fieldNumber; - lastTi->set(ti); - ++size; + ++start; } - void TermInfosWriter::writeTerm(int32_t fieldNumber, ByteArray termBytes, int32_t termBytesLength) - { - // Compute prefix in common with last term - int32_t start = 0; - int32_t limit = std::min(termBytesLength, lastTermBytesLength); - while (start < limit) - { - if (termBytes[start] != lastTermBytes[start]) - break; - ++start; - } - - int32_t length = termBytesLength - start; - output->writeVInt(start); // write shared prefix length - output->writeVInt(length); // write delta length - output->writeBytes(termBytes.get(), start, length); // write delta bytes - output->writeVInt(fieldNumber); // write field num - if (lastTermBytes.size() < termBytesLength) - lastTermBytes.resize((int32_t)((double)termBytesLength * 1.5)); - MiscUtils::arrayCopy(termBytes.get(), start, lastTermBytes.get(), start, length); - lastTermBytesLength = termBytesLength; + int32_t length = termBytesLength - start; + output->writeVInt(start); // write shared prefix length + output->writeVInt(length); // write delta length + output->writeBytes(termBytes.get(), start, length); // write delta bytes + output->writeVInt(fieldNumber); // write field num + if (lastTermBytes.size() < termBytesLength) { + lastTermBytes.resize((int32_t)((double)termBytesLength * 1.5)); } - - void TermInfosWriter::close() - { - output->seek(4); // write size after format - output->writeLong(size); - output->close(); - - if (!isIndex) - TermInfosWriterPtr(_other)->close(); + MiscUtils::arrayCopy(termBytes.get(), start, lastTermBytes.get(), start, length); + lastTermBytesLength = termBytesLength; +} + +void TermInfosWriter::close() { + output->seek(4); // write size after format + output->writeLong(size); + output->close(); + + if (!isIndex) { + TermInfosWriterPtr(_other)->close(); } } + +} diff --git a/src/core/index/TermPositionVector.cpp b/src/core/index/TermPositionVector.cpp index 73117e86..8d7114f4 100644 --- a/src/core/index/TermPositionVector.cpp +++ b/src/core/index/TermPositionVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,25 +7,22 @@ #include "LuceneInc.h" #include "TermPositionVector.h" -namespace Lucene -{ - TermPositionVector::TermPositionVector() - { - } - - TermPositionVector::~TermPositionVector() - { - } - - Collection TermPositionVector::getTermPositions(int32_t index) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection TermPositionVector::getOffsets(int32_t index) - { - BOOST_ASSERT(false); - return Collection(); // override - } +namespace Lucene { + +TermPositionVector::TermPositionVector() { +} + +TermPositionVector::~TermPositionVector() { +} + +Collection TermPositionVector::getTermPositions(int32_t index) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection TermPositionVector::getOffsets(int32_t index) { + BOOST_ASSERT(false); + return Collection(); // override +} + } diff --git a/src/core/index/TermPositions.cpp b/src/core/index/TermPositions.cpp index 172afadc..c0856ae1 100644 --- a/src/core/index/TermPositions.cpp +++ b/src/core/index/TermPositions.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,37 +7,32 @@ #include "LuceneInc.h" #include "TermPositions.h" -namespace Lucene -{ - TermPositions::TermPositions() - { - } - - TermPositions::~TermPositions() - { - } - - int32_t TermPositions::nextPosition() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t TermPositions::getPayloadLength() - { - BOOST_ASSERT(false); - return 0; // override - } - - ByteArray TermPositions::getPayload(ByteArray data, int32_t offset) - { - BOOST_ASSERT(false); - return ByteArray(); // override - } - - bool TermPositions::isPayloadAvailable() - { - BOOST_ASSERT(false); - return false; // override - } +namespace Lucene { + +TermPositions::TermPositions() { +} + +TermPositions::~TermPositions() { +} + +int32_t TermPositions::nextPosition() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t TermPositions::getPayloadLength() { + BOOST_ASSERT(false); + return 0; // override +} + +ByteArray TermPositions::getPayload(ByteArray data, int32_t offset) { + BOOST_ASSERT(false); + return ByteArray(); // override +} + +bool TermPositions::isPayloadAvailable() { + BOOST_ASSERT(false); + return false; // override +} + } diff --git a/src/core/index/TermVectorEntry.cpp b/src/core/index/TermVectorEntry.cpp index ee66ba7e..bea5b9cf 100644 --- a/src/core/index/TermVectorEntry.cpp +++ b/src/core/index/TermVectorEntry.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,85 +8,75 @@ #include "TermVectorEntry.h" #include "StringUtils.h" -namespace Lucene -{ - TermVectorEntry::TermVectorEntry(const String& field, const String& term, int32_t frequency, - Collection offsets, Collection positions) - { - this->field = field; - this->term = term; - this->frequency = frequency; - this->offsets = offsets; - this->positions = positions; - } - - TermVectorEntry::~TermVectorEntry() - { - } - - String TermVectorEntry::getField() - { - return field; - } - - int32_t TermVectorEntry::getFrequency() - { - return frequency; - } - - Collection TermVectorEntry::getOffsets() - { - return offsets; - } - - Collection TermVectorEntry::getPositions() - { - return positions; - } - - String TermVectorEntry::getTerm() - { - return term; - } - - void TermVectorEntry::setFrequency(int32_t frequency) - { - this->frequency = frequency; - } - - void TermVectorEntry::setOffsets(Collection offsets) - { - this->offsets = offsets; - } - - void TermVectorEntry::setPositions(Collection positions) - { - this->positions = positions; - } - - bool TermVectorEntry::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - TermVectorEntryPtr otherTermVectorEntry(boost::dynamic_pointer_cast(other)); - if (otherTermVectorEntry) - return (term == otherTermVectorEntry->term); - - return false; - } - - int32_t TermVectorEntry::hashCode() - { - return StringUtils::hashCode(term); +namespace Lucene { + +TermVectorEntry::TermVectorEntry(const String& field, const String& term, int32_t frequency, + Collection offsets, Collection positions) { + this->field = field; + this->term = term; + this->frequency = frequency; + this->offsets = offsets; + this->positions = positions; +} + +TermVectorEntry::~TermVectorEntry() { +} + +String TermVectorEntry::getField() { + return field; +} + +int32_t TermVectorEntry::getFrequency() { + return frequency; +} + +Collection TermVectorEntry::getOffsets() { + return offsets; +} + +Collection TermVectorEntry::getPositions() { + return positions; +} + +String TermVectorEntry::getTerm() { + return term; +} + +void TermVectorEntry::setFrequency(int32_t frequency) { + this->frequency = frequency; +} + +void TermVectorEntry::setOffsets(Collection offsets) { + this->offsets = offsets; +} + +void TermVectorEntry::setPositions(Collection positions) { + this->positions = positions; +} + +bool TermVectorEntry::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - String TermVectorEntry::toString() - { - StringStream buffer; - buffer << L"TermVectorEntry{field='" << field; - buffer << L"\', term='" << term; - buffer << L"\', frequency=" << frequency << L"}"; - return buffer.str(); + + TermVectorEntryPtr otherTermVectorEntry(boost::dynamic_pointer_cast(other)); + if (otherTermVectorEntry) { + return (term == otherTermVectorEntry->term); } + + return false; +} + +int32_t TermVectorEntry::hashCode() { + return StringUtils::hashCode(term); +} + +String TermVectorEntry::toString() { + StringStream buffer; + buffer << L"TermVectorEntry{field='" << field; + buffer << L"\', term='" << term; + buffer << L"\', frequency=" << frequency << L"}"; + return buffer.str(); +} + } diff --git a/src/core/index/TermVectorEntryFreqSortedComparator.cpp b/src/core/index/TermVectorEntryFreqSortedComparator.cpp index e65f2d75..669a73cf 100644 --- a/src/core/index/TermVectorEntryFreqSortedComparator.cpp +++ b/src/core/index/TermVectorEntryFreqSortedComparator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,24 +8,27 @@ #include "TermVectorEntryFreqSortedComparator.h" #include "TermVectorEntry.h" -namespace Lucene -{ - TermVectorEntryFreqSortedComparator::~TermVectorEntryFreqSortedComparator() - { +namespace Lucene { + +TermVectorEntryFreqSortedComparator::~TermVectorEntryFreqSortedComparator() { +} + +bool TermVectorEntryFreqSortedComparator::compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second) { + int32_t result = (second->getFrequency() - first->getFrequency()); + if (result < 0) { + return true; + } + if (result > 0) { + return false; } - - bool TermVectorEntryFreqSortedComparator::compare(const TermVectorEntryPtr& first, const TermVectorEntryPtr& second) - { - int32_t result = (second->getFrequency() - first->getFrequency()); - if (result < 0) - return true; - if (result > 0) - return false; - result = first->getTerm().compare(second->getTerm()); - if (result < 0) - return true; - if (result > 0) - return false; - return (first->getField().compare(second->getField()) < 0); + result = first->getTerm().compare(second->getTerm()); + if (result < 0) { + return true; } + if (result > 0) { + return false; + } + return (first->getField().compare(second->getField()) < 0); +} + } diff --git a/src/core/index/TermVectorMapper.cpp b/src/core/index/TermVectorMapper.cpp index 084db377..ae18c019 100644 --- a/src/core/index/TermVectorMapper.cpp +++ b/src/core/index/TermVectorMapper.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,30 +7,26 @@ #include "LuceneInc.h" #include "TermVectorMapper.h" -namespace Lucene -{ - TermVectorMapper::TermVectorMapper(bool ignoringPositions, bool ignoringOffsets) - { - this->ignoringPositions = ignoringPositions; - this->ignoringOffsets = ignoringOffsets; - } - - TermVectorMapper::~TermVectorMapper() - { - } - - bool TermVectorMapper::isIgnoringPositions() - { - return ignoringPositions; - } - - bool TermVectorMapper::isIgnoringOffsets() - { - return ignoringOffsets; - } - - void TermVectorMapper::setDocumentNumber(int32_t documentNumber) - { - // override - } +namespace Lucene { + +TermVectorMapper::TermVectorMapper(bool ignoringPositions, bool ignoringOffsets) { + this->ignoringPositions = ignoringPositions; + this->ignoringOffsets = ignoringOffsets; +} + +TermVectorMapper::~TermVectorMapper() { +} + +bool TermVectorMapper::isIgnoringPositions() { + return ignoringPositions; +} + +bool TermVectorMapper::isIgnoringOffsets() { + return ignoringOffsets; +} + +void TermVectorMapper::setDocumentNumber(int32_t documentNumber) { + // override +} + } diff --git a/src/core/index/TermVectorOffsetInfo.cpp b/src/core/index/TermVectorOffsetInfo.cpp index b6875e4b..2f431448 100644 --- a/src/core/index/TermVectorOffsetInfo.cpp +++ b/src/core/index/TermVectorOffsetInfo.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,59 +7,54 @@ #include "LuceneInc.h" #include "TermVectorOffsetInfo.h" -namespace Lucene -{ - TermVectorOffsetInfo::TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset) - { - this->endOffset = endOffset; - this->startOffset = startOffset; - } - - TermVectorOffsetInfo::~TermVectorOffsetInfo() - { - } - - const Collection TermVectorOffsetInfo::EMPTY_OFFSET_INFO() - { - static Collection _EMPTY_OFFSET_INFO; - if (!_EMPTY_OFFSET_INFO) - _EMPTY_OFFSET_INFO = Collection::newInstance(); - return _EMPTY_OFFSET_INFO; - } - - int32_t TermVectorOffsetInfo::getEndOffset() - { - return endOffset; - } - - void TermVectorOffsetInfo::setEndOffset(int32_t endOffset) - { - this->endOffset = endOffset; - } - - int32_t TermVectorOffsetInfo::getStartOffset() - { - return startOffset; - } - - void TermVectorOffsetInfo::setStartOffset(int32_t endOffset) - { - this->startOffset = startOffset; - } - - bool TermVectorOffsetInfo::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - TermVectorOffsetInfoPtr otherTermVector(boost::dynamic_pointer_cast(other)); - if (!otherTermVector) - return false; - return (endOffset == otherTermVector->endOffset && startOffset == otherTermVector->startOffset); +namespace Lucene { + +TermVectorOffsetInfo::TermVectorOffsetInfo(int32_t startOffset, int32_t endOffset) { + this->endOffset = endOffset; + this->startOffset = startOffset; +} + +TermVectorOffsetInfo::~TermVectorOffsetInfo() { +} + +const Collection TermVectorOffsetInfo::EMPTY_OFFSET_INFO() { + static Collection _EMPTY_OFFSET_INFO; + LUCENE_RUN_ONCE( + _EMPTY_OFFSET_INFO = Collection::newInstance(); + ); + return _EMPTY_OFFSET_INFO; +} + +int32_t TermVectorOffsetInfo::getEndOffset() { + return endOffset; +} + +void TermVectorOffsetInfo::setEndOffset(int32_t endOffset) { + this->endOffset = endOffset; +} + +int32_t TermVectorOffsetInfo::getStartOffset() { + return startOffset; +} + +void TermVectorOffsetInfo::setStartOffset(int32_t startOffset) { + this->startOffset = startOffset; +} + +bool TermVectorOffsetInfo::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t TermVectorOffsetInfo::hashCode() - { - int32_t result = startOffset; - return (29 * result + endOffset); + TermVectorOffsetInfoPtr otherTermVector(boost::dynamic_pointer_cast(other)); + if (!otherTermVector) { + return false; } + return (endOffset == otherTermVector->endOffset && startOffset == otherTermVector->startOffset); +} + +int32_t TermVectorOffsetInfo::hashCode() { + int32_t result = startOffset; + return (29 * result + endOffset); +} + } diff --git a/src/core/index/TermVectorsReader.cpp b/src/core/index/TermVectorsReader.cpp index 155df37c..488de775 100644 --- a/src/core/index/TermVectorsReader.cpp +++ b/src/core/index/TermVectorsReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,599 +15,531 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// NOTE: if you make a new format, it must be larger than the current format - const int32_t TermVectorsReader::FORMAT_VERSION = 2; - - /// Changes to speed up bulk merging of term vectors - const int32_t TermVectorsReader::FORMAT_VERSION2 = 3; - - /// Changed strings to UTF8 with length-in-bytes not length-in-chars - const int32_t TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES = 4; - - /// NOTE: always change this if you switch to a new format - const int32_t TermVectorsReader::FORMAT_CURRENT = TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES; - - /// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file - const int32_t TermVectorsReader::FORMAT_SIZE = 4; - - const uint8_t TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR = 0x1; - const uint8_t TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR = 0x2; - - TermVectorsReader::TermVectorsReader() - { - this->_size = 0; - this->numTotalDocs = 0; - this->docStoreOffset = 0; - this->format = 0; - } - - TermVectorsReader::TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos) - { - ConstructReader(d, segment, fieldInfos, BufferedIndexInput::BUFFER_SIZE, -1, 0); - } - - TermVectorsReader::TermVectorsReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) - { - ConstructReader(d, segment, fieldInfos, readBufferSize, docStoreOffset, size); - } - - TermVectorsReader::~TermVectorsReader() - { - } - - void TermVectorsReader::ConstructReader(DirectoryPtr d, const String& segment, FieldInfosPtr fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) - { - this->_size = 0; - this->numTotalDocs = 0; - this->docStoreOffset = 0; - this->format = 0; - - bool success = false; - LuceneException finally; - try - { - if (d->fileExists(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION())) - { - tvx = d->openInput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION(), readBufferSize); - format = checkValidFormat(tvx); - tvd = d->openInput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION(), readBufferSize); - int32_t tvdFormat = checkValidFormat(tvd); - tvf = d->openInput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION(), readBufferSize); - int32_t tvfFormat = checkValidFormat(tvf); - - BOOST_ASSERT(format == tvdFormat); - BOOST_ASSERT(format == tvfFormat); - - if (format >= FORMAT_VERSION2) - { - BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 16 == 0); - numTotalDocs = (int32_t)(tvx->length() >> 4); - } - else - { - BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 8 == 0); - numTotalDocs = (int32_t)(tvx->length() >> 3); - } - - if (docStoreOffset == -1) - { - this->docStoreOffset = 0; - this->_size = numTotalDocs; - BOOST_ASSERT(size == 0 || numTotalDocs == size); - } - else - { - this->docStoreOffset = docStoreOffset; - this->_size = size; - // Verify the file is long enough to hold all of our docs - BOOST_ASSERT(numTotalDocs >= size + docStoreOffset); - } +namespace Lucene { + +/// NOTE: if you make a new format, it must be larger than the current format +const int32_t TermVectorsReader::FORMAT_VERSION = 2; + +/// Changes to speed up bulk merging of term vectors +const int32_t TermVectorsReader::FORMAT_VERSION2 = 3; + +/// Changed strings to UTF8 with length-in-bytes not length-in-chars +const int32_t TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES = 4; + +/// NOTE: always change this if you switch to a new format +const int32_t TermVectorsReader::FORMAT_CURRENT = TermVectorsReader::FORMAT_UTF8_LENGTH_IN_BYTES; + +/// The size in bytes that the FORMAT_VERSION will take up at the beginning of each file +const int32_t TermVectorsReader::FORMAT_SIZE = 4; + +const uint8_t TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR = 0x1; +const uint8_t TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR = 0x2; + +TermVectorsReader::TermVectorsReader() { + this->_size = 0; + this->numTotalDocs = 0; + this->docStoreOffset = 0; + this->format = 0; +} + +TermVectorsReader::TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos) { + ConstructReader(d, segment, fieldInfos, BufferedIndexInput::BUFFER_SIZE, -1, 0); +} + +TermVectorsReader::TermVectorsReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { + ConstructReader(d, segment, fieldInfos, readBufferSize, docStoreOffset, size); +} + +TermVectorsReader::~TermVectorsReader() { +} + +void TermVectorsReader::ConstructReader(const DirectoryPtr& d, const String& segment, const FieldInfosPtr& fieldInfos, int32_t readBufferSize, int32_t docStoreOffset, int32_t size) { + this->_size = 0; + this->numTotalDocs = 0; + this->docStoreOffset = 0; + this->format = 0; + + bool success = false; + LuceneException finally; + try { + if (d->fileExists(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION())) { + tvx = d->openInput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION(), readBufferSize); + format = checkValidFormat(tvx); + tvd = d->openInput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION(), readBufferSize); + int32_t tvdFormat = checkValidFormat(tvd); + tvf = d->openInput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION(), readBufferSize); + int32_t tvfFormat = checkValidFormat(tvf); + + BOOST_ASSERT(format == tvdFormat); + BOOST_ASSERT(format == tvfFormat); + + if (format >= FORMAT_VERSION2) { + BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 16 == 0); + numTotalDocs = (int32_t)(tvx->length() >> 4); + } else { + BOOST_ASSERT((tvx->length() - FORMAT_SIZE) % 8 == 0); + numTotalDocs = (int32_t)(tvx->length() >> 3); } - else - { - // If all documents flushed in a segment had hit non-aborting exceptions, it's possible that - // FieldInfos.hasVectors returns true yet the term vector files don't exist. - format = 0; + + if (docStoreOffset == -1) { + this->docStoreOffset = 0; + this->_size = numTotalDocs; + BOOST_ASSERT(size == 0 || numTotalDocs == size); + } else { + this->docStoreOffset = docStoreOffset; + this->_size = size; + // Verify the file is long enough to hold all of our docs + BOOST_ASSERT(numTotalDocs >= size + docStoreOffset); } - - this->fieldInfos = fieldInfos; - success = true; - } - catch (LuceneException& e) - { - finally = e; + } else { + // If all documents flushed in a segment had hit non-aborting exceptions, it's possible that + // FieldInfos.hasVectors returns true yet the term vector files don't exist. + format = 0; } - - // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception - // above. In this case, we want to explicitly close any subset of things that were opened. - if (!success) - close(); - finally.throwException(); + + this->fieldInfos = fieldInfos; + success = true; + } catch (LuceneException& e) { + finally = e; } - - IndexInputPtr TermVectorsReader::getTvdStream() - { - return tvd; + + // With lock-less commits, it's entirely possible (and fine) to hit a FileNotFound exception + // above. In this case, we want to explicitly close any subset of things that were opened. + if (!success) { + close(); } - - IndexInputPtr TermVectorsReader::getTvfStream() - { - return tvf; + finally.throwException(); +} + +IndexInputPtr TermVectorsReader::getTvdStream() { + return tvd; +} + +IndexInputPtr TermVectorsReader::getTvfStream() { + return tvf; +} + +void TermVectorsReader::seekTvx(int32_t docNum) { + if (format < FORMAT_VERSION2) { + tvx->seek((docNum + docStoreOffset) * 8 + FORMAT_SIZE); + } else { + tvx->seek((docNum + docStoreOffset) * 16 + FORMAT_SIZE); } - - void TermVectorsReader::seekTvx(int32_t docNum) - { - if (format < FORMAT_VERSION2) - tvx->seek((docNum + docStoreOffset) * 8 + FORMAT_SIZE); - else - tvx->seek((docNum + docStoreOffset) * 16 + FORMAT_SIZE); +} + +bool TermVectorsReader::canReadRawDocs() { + return (format >= FORMAT_UTF8_LENGTH_IN_BYTES); +} + +void TermVectorsReader::rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs) { + if (!tvx) { + MiscUtils::arrayFill(tvdLengths.begin(), 0, tvdLengths.size(), 0); + MiscUtils::arrayFill(tvfLengths.begin(), 0, tvfLengths.size(), 0); + return; } - - bool TermVectorsReader::canReadRawDocs() - { - return (format >= FORMAT_UTF8_LENGTH_IN_BYTES); + + // SegmentMerger calls canReadRawDocs() first and should not call us if that returns false. + if (format < FORMAT_VERSION2) { + boost::throw_exception(IllegalStateException(L"cannot read raw docs with older term vector formats")); } - - void TermVectorsReader::rawDocs(Collection tvdLengths, Collection tvfLengths, int32_t startDocID, int32_t numDocs) - { - if (!tvx) - { - MiscUtils::arrayFill(tvdLengths.begin(), 0, tvdLengths.size(), 0); - MiscUtils::arrayFill(tvfLengths.begin(), 0, tvfLengths.size(), 0); - return; + + seekTvx(startDocID); + + int64_t tvdPosition = tvx->readLong(); + tvd->seek(tvdPosition); + + int64_t tvfPosition = tvx->readLong(); + tvf->seek(tvfPosition); + + int64_t lastTvdPosition = tvdPosition; + int64_t lastTvfPosition = tvfPosition; + + int32_t count = 0; + while (count < numDocs) { + int32_t docID = docStoreOffset + startDocID + count + 1; + BOOST_ASSERT(docID <= numTotalDocs); + if (docID < numTotalDocs) { + tvdPosition = tvx->readLong(); + tvfPosition = tvx->readLong(); + } else { + tvdPosition = tvd->length(); + tvfPosition = tvf->length(); + BOOST_ASSERT(count == numDocs - 1); } - - // SegmentMerger calls canReadRawDocs() first and should not call us if that returns false. - if (format < FORMAT_VERSION2) - boost::throw_exception(IllegalStateException(L"cannot read raw docs with older term vector formats")); - - seekTvx(startDocID); - - int64_t tvdPosition = tvx->readLong(); - tvd->seek(tvdPosition); - - int64_t tvfPosition = tvx->readLong(); - tvf->seek(tvfPosition); - - int64_t lastTvdPosition = tvdPosition; - int64_t lastTvfPosition = tvfPosition; - - int32_t count = 0; - while (count < numDocs) - { - int32_t docID = docStoreOffset + startDocID + count + 1; - BOOST_ASSERT(docID <= numTotalDocs); - if (docID < numTotalDocs) - { - tvdPosition = tvx->readLong(); - tvfPosition = tvx->readLong(); - } - else - { - tvdPosition = tvd->length(); - tvfPosition = tvf->length(); - BOOST_ASSERT(count == numDocs - 1); + tvdLengths[count] = (int32_t)(tvdPosition - lastTvdPosition); + tvfLengths[count] = (int32_t)(tvfPosition - lastTvfPosition); + ++count; + lastTvdPosition = tvdPosition; + lastTvfPosition = tvfPosition; + } +} + +int32_t TermVectorsReader::checkValidFormat(const IndexInputPtr& in) { + int32_t format = in->readInt(); + if (format > FORMAT_CURRENT) { + boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + + StringUtils::toString(format) + L" expected " + + StringUtils::toString(FORMAT_CURRENT) + L" or less")); + } + return format; +} + +void TermVectorsReader::close() { + // make all effort to close up. Keep the first exception and throw it as a new one. + LuceneException keep; + if (tvx) { + try { + tvx->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; } - tvdLengths[count] = (int32_t)(tvdPosition - lastTvdPosition); - tvfLengths[count] = (int32_t)(tvfPosition - lastTvfPosition); - ++count; - lastTvdPosition = tvdPosition; - lastTvfPosition = tvfPosition; } } - - int32_t TermVectorsReader::checkValidFormat(IndexInputPtr in) - { - int32_t format = in->readInt(); - if (format > FORMAT_CURRENT) - { - boost::throw_exception(CorruptIndexException(L"Incompatible format version: " + - StringUtils::toString(format) + L" expected " + - StringUtils::toString(FORMAT_CURRENT) + L" or less")); + if (tvd) { + try { + tvd->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; + } } - return format; } - - void TermVectorsReader::close() - { - // make all effort to close up. Keep the first exception and throw it as a new one. - LuceneException keep; - if (tvx) - { - try - { - tvx->close(); - } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + if (tvf) { + try { + tvf->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; } } - if (tvd) - { - try - { - tvd->close(); + } + keep.throwException(); +} + +int32_t TermVectorsReader::size() { + return _size; +} + +void TermVectorsReader::get(int32_t docNum, const String& field, const TermVectorMapperPtr& mapper) { + if (tvx) { + int32_t fieldNumber = fieldInfos->fieldNumber(field); + + // We need to account for the FORMAT_SIZE at when seeking in the tvx. We don't need to do + // this in other seeks because we already have the file pointer that was written in another file + seekTvx(docNum); + int64_t tvdPosition = tvx->readLong(); + + tvd->seek(tvdPosition); + int32_t fieldCount = tvd->readVInt(); + + // There are only a few fields per document. We opt for a full scan rather then requiring that they + // be ordered. We need to read through all of the fields anyway to get to the tvf pointers. + int32_t number = 0; + int32_t found = -1; + for (int32_t i = 0; i < fieldCount; ++i) { + if (format >= FORMAT_VERSION) { + number = tvd->readVInt(); + } else { + number += tvd->readVInt(); } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + + if (number == fieldNumber) { + found = i; } } - if (tvf) - { - try - { - tvf->close(); + + // This field, although valid in the segment, was not found in this document + if (found != -1) { + // Compute position in the tvf file + int64_t position; + if (format >= FORMAT_VERSION2) { + position = tvx->readLong(); + } else { + position = tvd->readVLong(); } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + for (int32_t i = 1; i <= found; ++i) { + position += tvd->readVLong(); } + + mapper->setDocumentNumber(docNum); + readTermVector(field, position, mapper); } - keep.throwException(); - } - - int32_t TermVectorsReader::size() - { - return _size; } - - void TermVectorsReader::get(int32_t docNum, const String& field, TermVectorMapperPtr mapper) - { - if (tvx) - { - int32_t fieldNumber = fieldInfos->fieldNumber(field); - - // We need to account for the FORMAT_SIZE at when seeking in the tvx. We don't need to do - // this in other seeks because we already have the file pointer that was written in another file - seekTvx(docNum); - int64_t tvdPosition = tvx->readLong(); - - tvd->seek(tvdPosition); - int32_t fieldCount = tvd->readVInt(); - - // There are only a few fields per document. We opt for a full scan rather then requiring that they - // be ordered. We need to read through all of the fields anyway to get to the tvf pointers. - int32_t number = 0; - int32_t found = -1; - for (int32_t i = 0; i < fieldCount; ++i) - { - if (format >= FORMAT_VERSION) - number = tvd->readVInt(); - else - number += tvd->readVInt(); - - if (number == fieldNumber) - found = i; - } - - // This field, although valid in the segment, was not found in this document - if (found != -1) - { - // Compute position in the tvf file - int64_t position; - if (format >= FORMAT_VERSION2) - position = tvx->readLong(); - else - position = tvd->readVLong(); - for (int32_t i = 1; i <= found; ++i) - position += tvd->readVLong(); - - mapper->setDocumentNumber(docNum); - readTermVector(field, position, mapper); - } +} + +TermFreqVectorPtr TermVectorsReader::get(int32_t docNum, const String& field) { + // Check if no term vectors are available for this segment at all + ParallelArrayTermVectorMapperPtr mapper(newLucene()); + get(docNum, field, mapper); + return mapper->materializeVector(); +} + +Collection TermVectorsReader::readFields(int32_t fieldCount) { + int32_t number = 0; + Collection fields(Collection::newInstance(fieldCount)); + + for (int32_t i = 0; i < fieldCount; ++i) { + if (format >= FORMAT_VERSION) { + number = tvd->readVInt(); + } else { + number += tvd->readVInt(); } + fields[i] = fieldInfos->fieldName(number); } - - TermFreqVectorPtr TermVectorsReader::get(int32_t docNum, const String& field) - { - // Check if no term vectors are available for this segment at all - ParallelArrayTermVectorMapperPtr mapper(newLucene()); - get(docNum, field, mapper); - return mapper->materializeVector(); + + return fields; +} + +Collection TermVectorsReader::readTvfPointers(int32_t fieldCount) { + // Compute position in the tvf file + int64_t position; + if (format >= FORMAT_VERSION2) { + position = tvx->readLong(); + } else { + position = tvd->readVLong(); } - - Collection TermVectorsReader::readFields(int32_t fieldCount) - { - int32_t number = 0; - Collection fields(Collection::newInstance(fieldCount)); - - for (int32_t i = 0; i < fieldCount; ++i) - { - if (format >= FORMAT_VERSION) - number = tvd->readVInt(); - else - number += tvd->readVInt(); - fields[i] = fieldInfos->fieldName(number); - } - - return fields; + + Collection tvfPointers(Collection::newInstance(fieldCount)); + tvfPointers[0] = position; + + for (int32_t i = 1; i < fieldCount; ++i) { + position += tvd->readVLong(); + tvfPointers[i] = position; } - - Collection TermVectorsReader::readTvfPointers(int32_t fieldCount) - { - // Compute position in the tvf file - int64_t position; - if (format >= FORMAT_VERSION2) - position = tvx->readLong(); - else - position = tvd->readVLong(); - - Collection tvfPointers(Collection::newInstance(fieldCount)); - tvfPointers[0] = position; - - for (int32_t i = 1; i < fieldCount; ++i) - { - position += tvd->readVLong(); - tvfPointers[i] = position; + + return tvfPointers; +} + +Collection TermVectorsReader::get(int32_t docNum) { + Collection result; + if (tvx) { + // We need to offset by + seekTvx(docNum); + int64_t tvdPosition = tvx->readLong(); + + tvd->seek(tvdPosition); + int32_t fieldCount = tvd->readVInt(); + + // No fields are vectorized for this document + if (fieldCount != 0) { + Collection fields(readFields(fieldCount)); + Collection tvfPointers(readTvfPointers(fieldCount)); + result = readTermVectors(docNum, fields, tvfPointers); } - - return tvfPointers; } - - Collection TermVectorsReader::get(int32_t docNum) - { - Collection result; - if (tvx) - { - // We need to offset by - seekTvx(docNum); - int64_t tvdPosition = tvx->readLong(); - - tvd->seek(tvdPosition); - int32_t fieldCount = tvd->readVInt(); - - // No fields are vectorized for this document - if (fieldCount != 0) - { - Collection fields(readFields(fieldCount)); - Collection tvfPointers(readTvfPointers(fieldCount)); - result = readTermVectors(docNum, fields, tvfPointers); - } + return result; +} + +void TermVectorsReader::get(int32_t docNumber, const TermVectorMapperPtr& mapper) { + // Check if no term vectors are available for this segment at all + if (tvx) { + // We need to offset by + seekTvx(docNumber); + int64_t tvdPosition = tvx->readLong(); + + tvd->seek(tvdPosition); + int32_t fieldCount = tvd->readVInt(); + + // No fields are vectorized for this document + if (fieldCount != 0) { + Collection fields(readFields(fieldCount)); + Collection tvfPointers(readTvfPointers(fieldCount)); + mapper->setDocumentNumber(docNumber); + readTermVectors(fields, tvfPointers, mapper); } - return result; } - - void TermVectorsReader::get(int32_t docNumber, TermVectorMapperPtr mapper) - { - // Check if no term vectors are available for this segment at all - if (tvx) - { - // We need to offset by - seekTvx(docNumber); - int64_t tvdPosition = tvx->readLong(); - - tvd->seek(tvdPosition); - int32_t fieldCount = tvd->readVInt(); - - // No fields are vectorized for this document - if (fieldCount != 0) - { - Collection fields(readFields(fieldCount)); - Collection tvfPointers(readTvfPointers(fieldCount)); - mapper->setDocumentNumber(docNumber); - readTermVectors(fields, tvfPointers, mapper); - } - } +} + +Collection TermVectorsReader::readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers) { + Collection res(Collection::newInstance(fields.size())); + for (int32_t i = 0; i < fields.size(); ++i) { + ParallelArrayTermVectorMapperPtr mapper(newLucene()); + mapper->setDocumentNumber(docNum); + readTermVector(fields[i], tvfPointers[i], mapper); + res[i] = mapper->materializeVector(); } - - Collection TermVectorsReader::readTermVectors(int32_t docNum, Collection fields, Collection tvfPointers) - { - Collection res(Collection::newInstance(fields.size())); - for (int32_t i = 0; i < fields.size(); ++i) - { - ParallelArrayTermVectorMapperPtr mapper(newLucene()); - mapper->setDocumentNumber(docNum); - readTermVector(fields[i], tvfPointers[i], mapper); - res[i] = mapper->materializeVector(); - } - return res; + return res; +} + +void TermVectorsReader::readTermVectors(Collection fields, Collection tvfPointers, const TermVectorMapperPtr& mapper) { + for (int32_t i = 0; i < fields.size(); ++i) { + readTermVector(fields[i], tvfPointers[i], mapper); } - - void TermVectorsReader::readTermVectors(Collection fields, Collection tvfPointers, TermVectorMapperPtr mapper) - { - for (int32_t i = 0; i < fields.size(); ++i) - readTermVector(fields[i], tvfPointers[i], mapper); +} + +void TermVectorsReader::readTermVector(const String& field, int64_t tvfPointer, const TermVectorMapperPtr& mapper) { + // Now read the data from specified position. We don't need to offset by the FORMAT here since + // the pointer already includes the offset + tvf->seek(tvfPointer); + + int32_t numTerms = tvf->readVInt(); + + // If no terms - return a constant empty termvector. However, this should never occur! + if (numTerms == 0) { + return; } - - void TermVectorsReader::readTermVector(const String& field, int64_t tvfPointer, TermVectorMapperPtr mapper) - { - // Now read the data from specified position. We don't need to offset by the FORMAT here since - // the pointer already includes the offset - tvf->seek(tvfPointer); - - int32_t numTerms = tvf->readVInt(); - - // If no terms - return a constant empty termvector. However, this should never occur! - if (numTerms == 0) - return; - - bool storePositions; - bool storeOffsets; - - if (format >= FORMAT_VERSION) - { - uint8_t bits = tvf->readByte(); - storePositions = ((bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0); - storeOffsets = ((bits & STORE_OFFSET_WITH_TERMVECTOR) != 0); - } - else - { - tvf->readVInt(); - storePositions = false; - storeOffsets = false; - } - - mapper->setExpectations(field, numTerms, storeOffsets, storePositions); - int32_t start = 0; - int32_t deltaLength = 0; - int32_t totalLength = 0; - ByteArray byteBuffer; - CharArray charBuffer; - bool preUTF8 = (format < FORMAT_UTF8_LENGTH_IN_BYTES); - - // init the buffers - if (preUTF8) - { - charBuffer = CharArray::newInstance(10); - byteBuffer.reset(); - } - else - { - charBuffer.reset(); - byteBuffer = ByteArray::newInstance(20); - } - - for (int32_t i = 0; i < numTerms; ++i) - { - start = tvf->readVInt(); - deltaLength = tvf->readVInt(); - totalLength = start + deltaLength; - - String term; - - if (preUTF8) - { - // Term stored as "java chars" - if (charBuffer.size() < totalLength) - charBuffer.resize((int32_t)(1.5 * (double)totalLength)); - totalLength = start + tvf->readChars(charBuffer.get(), start, deltaLength); - term.append(charBuffer.get(), totalLength); + + bool storePositions; + bool storeOffsets; + + if (format >= FORMAT_VERSION) { + uint8_t bits = tvf->readByte(); + storePositions = ((bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0); + storeOffsets = ((bits & STORE_OFFSET_WITH_TERMVECTOR) != 0); + } else { + tvf->readVInt(); + storePositions = false; + storeOffsets = false; + } + + mapper->setExpectations(field, numTerms, storeOffsets, storePositions); + int32_t start = 0; + int32_t deltaLength = 0; + int32_t totalLength = 0; + ByteArray byteBuffer; + CharArray charBuffer; + bool preUTF8 = (format < FORMAT_UTF8_LENGTH_IN_BYTES); + + // init the buffers + if (preUTF8) { + charBuffer = CharArray::newInstance(10); + byteBuffer.reset(); + } else { + charBuffer.reset(); + byteBuffer = ByteArray::newInstance(20); + } + + for (int32_t i = 0; i < numTerms; ++i) { + start = tvf->readVInt(); + deltaLength = tvf->readVInt(); + totalLength = start + deltaLength; + + String term; + + if (preUTF8) { + // Term stored as "java chars" + if (charBuffer.size() < totalLength) { + charBuffer.resize((int32_t)(1.5 * (double)totalLength)); } - else - { - // Term stored as utf8 bytes - if (byteBuffer.size() < totalLength) - byteBuffer.resize((int32_t)(1.5 * (double)totalLength)); - tvf->readBytes(byteBuffer.get(), start, deltaLength); - term = StringUtils::toUnicode(byteBuffer.get(), totalLength); + totalLength = start + tvf->readChars(charBuffer.get(), start, deltaLength); + term.append(charBuffer.get(), totalLength); + } else { + // Term stored as utf8 bytes + if (byteBuffer.size() < totalLength) { + byteBuffer.resize((int32_t)(1.5 * (double)totalLength)); } - int32_t freq = tvf->readVInt(); - Collection positions; - if (storePositions) // read in the positions - { - // does the mapper even care about positions? - if (!mapper->isIgnoringPositions()) - { - positions = Collection::newInstance(freq); - int32_t prevPosition = 0; - for (Collection::iterator position = positions.begin(); position != positions.end(); ++position) - { - *position = prevPosition + tvf->readVInt(); - prevPosition = *position; - } + tvf->readBytes(byteBuffer.get(), start, deltaLength); + term = StringUtils::toUnicode(byteBuffer.get(), totalLength); + } + int32_t freq = tvf->readVInt(); + Collection positions; + if (storePositions) { // read in the positions + // does the mapper even care about positions? + if (!mapper->isIgnoringPositions()) { + positions = Collection::newInstance(freq); + int32_t prevPosition = 0; + for (Collection::iterator position = positions.begin(); position != positions.end(); ++position) { + *position = prevPosition + tvf->readVInt(); + prevPosition = *position; } - else - { - // we need to skip over the positions. Since these are VInts, I don't believe there - // is anyway to know for sure how far to skip - for (int32_t j = 0; j < freq; ++j) - tvf->readVInt(); + } else { + // we need to skip over the positions. Since these are VInts, I don't believe there + // is anyway to know for sure how far to skip + for (int32_t j = 0; j < freq; ++j) { + tvf->readVInt(); } } - - Collection offsets; - if (storeOffsets) - { - // does the mapper even care about offsets? - if (!mapper->isIgnoringOffsets()) - { - offsets = Collection::newInstance(freq); - int32_t prevOffset = 0; - for (Collection::iterator offset = offsets.begin(); offset != offsets.end(); ++offset) - { - int32_t startOffset = prevOffset + tvf->readVInt(); - int32_t endOffset = startOffset + tvf->readVInt(); - *offset = newLucene(startOffset, endOffset); - prevOffset = endOffset; - } + } + + Collection offsets; + if (storeOffsets) { + // does the mapper even care about offsets? + if (!mapper->isIgnoringOffsets()) { + offsets = Collection::newInstance(freq); + int32_t prevOffset = 0; + for (Collection::iterator offset = offsets.begin(); offset != offsets.end(); ++offset) { + int32_t startOffset = prevOffset + tvf->readVInt(); + int32_t endOffset = startOffset + tvf->readVInt(); + *offset = newLucene(startOffset, endOffset); + prevOffset = endOffset; } - else - { - for (int32_t j = 0; j < freq; ++j) - { - tvf->readVInt(); - tvf->readVInt(); - } + } else { + for (int32_t j = 0; j < freq; ++j) { + tvf->readVInt(); + tvf->readVInt(); } } - mapper->map(term, freq, offsets, positions); } + mapper->map(term, freq, offsets, positions); } - - LuceneObjectPtr TermVectorsReader::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - TermVectorsReaderPtr cloneReader(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); - cloneReader->fieldInfos = fieldInfos; - cloneReader->_size = _size; - cloneReader->numTotalDocs = numTotalDocs; - cloneReader->docStoreOffset = docStoreOffset; - cloneReader->format = format; - - // These are null when a TermVectorsReader was created on a segment that did not have term vectors saved - if (tvx && tvd && tvf) - { - cloneReader->tvx = boost::dynamic_pointer_cast(tvx->clone()); - cloneReader->tvd = boost::dynamic_pointer_cast(tvd->clone()); - cloneReader->tvf = boost::dynamic_pointer_cast(tvf->clone()); - } - - return cloneReader; +} + +LuceneObjectPtr TermVectorsReader::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + TermVectorsReaderPtr cloneReader(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); + cloneReader->fieldInfos = fieldInfos; + cloneReader->_size = _size; + cloneReader->numTotalDocs = numTotalDocs; + cloneReader->docStoreOffset = docStoreOffset; + cloneReader->format = format; + + // These are null when a TermVectorsReader was created on a segment that did not have term vectors saved + if (tvx && tvd && tvf) { + cloneReader->tvx = boost::dynamic_pointer_cast(tvx->clone()); + cloneReader->tvd = boost::dynamic_pointer_cast(tvd->clone()); + cloneReader->tvf = boost::dynamic_pointer_cast(tvf->clone()); } - - ParallelArrayTermVectorMapper::ParallelArrayTermVectorMapper() - { - currentPosition = 0; - storingOffsets = false; - storingPositions = false; + + return cloneReader; +} + +ParallelArrayTermVectorMapper::ParallelArrayTermVectorMapper() { + currentPosition = 0; + storingOffsets = false; + storingPositions = false; +} + +ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper() { +} + +void ParallelArrayTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) { + this->field = field; + terms = Collection::newInstance(numTerms); + termFreqs = Collection::newInstance(numTerms); + this->storingOffsets = storeOffsets; + this->storingPositions = storePositions; + + if (storePositions) { + this->positions = Collection< Collection >::newInstance(numTerms); } - - ParallelArrayTermVectorMapper::~ParallelArrayTermVectorMapper() - { + if (storeOffsets) { + this->offsets = Collection< Collection >::newInstance(numTerms); } - - void ParallelArrayTermVectorMapper::setExpectations(const String& field, int32_t numTerms, bool storeOffsets, bool storePositions) - { - this->field = field; - terms = Collection::newInstance(numTerms); - termFreqs = Collection::newInstance(numTerms); - this->storingOffsets = storeOffsets; - this->storingPositions = storePositions; - - if (storePositions) - this->positions = Collection< Collection >::newInstance(numTerms); - if (storeOffsets) - this->offsets = Collection< Collection >::newInstance(numTerms); +} + +void ParallelArrayTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) { + terms[currentPosition] = term; + termFreqs[currentPosition] = frequency; + if (storingOffsets) { + this->offsets[currentPosition] = offsets; } - - void ParallelArrayTermVectorMapper::map(const String& term, int32_t frequency, Collection offsets, Collection positions) - { - terms[currentPosition] = term; - termFreqs[currentPosition] = frequency; - if (storingOffsets) - this->offsets[currentPosition] = offsets; - if (storingPositions) - this->positions[currentPosition] = positions; - ++currentPosition; + if (storingPositions) { + this->positions[currentPosition] = positions; } - - TermFreqVectorPtr ParallelArrayTermVectorMapper::materializeVector() - { - SegmentTermVectorPtr tv; - if (!field.empty() && terms) - { - if (storingPositions || storingOffsets) - tv = newLucene(field, terms, termFreqs, positions, offsets); - else - tv = newLucene(field, terms, termFreqs); + ++currentPosition; +} + +TermFreqVectorPtr ParallelArrayTermVectorMapper::materializeVector() { + SegmentTermVectorPtr tv; + if (!field.empty() && terms) { + if (storingPositions || storingOffsets) { + tv = newLucene(field, terms, termFreqs, positions, offsets); + } else { + tv = newLucene(field, terms, termFreqs); } - return tv; } + return tv; +} + } diff --git a/src/core/index/TermVectorsTermsWriter.cpp b/src/core/index/TermVectorsTermsWriter.cpp index 0f55ef7f..f7173cee 100644 --- a/src/core/index/TermVectorsTermsWriter.cpp +++ b/src/core/index/TermVectorsTermsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -19,321 +19,276 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - TermVectorsTermsWriter::TermVectorsTermsWriter(DocumentsWriterPtr docWriter) - { - this->freeCount = 0; - this->lastDocID = 0; - this->allocCount = 0; - this->_docWriter = docWriter; - this->docFreeList = Collection::newInstance(1); - } - - TermVectorsTermsWriter::~TermVectorsTermsWriter() - { - } - - TermsHashConsumerPerThreadPtr TermVectorsTermsWriter::addThread(TermsHashPerThreadPtr perThread) - { - return newLucene(perThread, shared_from_this()); - } - - void TermVectorsTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) - { - int32_t end = start + count; - for (int32_t i = start; i < end; ++i) - postings[i] = newLucene(); +namespace Lucene { + +TermVectorsTermsWriter::TermVectorsTermsWriter(const DocumentsWriterPtr& docWriter) { + this->freeCount = 0; + this->lastDocID = 0; + this->allocCount = 0; + this->_docWriter = docWriter; + this->docFreeList = Collection::newInstance(1); +} + +TermVectorsTermsWriter::~TermVectorsTermsWriter() { +} + +TermsHashConsumerPerThreadPtr TermVectorsTermsWriter::addThread(const TermsHashPerThreadPtr& perThread) { + return newLucene(perThread, shared_from_this()); +} + +void TermVectorsTermsWriter::createPostings(Collection postings, int32_t start, int32_t count) { + int32_t end = start + count; + for (int32_t i = start; i < end; ++i) { + postings[i] = newLucene(); } - - void TermVectorsTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - SyncLock syncLock(this); - - // NOTE: it's possible that all documents seen in this segment hit non-aborting exceptions, in which case we will - // not have yet init'd the TermVectorsWriter. This is actually OK (unlike in the stored fields case) because, - // although IieldInfos.hasVectors() will return true, the TermVectorsReader gracefully handles non-existence of - // the term vectors files. - if (tvx) - { - if (state->numDocsInStore > 0) - { - // In case there are some final documents that we didn't see (because they hit a non-aborting exception) - fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); - } - - tvx->flush(); - tvd->flush(); - tvf->flush(); - } - - for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - for (Collection::iterator field = entry->second.begin(); field != entry->second.end(); ++field) - { - TermVectorsTermsWriterPerFieldPtr perField(boost::static_pointer_cast(*field)); - TermsHashPerFieldPtr(perField->_termsHashPerField)->reset(); - perField->shrinkHash(); - } - - TermVectorsTermsWriterPerThreadPtr perThread(boost::static_pointer_cast(entry->first)); - TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(true); +} + +void TermVectorsTermsWriter::flush(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + + // NOTE: it's possible that all documents seen in this segment hit non-aborting exceptions, in which case we will + // not have yet init'd the TermVectorsWriter. This is actually OK (unlike in the stored fields case) because, + // although IieldInfos.hasVectors() will return true, the TermVectorsReader gracefully handles non-existence of + // the term vectors files. + if (tvx) { + if (state->numDocsInStore > 0) { + // In case there are some final documents that we didn't see (because they hit a non-aborting exception) + fill(state->numDocsInStore - DocumentsWriterPtr(_docWriter)->getDocStoreOffset()); } + + tvx->flush(); + tvd->flush(); + tvf->flush(); } - - void TermVectorsTermsWriter::closeDocStore(SegmentWriteStatePtr state) - { - SyncLock syncLock(this); - if (tvx) - { - DocumentsWriterPtr docWriter(_docWriter); - - // At least one doc in this run had term vectors enabled - fill(state->numDocsInStore - docWriter->getDocStoreOffset()); - tvx->close(); - tvf->close(); - tvd->close(); - tvx.reset(); - BOOST_ASSERT(!state->docStoreSegmentName.empty()); - String fileName(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - if (4 + ((int64_t)state->numDocsInStore) * 16 != state->directory->fileLength(fileName)) - { - boost::throw_exception(RuntimeException(L"after flush: tvx size mismatch: " + StringUtils::toString(state->numDocsInStore) + - L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + - L" length in bytes of " + fileName + L" file exists?=" + - StringUtils::toString(state->directory->fileExists(fileName)))); - } - - state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); - state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); - - docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); - docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); - - lastDocID = 0; + + for (MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + for (Collection::iterator field = entry->second.begin(); field != entry->second.end(); ++field) { + TermVectorsTermsWriterPerFieldPtr perField(boost::static_pointer_cast(*field)); + TermsHashPerFieldPtr(perField->_termsHashPerField)->reset(); + perField->shrinkHash(); } + + TermVectorsTermsWriterPerThreadPtr perThread(boost::static_pointer_cast(entry->first)); + TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(true); } - - TermVectorsTermsWriterPerDocPtr TermVectorsTermsWriter::getPerDoc() - { - SyncLock syncLock(this); - if (freeCount == 0) - { - if (++allocCount > docFreeList.size()) - { - // Grow our free list up front to make sure we have enough space to recycle all outstanding - // PerDoc instances - BOOST_ASSERT(allocCount == 1 + docFreeList.size()); - docFreeList.resize(MiscUtils::getNextSize(allocCount)); - } - return newLucene(shared_from_this()); +} + +void TermVectorsTermsWriter::closeDocStore(const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + if (tvx) { + DocumentsWriterPtr docWriter(_docWriter); + + // At least one doc in this run had term vectors enabled + fill(state->numDocsInStore - docWriter->getDocStoreOffset()); + tvx->close(); + tvf->close(); + tvd->close(); + tvx.reset(); + BOOST_ASSERT(!state->docStoreSegmentName.empty()); + String fileName(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + if (4 + ((int64_t)state->numDocsInStore) * 16 != state->directory->fileLength(fileName)) { + boost::throw_exception(RuntimeException(L"after flush: tvx size mismatch: " + StringUtils::toString(state->numDocsInStore) + + L" docs vs " + StringUtils::toString(state->directory->fileLength(fileName)) + + L" length in bytes of " + fileName + L" file exists?=" + + StringUtils::toString(state->directory->fileExists(fileName)))); } - else - return docFreeList[--freeCount]; + + state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); + state->flushedFiles.add(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); + + docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); + docWriter->removeOpenFile(state->docStoreSegmentName + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); + + lastDocID = 0; } - - void TermVectorsTermsWriter::fill(int32_t docID) - { - int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); - int32_t end = docID + docStoreOffset; - if (lastDocID < end) - { - int64_t tvfPosition = tvf->getFilePointer(); - while (lastDocID < end) - { - tvx->writeLong(tvd->getFilePointer()); - tvd->writeVInt(0); - tvx->writeLong(tvfPosition); - ++lastDocID; - } +} + +TermVectorsTermsWriterPerDocPtr TermVectorsTermsWriter::getPerDoc() { + SyncLock syncLock(this); + if (freeCount == 0) { + if (++allocCount > docFreeList.size()) { + // Grow our free list up front to make sure we have enough space to recycle all outstanding + // PerDoc instances + BOOST_ASSERT(allocCount == 1 + docFreeList.size()); + docFreeList.resize(MiscUtils::getNextSize(allocCount)); } + return newLucene(shared_from_this()); + } else { + return docFreeList[--freeCount]; } - - void TermVectorsTermsWriter::initTermVectorsWriter() - { - SyncLock syncLock(this); - if (!tvx) - { - DocumentsWriterPtr docWriter(_docWriter); - - String docStoreSegment(docWriter->getDocStoreSegment()); - if (docStoreSegment.empty()) - return; - - // If we hit an exception while init'ing the term vector output files, we must abort this segment - // because those files will be in an unknown state - tvx = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - tvd = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); - tvf = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); - - tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); - tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); - tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); - - docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); - docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); - - lastDocID = 0; +} + +void TermVectorsTermsWriter::fill(int32_t docID) { + int32_t docStoreOffset = DocumentsWriterPtr(_docWriter)->getDocStoreOffset(); + int32_t end = docID + docStoreOffset; + if (lastDocID < end) { + int64_t tvfPosition = tvf->getFilePointer(); + while (lastDocID < end) { + tvx->writeLong(tvd->getFilePointer()); + tvd->writeVInt(0); + tvx->writeLong(tvfPosition); + ++lastDocID; } } - - void TermVectorsTermsWriter::finishDocument(TermVectorsTermsWriterPerDocPtr perDoc) - { - SyncLock syncLock(this); +} + +void TermVectorsTermsWriter::initTermVectorsWriter() { + SyncLock syncLock(this); + if (!tvx) { DocumentsWriterPtr docWriter(_docWriter); - - BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument start")); - - initTermVectorsWriter(); - - fill(perDoc->docID); - - // Append term vectors to the real outputs - tvx->writeLong(tvd->getFilePointer()); - tvx->writeLong(tvf->getFilePointer()); - tvd->writeVInt(perDoc->numVectorFields); - - if (perDoc->numVectorFields > 0) - { - for (int32_t i = 0; i < perDoc->numVectorFields; ++i) - tvd->writeVInt(perDoc->fieldNumbers[i]); - BOOST_ASSERT(perDoc->fieldPointers[0] == 0); - int64_t lastPos = perDoc->fieldPointers[0]; - for (int32_t i = 1; i < perDoc->numVectorFields; ++i) - { - int64_t pos = perDoc->fieldPointers[i]; - tvd->writeVLong(pos - lastPos); - lastPos = pos; - } - perDoc->perDocTvf->writeTo(tvf); - perDoc->numVectorFields = 0; + + String docStoreSegment(docWriter->getDocStoreSegment()); + if (docStoreSegment.empty()) { + return; } - - BOOST_ASSERT(lastDocID == perDoc->docID + docWriter->getDocStoreOffset()); - - ++lastDocID; - - perDoc->reset(); - free(perDoc); - BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument end")); - } - - bool TermVectorsTermsWriter::freeRAM() - { - // We don't hold any state beyond one doc, so we don't free persistent RAM here - return false; + + // If we hit an exception while init'ing the term vector output files, we must abort this segment + // because those files will be in an unknown state + tvx = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + tvd = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); + tvf = docWriter->directory->createOutput(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); + + tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); + tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); + tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); + + docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); + docWriter->addOpenFile(docStoreSegment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); + + lastDocID = 0; } - - void TermVectorsTermsWriter::abort() - { - if (tvx) - { - try - { - tvx->close(); - } - catch (...) - { - } - tvx.reset(); - } - if (tvd) - { - try - { - tvd->close(); - } - catch (...) - { - } - tvd.reset(); +} + +void TermVectorsTermsWriter::finishDocument(const TermVectorsTermsWriterPerDocPtr& perDoc) { + SyncLock syncLock(this); + DocumentsWriterPtr docWriter(_docWriter); + + BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument start")); + + initTermVectorsWriter(); + + fill(perDoc->docID); + + // Append term vectors to the real outputs + tvx->writeLong(tvd->getFilePointer()); + tvx->writeLong(tvf->getFilePointer()); + tvd->writeVInt(perDoc->numVectorFields); + + if (perDoc->numVectorFields > 0) { + for (int32_t i = 0; i < perDoc->numVectorFields; ++i) { + tvd->writeVInt(perDoc->fieldNumbers[i]); } - if (tvf) - { - try - { - tvf->close(); - } - catch (...) - { - } - tvf.reset(); + BOOST_ASSERT(perDoc->fieldPointers[0] == 0); + int64_t lastPos = perDoc->fieldPointers[0]; + for (int32_t i = 1; i < perDoc->numVectorFields; ++i) { + int64_t pos = perDoc->fieldPointers[i]; + tvd->writeVLong(pos - lastPos); + lastPos = pos; } - lastDocID = 0; - } - - void TermVectorsTermsWriter::free(TermVectorsTermsWriterPerDocPtr doc) - { - SyncLock syncLock(this); - BOOST_ASSERT(freeCount < docFreeList.size()); - docFreeList[freeCount++] = doc; - } - - int32_t TermVectorsTermsWriter::bytesPerPosting() - { - return (RawPostingList::BYTES_SIZE + 3 * DocumentsWriter::INT_NUM_BYTE); - } - - TermVectorsTermsWriterPerDoc::TermVectorsTermsWriterPerDoc(TermVectorsTermsWriterPtr termsWriter) - { - this->_termsWriter = termsWriter; - buffer = DocumentsWriterPtr(termsWriter->_docWriter)->newPerDocBuffer(); - perDocTvf = newLucene(buffer); - numVectorFields = 0; - fieldNumbers = Collection::newInstance(1); - fieldPointers = Collection::newInstance(1); + perDoc->perDocTvf->writeTo(tvf); + perDoc->numVectorFields = 0; } - - TermVectorsTermsWriterPerDoc::~TermVectorsTermsWriterPerDoc() - { - } - - void TermVectorsTermsWriterPerDoc::reset() - { - perDocTvf->reset(); - buffer->recycle(); - numVectorFields = 0; - } - - void TermVectorsTermsWriterPerDoc::abort() - { - reset(); - TermVectorsTermsWriterPtr(_termsWriter)->free(shared_from_this()); - } - - void TermVectorsTermsWriterPerDoc::addField(int32_t fieldNumber) - { - if (numVectorFields == fieldNumbers.size()) - { - fieldNumbers.resize(MiscUtils::getNextSize(fieldNumbers.size())); - fieldPointers.resize(MiscUtils::getNextSize(fieldPointers.size())); + + BOOST_ASSERT(lastDocID == perDoc->docID + docWriter->getDocStoreOffset()); + + ++lastDocID; + + perDoc->reset(); + free(perDoc); + BOOST_ASSERT(IndexWriterPtr(docWriter->_writer)->testPoint(L"TermVectorsTermsWriter.finishDocument end")); +} + +bool TermVectorsTermsWriter::freeRAM() { + // We don't hold any state beyond one doc, so we don't free persistent RAM here + return false; +} + +void TermVectorsTermsWriter::abort() { + if (tvx) { + try { + tvx->close(); + } catch (...) { } - fieldNumbers[numVectorFields] = fieldNumber; - fieldPointers[numVectorFields] = perDocTvf->getFilePointer(); - ++numVectorFields; - } - - int64_t TermVectorsTermsWriterPerDoc::sizeInBytes() - { - return buffer->getSizeInBytes(); + tvx.reset(); } - - void TermVectorsTermsWriterPerDoc::finish() - { - TermVectorsTermsWriterPtr(_termsWriter)->finishDocument(shared_from_this()); + if (tvd) { + try { + tvd->close(); + } catch (...) { + } + tvd.reset(); } - - TermVectorsTermsWriterPostingList::TermVectorsTermsWriterPostingList() - { - freq = 0; - lastOffset = 0; - lastPosition = 0; + if (tvf) { + try { + tvf->close(); + } catch (...) { + } + tvf.reset(); } - - TermVectorsTermsWriterPostingList::~TermVectorsTermsWriterPostingList() - { + lastDocID = 0; +} + +void TermVectorsTermsWriter::free(const TermVectorsTermsWriterPerDocPtr& doc) { + SyncLock syncLock(this); + BOOST_ASSERT(freeCount < docFreeList.size()); + docFreeList[freeCount++] = doc; +} + +int32_t TermVectorsTermsWriter::bytesPerPosting() { + return (RawPostingList::BYTES_SIZE + 3 * DocumentsWriter::INT_NUM_BYTE); +} + +TermVectorsTermsWriterPerDoc::TermVectorsTermsWriterPerDoc(const TermVectorsTermsWriterPtr& termsWriter) { + this->_termsWriter = termsWriter; + buffer = DocumentsWriterPtr(termsWriter->_docWriter)->newPerDocBuffer(); + perDocTvf = newLucene(buffer); + numVectorFields = 0; + fieldNumbers = Collection::newInstance(1); + fieldPointers = Collection::newInstance(1); +} + +TermVectorsTermsWriterPerDoc::~TermVectorsTermsWriterPerDoc() { +} + +void TermVectorsTermsWriterPerDoc::reset() { + perDocTvf->reset(); + buffer->recycle(); + numVectorFields = 0; +} + +void TermVectorsTermsWriterPerDoc::abort() { + reset(); + TermVectorsTermsWriterPtr(_termsWriter)->free(shared_from_this()); +} + +void TermVectorsTermsWriterPerDoc::addField(int32_t fieldNumber) { + if (numVectorFields == fieldNumbers.size()) { + fieldNumbers.resize(MiscUtils::getNextSize(fieldNumbers.size())); + fieldPointers.resize(MiscUtils::getNextSize(fieldPointers.size())); } + fieldNumbers[numVectorFields] = fieldNumber; + fieldPointers[numVectorFields] = perDocTvf->getFilePointer(); + ++numVectorFields; +} + +int64_t TermVectorsTermsWriterPerDoc::sizeInBytes() { + return buffer->getSizeInBytes(); +} + +void TermVectorsTermsWriterPerDoc::finish() { + TermVectorsTermsWriterPtr(_termsWriter)->finishDocument(shared_from_this()); +} + +TermVectorsTermsWriterPostingList::TermVectorsTermsWriterPostingList() { + freq = 0; + lastOffset = 0; + lastPosition = 0; +} + +TermVectorsTermsWriterPostingList::~TermVectorsTermsWriterPostingList() { +} + } diff --git a/src/core/index/TermVectorsTermsWriterPerField.cpp b/src/core/index/TermVectorsTermsWriterPerField.cpp index a0e14871..85724964 100644 --- a/src/core/index/TermVectorsTermsWriterPerField.cpp +++ b/src/core/index/TermVectorsTermsWriterPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -23,254 +23,238 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - TermVectorsTermsWriterPerField::TermVectorsTermsWriterPerField(TermsHashPerFieldPtr termsHashPerField, TermVectorsTermsWriterPerThreadPtr perThread, FieldInfoPtr fieldInfo) - { - this->doVectors = false; - this->doVectorPositions = false; - this->doVectorOffsets = false; - this->maxNumPostings = 0; - - this->_termsHashPerField = termsHashPerField; - this->_perThread = perThread; - this->_termsWriter = perThread->_termsWriter; - this->fieldInfo = fieldInfo; - _docState = termsHashPerField->docState; - _fieldState = termsHashPerField->fieldState; - } - - TermVectorsTermsWriterPerField::~TermVectorsTermsWriterPerField() - { - } - - int32_t TermVectorsTermsWriterPerField::getStreamCount() - { - return 2; - } - - bool TermVectorsTermsWriterPerField::start(Collection fields, int32_t count) - { - doVectors = false; - doVectorPositions = false; - doVectorOffsets = false; - - for (int32_t i = 0; i < count; ++i) - { - FieldablePtr field(fields[i]); - if (field->isIndexed() && field->isTermVectorStored()) - { - doVectors = true; - if (field->isStorePositionWithTermVector()) - doVectorPositions = true; - if (field->isStoreOffsetWithTermVector()) - doVectorOffsets = true; +namespace Lucene { + +TermVectorsTermsWriterPerField::TermVectorsTermsWriterPerField(const TermsHashPerFieldPtr& termsHashPerField, const TermVectorsTermsWriterPerThreadPtr& perThread, const FieldInfoPtr& fieldInfo) { + this->doVectors = false; + this->doVectorPositions = false; + this->doVectorOffsets = false; + this->maxNumPostings = 0; + + this->_termsHashPerField = termsHashPerField; + this->_perThread = perThread; + this->_termsWriter = perThread->_termsWriter; + this->fieldInfo = fieldInfo; + _docState = termsHashPerField->docState; + _fieldState = termsHashPerField->fieldState; +} + +TermVectorsTermsWriterPerField::~TermVectorsTermsWriterPerField() { +} + +int32_t TermVectorsTermsWriterPerField::getStreamCount() { + return 2; +} + +bool TermVectorsTermsWriterPerField::start(Collection fields, int32_t count) { + doVectors = false; + doVectorPositions = false; + doVectorOffsets = false; + + for (int32_t i = 0; i < count; ++i) { + FieldablePtr field(fields[i]); + if (field->isIndexed() && field->isTermVectorStored()) { + doVectors = true; + if (field->isStorePositionWithTermVector()) { + doVectorPositions = true; } - } - - if (doVectors) - { - TermVectorsTermsWriterPerThreadPtr perThread(_perThread); - DocStatePtr docState(_docState); - if (!perThread->doc) - { - perThread->doc = TermVectorsTermsWriterPtr(_termsWriter)->getPerDoc(); - perThread->doc->docID = docState->docID; - BOOST_ASSERT(perThread->doc->numVectorFields == 0); - BOOST_ASSERT(perThread->doc->perDocTvf->length() == 0); - BOOST_ASSERT(perThread->doc->perDocTvf->getFilePointer() == 0); + if (field->isStoreOffsetWithTermVector()) { + doVectorOffsets = true; } + } + } - BOOST_ASSERT(perThread->doc->docID == docState->docID); - - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - - if (termsHashPerField->numPostings != 0) - { - // Only necessary if previous doc hit a non-aborting exception while writing vectors - // in this field - termsHashPerField->reset(); - TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); - } + if (doVectors) { + TermVectorsTermsWriterPerThreadPtr perThread(_perThread); + DocStatePtr docState(_docState); + if (!perThread->doc) { + perThread->doc = TermVectorsTermsWriterPtr(_termsWriter)->getPerDoc(); + perThread->doc->docID = docState->docID; + BOOST_ASSERT(perThread->doc->numVectorFields == 0); + BOOST_ASSERT(perThread->doc->perDocTvf->length() == 0); + BOOST_ASSERT(perThread->doc->perDocTvf->getFilePointer() == 0); + } + + BOOST_ASSERT(perThread->doc->docID == docState->docID); + + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + + if (termsHashPerField->numPostings != 0) { + // Only necessary if previous doc hit a non-aborting exception while writing vectors + // in this field + termsHashPerField->reset(); + TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); } - - return doVectors; } - - void TermVectorsTermsWriterPerField::abort() - { + + return doVectors; +} + +void TermVectorsTermsWriterPerField::abort() { +} + +void TermVectorsTermsWriterPerField::finish() { + BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.finish start")); + + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + int32_t numPostings = termsHashPerField->numPostings; + + BOOST_ASSERT(numPostings >= 0); + + if (!doVectors || numPostings == 0) { + return; } - - void TermVectorsTermsWriterPerField::finish() - { - BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.finish start")); - - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - int32_t numPostings = termsHashPerField->numPostings; - - BOOST_ASSERT(numPostings >= 0); - - if (!doVectors || numPostings == 0) - return; - - if (numPostings > maxNumPostings) - maxNumPostings = numPostings; - - TermVectorsTermsWriterPerThreadPtr perThread(_perThread); - IndexOutputPtr tvf(perThread->doc->perDocTvf); - - // This is called once, after inverting all occurrences of a given field in the doc. At this point we flush - // our hash into the DocWriter. - - BOOST_ASSERT(fieldInfo->storeTermVector); - BOOST_ASSERT(perThread->vectorFieldsInOrder(fieldInfo)); - - perThread->doc->addField(termsHashPerField->fieldInfo->number); - - Collection postings(termsHashPerField->sortPostings()); - - tvf->writeVInt(numPostings); - uint8_t bits = 0x0; - if (doVectorPositions) - bits |= TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR; - if (doVectorOffsets) - bits |= TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR; - tvf->writeByte(bits); - - int32_t encoderUpto = 0; - int32_t lastTermBytesCount = 0; - - ByteSliceReaderPtr reader(perThread->vectorSliceReader); - Collection charBuffers(TermsHashPerThreadPtr(perThread->_termsHashPerThread)->charPool->buffers); - - for (int32_t j = 0; j < numPostings; ++j) - { - TermVectorsTermsWriterPostingListPtr posting(boost::static_pointer_cast(postings[j])); - int32_t freq = posting->freq; - - CharArray text2(charBuffers[posting->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]); - int32_t start2 = (posting->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - - // We swap between two encoders to save copying last Term's byte array - UTF8ResultPtr utf8Result(perThread->utf8Results[encoderUpto]); - - StringUtils::toUTF8(text2.get() + start2, text2.size(), utf8Result); - int32_t termBytesCount = utf8Result->length; - - // Compute common prefix between last term and this term - int32_t prefix = 0; - if (j > 0) - { - ByteArray lastTermBytes(perThread->utf8Results[1 - encoderUpto]->result); - ByteArray termBytes(perThread->utf8Results[encoderUpto]->result); - while (prefix < lastTermBytesCount && prefix < termBytesCount) - { - if (lastTermBytes[prefix] != termBytes[prefix]) - break; - ++prefix; + + if (numPostings > maxNumPostings) { + maxNumPostings = numPostings; + } + + TermVectorsTermsWriterPerThreadPtr perThread(_perThread); + IndexOutputPtr tvf(perThread->doc->perDocTvf); + + // This is called once, after inverting all occurrences of a given field in the doc. At this point we flush + // our hash into the DocWriter. + + BOOST_ASSERT(fieldInfo->storeTermVector); + BOOST_ASSERT(perThread->vectorFieldsInOrder(fieldInfo)); + + perThread->doc->addField(termsHashPerField->fieldInfo->number); + + Collection postings(termsHashPerField->sortPostings()); + + tvf->writeVInt(numPostings); + uint8_t bits = 0x0; + if (doVectorPositions) { + bits |= TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR; + } + if (doVectorOffsets) { + bits |= TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR; + } + tvf->writeByte(bits); + + int32_t encoderUpto = 0; + int32_t lastTermBytesCount = 0; + + ByteSliceReaderPtr reader(perThread->vectorSliceReader); + Collection charBuffers(TermsHashPerThreadPtr(perThread->_termsHashPerThread)->charPool->buffers); + + for (int32_t j = 0; j < numPostings; ++j) { + TermVectorsTermsWriterPostingListPtr posting(boost::static_pointer_cast(postings[j])); + int32_t freq = posting->freq; + + CharArray text2(charBuffers[posting->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]); + int32_t start2 = (posting->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + + // We swap between two encoders to save copying last Term's byte array + UTF8ResultPtr utf8Result(perThread->utf8Results[encoderUpto]); + + StringUtils::toUTF8(text2.get() + start2, text2.size(), utf8Result); + int32_t termBytesCount = utf8Result->length; + + // Compute common prefix between last term and this term + int32_t prefix = 0; + if (j > 0) { + ByteArray lastTermBytes(perThread->utf8Results[1 - encoderUpto]->result); + ByteArray termBytes(perThread->utf8Results[encoderUpto]->result); + while (prefix < lastTermBytesCount && prefix < termBytesCount) { + if (lastTermBytes[prefix] != termBytes[prefix]) { + break; } + ++prefix; } - encoderUpto = 1 - encoderUpto; - lastTermBytesCount = termBytesCount; - - int32_t suffix = termBytesCount - prefix; - tvf->writeVInt(prefix); - tvf->writeVInt(suffix); - tvf->writeBytes(utf8Result->result.get(), prefix, suffix); - tvf->writeVInt(freq); - - if (doVectorPositions) - { - termsHashPerField->initReader(reader, posting, 0); - reader->writeTo(tvf); - } - - if (doVectorOffsets) - { - termsHashPerField->initReader(reader, posting, 1); - reader->writeTo(tvf); - } } - - termsHashPerField->reset(); - - // NOTE: we clear per-field at the thread level, because term vectors fully write themselves on each - // field; this saves RAM (eg if large doc has two large fields with term vectors on) because we - // recycle/reuse all RAM after each field - TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); + encoderUpto = 1 - encoderUpto; + lastTermBytesCount = termBytesCount; + + int32_t suffix = termBytesCount - prefix; + tvf->writeVInt(prefix); + tvf->writeVInt(suffix); + tvf->writeBytes(utf8Result->result.get(), prefix, suffix); + tvf->writeVInt(freq); + + if (doVectorPositions) { + termsHashPerField->initReader(reader, posting, 0); + reader->writeTo(tvf); + } + + if (doVectorOffsets) { + termsHashPerField->initReader(reader, posting, 1); + reader->writeTo(tvf); + } } - - void TermVectorsTermsWriterPerField::shrinkHash() - { - TermsHashPerFieldPtr(_termsHashPerField)->shrinkHash(maxNumPostings); - maxNumPostings = 0; + + termsHashPerField->reset(); + + // NOTE: we clear per-field at the thread level, because term vectors fully write themselves on each + // field; this saves RAM (eg if large doc has two large fields with term vectors on) because we + // recycle/reuse all RAM after each field + TermsHashPerThreadPtr(perThread->_termsHashPerThread)->reset(false); +} + +void TermVectorsTermsWriterPerField::shrinkHash() { + TermsHashPerFieldPtr(_termsHashPerField)->shrinkHash(maxNumPostings); + maxNumPostings = 0; +} + +void TermVectorsTermsWriterPerField::start(const FieldablePtr& field) { + if (doVectorOffsets) { + offsetAttribute = FieldInvertStatePtr(_fieldState)->attributeSource->addAttribute(); + } else { + offsetAttribute.reset(); } - - void TermVectorsTermsWriterPerField::start(FieldablePtr field) - { - if (doVectorOffsets) - offsetAttribute = FieldInvertStatePtr(_fieldState)->attributeSource->addAttribute(); - else - offsetAttribute.reset(); +} + +void TermVectorsTermsWriterPerField::newTerm(const RawPostingListPtr& p0) { + BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); + + TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); + + p->freq = 1; + + FieldInvertStatePtr fieldState(_fieldState); + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + + if (doVectorOffsets) { + int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); + int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); + + termsHashPerField->writeVInt(1, startOffset); + termsHashPerField->writeVInt(1, endOffset - startOffset); + p->lastOffset = endOffset; } - - void TermVectorsTermsWriterPerField::newTerm(RawPostingListPtr p0) - { - BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); - - TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); - - p->freq = 1; - - FieldInvertStatePtr fieldState(_fieldState); - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - - if (doVectorOffsets) - { - int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); - int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); - - termsHashPerField->writeVInt(1, startOffset); - termsHashPerField->writeVInt(1, endOffset - startOffset); - p->lastOffset = endOffset; - } - - if (doVectorPositions) - { - termsHashPerField->writeVInt(0, fieldState->position); - p->lastPosition = fieldState->position; - } + + if (doVectorPositions) { + termsHashPerField->writeVInt(0, fieldState->position); + p->lastPosition = fieldState->position; } - - void TermVectorsTermsWriterPerField::addTerm(RawPostingListPtr p0) - { - BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); - - TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); - - ++p->freq; - - FieldInvertStatePtr fieldState(_fieldState); - TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); - - if (doVectorOffsets) - { - int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); - int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); - - termsHashPerField->writeVInt(1, startOffset - p->lastOffset); - termsHashPerField->writeVInt(1, endOffset - startOffset); - p->lastOffset = endOffset; - } - - if (doVectorPositions) - { - termsHashPerField->writeVInt(0, fieldState->position - p->lastPosition); - p->lastPosition = fieldState->position; - } +} + +void TermVectorsTermsWriterPerField::addTerm(const RawPostingListPtr& p0) { + BOOST_ASSERT(DocStatePtr(_docState)->testPoint(L"TermVectorsTermsWriterPerField.newTerm start")); + + TermVectorsTermsWriterPostingListPtr p(boost::static_pointer_cast(p0)); + + ++p->freq; + + FieldInvertStatePtr fieldState(_fieldState); + TermsHashPerFieldPtr termsHashPerField(_termsHashPerField); + + if (doVectorOffsets) { + int32_t startOffset = fieldState->offset + offsetAttribute->startOffset(); + int32_t endOffset = fieldState->offset + offsetAttribute->endOffset(); + + termsHashPerField->writeVInt(1, startOffset - p->lastOffset); + termsHashPerField->writeVInt(1, endOffset - startOffset); + p->lastOffset = endOffset; } - - void TermVectorsTermsWriterPerField::skippingLongTerm() - { + + if (doVectorPositions) { + termsHashPerField->writeVInt(0, fieldState->position - p->lastPosition); + p->lastPosition = fieldState->position; } } + +void TermVectorsTermsWriterPerField::skippingLongTerm() { +} + +} diff --git a/src/core/index/TermVectorsTermsWriterPerThread.cpp b/src/core/index/TermVectorsTermsWriterPerThread.cpp index ac1b6dcd..add3a270 100644 --- a/src/core/index/TermVectorsTermsWriterPerThread.cpp +++ b/src/core/index/TermVectorsTermsWriterPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,62 +14,53 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - TermVectorsTermsWriterPerThread::TermVectorsTermsWriterPerThread(TermsHashPerThreadPtr termsHashPerThread, TermVectorsTermsWriterPtr termsWriter) - { - utf8Results = newCollection(newInstance(), newInstance()); - this->vectorSliceReader = newLucene(); - this->_termsWriter = termsWriter; - this->_termsHashPerThread = termsHashPerThread; - _docState = termsHashPerThread->docState; - } - - TermVectorsTermsWriterPerThread::~TermVectorsTermsWriterPerThread() - { - } - - void TermVectorsTermsWriterPerThread::startDocument() - { - BOOST_ASSERT(clearLastVectorFieldName()); - if (doc) - { - doc->reset(); - doc->docID = DocStatePtr(_docState)->docID; - } +namespace Lucene { + +TermVectorsTermsWriterPerThread::TermVectorsTermsWriterPerThread(const TermsHashPerThreadPtr& termsHashPerThread, const TermVectorsTermsWriterPtr& termsWriter) { + utf8Results = newCollection(newInstance(), newInstance()); + this->vectorSliceReader = newLucene(); + this->_termsWriter = termsWriter; + this->_termsHashPerThread = termsHashPerThread; + _docState = termsHashPerThread->docState; +} + +TermVectorsTermsWriterPerThread::~TermVectorsTermsWriterPerThread() { +} + +void TermVectorsTermsWriterPerThread::startDocument() { + BOOST_ASSERT(clearLastVectorFieldName()); + if (doc) { + doc->reset(); + doc->docID = DocStatePtr(_docState)->docID; } - - DocWriterPtr TermVectorsTermsWriterPerThread::finishDocument() - { - DocWriterPtr returnDoc(doc); +} + +DocWriterPtr TermVectorsTermsWriterPerThread::finishDocument() { + DocWriterPtr returnDoc(doc); + doc.reset(); + return returnDoc; +} + +TermsHashConsumerPerFieldPtr TermVectorsTermsWriterPerThread::addField(const TermsHashPerFieldPtr& termsHashPerField, const FieldInfoPtr& fieldInfo) { + return newLucene(termsHashPerField, shared_from_this(), fieldInfo); +} + +void TermVectorsTermsWriterPerThread::abort() { + if (doc) { + doc->abort(); doc.reset(); - return returnDoc; - } - - TermsHashConsumerPerFieldPtr TermVectorsTermsWriterPerThread::addField(TermsHashPerFieldPtr termsHashPerField, FieldInfoPtr fieldInfo) - { - return newLucene(termsHashPerField, shared_from_this(), fieldInfo); - } - - void TermVectorsTermsWriterPerThread::abort() - { - if (doc) - { - doc->abort(); - doc.reset(); - } - } - - bool TermVectorsTermsWriterPerThread::clearLastVectorFieldName() - { - lastVectorFieldName.clear(); - return true; - } - - bool TermVectorsTermsWriterPerThread::vectorFieldsInOrder(FieldInfoPtr fi) - { - bool inOrder = lastVectorFieldName.empty() ? true : (lastVectorFieldName < fi->name); - lastVectorFieldName = fi->name; - return inOrder; } } + +bool TermVectorsTermsWriterPerThread::clearLastVectorFieldName() { + lastVectorFieldName.clear(); + return true; +} + +bool TermVectorsTermsWriterPerThread::vectorFieldsInOrder(const FieldInfoPtr& fi) { + bool inOrder = lastVectorFieldName.empty() ? true : (lastVectorFieldName < fi->name); + lastVectorFieldName = fi->name; + return inOrder; +} + +} diff --git a/src/core/index/TermVectorsWriter.cpp b/src/core/index/TermVectorsWriter.cpp index 6a8dbf91..2f553f4d 100644 --- a/src/core/index/TermVectorsWriter.cpp +++ b/src/core/index/TermVectorsWriter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,208 +17,184 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - TermVectorsWriter::TermVectorsWriter(DirectoryPtr directory, const String& segment, FieldInfosPtr fieldInfos) - { - utf8Results = newCollection(newInstance(), newInstance()); - - // Open files for TermVector storage - tvx = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); - tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); - tvd = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); - tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); - tvf = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); - tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); - - this->fieldInfos = fieldInfos; - } - - TermVectorsWriter::~TermVectorsWriter() - { - } - - void TermVectorsWriter::addAllDocVectors(Collection vectors) - { - tvx->writeLong(tvd->getFilePointer()); - tvx->writeLong(tvf->getFilePointer()); - - if (vectors) - { - int32_t numFields = vectors.size(); - tvd->writeVInt(numFields); - - Collection fieldPointers(Collection::newInstance(numFields)); - - for (int32_t i = 0; i < numFields; ++i) - { - fieldPointers[i] = tvf->getFilePointer(); - - int32_t fieldNumber = fieldInfos->fieldNumber(vectors[i]->getField()); - - // 1st pass: write field numbers to tvd - tvd->writeVInt(fieldNumber); - - int32_t numTerms = vectors[i]->size(); - tvf->writeVInt(numTerms); - - TermPositionVectorPtr tpVector(boost::dynamic_pointer_cast(vectors[i])); - - uint8_t bits; - bool storePositions; - bool storeOffsets; - - if (tpVector) - { - // May have positions & offsets - storePositions = (tpVector->size() > 0 && !tpVector->getTermPositions(0)); - storeOffsets = (tpVector->size() > 0 && tpVector->getOffsets(0)); - bits = (uint8_t)((storePositions ? TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR : 0) + - (storeOffsets ? TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR : 0)); - } - else - { - bits = 0; - storePositions = false; - storeOffsets = false; +namespace Lucene { + +TermVectorsWriter::TermVectorsWriter(const DirectoryPtr& directory, const String& segment, const FieldInfosPtr& fieldInfos) { + utf8Results = newCollection(newInstance(), newInstance()); + + // Open files for TermVector storage + tvx = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_INDEX_EXTENSION()); + tvx->writeInt(TermVectorsReader::FORMAT_CURRENT); + tvd = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_DOCUMENTS_EXTENSION()); + tvd->writeInt(TermVectorsReader::FORMAT_CURRENT); + tvf = directory->createOutput(segment + L"." + IndexFileNames::VECTORS_FIELDS_EXTENSION()); + tvf->writeInt(TermVectorsReader::FORMAT_CURRENT); + + this->fieldInfos = fieldInfos; +} + +TermVectorsWriter::~TermVectorsWriter() { +} + +void TermVectorsWriter::addAllDocVectors(Collection vectors) { + tvx->writeLong(tvd->getFilePointer()); + tvx->writeLong(tvf->getFilePointer()); + + if (vectors) { + int32_t numFields = vectors.size(); + tvd->writeVInt(numFields); + + Collection fieldPointers(Collection::newInstance(numFields)); + + for (int32_t i = 0; i < numFields; ++i) { + fieldPointers[i] = tvf->getFilePointer(); + + int32_t fieldNumber = fieldInfos->fieldNumber(vectors[i]->getField()); + + // 1st pass: write field numbers to tvd + tvd->writeVInt(fieldNumber); + + int32_t numTerms = vectors[i]->size(); + tvf->writeVInt(numTerms); + + TermPositionVectorPtr tpVector(boost::dynamic_pointer_cast(vectors[i])); + + uint8_t bits; + bool storePositions; + bool storeOffsets; + + if (tpVector) { + // May have positions & offsets + storePositions = (tpVector->size() > 0 && !tpVector->getTermPositions(0)); + storeOffsets = (tpVector->size() > 0 && tpVector->getOffsets(0)); + bits = (uint8_t)((storePositions ? TermVectorsReader::STORE_POSITIONS_WITH_TERMVECTOR : 0) + + (storeOffsets ? TermVectorsReader::STORE_OFFSET_WITH_TERMVECTOR : 0)); + } else { + bits = 0; + storePositions = false; + storeOffsets = false; + } + + tvf->writeVInt(bits); + Collection terms(vectors[i]->getTerms()); + Collection freqs(vectors[i]->getTermFrequencies()); + + int32_t utf8Upto = 0; + utf8Results[1]->length = 0; + + for (int32_t j = 0; j < numTerms; ++j) { + StringUtils::toUTF8(terms[j].c_str(), terms[j].length(), utf8Results[utf8Upto]); + + int32_t start = MiscUtils::bytesDifference(utf8Results[1 - utf8Upto]->result.get(), utf8Results[1 - utf8Upto]->length, + utf8Results[utf8Upto]->result.get(), utf8Results[utf8Upto]->length); + int32_t length = utf8Results[utf8Upto]->length - start; + tvf->writeVInt(start); // write shared prefix length + tvf->writeVInt(length); // write delta length + tvf->writeBytes(utf8Results[utf8Upto]->result.get(), start, length); // write delta bytes + utf8Upto = 1 - utf8Upto; + + int32_t termFreq = freqs[j]; + tvf->writeVInt(termFreq); + + if (storePositions) { + Collection positions(tpVector->getTermPositions(j)); + if (!positions) { + boost::throw_exception(IllegalStateException(L"Trying to write positions that are null!")); + } + BOOST_ASSERT(positions.size() == termFreq); + + // use delta encoding for positions + int32_t lastPosition = 0; + for (int32_t k = 0; k < positions.size(); ++k) { + int32_t position = positions[k]; + tvf->writeVInt(position - lastPosition); + lastPosition = position; + } } - - tvf->writeVInt(bits); - Collection terms(vectors[i]->getTerms()); - Collection freqs(vectors[i]->getTermFrequencies()); - - int32_t utf8Upto = 0; - utf8Results[1]->length = 0; - - for (int32_t j = 0; j < numTerms; ++j) - { - StringUtils::toUTF8(terms[j].c_str(), terms[j].length(), utf8Results[utf8Upto]); - - int32_t start = MiscUtils::bytesDifference(utf8Results[1 - utf8Upto]->result.get(), utf8Results[1 - utf8Upto]->length, - utf8Results[utf8Upto]->result.get(), utf8Results[utf8Upto]->length); - int32_t length = utf8Results[utf8Upto]->length - start; - tvf->writeVInt(start); // write shared prefix length - tvf->writeVInt(length); // write delta length - tvf->writeBytes(utf8Results[utf8Upto]->result.get(), start, length); // write delta bytes - utf8Upto = 1 - utf8Upto; - - int32_t termFreq = freqs[j]; - tvf->writeVInt(termFreq); - - if (storePositions) - { - Collection positions(tpVector->getTermPositions(j)); - if (!positions) - boost::throw_exception(IllegalStateException(L"Trying to write positions that are null!")); - BOOST_ASSERT(positions.size() == termFreq); - - // use delta encoding for positions - int32_t lastPosition = 0; - for (int32_t k = 0; k < positions.size(); ++k) - { - int32_t position = positions[k]; - tvf->writeVInt(position - lastPosition); - lastPosition = position; - } + + if (storeOffsets) { + Collection offsets(tpVector->getOffsets(j)); + if (!offsets) { + boost::throw_exception(IllegalStateException(L"Trying to write offsets that are null!")); } - - if (storeOffsets) - { - Collection offsets(tpVector->getOffsets(j)); - if (!offsets) - boost::throw_exception(IllegalStateException(L"Trying to write offsets that are null!")); - BOOST_ASSERT(offsets.size() == termFreq); - - // use delta encoding for offsets - int32_t lastEndOffset = 0; - for (int32_t k = 0; k < offsets.size(); ++k) - { - int32_t startOffset = offsets[k]->getStartOffset(); - int32_t endOffset = offsets[k]->getEndOffset(); - tvf->writeVInt(startOffset - lastEndOffset); - tvf->writeVInt(endOffset - startOffset); - lastEndOffset = endOffset; - } + BOOST_ASSERT(offsets.size() == termFreq); + + // use delta encoding for offsets + int32_t lastEndOffset = 0; + for (int32_t k = 0; k < offsets.size(); ++k) { + int32_t startOffset = offsets[k]->getStartOffset(); + int32_t endOffset = offsets[k]->getEndOffset(); + tvf->writeVInt(startOffset - lastEndOffset); + tvf->writeVInt(endOffset - startOffset); + lastEndOffset = endOffset; } } } - - // 2nd pass: write field pointers to tvd - if (numFields > 1) - { - int64_t lastFieldPointer = fieldPointers[0]; - for (int32_t i = 1; i < numFields; ++i) - { - int64_t fieldPointer = fieldPointers[i]; - tvd->writeVLong(fieldPointer - lastFieldPointer); - lastFieldPointer = fieldPointer; - } + } + + // 2nd pass: write field pointers to tvd + if (numFields > 1) { + int64_t lastFieldPointer = fieldPointers[0]; + for (int32_t i = 1; i < numFields; ++i) { + int64_t fieldPointer = fieldPointers[i]; + tvd->writeVLong(fieldPointer - lastFieldPointer); + lastFieldPointer = fieldPointer; } } - else - tvd->writeVInt(0); + } else { + tvd->writeVInt(0); } - - void TermVectorsWriter::addRawDocuments(TermVectorsReaderPtr reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs) - { - int64_t tvdPosition = tvd->getFilePointer(); - int64_t tvfPosition = tvf->getFilePointer(); - int64_t tvdStart = tvdPosition; - int64_t tvfStart = tvfPosition; - for (int32_t i = 0; i < numDocs; ++i) - { - tvx->writeLong(tvdPosition); - tvdPosition += tvdLengths[i]; - tvx->writeLong(tvfPosition); - tvfPosition += tvfLengths[i]; - } - tvd->copyBytes(reader->getTvdStream(), tvdPosition - tvdStart); - tvf->copyBytes(reader->getTvfStream(), tvfPosition - tvfStart); - BOOST_ASSERT(tvd->getFilePointer() == tvdPosition); - BOOST_ASSERT(tvf->getFilePointer() == tvfPosition); +} + +void TermVectorsWriter::addRawDocuments(const TermVectorsReaderPtr& reader, Collection tvdLengths, Collection tvfLengths, int32_t numDocs) { + int64_t tvdPosition = tvd->getFilePointer(); + int64_t tvfPosition = tvf->getFilePointer(); + int64_t tvdStart = tvdPosition; + int64_t tvfStart = tvfPosition; + for (int32_t i = 0; i < numDocs; ++i) { + tvx->writeLong(tvdPosition); + tvdPosition += tvdLengths[i]; + tvx->writeLong(tvfPosition); + tvfPosition += tvfLengths[i]; } - - void TermVectorsWriter::close() - { - // make an effort to close all streams we can but remember and re-throw the first exception - // encountered in this process - LuceneException keep; - if (tvx) - { - try - { - tvx->close(); - } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + tvd->copyBytes(reader->getTvdStream(), tvdPosition - tvdStart); + tvf->copyBytes(reader->getTvfStream(), tvfPosition - tvfStart); + BOOST_ASSERT(tvd->getFilePointer() == tvdPosition); + BOOST_ASSERT(tvf->getFilePointer() == tvfPosition); +} + +void TermVectorsWriter::close() { + // make an effort to close all streams we can but remember and re-throw the first exception + // encountered in this process + LuceneException keep; + if (tvx) { + try { + tvx->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; } } - if (tvd) - { - try - { - tvd->close(); - } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + } + if (tvd) { + try { + tvd->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; } } - if (tvf) - { - try - { - tvf->close(); - } - catch (LuceneException& e) - { - if (keep.isNull()) - keep = e; + } + if (tvf) { + try { + tvf->close(); + } catch (LuceneException& e) { + if (keep.isNull()) { + keep = e; } } - keep.throwException(); } + keep.throwException(); +} + } diff --git a/src/core/index/TermsHash.cpp b/src/core/index/TermsHash.cpp index 7cbc629d..ef10eb04 100644 --- a/src/core/index/TermsHash.cpp +++ b/src/core/index/TermsHash.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,195 +16,190 @@ #include "IndexWriter.h" #include "MiscUtils.h" -namespace Lucene -{ - TermsHash::TermsHash(DocumentsWriterPtr docWriter, bool trackAllocations, TermsHashConsumerPtr consumer, TermsHashPtr nextTermsHash) - { - this->postingsFreeCount = 0; - this->postingsAllocCount = 0; - this->trackAllocations = false; - this->postingsFreeList = Collection::newInstance(1); - - this->_docWriter = docWriter; - this->consumer = consumer; - this->nextTermsHash = nextTermsHash; - this->trackAllocations = trackAllocations; - - bytesPerPosting = consumer->bytesPerPosting() + 4 * DocumentsWriter::POINTER_NUM_BYTE; - postingsFreeChunk = (int32_t)((double)DocumentsWriter::BYTE_BLOCK_SIZE / (double)bytesPerPosting); - } - - TermsHash::~TermsHash() - { - } - - InvertedDocConsumerPerThreadPtr TermsHash::addThread(DocInverterPerThreadPtr docInverterPerThread) - { - return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, TermsHashPerThreadPtr()); +namespace Lucene { + +TermsHash::TermsHash(const DocumentsWriterPtr& docWriter, bool trackAllocations, const TermsHashConsumerPtr& consumer, const TermsHashPtr& nextTermsHash) { + this->postingsFreeCount = 0; + this->postingsAllocCount = 0; + this->trackAllocations = false; + this->postingsFreeList = Collection::newInstance(1); + + this->_docWriter = docWriter; + this->consumer = consumer; + this->nextTermsHash = nextTermsHash; + this->trackAllocations = trackAllocations; + + bytesPerPosting = consumer->bytesPerPosting() + 4 * DocumentsWriter::POINTER_NUM_BYTE; + postingsFreeChunk = (int32_t)((double)DocumentsWriter::BYTE_BLOCK_SIZE / (double)bytesPerPosting); +} + +TermsHash::~TermsHash() { +} + +InvertedDocConsumerPerThreadPtr TermsHash::addThread(const DocInverterPerThreadPtr& docInverterPerThread) { + return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, TermsHashPerThreadPtr()); +} + +TermsHashPerThreadPtr TermsHash::addThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPerThreadPtr& primaryPerThread) { + return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, primaryPerThread); +} + +void TermsHash::setFieldInfos(const FieldInfosPtr& fieldInfos) { + this->fieldInfos = fieldInfos; + consumer->setFieldInfos(fieldInfos); +} + +void TermsHash::abort() { + consumer->abort(); + if (nextTermsHash) { + nextTermsHash->abort(); } - - TermsHashPerThreadPtr TermsHash::addThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPerThreadPtr primaryPerThread) - { - return newLucene(docInverterPerThread, shared_from_this(), nextTermsHash, primaryPerThread); +} + +void TermsHash::shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + BOOST_ASSERT(postingsFreeCount == postingsAllocCount); + + int32_t newSize = 1; + if (newSize != postingsFreeList.size()) { + if (postingsFreeCount > newSize) { + if (trackAllocations) { + DocumentsWriterPtr(_docWriter)->bytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); + } + postingsFreeCount = newSize; + postingsAllocCount = newSize; + } + postingsFreeList.resize(newSize); } - - void TermsHash::setFieldInfos(FieldInfosPtr fieldInfos) - { - this->fieldInfos = fieldInfos; - consumer->setFieldInfos(fieldInfos); +} + +void TermsHash::closeDocStore(const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + consumer->closeDocStore(state); + if (nextTermsHash) { + nextTermsHash->closeDocStore(state); } - - void TermsHash::abort() - { - consumer->abort(); - if (nextTermsHash) - nextTermsHash->abort(); +} + +void TermsHash::flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, const SegmentWriteStatePtr& state) { + SyncLock syncLock(this); + MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField childThreadsAndFields(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::newInstance()); + MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField nextThreadsAndFields; + if (nextTermsHash) { + nextThreadsAndFields = MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance(); } - - void TermsHash::shrinkFreePostings(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) - { - BOOST_ASSERT(postingsFreeCount == postingsAllocCount); - - int32_t newSize = 1; - if (newSize != postingsFreeList.size()) - { - if (postingsFreeCount > newSize) - { - if (trackAllocations) - DocumentsWriterPtr(_docWriter)->bytesAllocated(-(postingsFreeCount - newSize) * bytesPerPosting); - postingsFreeCount = newSize; - postingsAllocCount = newSize; + + for (MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) { + Collection childFields(Collection::newInstance()); + Collection nextChildFields; + if (nextTermsHash) { + nextChildFields = Collection::newInstance(); + } + + for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) { + childFields.add(boost::static_pointer_cast(*perField)->consumer); + if (nextTermsHash) { + nextChildFields.add(boost::static_pointer_cast(*perField)->nextPerField); } - postingsFreeList.resize(newSize); + } + + childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); + if (nextTermsHash) { + nextThreadsAndFields.put(boost::static_pointer_cast(entry->first)->nextPerThread, nextChildFields); } } - - void TermsHash::closeDocStore(SegmentWriteStatePtr state) - { - SyncLock syncLock(this); - consumer->closeDocStore(state); - if (nextTermsHash) - nextTermsHash->closeDocStore(state); + + consumer->flush(childThreadsAndFields, state); + + shrinkFreePostings(threadsAndFields, state); + + if (nextTermsHash) { + nextTermsHash->flush(nextThreadsAndFields, state); + } +} + +bool TermsHash::freeRAM() { + if (!trackAllocations) { + return false; } - - void TermsHash::flush(MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField threadsAndFields, SegmentWriteStatePtr state) + + bool any = false; + int64_t bytesFreed = 0; { SyncLock syncLock(this); - MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField childThreadsAndFields(MapTermsHashConsumerPerThreadCollectionTermsHashConsumerPerField::newInstance()); - MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField nextThreadsAndFields; - if (nextTermsHash) - nextThreadsAndFields = MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::newInstance(); - - for (MapInvertedDocConsumerPerThreadCollectionInvertedDocConsumerPerField::iterator entry = threadsAndFields.begin(); entry != threadsAndFields.end(); ++entry) - { - Collection childFields(Collection::newInstance()); - Collection nextChildFields; - if (nextTermsHash) - nextChildFields = Collection::newInstance(); - - for (Collection::iterator perField = entry->second.begin(); perField != entry->second.end(); ++perField) - { - childFields.add(boost::static_pointer_cast(*perField)->consumer); - if (nextTermsHash) - nextChildFields.add(boost::static_pointer_cast(*perField)->nextPerField); - } - - childThreadsAndFields.put(boost::static_pointer_cast(entry->first)->consumer, childFields); - if (nextTermsHash) - nextThreadsAndFields.put(boost::static_pointer_cast(entry->first)->nextPerThread, nextChildFields); + int32_t numToFree = postingsFreeCount >= postingsFreeChunk ? postingsFreeChunk : postingsFreeCount; + any = (numToFree > 0); + if (any) { + MiscUtils::arrayFill(postingsFreeList.begin(), postingsFreeCount - numToFree, postingsFreeCount, RawPostingListPtr()); + postingsFreeCount -= numToFree; + postingsAllocCount -= numToFree; + bytesFreed = -numToFree * bytesPerPosting; + any = true; } - - consumer->flush(childThreadsAndFields, state); - - shrinkFreePostings(threadsAndFields, state); - - if (nextTermsHash) - nextTermsHash->flush(nextThreadsAndFields, state); } - - bool TermsHash::freeRAM() - { - if (!trackAllocations) - return false; - - bool any = false; - int64_t bytesFreed = 0; - { - SyncLock syncLock(this); - int32_t numToFree = postingsFreeCount >= postingsFreeChunk ? postingsFreeChunk : postingsFreeCount; - any = (numToFree > 0); - if (any) - { - MiscUtils::arrayFill(postingsFreeList.begin(), postingsFreeCount - numToFree, postingsFreeCount, RawPostingListPtr()); - postingsFreeCount -= numToFree; - postingsAllocCount -= numToFree; - bytesFreed = -numToFree * bytesPerPosting; - any = true; - } - } - - if (any) - DocumentsWriterPtr(_docWriter)->bytesAllocated(bytesFreed); - if (nextTermsHash && nextTermsHash->freeRAM()) - any = true; - - return any; + if (any) { + DocumentsWriterPtr(_docWriter)->bytesAllocated(bytesFreed); } - - void TermsHash::recyclePostings(Collection postings, int32_t numPostings) - { - SyncLock syncLock(this); - BOOST_ASSERT(postings.size() >= numPostings); - - // Move all Postings from this ThreadState back to our free list. We pre-allocated this array while we - // were creating Postings to make sure it's large enough - BOOST_ASSERT(postingsFreeCount + numPostings <= postingsFreeList.size()); - MiscUtils::arrayCopy(postings.begin(), 0, postingsFreeList.begin(), postingsFreeCount, numPostings); - postingsFreeCount += numPostings; + + if (nextTermsHash && nextTermsHash->freeRAM()) { + any = true; } - - void TermsHash::getPostings(Collection postings) - { - SyncLock syncLock(this); - DocumentsWriterPtr docWriter(_docWriter); - IndexWriterPtr writer(docWriter->_writer); - - BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings start")); - - BOOST_ASSERT(postingsFreeCount <= postingsFreeList.size()); - BOOST_ASSERT(postingsFreeCount <= postingsAllocCount); - - int32_t numToCopy = postingsFreeCount < postings.size() ? postingsFreeCount : postings.size(); - int32_t start = postingsFreeCount - numToCopy; - BOOST_ASSERT(start >= 0); - BOOST_ASSERT(start + numToCopy <= postingsFreeList.size()); - BOOST_ASSERT(numToCopy <= postings.size()); - MiscUtils::arrayCopy(postingsFreeList.begin(), start, postings.begin(), 0, numToCopy); - - // Directly allocate the remainder if any - if (numToCopy != postings.size()) - { - int32_t extra = postings.size() - numToCopy; - int32_t newPostingsAllocCount = postingsAllocCount + extra; - - consumer->createPostings(postings, numToCopy, extra); - BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings after create")); - postingsAllocCount += extra; - - if (trackAllocations) - docWriter->bytesAllocated(extra * bytesPerPosting); - - if (newPostingsAllocCount > postingsFreeList.size()) - { - // Pre-allocate the postingsFreeList so it's large enough to hold all postings we've given out - postingsFreeList = Collection::newInstance(MiscUtils::getNextSize(newPostingsAllocCount)); - } + + return any; +} + +void TermsHash::recyclePostings(Collection postings, int32_t numPostings) { + SyncLock syncLock(this); + BOOST_ASSERT(postings.size() >= numPostings); + + // Move all Postings from this ThreadState back to our free list. We pre-allocated this array while we + // were creating Postings to make sure it's large enough + BOOST_ASSERT(postingsFreeCount + numPostings <= postingsFreeList.size()); + MiscUtils::arrayCopy(postings.begin(), 0, postingsFreeList.begin(), postingsFreeCount, numPostings); + postingsFreeCount += numPostings; +} + +void TermsHash::getPostings(Collection postings) { + SyncLock syncLock(this); + DocumentsWriterPtr docWriter(_docWriter); + IndexWriterPtr writer(docWriter->_writer); + + BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings start")); + + BOOST_ASSERT(postingsFreeCount <= postingsFreeList.size()); + BOOST_ASSERT(postingsFreeCount <= postingsAllocCount); + + int32_t numToCopy = postingsFreeCount < postings.size() ? postingsFreeCount : postings.size(); + int32_t start = postingsFreeCount - numToCopy; + BOOST_ASSERT(start >= 0); + BOOST_ASSERT(start + numToCopy <= postingsFreeList.size()); + BOOST_ASSERT(numToCopy <= postings.size()); + MiscUtils::arrayCopy(postingsFreeList.begin(), start, postings.begin(), 0, numToCopy); + + // Directly allocate the remainder if any + if (numToCopy != postings.size()) { + int32_t extra = postings.size() - numToCopy; + int32_t newPostingsAllocCount = postingsAllocCount + extra; + + consumer->createPostings(postings, numToCopy, extra); + BOOST_ASSERT(writer->testPoint(L"TermsHash.getPostings after create")); + postingsAllocCount += extra; + + if (trackAllocations) { + docWriter->bytesAllocated(extra * bytesPerPosting); + } + + if (newPostingsAllocCount > postingsFreeList.size()) { + // Pre-allocate the postingsFreeList so it's large enough to hold all postings we've given out + postingsFreeList = Collection::newInstance(MiscUtils::getNextSize(newPostingsAllocCount)); } - - postingsFreeCount -= numToCopy; - - if (trackAllocations) - docWriter->bytesUsed(postings.size() * bytesPerPosting); } + + postingsFreeCount -= numToCopy; + + if (trackAllocations) { + docWriter->bytesUsed(postings.size() * bytesPerPosting); + } +} + } diff --git a/src/core/index/TermsHashConsumer.cpp b/src/core/index/TermsHashConsumer.cpp index 50c508c1..830127ca 100644 --- a/src/core/index/TermsHashConsumer.cpp +++ b/src/core/index/TermsHashConsumer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "TermsHashConsumer.h" -namespace Lucene -{ - TermsHashConsumer::~TermsHashConsumer() - { - } - - void TermsHashConsumer::setFieldInfos(FieldInfosPtr fieldInfos) - { - this->fieldInfos = fieldInfos; - } +namespace Lucene { + +TermsHashConsumer::~TermsHashConsumer() { +} + +void TermsHashConsumer::setFieldInfos(const FieldInfosPtr& fieldInfos) { + this->fieldInfos = fieldInfos; +} + } diff --git a/src/core/index/TermsHashConsumerPerField.cpp b/src/core/index/TermsHashConsumerPerField.cpp index 85bdfa0a..02e9c4b9 100644 --- a/src/core/index/TermsHashConsumerPerField.cpp +++ b/src/core/index/TermsHashConsumerPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "TermsHashConsumerPerField.h" -namespace Lucene -{ - TermsHashConsumerPerField::~TermsHashConsumerPerField() - { - } +namespace Lucene { + +TermsHashConsumerPerField::~TermsHashConsumerPerField() { +} + } diff --git a/src/core/index/TermsHashConsumerPerThread.cpp b/src/core/index/TermsHashConsumerPerThread.cpp index ec8cc28e..a2538ace 100644 --- a/src/core/index/TermsHashConsumerPerThread.cpp +++ b/src/core/index/TermsHashConsumerPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "TermsHashConsumerPerThread.h" -namespace Lucene -{ - TermsHashConsumerPerThread::~TermsHashConsumerPerThread() - { - } +namespace Lucene { + +TermsHashConsumerPerThread::~TermsHashConsumerPerThread() { +} + } diff --git a/src/core/index/TermsHashPerField.cpp b/src/core/index/TermsHashPerField.cpp index bdf56822..0a3fa062 100644 --- a/src/core/index/TermsHashPerField.cpp +++ b/src/core/index/TermsHashPerField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -22,519 +22,476 @@ #include "UTF8Stream.h" #include "MiscUtils.h" -namespace Lucene -{ - TermsHashPerField::TermsHashPerField(DocInverterPerFieldPtr docInverterPerField, TermsHashPerThreadPtr perThread, TermsHashPerThreadPtr nextPerThread, FieldInfoPtr fieldInfo) - { - this->_docInverterPerField = docInverterPerField; - this->_perThread = perThread; - this->nextPerThread = nextPerThread; - this->fieldInfo = fieldInfo; - } - - TermsHashPerField::~TermsHashPerField() - { +namespace Lucene { + +TermsHashPerField::TermsHashPerField(const DocInverterPerFieldPtr& docInverterPerField, const TermsHashPerThreadPtr& perThread, const TermsHashPerThreadPtr& nextPerThread, const FieldInfoPtr& fieldInfo) { + this->_docInverterPerField = docInverterPerField; + this->_perThread = perThread; + this->nextPerThread = nextPerThread; + this->fieldInfo = fieldInfo; +} + +TermsHashPerField::~TermsHashPerField() { +} + +void TermsHashPerField::initialize() { + this->postingsCompacted = false; + this->numPostings = 0; + this->postingsHashSize = 4; + this->postingsHashHalfSize = this->postingsHashSize / 2; + this->postingsHashMask = this->postingsHashSize - 1; + this->postingsHash = Collection::newInstance(postingsHashSize); + this->doCall = false; + this->doNextCall = false; + this->intUptoStart = 0; + + TermsHashPerThreadPtr perThread(_perThread); + intPool = perThread->intPool; + charPool = perThread->charPool; + bytePool = perThread->bytePool; + docState = perThread->docState; + DocInverterPerFieldPtr docInverterPerField(_docInverterPerField); + fieldState = docInverterPerField->fieldState; + this->consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); + streamCount = consumer->getStreamCount(); + numPostingInt = 2 * streamCount; + if (nextPerThread) { + nextPerField = boost::dynamic_pointer_cast(nextPerThread->addField(docInverterPerField, fieldInfo)); } - - void TermsHashPerField::initialize() - { - this->postingsCompacted = false; - this->numPostings = 0; - this->postingsHashSize = 4; - this->postingsHashHalfSize = this->postingsHashSize / 2; - this->postingsHashMask = this->postingsHashSize - 1; - this->postingsHash = Collection::newInstance(postingsHashSize); - this->doCall = false; - this->doNextCall = false; - this->intUptoStart = 0; - - TermsHashPerThreadPtr perThread(_perThread); - intPool = perThread->intPool; - charPool = perThread->charPool; - bytePool = perThread->bytePool; - docState = perThread->docState; - DocInverterPerFieldPtr docInverterPerField(_docInverterPerField); - fieldState = docInverterPerField->fieldState; - this->consumer = perThread->consumer->addField(shared_from_this(), fieldInfo); - streamCount = consumer->getStreamCount(); - numPostingInt = 2 * streamCount; - if (nextPerThread) - nextPerField = boost::dynamic_pointer_cast(nextPerThread->addField(docInverterPerField, fieldInfo)); +} + +void TermsHashPerField::shrinkHash(int32_t targetSize) { + BOOST_ASSERT(postingsCompacted || numPostings == 0); + + int32_t newSize = 4; + if (newSize != postingsHash.size()) { + postingsHash.resize(newSize); + postingsHashSize = newSize; + postingsHashHalfSize = newSize / 2; + postingsHashMask = newSize - 1; } - - void TermsHashPerField::shrinkHash(int32_t targetSize) - { - BOOST_ASSERT(postingsCompacted || numPostings == 0); - - int32_t newSize = 4; - if (newSize != postingsHash.size()) - { - postingsHash.resize(newSize); - postingsHashSize = newSize; - postingsHashHalfSize = newSize / 2; - postingsHashMask = newSize - 1; - } - MiscUtils::arrayFill(postingsHash.begin(), 0, postingsHash.size(), RawPostingListPtr()); + MiscUtils::arrayFill(postingsHash.begin(), 0, postingsHash.size(), RawPostingListPtr()); +} + +void TermsHashPerField::reset() { + if (!postingsCompacted) { + compactPostings(); } - - void TermsHashPerField::reset() - { - if (!postingsCompacted) - compactPostings(); - BOOST_ASSERT(numPostings <= postingsHash.size()); - if (numPostings > 0) - { - TermsHashPtr(TermsHashPerThreadPtr(_perThread)->_termsHash)->recyclePostings(postingsHash, numPostings); - MiscUtils::arrayFill(postingsHash.begin(), 0, numPostings, RawPostingListPtr()); - numPostings = 0; - } - postingsCompacted = false; - if (nextPerField) - nextPerField->reset(); + BOOST_ASSERT(numPostings <= postingsHash.size()); + if (numPostings > 0) { + TermsHashPtr(TermsHashPerThreadPtr(_perThread)->_termsHash)->recyclePostings(postingsHash, numPostings); + MiscUtils::arrayFill(postingsHash.begin(), 0, numPostings, RawPostingListPtr()); + numPostings = 0; } - - void TermsHashPerField::abort() - { - SyncLock syncLock(this); - reset(); - if (nextPerField) - nextPerField->abort(); + postingsCompacted = false; + if (nextPerField) { + nextPerField->reset(); } - - void TermsHashPerField::initReader(ByteSliceReaderPtr reader, RawPostingListPtr p, int32_t stream) - { - BOOST_ASSERT(stream < streamCount); - IntArray ints(intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]); - int32_t upto = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); - reader->init(bytePool, p->byteStart + stream * ByteBlockPool::FIRST_LEVEL_SIZE(), ints[upto + stream]); +} + +void TermsHashPerField::abort() { + SyncLock syncLock(this); + reset(); + if (nextPerField) { + nextPerField->abort(); } - - void TermsHashPerField::compactPostings() - { - SyncLock syncLock(this); - int32_t upto = 0; - for (int32_t i = 0; i < postingsHashSize; ++i) - { - if (postingsHash[i]) - { - if (upto < i) - { - postingsHash[upto] = postingsHash[i]; - postingsHash[i].reset(); - } - ++upto; +} + +void TermsHashPerField::initReader(const ByteSliceReaderPtr& reader, const RawPostingListPtr& p, int32_t stream) { + BOOST_ASSERT(stream < streamCount); + IntArray ints(intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]); + int32_t upto = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); + reader->init(bytePool, p->byteStart + stream * ByteBlockPool::FIRST_LEVEL_SIZE(), ints[upto + stream]); +} + +void TermsHashPerField::compactPostings() { + SyncLock syncLock(this); + int32_t upto = 0; + for (int32_t i = 0; i < postingsHashSize; ++i) { + if (postingsHash[i]) { + if (upto < i) { + postingsHash[upto] = postingsHash[i]; + postingsHash[i].reset(); } + ++upto; } - - BOOST_ASSERT(upto == numPostings); - postingsCompacted = true; } - - struct comparePostings - { - comparePostings(Collection buffers) - { - this->buffers = buffers; + + BOOST_ASSERT(upto == numPostings); + postingsCompacted = true; +} + +struct comparePostings { + comparePostings(Collection buffers) { + this->buffers = buffers; } - - /// Compares term text for two Posting instance - inline bool operator()(const RawPostingListPtr& first, const RawPostingListPtr& second) const - { - if (first == second) + + /// Compares term text for two Posting instance + inline bool operator()(const RawPostingListPtr& first, const RawPostingListPtr& second) const { + if (first == second) { return false; - - wchar_t* text1 = buffers[first->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); - int32_t pos1 = (first->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - wchar_t* text2 = buffers[second->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); - int32_t pos2 = (second->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - + } + + wchar_t* text1 = buffers[first->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); + int32_t pos1 = (first->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + wchar_t* text2 = buffers[second->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); + int32_t pos2 = (second->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + BOOST_ASSERT(text1 != text2 || pos1 != pos2); - - while (true) - { + + while (true) { wchar_t c1 = text1[pos1++]; wchar_t c2 = text2[pos2++]; - if (c1 != c2) - { - if (c2 == UTF8Base::UNICODE_TERMINATOR) + if (c1 != c2) { + if (c2 == UTF8Base::UNICODE_TERMINATOR) { return false; - else if (c1 == UTF8Base::UNICODE_TERMINATOR) + } else if (c1 == UTF8Base::UNICODE_TERMINATOR) { return true; - else + } else { return (c1 < c2); - } - else - { - // This method should never compare equal postings unless first == second + } + } else { + // This method should never compare equal postings unless first == second BOOST_ASSERT(c1 != UTF8Base::UNICODE_TERMINATOR); } } - } - - Collection buffers; - }; - - Collection TermsHashPerField::sortPostings() - { - compactPostings(); - std::sort(postingsHash.begin(), postingsHash.begin() + numPostings, comparePostings(charPool->buffers)); - return postingsHash; } - - bool TermsHashPerField::postingEquals(const wchar_t* tokenText, int32_t tokenTextLen) - { - wchar_t* text = TermsHashPerThreadPtr(_perThread)->charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); - BOOST_ASSERT(text); - int32_t pos = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - int32_t tokenPos = 0; - for (; tokenPos < tokenTextLen; ++pos, ++tokenPos) - { - if (tokenText[tokenPos] != text[pos]) - return false; + + Collection buffers; +}; + +Collection TermsHashPerField::sortPostings() { + compactPostings(); + std::sort(postingsHash.begin(), postingsHash.begin() + numPostings, comparePostings(charPool->buffers)); + return postingsHash; +} + +bool TermsHashPerField::postingEquals(const wchar_t* tokenText, int32_t tokenTextLen) { + wchar_t* text = TermsHashPerThreadPtr(_perThread)->charPool->buffers[p->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT].get(); + BOOST_ASSERT(text); + int32_t pos = (p->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + int32_t tokenPos = 0; + for (; tokenPos < tokenTextLen; ++pos, ++tokenPos) { + if (tokenText[tokenPos] != text[pos]) { + return false; } - return (text[pos] == UTF8Base::UNICODE_TERMINATOR); } - - void TermsHashPerField::start(FieldablePtr field) - { - termAtt = fieldState->attributeSource->addAttribute(); - consumer->start(field); - if (nextPerField) - nextPerField->start(field); + return (text[pos] == UTF8Base::UNICODE_TERMINATOR); +} + +void TermsHashPerField::start(const FieldablePtr& field) { + termAtt = fieldState->attributeSource->addAttribute(); + consumer->start(field); + if (nextPerField) { + nextPerField->start(field); } - - bool TermsHashPerField::start(Collection fields, int32_t count) - { - doCall = consumer->start(fields, count); - if (nextPerField) - doNextCall = nextPerField->start(fields, count); - return (doCall || doNextCall); +} + +bool TermsHashPerField::start(Collection fields, int32_t count) { + doCall = consumer->start(fields, count); + if (nextPerField) { + doNextCall = nextPerField->start(fields, count); } - - void TermsHashPerField::add(int32_t textStart) - { - // Secondary entry point (for 2nd and subsequent TermsHash), we hash by textStart - int32_t code = textStart; - - int32_t hashPos = (code & postingsHashMask); - - BOOST_ASSERT(!postingsCompacted); - - // Locate RawPostingList in hash - p = postingsHash[hashPos]; - - if (p && p->textStart != textStart) - { - // Conflict: keep searching different locations in the hash table. - int32_t inc = (((code >> 8) + code) | 1); - do - { - code += inc; - hashPos = (code & postingsHashMask); - p = postingsHash[hashPos]; - } - while (p && p->textStart != textStart); + return (doCall || doNextCall); +} + +void TermsHashPerField::add(int32_t textStart) { + // Secondary entry point (for 2nd and subsequent TermsHash), we hash by textStart + int32_t code = textStart; + + int32_t hashPos = (code & postingsHashMask); + + BOOST_ASSERT(!postingsCompacted); + + // Locate RawPostingList in hash + p = postingsHash[hashPos]; + + if (p && p->textStart != textStart) { + // Conflict: keep searching different locations in the hash table. + int32_t inc = (((code >> 8) + code) | 1); + do { + code += inc; + hashPos = (code & postingsHashMask); + p = postingsHash[hashPos]; + } while (p && p->textStart != textStart); + } + + if (!p) { + // First time we are seeing this token since we last flushed the hash. + TermsHashPerThreadPtr perThread(_perThread); + + // Refill? + if (perThread->freePostingsCount == 0) { + perThread->morePostings(); } - - if (!p) - { - // First time we are seeing this token since we last flushed the hash. - TermsHashPerThreadPtr perThread(_perThread); - - // Refill? - if (perThread->freePostingsCount == 0) - perThread->morePostings(); - - // Pull next free RawPostingList from free list - p = perThread->freePostings[--perThread->freePostingsCount]; - BOOST_ASSERT(p); - - p->textStart = textStart; - - BOOST_ASSERT(!postingsHash[hashPos]); - postingsHash[hashPos] = p; - ++numPostings; - - if (numPostings == postingsHashHalfSize) - rehashPostings(2 * postingsHashSize); - - // Init stream slices - if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) - intPool->nextBuffer(); - - if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) - bytePool->nextBuffer(); - - intUptos = intPool->buffer; - intUptoStart = intPool->intUpto; - intPool->intUpto += streamCount; - - p->intStart = intUptoStart + intPool->intOffset; - - for (int32_t i = 0; i < streamCount; ++i) - { - int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); - intUptos[intUptoStart + i] = upto + bytePool->byteOffset; - } - p->byteStart = intUptos[intUptoStart]; - - consumer->newTerm(p); + + // Pull next free RawPostingList from free list + p = perThread->freePostings[--perThread->freePostingsCount]; + BOOST_ASSERT(p); + + p->textStart = textStart; + + BOOST_ASSERT(!postingsHash[hashPos]); + postingsHash[hashPos] = p; + ++numPostings; + + if (numPostings == postingsHashHalfSize) { + rehashPostings(2 * postingsHashSize); } - else - { - intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; - intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); - consumer->addTerm(p); + + // Init stream slices + if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) { + intPool->nextBuffer(); + } + + if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) { + bytePool->nextBuffer(); + } + + intUptos = intPool->buffer; + intUptoStart = intPool->intUpto; + intPool->intUpto += streamCount; + + p->intStart = intUptoStart + intPool->intOffset; + + for (int32_t i = 0; i < streamCount; ++i) { + int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); + intUptos[intUptoStart + i] = upto + bytePool->byteOffset; } + p->byteStart = intUptos[intUptoStart]; + + consumer->newTerm(p); + } else { + intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; + intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); + consumer->addTerm(p); } - - void TermsHashPerField::add() - { - BOOST_ASSERT(!postingsCompacted); - - // Get the text of this term. - wchar_t* tokenText = termAtt->termBufferArray(); - int32_t tokenTextLen = termAtt->termLength(); - - // Compute hashcode and replace any invalid UTF16 sequences - int32_t downto = tokenTextLen; - int32_t code = 0; - - while (downto > 0) - { - wchar_t ch = tokenText[--downto]; - - #ifdef LPP_UNICODE_CHAR_SIZE_2 - if (ch >= UTF8Base::TRAIL_SURROGATE_MIN && ch <= UTF8Base::TRAIL_SURROGATE_MAX) - { - if (downto == 0) - { +} + +void TermsHashPerField::add() { + BOOST_ASSERT(!postingsCompacted); + + // Get the text of this term. + wchar_t* tokenText = termAtt->termBufferArray(); + int32_t tokenTextLen = termAtt->termLength(); + + // Compute hashcode and replace any invalid UTF16 sequences + int32_t downto = tokenTextLen; + int32_t code = 0; + + while (downto > 0) { + wchar_t ch = tokenText[--downto]; + +#ifdef LPP_UNICODE_CHAR_SIZE_2 + if (ch >= UTF8Base::TRAIL_SURROGATE_MIN && ch <= UTF8Base::TRAIL_SURROGATE_MAX) { + if (downto == 0) { + // Unpaired + ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; + tokenText[downto] = ch; + } else { + wchar_t ch2 = tokenText[downto - 1]; + if (ch2 >= UTF8Base::LEAD_SURROGATE_MIN && ch2 <= UTF8Base::LEAD_SURROGATE_MAX) { + // OK: high followed by low. This is a valid surrogate pair. + code = ((code * 31) + ch) * 31 + ch2; + --downto; + continue; + } else { // Unpaired ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; tokenText[downto] = ch; } - else - { - wchar_t ch2 = tokenText[downto - 1]; - if (ch2 >= UTF8Base::LEAD_SURROGATE_MIN && ch2 <= UTF8Base::LEAD_SURROGATE_MAX) - { - // OK: high followed by low. This is a valid surrogate pair. - code = ((code * 31) + ch) * 31 + ch2; - --downto; - continue; - } - else - { - // Unpaired - ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; - tokenText[downto] = ch; - } - } } - else if (ch >= UTF8Base::LEAD_SURROGATE_MIN && (ch <= UTF8Base::LEAD_SURROGATE_MAX || ch == UTF8Base::UNICODE_TERMINATOR)) - { - // Unpaired or UTF8Base::UNICODE_TERMINATOR - ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; - tokenText[downto] = ch; - } - #else - if (ch == UTF8Base::UNICODE_TERMINATOR) - { - // Unpaired or UTF8Base::UNICODE_TERMINATOR - ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; - tokenText[downto] = ch; - } - #endif - - code = (code * 31) + ch; + } else if (ch >= UTF8Base::LEAD_SURROGATE_MIN && (ch <= UTF8Base::LEAD_SURROGATE_MAX || ch == UTF8Base::UNICODE_TERMINATOR)) { + // Unpaired or UTF8Base::UNICODE_TERMINATOR + ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; + tokenText[downto] = ch; } - - int32_t hashPos = (code & postingsHashMask); - - // Locate RawPostingList in hash - p = postingsHash[hashPos]; - - if (p && !postingEquals(tokenText, tokenTextLen)) - { - // Conflict: keep searching different locations in the hash table. - int32_t inc = (((code >> 8) + code) | 1); - do - { - code += inc; - hashPos = (code & postingsHashMask); - p = postingsHash[hashPos]; - } - while (p && !postingEquals(tokenText, tokenTextLen)); +#else + if (ch == UTF8Base::UNICODE_TERMINATOR) { + // Unpaired or UTF8Base::UNICODE_TERMINATOR + ch = UTF8Base::UNICODE_REPLACEMENT_CHAR; + tokenText[downto] = ch; } - - if (!p) - { - // First time we are seeing this token since we last flushed the hash. - int32_t textLen1 = 1 + tokenTextLen; - if (textLen1 + charPool->charUpto > DocumentsWriter::CHAR_BLOCK_SIZE) - { - if (textLen1 > DocumentsWriter::CHAR_BLOCK_SIZE) - { - // Just skip this term, to remain as robust as possible during indexing. A TokenFilter - // can be inserted into the analyzer chain if other behavior is wanted (pruning the term - // to a prefix, throwing an exception, etc). - - if (docState->maxTermPrefix.empty()) - docState->maxTermPrefix.append(tokenText, std::min((int32_t)30, tokenTextLen)); - - consumer->skippingLongTerm(); - return; +#endif + + code = (code * 31) + ch; + } + + int32_t hashPos = (code & postingsHashMask); + + // Locate RawPostingList in hash + p = postingsHash[hashPos]; + + if (p && !postingEquals(tokenText, tokenTextLen)) { + // Conflict: keep searching different locations in the hash table. + int32_t inc = (((code >> 8) + code) | 1); + do { + code += inc; + hashPos = (code & postingsHashMask); + p = postingsHash[hashPos]; + } while (p && !postingEquals(tokenText, tokenTextLen)); + } + + if (!p) { + // First time we are seeing this token since we last flushed the hash. + int32_t textLen1 = 1 + tokenTextLen; + if (textLen1 + charPool->charUpto > DocumentsWriter::CHAR_BLOCK_SIZE) { + if (textLen1 > DocumentsWriter::CHAR_BLOCK_SIZE) { + // Just skip this term, to remain as robust as possible during indexing. A TokenFilter + // can be inserted into the analyzer chain if other behavior is wanted (pruning the term + // to a prefix, throwing an exception, etc). + + if (docState->maxTermPrefix.empty()) { + docState->maxTermPrefix.append(tokenText, std::min((int32_t)30, tokenTextLen)); } - charPool->nextBuffer(); - } - - TermsHashPerThreadPtr perThread(_perThread); - - // Refill? - if (perThread->freePostingsCount == 0) - perThread->morePostings(); - - // Pull next free RawPostingList from free list - p = perThread->freePostings[--perThread->freePostingsCount]; - BOOST_ASSERT(p); - - wchar_t* text = charPool->buffer.get(); - int32_t textUpto = charPool->charUpto; - - p->textStart = textUpto + charPool->charOffset; - charPool->charUpto += textLen1; - - MiscUtils::arrayCopy(tokenText, 0, text, textUpto, tokenTextLen); - text[textUpto + tokenTextLen] = UTF8Base::UNICODE_TERMINATOR; - - BOOST_ASSERT(!postingsHash[hashPos]); - postingsHash[hashPos] = p; - ++numPostings; - - if (numPostings == postingsHashHalfSize) - rehashPostings(2 * postingsHashSize); - - // Init stream slices - if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) - intPool->nextBuffer(); - - if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) - bytePool->nextBuffer(); - - intUptos = intPool->buffer; - intUptoStart = intPool->intUpto; - intPool->intUpto += streamCount; - - p->intStart = intUptoStart + intPool->intOffset; - - for (int32_t i = 0; i < streamCount; ++i) - { - int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); - intUptos[intUptoStart + i] = upto + bytePool->byteOffset; + + consumer->skippingLongTerm(); + return; } - p->byteStart = intUptos[intUptoStart]; - - consumer->newTerm(p); + charPool->nextBuffer(); } - else - { - intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; - intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); - consumer->addTerm(p); + + TermsHashPerThreadPtr perThread(_perThread); + + // Refill? + if (perThread->freePostingsCount == 0) { + perThread->morePostings(); } - - if (doNextCall) - nextPerField->add(p->textStart); - } - - void TermsHashPerField::writeByte(int32_t stream, int8_t b) - { - int32_t upto = intUptos[intUptoStart + stream]; - ByteArray bytes(bytePool->buffers[upto >> DocumentsWriter::BYTE_BLOCK_SHIFT]); - BOOST_ASSERT(bytes); - int32_t offset = (upto & DocumentsWriter::BYTE_BLOCK_MASK); - if (bytes[offset] != 0) - { - // End of slice; allocate a new one - offset = bytePool->allocSlice(bytes, offset); - bytes = bytePool->buffer; - intUptos[intUptoStart + stream] = offset + bytePool->byteOffset; + + // Pull next free RawPostingList from free list + p = perThread->freePostings[--perThread->freePostingsCount]; + BOOST_ASSERT(p); + + wchar_t* text = charPool->buffer.get(); + int32_t textUpto = charPool->charUpto; + + p->textStart = textUpto + charPool->charOffset; + charPool->charUpto += textLen1; + + MiscUtils::arrayCopy(tokenText, 0, text, textUpto, tokenTextLen); + text[textUpto + tokenTextLen] = UTF8Base::UNICODE_TERMINATOR; + + BOOST_ASSERT(!postingsHash[hashPos]); + postingsHash[hashPos] = p; + ++numPostings; + + if (numPostings == postingsHashHalfSize) { + rehashPostings(2 * postingsHashSize); } - bytes[offset] = b; - intUptos[intUptoStart + stream]++; + + // Init stream slices + if (numPostingInt + intPool->intUpto > DocumentsWriter::INT_BLOCK_SIZE) { + intPool->nextBuffer(); + } + + if (DocumentsWriter::BYTE_BLOCK_SIZE - bytePool->byteUpto < numPostingInt * ByteBlockPool::FIRST_LEVEL_SIZE()) { + bytePool->nextBuffer(); + } + + intUptos = intPool->buffer; + intUptoStart = intPool->intUpto; + intPool->intUpto += streamCount; + + p->intStart = intUptoStart + intPool->intOffset; + + for (int32_t i = 0; i < streamCount; ++i) { + int32_t upto = bytePool->newSlice(ByteBlockPool::FIRST_LEVEL_SIZE()); + intUptos[intUptoStart + i] = upto + bytePool->byteOffset; + } + p->byteStart = intUptos[intUptoStart]; + + consumer->newTerm(p); + } else { + intUptos = intPool->buffers[p->intStart >> DocumentsWriter::INT_BLOCK_SHIFT]; + intUptoStart = (p->intStart & DocumentsWriter::INT_BLOCK_MASK); + consumer->addTerm(p); } - - void TermsHashPerField::writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length) - { - int32_t end = offset + length; - for (int32_t i = offset; i < end; ++i) - writeByte(stream, b[i]); + + if (doNextCall) { + nextPerField->add(p->textStart); } - - void TermsHashPerField::writeVInt(int32_t stream, int32_t i) - { - BOOST_ASSERT(stream < streamCount); - while ((i & ~0x7f) != 0) - { - writeByte(stream, (uint8_t)((i & 0x7f) | 0x80)); - i = MiscUtils::unsignedShift(i, 7); - } - writeByte(stream, (uint8_t)i); +} + +void TermsHashPerField::writeByte(int32_t stream, int8_t b) { + int32_t upto = intUptos[intUptoStart + stream]; + ByteArray bytes(bytePool->buffers[upto >> DocumentsWriter::BYTE_BLOCK_SHIFT]); + BOOST_ASSERT(bytes); + int32_t offset = (upto & DocumentsWriter::BYTE_BLOCK_MASK); + if (bytes[offset] != 0) { + // End of slice; allocate a new one + offset = bytePool->allocSlice(bytes, offset); + bytes = bytePool->buffer; + intUptos[intUptoStart + stream] = offset + bytePool->byteOffset; + } + bytes[offset] = b; + intUptos[intUptoStart + stream]++; +} + +void TermsHashPerField::writeBytes(int32_t stream, const uint8_t* b, int32_t offset, int32_t length) { + int32_t end = offset + length; + for (int32_t i = offset; i < end; ++i) { + writeByte(stream, b[i]); } - - void TermsHashPerField::finish() - { - consumer->finish(); - if (nextPerField) - nextPerField->finish(); +} + +void TermsHashPerField::writeVInt(int32_t stream, int32_t i) { + BOOST_ASSERT(stream < streamCount); + while ((i & ~0x7f) != 0) { + writeByte(stream, (uint8_t)((i & 0x7f) | 0x80)); + i = MiscUtils::unsignedShift(i, 7); } - - void TermsHashPerField::rehashPostings(int32_t newSize) - { - int32_t newMask = newSize - 1; - - Collection newHash(Collection::newInstance(newSize)); - TermsHashPerThreadPtr perThread(_perThread); - - for (int32_t i = 0; i < postingsHashSize; ++i) - { - RawPostingListPtr p0(postingsHash[i]); - if (p0) - { - int32_t code; - if (perThread->primary) - { - int32_t start = (p0->textStart & DocumentsWriter::CHAR_BLOCK_MASK); - CharArray text = charPool->buffers[p0->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; - int32_t pos = start; - while (text[pos] != UTF8Base::UNICODE_TERMINATOR) - ++pos; - code = 0; - while (pos > start) - code = (code * 31) + text[--pos]; + writeByte(stream, (uint8_t)i); +} + +void TermsHashPerField::finish() { + consumer->finish(); + if (nextPerField) { + nextPerField->finish(); + } +} + +void TermsHashPerField::rehashPostings(int32_t newSize) { + int32_t newMask = newSize - 1; + + Collection newHash(Collection::newInstance(newSize)); + TermsHashPerThreadPtr perThread(_perThread); + + for (int32_t i = 0; i < postingsHashSize; ++i) { + RawPostingListPtr p0(postingsHash[i]); + if (p0) { + int32_t code; + if (perThread->primary) { + int32_t start = (p0->textStart & DocumentsWriter::CHAR_BLOCK_MASK); + CharArray text = charPool->buffers[p0->textStart >> DocumentsWriter::CHAR_BLOCK_SHIFT]; + int32_t pos = start; + while (text[pos] != UTF8Base::UNICODE_TERMINATOR) { + ++pos; } - else - code = p0->textStart; - - int32_t hashPos = (code & newMask); - BOOST_ASSERT(hashPos >= 0); - if (newHash[hashPos]) - { - int32_t inc = (((code >> 8) + code) | 1); - do - { - code += inc; - hashPos = (code & newMask); - } - while (newHash[hashPos]); + code = 0; + while (pos > start) { + code = (code * 31) + text[--pos]; } - newHash[hashPos] = p0; + } else { + code = p0->textStart; + } + + int32_t hashPos = (code & newMask); + BOOST_ASSERT(hashPos >= 0); + if (newHash[hashPos]) { + int32_t inc = (((code >> 8) + code) | 1); + do { + code += inc; + hashPos = (code & newMask); + } while (newHash[hashPos]); } + newHash[hashPos] = p0; } - - postingsHashMask = newMask; - postingsHash = newHash; - postingsHashSize = newSize; - postingsHashHalfSize = (newSize >> 1); } + + postingsHashMask = newMask; + postingsHash = newHash; + postingsHashSize = newSize; + postingsHashHalfSize = (newSize >> 1); +} + } diff --git a/src/core/index/TermsHashPerThread.cpp b/src/core/index/TermsHashPerThread.cpp index ff24f28c..a68fa9dc 100644 --- a/src/core/index/TermsHashPerThread.cpp +++ b/src/core/index/TermsHashPerThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,112 +15,101 @@ #include "IntBlockPool.h" #include "DocumentsWriter.h" -namespace Lucene -{ - TermsHashPerThread::TermsHashPerThread(DocInverterPerThreadPtr docInverterPerThread, TermsHashPtr termsHash, TermsHashPtr nextTermsHash, TermsHashPerThreadPtr primaryPerThread) - { - this->freePostings = Collection::newInstance(256); - this->freePostingsCount = 0; - this->primary = false; - this->_docInverterPerThread = docInverterPerThread; - this->_termsHash = termsHash; - this->nextTermsHash = nextTermsHash; - this->_primaryPerThread = primaryPerThread; - } - - TermsHashPerThread::~TermsHashPerThread() - { - } - - void TermsHashPerThread::initialize() - { - DocInverterPerThreadPtr docInverterPerThread(_docInverterPerThread); - TermsHashPtr termsHash(_termsHash); - docState = docInverterPerThread->docState; - consumer = termsHash->consumer->addThread(shared_from_this()); - - if (nextTermsHash) - { - // We are primary - charPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)); - primary = true; - } - else - { - charPool = TermsHashPerThreadPtr(_primaryPerThread)->charPool; - primary = false; - } - - intPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter), termsHash->trackAllocations); - bytePool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)->byteBlockAllocator, termsHash->trackAllocations); - - if (nextTermsHash) - nextPerThread = nextTermsHash->addThread(docInverterPerThread, shared_from_this()); +namespace Lucene { + +TermsHashPerThread::TermsHashPerThread(const DocInverterPerThreadPtr& docInverterPerThread, const TermsHashPtr& termsHash, const TermsHashPtr& nextTermsHash, const TermsHashPerThreadPtr& primaryPerThread) { + this->freePostings = Collection::newInstance(256); + this->freePostingsCount = 0; + this->primary = false; + this->_docInverterPerThread = docInverterPerThread; + this->_termsHash = termsHash; + this->nextTermsHash = nextTermsHash; + this->_primaryPerThread = primaryPerThread; +} + +TermsHashPerThread::~TermsHashPerThread() { +} + +void TermsHashPerThread::initialize() { + DocInverterPerThreadPtr docInverterPerThread(_docInverterPerThread); + TermsHashPtr termsHash(_termsHash); + docState = docInverterPerThread->docState; + consumer = termsHash->consumer->addThread(shared_from_this()); + + if (nextTermsHash) { + // We are primary + charPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)); + primary = true; + } else { + charPool = TermsHashPerThreadPtr(_primaryPerThread)->charPool; + primary = false; } - - InvertedDocConsumerPerFieldPtr TermsHashPerThread::addField(DocInverterPerFieldPtr docInverterPerField, FieldInfoPtr fieldInfo) - { - return newLucene(docInverterPerField, shared_from_this(), nextPerThread, fieldInfo); + + intPool = newLucene(DocumentsWriterPtr(termsHash->_docWriter), termsHash->trackAllocations); + bytePool = newLucene(DocumentsWriterPtr(termsHash->_docWriter)->byteBlockAllocator, termsHash->trackAllocations); + + if (nextTermsHash) { + nextPerThread = nextTermsHash->addThread(docInverterPerThread, shared_from_this()); } - - void TermsHashPerThread::abort() - { - SyncLock syncLock(this); - reset(true); - consumer->abort(); - if (nextPerThread) - nextPerThread->abort(); +} + +InvertedDocConsumerPerFieldPtr TermsHashPerThread::addField(const DocInverterPerFieldPtr& docInverterPerField, const FieldInfoPtr& fieldInfo) { + return newLucene(docInverterPerField, shared_from_this(), nextPerThread, fieldInfo); +} + +void TermsHashPerThread::abort() { + SyncLock syncLock(this); + reset(true); + consumer->abort(); + if (nextPerThread) { + nextPerThread->abort(); } - - void TermsHashPerThread::morePostings() - { - BOOST_ASSERT(freePostingsCount == 0); - TermsHashPtr(_termsHash)->getPostings(freePostings); - freePostingsCount = freePostings.size(); - BOOST_ASSERT(noNullPostings(freePostings, freePostingsCount, L"consumer=" + consumer->toString())); +} + +void TermsHashPerThread::morePostings() { + BOOST_ASSERT(freePostingsCount == 0); + TermsHashPtr(_termsHash)->getPostings(freePostings); + freePostingsCount = freePostings.size(); + BOOST_ASSERT(noNullPostings(freePostings, freePostingsCount, L"consumer=" + consumer->toString())); +} + +bool TermsHashPerThread::noNullPostings(Collection postings, int32_t count, const String& details) { + for (int32_t i = 0; i < count; ++i) { + BOOST_ASSERT(postings[i]); } - - bool TermsHashPerThread::noNullPostings(Collection postings, int32_t count, const String& details) - { - for (int32_t i = 0; i < count; ++i) - { - BOOST_ASSERT(postings[i]); - } - return true; + return true; +} + +void TermsHashPerThread::startDocument() { + consumer->startDocument(); + if (nextPerThread) { + nextPerThread->consumer->startDocument(); } - - void TermsHashPerThread::startDocument() - { - consumer->startDocument(); - if (nextPerThread) - nextPerThread->consumer->startDocument(); +} + +DocWriterPtr TermsHashPerThread::finishDocument() { + DocWriterPtr doc(consumer->finishDocument()); + DocWriterPtr doc2(nextPerThread ? nextPerThread->consumer->finishDocument() : DocWriterPtr()); + if (!doc) { + return doc2; + } else { + doc->setNext(doc2); + return doc; } - - DocWriterPtr TermsHashPerThread::finishDocument() - { - DocWriterPtr doc(consumer->finishDocument()); - DocWriterPtr doc2(nextPerThread ? nextPerThread->consumer->finishDocument() : DocWriterPtr()); - if (!doc) - return doc2; - else - { - doc->setNext(doc2); - return doc; - } +} + +void TermsHashPerThread::reset(bool recyclePostings) { + intPool->reset(); + bytePool->reset(); + + if (primary) { + charPool->reset(); } - void TermsHashPerThread::reset(bool recyclePostings) - { - intPool->reset(); - bytePool->reset(); - - if (primary) - charPool->reset(); - - if (recyclePostings) - { - TermsHashPtr(_termsHash)->recyclePostings(freePostings, freePostingsCount); - freePostingsCount = 0; - } + if (recyclePostings) { + TermsHashPtr(_termsHash)->recyclePostings(freePostings, freePostingsCount); + freePostingsCount = 0; } } + +} diff --git a/src/core/msvc/LuceneInc.cpp b/src/core/msvc/LuceneInc.cpp index e53579e2..18915650 100644 --- a/src/core/msvc/LuceneInc.cpp +++ b/src/core/msvc/LuceneInc.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// diff --git a/src/core/msvc/dllmain.cpp b/src/core/msvc/dllmain.cpp index e3847950..4c9bd0f4 100644 --- a/src/core/msvc/dllmain.cpp +++ b/src/core/msvc/dllmain.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,15 +8,13 @@ #if defined(_WIN32) && defined(LPP_HAVE_DLL) -BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) -{ - switch (ul_reason_for_call) - { - case DLL_PROCESS_ATTACH: - case DLL_THREAD_ATTACH: - case DLL_THREAD_DETACH: - case DLL_PROCESS_DETACH: - break; +BOOL APIENTRY DllMain(HMODULE module, DWORD ul_reason_for_call, LPVOID lpReserved) { + switch (ul_reason_for_call) { + case DLL_PROCESS_ATTACH: + case DLL_THREAD_ATTACH: + case DLL_THREAD_DETACH: + case DLL_PROCESS_DETACH: + break; } return TRUE; } diff --git a/src/core/msvc/lucene++.vcproj b/src/core/msvc/lucene++.vcproj index 0a415bf4..390c7bb6 100644 --- a/src/core/msvc/lucene++.vcproj +++ b/src/core/msvc/lucene++.vcproj @@ -3520,11 +3520,11 @@ Name="platform" > + + + + Debug DLL + Win32 + + + Debug Static + Win32 + + + Release DLL + Win32 + + + Release Static + Win32 + + + + {46A95AFD-95FD-4280-B22E-1B56F273144A} + lucene++ + Win32Proj + + + + StaticLibrary + Unicode + true + + + StaticLibrary + Unicode + + + DynamicLibrary + Unicode + true + + + DynamicLibrary + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + + + + Disabled + ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + Use + LuceneInc.h + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + false + + + $(BOOST_ROOT)\lib32-msvc-10.0;%(AdditionalLibraryDirectories) + true + Windows + MachineX86 + + + if not exist "..\..\..\lib" mkdir "..\..\..\lib" +if not exist "..\..\..\bin" mkdir "..\..\..\bin" +copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." +copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." + + + + + + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_WINDOWS;_USRDLL;LPP_HAVE_DLL;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + Use + LuceneInc.h + Level3 + ProgramDatabase + 4996;%(DisableSpecificWarnings) + false + + + $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) + true + Windows + true + true + MachineX86 + + + if not exist "..\..\..\lib" mkdir "..\..\..\lib" +if not exist "..\..\..\bin" mkdir "..\..\..\bin" +copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." +copy "$(OutDir)$(ProjectName).dll" "..\..\..\bin\." + + + + + + Disabled + ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + true + EnableFastChecks + MultiThreadedDebugDLL + Use + LuceneInc.h + Level3 + EditAndContinue + 4996;%(DisableSpecificWarnings) + false + + + /IGNORE:4221 %(AdditionalOptions) + $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) + + + if not exist "..\..\..\lib" mkdir "..\..\..\lib" +copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." + + + + + + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\include;..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_LIB;LPP_BUILDING_LIB;%(PreprocessorDefinitions) + MultiThreadedDLL + true + Use + LuceneInc.h + Level3 + ProgramDatabase + 4996;%(DisableSpecificWarnings) + false + + + /IGNORE:4221 %(AdditionalOptions) + $(BOOST_ROOT)\stage\lib;%(AdditionalLibraryDirectories) + + + if not exist "..\..\..\lib" mkdir "..\..\..\lib" +copy "$(OutDir)$(ProjectName).lib" "..\..\..\lib\." + + + + + + + + false + + + false + + + false + + + false + + + Create + Create + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/src/core/msvc/lucene++.vcxproj.filters b/src/core/msvc/lucene++.vcxproj.filters new file mode 100644 index 00000000..ae9ef5b2 --- /dev/null +++ b/src/core/msvc/lucene++.vcxproj.filters @@ -0,0 +1,2556 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBE52EBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} + rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav + + + {48502038-53e1-4765-991b-c97b4be11da4} + + + {a684d11c-6040-49c1-b5e5-0615984a8c2c} + + + {b61dc7e6-fb87-4b3f-ba4c-226a4361b847} + + + {c2e416ff-52fb-4d36-9be1-e8610beefd4d} + + + {88cac9c3-83c6-42df-a5eb-a1f0d741b627} + + + {3e5e8bae-b0a3-498d-89bb-c4d511c21f4f} + + + {9a837aa2-bfff-4729-b020-bece9d615183} + + + {174ad8b8-0c6c-4ff7-8a6f-84f5627420ea} + + + {f6b6a77f-cc4a-406e-9b54-cb4af439c1e1} + + + {d4777928-6836-4755-9087-b4025ec3fc78} + + + {88de8393-eac2-4c2b-b102-0aa83c3ff01b} + + + {eb586280-ac06-4447-a2f7-404725729407} + + + {cc88a9b9-9490-48e9-a2d2-c77a8e370f45} + + + {81e1de0d-e621-451a-94dd-97e69cff08e7} + + + {ce3c5a86-a7c4-4f59-9c67-92d38e9dc50a} + + + + + source files + + + source files + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\standard + + + analysis\standard + + + analysis\standard + + + analysis\standard + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform\md5 + + + platform\unicode + + + + + header files + + + header files + + + header files + + + header files + + + header files + + + header files + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + store + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\tokenattributes + + + analysis\standard + + + analysis\standard + + + analysis\standard + + + analysis\standard + + + analysis\standard + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + util + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\spans + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\payloads + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + search\function + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + document + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + query parser + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform + + + platform\md5 + + + platform\unicode + + + platform\unicode + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + index + + + store + + + store + + + store + + + store + + + store + + + store + + + \ No newline at end of file diff --git a/src/core/queryparser/FastCharStream.cpp b/src/core/queryparser/FastCharStream.cpp index ff26acdd..aca48889 100644 --- a/src/core/queryparser/FastCharStream.cpp +++ b/src/core/queryparser/FastCharStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,116 +9,101 @@ #include "Reader.h" #include "MiscUtils.h" -namespace Lucene -{ - FastCharStream::FastCharStream(ReaderPtr reader) - { - input = reader; - bufferLength = 0; - bufferPosition = 0; - tokenStart = 0; - bufferStart = 0; - } - - FastCharStream::~FastCharStream() - { - } - - wchar_t FastCharStream::readChar() - { - if (bufferPosition >= bufferLength) - refill(); - return buffer[bufferPosition++]; - } - - void FastCharStream::refill() - { - int32_t newPosition = bufferLength - tokenStart; - - if (tokenStart == 0) // token won't fit in buffer - { - if (!buffer) - buffer = CharArray::newInstance(2048); - else if (bufferLength == buffer.size()) // grow buffer - buffer.resize(buffer.size() * 2); - } - else // shift token to front - MiscUtils::arrayCopy(buffer.get(), tokenStart, buffer.get(), 0, newPosition); - - bufferLength = newPosition; // update state - bufferPosition = newPosition; - bufferStart += tokenStart; - tokenStart = 0; - - int32_t charsRead = input->read(buffer.get(), newPosition, buffer.size() - newPosition); // fill space in buffer - if (charsRead == -1) - boost::throw_exception(IOException(L"read past eof")); - else - bufferLength += charsRead; - } - - wchar_t FastCharStream::BeginToken() - { - tokenStart = bufferPosition; - return readChar(); - } - - void FastCharStream::backup(int32_t amount) - { - bufferPosition -= amount; - } - - String FastCharStream::GetImage() - { - return String(buffer.get() + tokenStart, bufferPosition - tokenStart); - } - - CharArray FastCharStream::GetSuffix(int32_t length) - { - CharArray value(CharArray::newInstance(length)); - MiscUtils::arrayCopy(buffer.get(), bufferPosition - length, value.get(), 0, length); - return value; +namespace Lucene { + +FastCharStream::FastCharStream(const ReaderPtr& reader) { + input = reader; + bufferLength = 0; + bufferPosition = 0; + tokenStart = 0; + bufferStart = 0; +} + +FastCharStream::~FastCharStream() { +} + +wchar_t FastCharStream::readChar() { + if (bufferPosition >= bufferLength) { + refill(); } - - void FastCharStream::Done() - { - try - { - input->close(); - } - catch (IOException&) - { - // ignore IO exceptions + return buffer[bufferPosition++]; +} + +void FastCharStream::refill() { + int32_t newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) { // token won't fit in buffer + if (!buffer) { + buffer = CharArray::newInstance(2048); + } else if (bufferLength == buffer.size()) { // grow buffer + buffer.resize(buffer.size() * 2); } + } else { // shift token to front + MiscUtils::arrayCopy(buffer.get(), tokenStart, buffer.get(), 0, newPosition); } - - int32_t FastCharStream::getColumn() - { - return bufferStart + bufferPosition; - } - - int32_t FastCharStream::getLine() - { - return 1; - } - - int32_t FastCharStream::getEndColumn() - { - return bufferStart + bufferPosition; - } - - int32_t FastCharStream::getEndLine() - { - return 1; - } - - int32_t FastCharStream::getBeginColumn() - { - return bufferStart + tokenStart; + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + int32_t charsRead = input->read(buffer.get(), newPosition, buffer.size() - newPosition); // fill space in buffer + if (charsRead == -1) { + boost::throw_exception(IOException(L"read past eof")); + } else { + bufferLength += charsRead; } - - int32_t FastCharStream::getBeginLine() - { - return 1; +} + +wchar_t FastCharStream::BeginToken() { + tokenStart = bufferPosition; + return readChar(); +} + +void FastCharStream::backup(int32_t amount) { + bufferPosition -= amount; +} + +String FastCharStream::GetImage() { + return String(buffer.get() + tokenStart, bufferPosition - tokenStart); +} + +CharArray FastCharStream::GetSuffix(int32_t length) { + CharArray value(CharArray::newInstance(length)); + MiscUtils::arrayCopy(buffer.get(), bufferPosition - length, value.get(), 0, length); + return value; +} + +void FastCharStream::Done() { + try { + input->close(); + } catch (IOException&) { + // ignore IO exceptions } } + +int32_t FastCharStream::getColumn() { + return bufferStart + bufferPosition; +} + +int32_t FastCharStream::getLine() { + return 1; +} + +int32_t FastCharStream::getEndColumn() { + return bufferStart + bufferPosition; +} + +int32_t FastCharStream::getEndLine() { + return 1; +} + +int32_t FastCharStream::getBeginColumn() { + return bufferStart + tokenStart; +} + +int32_t FastCharStream::getBeginLine() { + return 1; +} + +} diff --git a/src/core/queryparser/MultiFieldQueryParser.cpp b/src/core/queryparser/MultiFieldQueryParser.cpp index 1ddee72b..05aa84b3 100644 --- a/src/core/queryparser/MultiFieldQueryParser.cpp +++ b/src/core/queryparser/MultiFieldQueryParser.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,157 +12,148 @@ #include "MultiPhraseQuery.h" #include "MiscUtils.h" -namespace Lucene -{ - MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer, MapStringDouble boosts) : QueryParser(matchVersion, L"", analyzer) - { - this->boosts = boosts; - this->fields = fields; - } - - MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, AnalyzerPtr analyzer) : QueryParser(matchVersion, L"", analyzer) - { - this->fields = fields; - } - - MultiFieldQueryParser::~MultiFieldQueryParser() - { - } - - QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) - { - if (field.empty()) - { - Collection clauses(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - QueryPtr query(QueryParser::getFieldQuery(*field, queryText)); - if (query) - { - // If the user passes a map of boosts - if (boosts) - { - // Get the boost from the map and apply them - MapStringDouble::iterator boost = boosts.find(*field); - if (boost != boosts.end()) - query->setBoost(boost->second); +namespace Lucene { + +MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer, MapStringDouble boosts) : QueryParser(matchVersion, L"", analyzer) { + this->boosts = boosts; + this->fields = fields; +} + +MultiFieldQueryParser::MultiFieldQueryParser(LuceneVersion::Version matchVersion, Collection fields, const AnalyzerPtr& analyzer) : QueryParser(matchVersion, L"", analyzer) { + this->fields = fields; +} + +MultiFieldQueryParser::~MultiFieldQueryParser() { +} + +QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { + if (field.empty()) { + Collection clauses(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + QueryPtr query(QueryParser::getFieldQuery(*field, queryText)); + if (query) { + // If the user passes a map of boosts + if (boosts) { + // Get the boost from the map and apply them + MapStringDouble::iterator boost = boosts.find(*field); + if (boost != boosts.end()) { + query->setBoost(boost->second); } - applySlop(query, slop); - clauses.add(newLucene(query, BooleanClause::SHOULD)); } + applySlop(query, slop); + clauses.add(newLucene(query, BooleanClause::SHOULD)); } - if (clauses.empty()) // happens for stopwords - return QueryPtr(); - return getBooleanQuery(clauses, true); } - QueryPtr query(QueryParser::getFieldQuery(field, queryText)); - applySlop(query, slop); - return query; + if (clauses.empty()) { // happens for stopwords + return QueryPtr(); + } + return getBooleanQuery(clauses, true); } - - QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText) - { - return getFieldQuery(field, queryText, 0); + QueryPtr query(QueryParser::getFieldQuery(field, queryText)); + applySlop(query, slop); + return query; +} + +QueryPtr MultiFieldQueryParser::getFieldQuery(const String& field, const String& queryText) { + return getFieldQuery(field, queryText, 0); +} + +void MultiFieldQueryParser::applySlop(const QueryPtr& query, int32_t slop) { + if (MiscUtils::typeOf(query)) { + boost::dynamic_pointer_cast(query)->setSlop(slop); } - - void MultiFieldQueryParser::applySlop(QueryPtr query, int32_t slop) - { - if (MiscUtils::typeOf(query)) - boost::dynamic_pointer_cast(query)->setSlop(slop); - if (MiscUtils::typeOf(query)) - boost::dynamic_pointer_cast(query)->setSlop(slop); + if (MiscUtils::typeOf(query)) { + boost::dynamic_pointer_cast(query)->setSlop(slop); } - - QueryPtr MultiFieldQueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) - { - if (field.empty()) - { - Collection clauses(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - clauses.add(newLucene(getFuzzyQuery(*field, termStr, minSimilarity), BooleanClause::SHOULD)); - return getBooleanQuery(clauses, true); +} + +QueryPtr MultiFieldQueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { + if (field.empty()) { + Collection clauses(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + clauses.add(newLucene(getFuzzyQuery(*field, termStr, minSimilarity), BooleanClause::SHOULD)); } - return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); + return getBooleanQuery(clauses, true); } - - QueryPtr MultiFieldQueryParser::getPrefixQuery(const String& field, const String& termStr) - { - if (field.empty()) - { - Collection clauses(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - clauses.add(newLucene(getPrefixQuery(*field, termStr), BooleanClause::SHOULD)); - return getBooleanQuery(clauses, true); + return QueryParser::getFuzzyQuery(field, termStr, minSimilarity); +} + +QueryPtr MultiFieldQueryParser::getPrefixQuery(const String& field, const String& termStr) { + if (field.empty()) { + Collection clauses(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + clauses.add(newLucene(getPrefixQuery(*field, termStr), BooleanClause::SHOULD)); } - return QueryParser::getPrefixQuery(field, termStr); + return getBooleanQuery(clauses, true); } - - QueryPtr MultiFieldQueryParser::getWildcardQuery(const String& field, const String& termStr) - { - if (field.empty()) - { - Collection clauses(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - clauses.add(newLucene(getWildcardQuery(*field, termStr), BooleanClause::SHOULD)); - return getBooleanQuery(clauses, true); + return QueryParser::getPrefixQuery(field, termStr); +} + +QueryPtr MultiFieldQueryParser::getWildcardQuery(const String& field, const String& termStr) { + if (field.empty()) { + Collection clauses(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + clauses.add(newLucene(getWildcardQuery(*field, termStr), BooleanClause::SHOULD)); } - return QueryParser::getWildcardQuery(field, termStr); + return getBooleanQuery(clauses, true); } - - QueryPtr MultiFieldQueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) - { - if (field.empty()) - { - Collection clauses(Collection::newInstance()); - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - clauses.add(newLucene(getRangeQuery(*field, part1, part2, inclusive), BooleanClause::SHOULD)); - return getBooleanQuery(clauses, true); + return QueryParser::getWildcardQuery(field, termStr); +} + +QueryPtr MultiFieldQueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { + if (field.empty()) { + Collection clauses(Collection::newInstance()); + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + clauses.add(newLucene(getRangeQuery(*field, part1, part2, inclusive), BooleanClause::SHOULD)); } - return QueryParser::getRangeQuery(field, part1, part2, inclusive); + return getBooleanQuery(clauses, true); + } + return QueryParser::getRangeQuery(field, part1, part2, inclusive); +} + +QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, const AnalyzerPtr& analyzer) { + if (queries.size() != fields.size()) { + boost::throw_exception(IllegalArgumentException(L"queries.size() != fields.size()")); } - - QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, AnalyzerPtr analyzer) - { - if (queries.size() != fields.size()) - boost::throw_exception(IllegalArgumentException(L"queries.size() != fields.size()")); - BooleanQueryPtr booleanQuery(newLucene()); - for (int32_t i = 0; i < fields.size(); ++i) - { - QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); - QueryPtr query(queryParser->parse(queries[i])); - if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) - booleanQuery->add(query, BooleanClause::SHOULD); + BooleanQueryPtr booleanQuery(newLucene()); + for (int32_t i = 0; i < fields.size(); ++i) { + QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); + QueryPtr query(queryParser->parse(queries[i])); + if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) { + booleanQuery->add(query, BooleanClause::SHOULD); } - return booleanQuery; } - - QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, AnalyzerPtr analyzer) - { - if (fields.size() != flags.size()) - boost::throw_exception(IllegalArgumentException(L"fields.size() != flags.size()")); - BooleanQueryPtr booleanQuery(newLucene()); - for (int32_t i = 0; i < fields.size(); ++i) - { - QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); - QueryPtr q(queryParser->parse(query)); - if (q && (!MiscUtils::typeOf(q) || !boost::dynamic_pointer_cast(q)->getClauses().empty())) - booleanQuery->add(q, flags[i]); + return booleanQuery; +} + +QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, const String& query, Collection fields, Collection flags, const AnalyzerPtr& analyzer) { + if (fields.size() != flags.size()) { + boost::throw_exception(IllegalArgumentException(L"fields.size() != flags.size()")); + } + BooleanQueryPtr booleanQuery(newLucene()); + for (int32_t i = 0; i < fields.size(); ++i) { + QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); + QueryPtr q(queryParser->parse(query)); + if (q && (!MiscUtils::typeOf(q) || !boost::dynamic_pointer_cast(q)->getClauses().empty())) { + booleanQuery->add(q, flags[i]); } - return booleanQuery; } - - QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, AnalyzerPtr analyzer) - { - if (queries.size() != fields.size() || fields.size() != flags.size()) - boost::throw_exception(IllegalArgumentException(L"queries, fields, and flags array have have different length")); - BooleanQueryPtr booleanQuery(newLucene()); - for (int32_t i = 0; i < fields.size(); ++i) - { - QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); - QueryPtr query(queryParser->parse(queries[i])); - if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) - booleanQuery->add(query, flags[i]); + return booleanQuery; +} + +QueryPtr MultiFieldQueryParser::parse(LuceneVersion::Version matchVersion, Collection queries, Collection fields, Collection flags, const AnalyzerPtr& analyzer) { + if (queries.size() != fields.size() || fields.size() != flags.size()) { + boost::throw_exception(IllegalArgumentException(L"queries, fields, and flags array have have different length")); + } + BooleanQueryPtr booleanQuery(newLucene()); + for (int32_t i = 0; i < fields.size(); ++i) { + QueryParserPtr queryParser(newLucene(matchVersion, fields[i], analyzer)); + QueryPtr query(queryParser->parse(queries[i])); + if (query && (!MiscUtils::typeOf(query) || !boost::dynamic_pointer_cast(query)->getClauses().empty())) { + booleanQuery->add(query, flags[i]); } - return booleanQuery; } + return booleanQuery; +} + } diff --git a/src/core/queryparser/QueryParseError.cpp b/src/core/queryparser/QueryParseError.cpp index 2348299a..5eec7525 100644 --- a/src/core/queryparser/QueryParseError.cpp +++ b/src/core/queryparser/QueryParseError.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,109 +9,106 @@ #include "QueryParserToken.h" #include "StringUtils.h" -namespace Lucene -{ - QueryParseError::~QueryParseError() - { +namespace Lucene { + +QueryParseError::~QueryParseError() { +} + +String QueryParseError::lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, + const String& errorAfter, wchar_t curChar) { + StringStream buffer; + buffer << L"Lexical error at line " << errorLine << L", column " << errorColumn << L". Encountered:"; + if (EOFSeen) { + buffer << L""; + } else { + buffer << L"\"" << addEscapes(String(1, curChar)) << L"\""; } - - String QueryParseError::lexicalError(bool EOFSeen, int32_t lexState, int32_t errorLine, int32_t errorColumn, - const String& errorAfter, wchar_t curChar) - { - StringStream buffer; - buffer << L"Lexical error at line " << errorLine << L", column " << errorColumn + L". Encountered:"; - if (EOFSeen) - buffer << L""; - else - buffer << L"\"" << addEscapes(String(1, curChar)) << L"\""; - buffer << L" (" + (int32_t)curChar << L"), after : \"" << addEscapes(errorAfter) + L"\""; - return buffer.str(); + buffer << L" (" << (int32_t)curChar << L"), after : \"" << addEscapes(errorAfter) + L"\""; + return buffer.str(); +} + +String QueryParseError::parseError(const QueryParserTokenPtr& currentToken, Collection< Collection > expectedTokenSequences, + Collection tokenImage) { + StringStream expected; + int32_t maxSize = 0; + for (int32_t i = 0; i < expectedTokenSequences.size(); ++i) { + if (maxSize < expectedTokenSequences[i].size()) { + maxSize = expectedTokenSequences[i].size(); + } + for (int32_t j = 0; j < expectedTokenSequences[i].size(); ++j) { + expected << tokenImage[expectedTokenSequences[i][j]] << L" "; + } + if (expectedTokenSequences[i][expectedTokenSequences[i].size() - 1] != 0) { + expected << L"..."; + } + expected << L"\n "; } - - String QueryParseError::parseError(QueryParserTokenPtr currentToken, Collection< Collection > expectedTokenSequences, - Collection tokenImage) - { - StringStream expected; - int32_t maxSize = 0; - for (int32_t i = 0; i < expectedTokenSequences.size(); ++i) - { - if (maxSize < expectedTokenSequences[i].size()) - maxSize = expectedTokenSequences[i].size(); - for (int32_t j = 0; j < expectedTokenSequences[i].size(); ++j) - expected << tokenImage[expectedTokenSequences[i][j]] << L" "; - if (expectedTokenSequences[i][expectedTokenSequences[i].size() - 1] != 0) - expected << L"..."; - expected << L"\n "; + StringStream retval; + retval << L"Encountered \""; + QueryParserTokenPtr token(currentToken->next); + for (int32_t i = 0; i < maxSize; ++i) { + if (i != 0) { + retval << L" "; } - StringStream retval; - retval << L"Encountered \""; - QueryParserTokenPtr token(currentToken->next); - for (int32_t i = 0; i < maxSize; ++i) - { - if (i != 0) - retval << L" "; - if (token->kind == 0) - { - retval << tokenImage[0]; - break; - } - retval << L" " << tokenImage[token->kind] << L" \"" << addEscapes(token->image) << L" \""; - token = token->next; + if (token->kind == 0) { + retval << tokenImage[0]; + break; } - retval << L"\" at line " << currentToken->next->beginLine << L", column " << currentToken->next->beginColumn; - retval << L".\n"; - if (expectedTokenSequences.size() == 1) - retval << L"Was expecting:\n "; - else - retval << L"Was expecting one of:\n "; - retval << expected.str(); - return retval.str(); + retval << L" " << tokenImage[token->kind] << L" \"" << addEscapes(token->image) << L" \""; + token = token->next; } - - String QueryParseError::addEscapes(const String& str) - { - StringStream buffer; - for (String::const_iterator ch = str.begin(); ch != str.end(); ++ch) - { - switch (*ch) - { - case L'\0': - continue; - case L'\b': - buffer << L"\\b"; - continue; - case L'\t': - buffer << L"\\t"; - continue; - case L'\n': - buffer << L"\\n"; - continue; - case L'\f': - buffer << L"\\f"; - continue; - case L'\r': - buffer << L"\\r"; - continue; - case L'\"': - buffer << L"\\\""; - continue; - case L'\'': - buffer << L"\\\'"; - continue; - case L'\\': - buffer << L"\\\\"; - continue; - default: - if (*ch < 0x20 || *ch > 0x7e) - { - String hexChar(L"0000" + StringUtils::toString(*ch, 16)); - buffer << L"\\u" + hexChar.substr(hexChar.length() - 4); - } - else - buffer << *ch; - continue; + retval << L"\" at line " << currentToken->next->beginLine << L", column " << currentToken->next->beginColumn; + retval << L".\n"; + if (expectedTokenSequences.size() == 1) { + retval << L"Was expecting:\n "; + } else { + retval << L"Was expecting one of:\n "; + } + retval << expected.str(); + return retval.str(); +} + +String QueryParseError::addEscapes(const String& str) { + StringStream buffer; + for (String::const_iterator ch = str.begin(); ch != str.end(); ++ch) { + switch (*ch) { + case L'\0': + continue; + case L'\b': + buffer << L"\\b"; + continue; + case L'\t': + buffer << L"\\t"; + continue; + case L'\n': + buffer << L"\\n"; + continue; + case L'\f': + buffer << L"\\f"; + continue; + case L'\r': + buffer << L"\\r"; + continue; + case L'\"': + buffer << L"\\\""; + continue; + case L'\'': + buffer << L"\\\'"; + continue; + case L'\\': + buffer << L"\\\\"; + continue; + default: + if (*ch < 0x20 || *ch > 0x7e) { + String hexChar(L"0000" + StringUtils::toString(*ch, 16)); + buffer << L"\\u" + hexChar.substr(hexChar.length() - 4); + } else { + buffer << *ch; } + continue; } - return buffer.str(); } + return buffer.str(); +} + } diff --git a/src/core/queryparser/QueryParser.cpp b/src/core/queryparser/QueryParser.cpp index 73cde9cb..f05f7b67 100644 --- a/src/core/queryparser/QueryParser.cpp +++ b/src/core/queryparser/QueryParser.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,7 +13,7 @@ #include "QueryParseError.h" #include "MultiTermQuery.h" #include "TermQuery.h" -#include "TermRangeQuery.h" +#include "TermRangeQuery.h" #include "FuzzyQuery.h" #include "FastCharStream.h" #include "StringReader.h" @@ -32,1540 +32,1358 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t QueryParser::CONJ_NONE = 0; - const int32_t QueryParser::CONJ_AND = 1; - const int32_t QueryParser::CONJ_OR = 2; - - const int32_t QueryParser::MOD_NONE = 0; - const int32_t QueryParser::MOD_NOT = 10; - const int32_t QueryParser::MOD_REQ = 11; - - const int32_t QueryParser::jj_la1_0[] = - { - 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, 0x100000, 0x20000, - 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000, 0x100000, 0x20000, 0x3ed0000 - }; - - const int32_t QueryParser::jj_la1_1[] = - { - 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, - 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0 - }; - - QueryParser::QueryParser(LuceneVersion::Version matchVersion, const String& field, AnalyzerPtr analyzer) - { - ConstructParser(newLucene(newLucene(L"")), QueryParserTokenManagerPtr()); - this->analyzer = analyzer; - this->field = field; - this->enablePositionIncrements = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); - } - - QueryParser::QueryParser(QueryParserCharStreamPtr stream) - { - ConstructParser(stream, QueryParserTokenManagerPtr()); - } - - QueryParser::QueryParser(QueryParserTokenManagerPtr tokenMgr) - { - ConstructParser(QueryParserCharStreamPtr(), tokenMgr); - } - - QueryParser::~QueryParser() - { - } - - void QueryParser::ConstructParser(QueryParserCharStreamPtr stream, QueryParserTokenManagerPtr tokenMgr) - { - _operator = OR_OPERATOR; - lowercaseExpandedTerms = true; - multiTermRewriteMethod = MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); - allowLeadingWildcard = false; - enablePositionIncrements = true; - phraseSlop = 0; - fuzzyMinSim = FuzzyQuery::defaultMinSimilarity(); - fuzzyPrefixLength = FuzzyQuery::defaultPrefixLength; - locale = std::locale(); - dateResolution = DateTools::RESOLUTION_NULL; - - token_source = tokenMgr ? tokenMgr : newLucene(stream); - token = newLucene(); - _jj_ntk = -1; - jj_la = 0; - jj_gen = 0; - jj_rescan = false; - jj_gc = 0; - jj_la1 = Collection::newInstance(23); - jj_2_rtns = Collection::newInstance(1); - for (int32_t i = 0; i < 23; ++i) - jj_la1[i] = -1; - for (int32_t i = 0; i < jj_2_rtns.size(); ++i) - jj_2_rtns[i] = newInstance(); - jj_expentries = Collection< Collection >::newInstance(); - jj_kind = -1; - jj_lasttokens = Collection::newInstance(100); - jj_endpos = 0; - } - - QueryPtr QueryParser::parse(const String& query) - { - ReInit(newLucene(newLucene(query))); - try - { - // TopLevelQuery is a Query followed by the end-of-input (EOF) - QueryPtr res(TopLevelQuery(field)); - return res ? res : newBooleanQuery(false); - } - catch (QueryParserError& e) - { - boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': " + e.getError())); - } - catch (TooManyClausesException&) - { - boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': too many boolean clauses")); - } - return QueryPtr(); +namespace Lucene { + +const int32_t QueryParser::CONJ_NONE = 0; +const int32_t QueryParser::CONJ_AND = 1; +const int32_t QueryParser::CONJ_OR = 2; + +const int32_t QueryParser::MOD_NONE = 0; +const int32_t QueryParser::MOD_NOT = 10; +const int32_t QueryParser::MOD_REQ = 11; + +const int32_t QueryParser::jj_la1_0[] = { + 0x300, 0x300, 0x1c00, 0x1c00, 0x3ed3f00, 0x90000, 0x20000, 0x3ed2000, 0x2690000, 0x100000, 0x100000, 0x20000, + 0x30000000, 0x4000000, 0x30000000, 0x20000, 0x0, 0x40000000, 0x0, 0x20000, 0x100000, 0x20000, 0x3ed0000 +}; + +const int32_t QueryParser::jj_la1_1[] = { + 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, + 0x0, 0x0, 0x0, 0x0, 0x0, 0x3, 0x0, 0x3, 0x0, 0x0, 0x0, 0x0 +}; + +QueryParser::QueryParser(LuceneVersion::Version matchVersion, const String& field, const AnalyzerPtr& analyzer) { + ConstructParser(newLucene(newLucene(L"")), QueryParserTokenManagerPtr()); + this->analyzer = analyzer; + this->field = field; + this->enablePositionIncrements = LuceneVersion::onOrAfter(matchVersion, LuceneVersion::LUCENE_29); +} + +QueryParser::QueryParser(const QueryParserCharStreamPtr& stream) { + ConstructParser(stream, QueryParserTokenManagerPtr()); +} + +QueryParser::QueryParser(const QueryParserTokenManagerPtr& tokenMgr) { + ConstructParser(QueryParserCharStreamPtr(), tokenMgr); +} + +QueryParser::~QueryParser() { +} + +void QueryParser::ConstructParser(const QueryParserCharStreamPtr& stream, const QueryParserTokenManagerPtr& tokenMgr) { + _operator = OR_OPERATOR; + lowercaseExpandedTerms = true; + multiTermRewriteMethod = MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); + allowLeadingWildcard = false; + enablePositionIncrements = true; + phraseSlop = 0; + fuzzyMinSim = FuzzyQuery::defaultMinSimilarity(); + fuzzyPrefixLength = FuzzyQuery::defaultPrefixLength; + locale = std::locale(); + dateResolution = DateTools::RESOLUTION_NULL; + + token_source = tokenMgr ? tokenMgr : newLucene(stream); + token = newLucene(); + _jj_ntk = -1; + jj_la = 0; + jj_gen = 0; + jj_rescan = false; + jj_gc = 0; + jj_la1 = Collection::newInstance(23); + jj_2_rtns = Collection::newInstance(1); + for (int32_t i = 0; i < 23; ++i) { + jj_la1[i] = -1; + } + for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { + jj_2_rtns[i] = newInstance(); + } + jj_expentries = Collection< Collection >::newInstance(); + jj_kind = -1; + jj_lasttokens = Collection::newInstance(100); + jj_endpos = 0; +} + +QueryPtr QueryParser::parse(const String& query) { + ReInit(newLucene(newLucene(query))); + try { + // TopLevelQuery is a Query followed by the end-of-input (EOF) + QueryPtr res(TopLevelQuery(field)); + return res ? res : newBooleanQuery(false); + } catch (QueryParserError& e) { + boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': " + e.getError())); + } catch (TooManyClausesException&) { + boost::throw_exception(QueryParserError(L"Cannot parse '" + query + L"': too many boolean clauses")); + } + return QueryPtr(); +} + +AnalyzerPtr QueryParser::getAnalyzer() { + return analyzer; +} + +String QueryParser::getField() { + return field; +} + +double QueryParser::getFuzzyMinSim() { + return fuzzyMinSim; +} + +void QueryParser::setFuzzyMinSim(double fuzzyMinSim) { + this->fuzzyMinSim = fuzzyMinSim; +} + +int32_t QueryParser::getFuzzyPrefixLength() { + return fuzzyPrefixLength; +} + +void QueryParser::setFuzzyPrefixLength(int32_t fuzzyPrefixLength) { + this->fuzzyPrefixLength = fuzzyPrefixLength; +} + +void QueryParser::setPhraseSlop(int32_t phraseSlop) { + this->phraseSlop = phraseSlop; +} + +int32_t QueryParser::getPhraseSlop() { + return phraseSlop; +} + +void QueryParser::setAllowLeadingWildcard(bool allowLeadingWildcard) { + this->allowLeadingWildcard = allowLeadingWildcard; +} + +bool QueryParser::getAllowLeadingWildcard() { + return allowLeadingWildcard; +} + +void QueryParser::setEnablePositionIncrements(bool enable) { + this->enablePositionIncrements = enable; +} + +bool QueryParser::getEnablePositionIncrements() { + return enablePositionIncrements; +} + +void QueryParser::setDefaultOperator(Operator op) { + this->_operator = op; +} + +QueryParser::Operator QueryParser::getDefaultOperator() { + return _operator; +} + +void QueryParser::setLowercaseExpandedTerms(bool lowercaseExpandedTerms) { + this->lowercaseExpandedTerms = lowercaseExpandedTerms; +} + +bool QueryParser::getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; +} + +void QueryParser::setMultiTermRewriteMethod(const RewriteMethodPtr& method) { + multiTermRewriteMethod = method; +} + +RewriteMethodPtr QueryParser::getMultiTermRewriteMethod() { + return multiTermRewriteMethod; +} + +void QueryParser::setLocale(std::locale locale) { + this->locale = locale; +} + +std::locale QueryParser::getLocale() { + return locale; +} + +void QueryParser::setDateResolution(DateTools::Resolution dateResolution) { + this->dateResolution = dateResolution; +} + +void QueryParser::setDateResolution(const String& fieldName, DateTools::Resolution dateResolution) { + if (fieldName.empty()) { + boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); } - - AnalyzerPtr QueryParser::getAnalyzer() - { - return analyzer; - } - - String QueryParser::getField() - { - return field; - } - - double QueryParser::getFuzzyMinSim() - { - return fuzzyMinSim; - } - - void QueryParser::setFuzzyMinSim(double fuzzyMinSim) - { - this->fuzzyMinSim = fuzzyMinSim; - } - - int32_t QueryParser::getFuzzyPrefixLength() - { - return fuzzyPrefixLength; - } - - void QueryParser::setFuzzyPrefixLength(int32_t fuzzyPrefixLength) - { - this->fuzzyPrefixLength = fuzzyPrefixLength; - } - - void QueryParser::setPhraseSlop(int32_t phraseSlop) - { - this->phraseSlop = phraseSlop; - } - - int32_t QueryParser::getPhraseSlop() - { - return phraseSlop; - } - - void QueryParser::setAllowLeadingWildcard(bool allowLeadingWildcard) - { - this->allowLeadingWildcard = allowLeadingWildcard; - } - - bool QueryParser::getAllowLeadingWildcard() - { - return allowLeadingWildcard; - } - - void QueryParser::setEnablePositionIncrements(bool enable) - { - this->enablePositionIncrements = enable; - } - - bool QueryParser::getEnablePositionIncrements() - { - return enablePositionIncrements; - } - - void QueryParser::setDefaultOperator(Operator op) - { - this->_operator = op; - } - - QueryParser::Operator QueryParser::getDefaultOperator() - { - return _operator; - } - - void QueryParser::setLowercaseExpandedTerms(bool lowercaseExpandedTerms) - { - this->lowercaseExpandedTerms = lowercaseExpandedTerms; - } - - bool QueryParser::getLowercaseExpandedTerms() - { - return lowercaseExpandedTerms; - } - - void QueryParser::setMultiTermRewriteMethod(RewriteMethodPtr method) - { - multiTermRewriteMethod = method; - } - - RewriteMethodPtr QueryParser::getMultiTermRewriteMethod() - { - return multiTermRewriteMethod; - } - - void QueryParser::setLocale(std::locale locale) - { - this->locale = locale; - } - - std::locale QueryParser::getLocale() - { - return locale; - } - - void QueryParser::setDateResolution(DateTools::Resolution dateResolution) - { - this->dateResolution = dateResolution; - } - - void QueryParser::setDateResolution(const String& fieldName, DateTools::Resolution dateResolution) - { - if (fieldName.empty()) - boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); - - if (!fieldToDateResolution) - { - // lazily initialize Map - fieldToDateResolution = MapStringResolution::newInstance(); - } - - fieldToDateResolution.put(fieldName, dateResolution); - } - - DateTools::Resolution QueryParser::getDateResolution(const String& fieldName) - { - if (fieldName.empty()) - boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); - - if (!fieldToDateResolution) - { - // no field specific date resolutions set; return default date resolution instead - return this->dateResolution; - } - - MapStringResolution::iterator resolution = fieldToDateResolution.find(fieldName); - if (resolution == fieldToDateResolution.end()) - { - // no date resolutions set for the given field; return default date resolution instead - return this->dateResolution; - } - - return resolution->second; - } - - void QueryParser::setRangeCollator(CollatorPtr rc) - { - rangeCollator = rc; - } - - CollatorPtr QueryParser::getRangeCollator() - { - return rangeCollator; - } - - void QueryParser::addClause(Collection clauses, int32_t conj, int32_t mods, QueryPtr q) - { - bool required = false; - bool prohibited = false; - - // If this term is introduced by AND, make the preceding term required, unless it's already prohibited - if (!clauses.empty() && conj == CONJ_AND) - { - BooleanClausePtr c(clauses[clauses.size() - 1]); - if (!c->isProhibited()) - c->setOccur(BooleanClause::MUST); - } - - if (!clauses.empty() && _operator == AND_OPERATOR && conj == CONJ_OR) - { - // If this term is introduced by OR, make the preceding term optional, unless it's prohibited (that - // means we leave -a OR b but +a OR b-->a OR b) notice if the input is a OR b, first term is parsed - // as required; without this modification a OR b would parsed as +a OR b - BooleanClausePtr c(clauses[clauses.size() - 1]); - if (!c->isProhibited()) - c->setOccur(BooleanClause::SHOULD); - } - - // We might have been passed a null query; the term might have been filtered away by the analyzer. - if (!q) - return; - - if (_operator == OR_OPERATOR) - { - // We set REQUIRED if we're introduced by AND or +; PROHIBITED if introduced by NOT or -; make - // sure not to set both. - prohibited = (mods == MOD_NOT); - required = (mods == MOD_REQ); - if (conj == CONJ_AND && !prohibited) - required = true; - } - else - { - // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED if not PROHIBITED and not - // introduced by OR - prohibited = (mods == MOD_NOT); - required = (!prohibited && conj != CONJ_OR); - } - if (required && !prohibited) - clauses.add(newBooleanClause(q, BooleanClause::MUST)); - else if (!required && !prohibited) - clauses.add(newBooleanClause(q, BooleanClause::SHOULD)); - else if (!required && prohibited) - clauses.add(newBooleanClause(q, BooleanClause::MUST_NOT)); - else - boost::throw_exception(RuntimeException(L"Clause cannot be both required and prohibited")); - } - - QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText) - { - TokenStreamPtr source; - try - { - source = analyzer->reusableTokenStream(field, newLucene(queryText)); - source->reset(); + + if (!fieldToDateResolution) { + // lazily initialize Map + fieldToDateResolution = MapStringResolution::newInstance(); + } + + fieldToDateResolution.put(fieldName, dateResolution); +} + +DateTools::Resolution QueryParser::getDateResolution(const String& fieldName) { + if (fieldName.empty()) { + boost::throw_exception(IllegalArgumentException(L"Field cannot be empty.")); + } + + if (!fieldToDateResolution) { + // no field specific date resolutions set; return default date resolution instead + return this->dateResolution; + } + + MapStringResolution::iterator resolution = fieldToDateResolution.find(fieldName); + if (resolution == fieldToDateResolution.end()) { + // no date resolutions set for the given field; return default date resolution instead + return this->dateResolution; + } + + return resolution->second; +} + +void QueryParser::setRangeCollator(const CollatorPtr& rc) { + rangeCollator = rc; +} + +CollatorPtr QueryParser::getRangeCollator() { + return rangeCollator; +} + +void QueryParser::addClause(Collection clauses, int32_t conj, int32_t mods, const QueryPtr& q) { + bool required = false; + bool prohibited = false; + + // If this term is introduced by AND, make the preceding term required, unless it's already prohibited + if (!clauses.empty() && conj == CONJ_AND) { + BooleanClausePtr c(clauses[clauses.size() - 1]); + if (!c->isProhibited()) { + c->setOccur(BooleanClause::MUST); } - catch (IOException&) - { - source = analyzer->tokenStream(field, newLucene(queryText)); + } + + if (!clauses.empty() && _operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, unless it's prohibited (that + // means we leave -a OR b but +a OR b-->a OR b) notice if the input is a OR b, first term is parsed + // as required; without this modification a OR b would parsed as +a OR b + BooleanClausePtr c(clauses[clauses.size() - 1]); + if (!c->isProhibited()) { + c->setOccur(BooleanClause::SHOULD); } - - CachingTokenFilterPtr buffer(newLucene(source)); - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncrAtt; - int32_t numTokens = 0; - - bool success = false; - try - { - buffer->reset(); - success = true; + } + + // We might have been passed a null query; the term might have been filtered away by the analyzer. + if (!q) { + return; + } + + if (_operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if introduced by NOT or -; make + // sure not to set both. + prohibited = (mods == MOD_NOT); + required = (mods == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; } - catch (IOException&) - { - // success == false if we hit an exception + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED if not PROHIBITED and not + // introduced by OR + prohibited = (mods == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) { + clauses.add(newBooleanClause(q, BooleanClause::MUST)); + } else if (!required && !prohibited) { + clauses.add(newBooleanClause(q, BooleanClause::SHOULD)); + } else if (!required && prohibited) { + clauses.add(newBooleanClause(q, BooleanClause::MUST_NOT)); + } else { + boost::throw_exception(RuntimeException(L"Clause cannot be both required and prohibited")); + } +} + +QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText) { + TokenStreamPtr source; + try { + source = analyzer->reusableTokenStream(field, newLucene(queryText)); + source->reset(); + } catch (IOException&) { + source = analyzer->tokenStream(field, newLucene(queryText)); + } + + CachingTokenFilterPtr buffer(newLucene(source)); + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncrAtt; + int32_t numTokens = 0; + + bool success = false; + try { + buffer->reset(); + success = true; + } catch (IOException&) { + // success == false if we hit an exception + } + if (success) { + if (buffer->hasAttribute()) { + termAtt = buffer->getAttribute(); } - if (success) - { - if (buffer->hasAttribute()) - termAtt = buffer->getAttribute(); - if (buffer->hasAttribute()) - posIncrAtt = buffer->getAttribute(); + if (buffer->hasAttribute()) { + posIncrAtt = buffer->getAttribute(); } - - int32_t positionCount = 0; - bool severalTokensAtSamePosition = false; - - bool hasMoreTokens = false; - if (termAtt) - { - try - { - hasMoreTokens = buffer->incrementToken(); - while (hasMoreTokens) - { - ++numTokens; - int32_t positionIncrement = posIncrAtt ? posIncrAtt->getPositionIncrement() : 1; - if (positionIncrement != 0) - positionCount += positionIncrement; - else - severalTokensAtSamePosition = true; - hasMoreTokens = buffer->incrementToken(); + } + + int32_t positionCount = 0; + bool severalTokensAtSamePosition = false; + + bool hasMoreTokens = false; + if (termAtt) { + try { + hasMoreTokens = buffer->incrementToken(); + while (hasMoreTokens) { + ++numTokens; + int32_t positionIncrement = posIncrAtt ? posIncrAtt->getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; } + hasMoreTokens = buffer->incrementToken(); } - catch (IOException&) - { - // ignore - } - } - try - { - // rewind the buffer stream - buffer->reset(); - - // close original stream - all tokens buffered - source->close(); - } - catch (IOException&) - { + } catch (IOException&) { // ignore } - - if (numTokens == 0) - return QueryPtr(); - else if (numTokens == 1) - { - String term; - try - { - bool hasNext = buffer->incrementToken(); - BOOST_ASSERT(hasNext); - term = termAtt->term(); - } - catch (IOException&) - { - // safe to ignore, because we know the number of tokens - } - return newTermQuery(newLucene(field, term)); + } + try { + // rewind the buffer stream + buffer->reset(); + + // close original stream - all tokens buffered + source->close(); + } catch (IOException&) { + // ignore + } + + if (numTokens == 0) { + return QueryPtr(); + } else if (numTokens == 1) { + String term; + try { + bool hasNext = buffer->incrementToken(); + BOOST_ASSERT(hasNext); + term = termAtt->term(); + } catch (IOException&) { + // safe to ignore, because we know the number of tokens } - else - { - if (severalTokensAtSamePosition) - { - if (positionCount == 1) - { - // no phrase query - BooleanQueryPtr q(newBooleanQuery(true)); - for (int32_t i = 0; i < numTokens; ++i) - { - String term; - try - { - bool hasNext = buffer->incrementToken(); - BOOST_ASSERT(hasNext); - term = termAtt->term(); - } - catch (IOException&) - { - // safe to ignore, because we know the number of tokens - } - - QueryPtr currentQuery(newTermQuery(newLucene(field, term))); - q->add(currentQuery, BooleanClause::SHOULD); - } - return q; - } - else - { - // phrase query - MultiPhraseQueryPtr mpq(newMultiPhraseQuery()); - mpq->setSlop(phraseSlop); - Collection multiTerms(Collection::newInstance()); - int32_t position = -1; - for (int32_t i = 0; i < numTokens; ++i) - { - String term; - int32_t positionIncrement = 1; - try - { - bool hasNext = buffer->incrementToken(); - BOOST_ASSERT(hasNext); - term = termAtt->term(); - if (posIncrAtt) - positionIncrement = posIncrAtt->getPositionIncrement(); - } - catch (IOException&) - { - // safe to ignore, because we know the number of tokens - } - - if (positionIncrement > 0 && !multiTerms.empty()) - { - if (enablePositionIncrements) - mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); - else - mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); - multiTerms.clear(); - } - position += positionIncrement; - multiTerms.add(newLucene(field, term)); + return newTermQuery(newLucene(field, term)); + } else { + if (severalTokensAtSamePosition) { + if (positionCount <= 1) { + // no phrase query + BooleanQueryPtr q(newBooleanQuery(true)); + for (int32_t i = 0; i < numTokens; ++i) { + String term; + try { + bool hasNext = buffer->incrementToken(); + BOOST_ASSERT(hasNext); + term = termAtt->term(); + } catch (IOException&) { + // safe to ignore, because we know the number of tokens } - if (enablePositionIncrements) - mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); - else - mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); - return mpq; + + QueryPtr currentQuery(newTermQuery(newLucene(field, term))); + q->add(currentQuery, BooleanClause::SHOULD); } - } - else - { - PhraseQueryPtr pq(newPhraseQuery()); - pq->setSlop(phraseSlop); + return q; + } else { + // phrase query + MultiPhraseQueryPtr mpq(newMultiPhraseQuery()); + mpq->setSlop(phraseSlop); + Collection multiTerms(Collection::newInstance()); int32_t position = -1; - - for (int32_t i = 0; i < numTokens; ++i) - { + for (int32_t i = 0; i < numTokens; ++i) { String term; int32_t positionIncrement = 1; - - try - { + try { bool hasNext = buffer->incrementToken(); BOOST_ASSERT(hasNext); term = termAtt->term(); - if (posIncrAtt) + if (posIncrAtt) { positionIncrement = posIncrAtt->getPositionIncrement(); - } - catch (IOException&) - { + } + } catch (IOException&) { // safe to ignore, because we know the number of tokens } - - if (enablePositionIncrements) - { - position += positionIncrement; - pq->add(newLucene(field, term), position); + + if (positionIncrement > 0 && !multiTerms.empty()) { + if (enablePositionIncrements) { + mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); + } else { + mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); + } + multiTerms.clear(); } - else - pq->add(newLucene(field, term)); + position += positionIncrement; + multiTerms.add(newLucene(field, term)); + } + if (enablePositionIncrements) { + mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end()), position); + } else { + mpq->add(Collection::newInstance(multiTerms.begin(), multiTerms.end())); } - return pq; + return mpq; } + } else { + PhraseQueryPtr pq(newPhraseQuery()); + pq->setSlop(phraseSlop); + int32_t position = -1; + + for (int32_t i = 0; i < numTokens; ++i) { + String term; + int32_t positionIncrement = 1; + + try { + bool hasNext = buffer->incrementToken(); + BOOST_ASSERT(hasNext); + term = termAtt->term(); + if (posIncrAtt) { + positionIncrement = posIncrAtt->getPositionIncrement(); + } + } catch (IOException&) { + // safe to ignore, because we know the number of tokens + } + + if (enablePositionIncrements) { + position += positionIncrement; + pq->add(newLucene(field, term), position); + } else { + pq->add(newLucene(field, term)); + } + } + return pq; } } - - QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) - { - QueryPtr query(getFieldQuery(field, queryText)); - if (MiscUtils::typeOf(query)) - boost::dynamic_pointer_cast(query)->setSlop(slop); - if (MiscUtils::typeOf(query)) - boost::dynamic_pointer_cast(query)->setSlop(slop); - return query; - } - - QueryPtr QueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) - { - String date1(part1); - String date2(part2); - if (lowercaseExpandedTerms) - { - StringUtils::toLower(date1); - StringUtils::toLower(date2); - } - try - { - boost::posix_time::ptime d1(DateTools::parseDate(date1, locale)); - boost::posix_time::ptime d2; - - // The user can only specify the date, not the time, so make sure the time is set to - // the latest possible time of that date to really include all documents - if (inclusive) - { - d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale) + - boost::posix_time::hours(23) + - boost::posix_time::minutes(59) + - boost::posix_time::seconds(59) + - boost::posix_time::millisec(999)); - } - else - d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale)); - DateTools::Resolution resolution = getDateResolution(field); - if (resolution == DateTools::RESOLUTION_NULL) - { - // no default or field specific date resolution has been set, use deprecated - // DateField to maintain compatibility with pre-1.9 Lucene versions. - date1 = DateField::dateToString(d1); - date2 = DateField::dateToString(d2); - } - else - { - date1 = DateTools::dateToString(d1, resolution); - date2 = DateTools::dateToString(d2, resolution); - } +} + +QueryPtr QueryParser::getFieldQuery(const String& field, const String& queryText, int32_t slop) { + QueryPtr query(getFieldQuery(field, queryText)); + if (MiscUtils::typeOf(query)) { + boost::dynamic_pointer_cast(query)->setSlop(slop); + } + if (MiscUtils::typeOf(query)) { + boost::dynamic_pointer_cast(query)->setSlop(slop); + } + return query; +} + +QueryPtr QueryParser::getRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { + String date1(part1); + String date2(part2); + if (lowercaseExpandedTerms) { + StringUtils::toLower(date1); + StringUtils::toLower(date2); + } + try { + boost::posix_time::ptime d1(DateTools::parseDate(date1, locale)); + boost::posix_time::ptime d2; + + // The user can only specify the date, not the time, so make sure the time is set to + // the latest possible time of that date to really include all documents + if (inclusive) { + d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale) + + boost::posix_time::hours(23) + + boost::posix_time::minutes(59) + + boost::posix_time::seconds(59) + + boost::posix_time::millisec(999)); + } else { + d2 = boost::posix_time::ptime(DateTools::parseDate(date2, locale)); } - catch (...) - { + DateTools::Resolution resolution = getDateResolution(field); + if (resolution == DateTools::RESOLUTION_NULL) { + // no default or field specific date resolution has been set, use deprecated + // DateField to maintain compatibility with pre-1.9 Lucene versions. + date1 = DateField::dateToString(d1); + date2 = DateField::dateToString(d2); + } else { + date1 = DateTools::dateToString(d1, resolution); + date2 = DateTools::dateToString(d2, resolution); } - return newRangeQuery(field, date1, date2, inclusive); - } - - BooleanQueryPtr QueryParser::newBooleanQuery(bool disableCoord) - { - return newLucene(disableCoord); - } - - BooleanClausePtr QueryParser::newBooleanClause(QueryPtr q, BooleanClause::Occur occur) - { - return newLucene(q, occur); - } - - QueryPtr QueryParser::newTermQuery(TermPtr term) - { - return newLucene(term); - } - - PhraseQueryPtr QueryParser::newPhraseQuery() - { - return newLucene(); - } - - MultiPhraseQueryPtr QueryParser::newMultiPhraseQuery() - { - return newLucene(); - } - - QueryPtr QueryParser::newPrefixQuery(TermPtr prefix) - { - PrefixQueryPtr query(newLucene(prefix)); - query->setRewriteMethod(multiTermRewriteMethod); - return query; - } - - QueryPtr QueryParser::newFuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) - { - // FuzzyQuery doesn't yet allow constant score rewrite - return newLucene(term, minimumSimilarity, prefixLength); - } - - QueryPtr QueryParser::newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) - { - TermRangeQueryPtr query(newLucene(field, part1, part2, inclusive, inclusive, rangeCollator)); - query->setRewriteMethod(multiTermRewriteMethod); - return query; - } - - QueryPtr QueryParser::newMatchAllDocsQuery() - { - return newLucene(); - } - - QueryPtr QueryParser::newWildcardQuery(TermPtr term) - { - WildcardQueryPtr query(newLucene(term)); - query->setRewriteMethod(multiTermRewriteMethod); - return query; - } - - QueryPtr QueryParser::getBooleanQuery(Collection clauses) - { - return getBooleanQuery(clauses, false); - } - - QueryPtr QueryParser::getBooleanQuery(Collection clauses, bool disableCoord) - { - if (clauses.empty()) - return QueryPtr(); // all clause words were filtered away by the analyzer. - BooleanQueryPtr query(newBooleanQuery(disableCoord)); - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - query->add(*clause); - return query; - } - - QueryPtr QueryParser::getWildcardQuery(const String& field, const String& termStr) - { - if (field == L"*" && termStr == L"*") - return newMatchAllDocsQuery(); - if (!allowLeadingWildcard && (boost::starts_with(termStr, L"*") || boost::starts_with(termStr, L"?"))) - boost::throw_exception(QueryParserError(L"'*' or '?' not allowed as first character in WildcardQuery")); - String queryTerm(termStr); - if (lowercaseExpandedTerms) - StringUtils::toLower(queryTerm); - TermPtr term(newLucene(field, queryTerm)); - return newWildcardQuery(term); - } - - QueryPtr QueryParser::getPrefixQuery(const String& field, const String& termStr) - { - if (!allowLeadingWildcard && boost::starts_with(termStr, L"*")) - boost::throw_exception(QueryParserError(L"'*' not allowed as first character in PrefixQuery")); - String queryTerm(termStr); - if (lowercaseExpandedTerms) - StringUtils::toLower(queryTerm); - TermPtr term(newLucene(field, queryTerm)); - return newPrefixQuery(term); - } - - QueryPtr QueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) - { - String queryTerm(termStr); - if (lowercaseExpandedTerms) - StringUtils::toLower(queryTerm); - TermPtr term(newLucene(field, queryTerm)); - return newFuzzyQuery(term, minSimilarity, fuzzyPrefixLength); - } - - String QueryParser::discardEscapeChar(const String& input) - { - // Create char array to hold unescaped char sequence - CharArray output(CharArray::newInstance(input.length())); - - // The length of the output can be less than the input due to discarded escape chars. - // This variable holds the actual length of the output - int32_t length = 0; - - // We remember whether the last processed character was an escape character - bool lastCharWasEscapeChar = false; - - // The multiplier the current unicode digit must be multiplied with. eg. the first digit must - // be multiplied with 16^3, the second with 16^2 - int32_t codePointMultiplier = 0; - - // Used to calculate the codepoint of the escaped unicode character - int32_t codePoint = 0; - - for (int32_t i = 0; i < (int32_t)input.length(); ++i) - { - wchar_t curChar = input[i]; - if (codePointMultiplier > 0) - { - codePoint += hexToInt(curChar) * codePointMultiplier; - codePointMultiplier = MiscUtils::unsignedShift(codePointMultiplier, 4); - if (codePointMultiplier == 0) - { - output[length++] = (wchar_t)codePoint; - codePoint = 0; - } + } catch (...) { + } + return newRangeQuery(field, date1, date2, inclusive); +} + +BooleanQueryPtr QueryParser::newBooleanQuery(bool disableCoord) { + return newLucene(disableCoord); +} + +BooleanClausePtr QueryParser::newBooleanClause(const QueryPtr& q, BooleanClause::Occur occur) { + return newLucene(q, occur); +} + +QueryPtr QueryParser::newTermQuery(const TermPtr& term) { + return newLucene(term); +} + +PhraseQueryPtr QueryParser::newPhraseQuery() { + return newLucene(); +} + +MultiPhraseQueryPtr QueryParser::newMultiPhraseQuery() { + return newLucene(); +} + +QueryPtr QueryParser::newPrefixQuery(const TermPtr& prefix) { + PrefixQueryPtr query(newLucene(prefix)); + query->setRewriteMethod(multiTermRewriteMethod); + return query; +} + +QueryPtr QueryParser::newFuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { + // FuzzyQuery doesn't yet allow constant score rewrite + return newLucene(term, minimumSimilarity, prefixLength); +} + +QueryPtr QueryParser::newRangeQuery(const String& field, const String& part1, const String& part2, bool inclusive) { + TermRangeQueryPtr query(newLucene(field, part1, part2, inclusive, inclusive, rangeCollator)); + query->setRewriteMethod(multiTermRewriteMethod); + return query; +} + +QueryPtr QueryParser::newMatchAllDocsQuery() { + return newLucene(); +} + +QueryPtr QueryParser::newWildcardQuery(const TermPtr& term) { + WildcardQueryPtr query(newLucene(term)); + query->setRewriteMethod(multiTermRewriteMethod); + return query; +} + +QueryPtr QueryParser::getBooleanQuery(Collection clauses) { + return getBooleanQuery(clauses, false); +} + +QueryPtr QueryParser::getBooleanQuery(Collection clauses, bool disableCoord) { + if (clauses.empty()) { + return QueryPtr(); // all clause words were filtered away by the analyzer. + } + BooleanQueryPtr query(newBooleanQuery(disableCoord)); + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + query->add(*clause); + } + return query; +} + +QueryPtr QueryParser::getWildcardQuery(const String& field, const String& termStr) { + if (field == L"*" && termStr == L"*") { + return newMatchAllDocsQuery(); + } + if (!allowLeadingWildcard && (boost::starts_with(termStr, L"*") || boost::starts_with(termStr, L"?"))) { + boost::throw_exception(QueryParserError(L"'*' or '?' not allowed as first character in WildcardQuery")); + } + String queryTerm(termStr); + if (lowercaseExpandedTerms) { + StringUtils::toLower(queryTerm); + } + TermPtr term(newLucene(field, queryTerm)); + return newWildcardQuery(term); +} + +QueryPtr QueryParser::getPrefixQuery(const String& field, const String& termStr) { + if (!allowLeadingWildcard && boost::starts_with(termStr, L"*")) { + boost::throw_exception(QueryParserError(L"'*' not allowed as first character in PrefixQuery")); + } + String queryTerm(termStr); + if (lowercaseExpandedTerms) { + StringUtils::toLower(queryTerm); + } + TermPtr term(newLucene(field, queryTerm)); + return newPrefixQuery(term); +} + +QueryPtr QueryParser::getFuzzyQuery(const String& field, const String& termStr, double minSimilarity) { + String queryTerm(termStr); + if (lowercaseExpandedTerms) { + StringUtils::toLower(queryTerm); + } + TermPtr term(newLucene(field, queryTerm)); + return newFuzzyQuery(term, minSimilarity, fuzzyPrefixLength); +} + +String QueryParser::discardEscapeChar(const String& input) { + // Create char array to hold unescaped char sequence + CharArray output(CharArray::newInstance(input.length())); + + // The length of the output can be less than the input due to discarded escape chars. + // This variable holds the actual length of the output + int32_t length = 0; + + // We remember whether the last processed character was an escape character + bool lastCharWasEscapeChar = false; + + // The multiplier the current unicode digit must be multiplied with. eg. the first digit must + // be multiplied with 16^3, the second with 16^2 + int32_t codePointMultiplier = 0; + + // Used to calculate the codepoint of the escaped unicode character + int32_t codePoint = 0; + + for (int32_t i = 0; i < (int32_t)input.length(); ++i) { + wchar_t curChar = input[i]; + if (codePointMultiplier > 0) { + codePoint += hexToInt(curChar) * codePointMultiplier; + codePointMultiplier = MiscUtils::unsignedShift(codePointMultiplier, 4); + if (codePointMultiplier == 0) { + output[length++] = (wchar_t)codePoint; + codePoint = 0; } - else if (lastCharWasEscapeChar) - { - if (curChar == L'u') - { - // found an escaped unicode character - codePointMultiplier = 16 * 16 * 16; - } - else - { - // this character was escaped - output[length++] = curChar; - } - lastCharWasEscapeChar = false; + } else if (lastCharWasEscapeChar) { + if (curChar == L'u') { + // found an escaped unicode character + codePointMultiplier = 16 * 16 * 16; + } else { + // this character was escaped + output[length++] = curChar; } - else - { - if (curChar == L'\\') - lastCharWasEscapeChar = true; - else - output[length++] = curChar; + lastCharWasEscapeChar = false; + } else { + if (curChar == L'\\') { + lastCharWasEscapeChar = true; + } else { + output[length++] = curChar; } } - - if (codePointMultiplier > 0) - boost::throw_exception(QueryParserError(L"Truncated unicode escape sequence.")); - if (lastCharWasEscapeChar) - boost::throw_exception(QueryParserError(L"Term can not end with escape character.")); - return String(output.get(), length); - } - - int32_t QueryParser::hexToInt(wchar_t c) - { - if (L'0' <= c && c <= L'9') - return c - L'0'; - else if (L'a' <= c && c <= L'f') - return c - L'a' + 10; - else if (L'A' <= c && c <= L'F') - return c - L'A' + 10; - else - { - boost::throw_exception(QueryParserError(L"None-hex character in unicode escape sequence: " + StringUtils::toString(c))); - return 0; + } + + if (codePointMultiplier > 0) { + boost::throw_exception(QueryParserError(L"Truncated unicode escape sequence.")); + } + if (lastCharWasEscapeChar) { + boost::throw_exception(QueryParserError(L"Term can not end with escape character.")); + } + return String(output.get(), length); +} + +int32_t QueryParser::hexToInt(wchar_t c) { + if (L'0' <= c && c <= L'9') { + return c - L'0'; + } else if (L'a' <= c && c <= L'f') { + return c - L'a' + 10; + } else if (L'A' <= c && c <= L'F') { + return c - L'A' + 10; + } else { + boost::throw_exception(QueryParserError(L"None-hex character in unicode escape sequence: " + StringUtils::toString(c))); + return 0; + } +} + +String QueryParser::escape(const String& s) { + StringStream buffer; + for (int32_t i = 0; i < (int32_t)s.length(); ++i) { + wchar_t c = s[i]; + // These characters are part of the query syntax and must be escaped + if (c == L'\\' || c == L'+' || c == L'-' || c == L'!' || c == L'(' || c == L')' || c == L':' || + c == L'^' || c == L'[' || c == L']' || c == L'\"' || c == L'{' || c == L'}' || c == L'~' || + c == L'*' || c == L'?' || c == L'|' || c == L'&') { + buffer << L"\\"; } + buffer << c; } - - String QueryParser::escape(const String& s) - { - StringStream buffer; - for (int32_t i = 0; i < (int32_t)s.length(); ++i) - { - wchar_t c = s[i]; - // These characters are part of the query syntax and must be escaped - if (c == L'\\' || c == L'+' || c == L'-' || c == L'!' || c == L'(' || c == L')' || c == L':' || - c == L'^' || c == L'[' || c == L']' || c == L'\"' || c == L'{' || c == L'}' || c == L'~' || - c == L'*' || c == L'?' || c == L'|' || c == L'&') - buffer << L"\\"; - buffer << c; + return buffer.str(); +} + +int QueryParser::main(Collection args) { + if (args.empty()) { + std::wcout << L"Usage: QueryParser "; + return 1; + } + QueryParserPtr qp(newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene())); + QueryPtr q(qp->parse(args[0])); + std::wcout << q->toString(L"field"); + return 0; +} + +int32_t QueryParser::Conjunction() { + int32_t ret = CONJ_NONE; + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case AND: + case OR: + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case AND: + jj_consume_token(AND); + ret = CONJ_AND; + break; + case OR: + jj_consume_token(OR); + ret = CONJ_OR; + break; + default: + jj_la1[0] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); } - return buffer.str(); + break; + default: + jj_la1[1] = jj_gen; } - - int QueryParser::main(Collection args) - { - if (args.empty()) - { - std::wcout << L"Usage: QueryParser "; - return 1; + return ret; +} + +int32_t QueryParser::Modifiers() { + int32_t ret = MOD_NONE; + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case NOT: + case PLUS: + case MINUS: + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case PLUS: + jj_consume_token(PLUS); + ret = MOD_REQ; + break; + case MINUS: + jj_consume_token(MINUS); + ret = MOD_NOT; + break; + case NOT: + jj_consume_token(NOT); + ret = MOD_NOT; + break; + default: + jj_la1[2] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); } - QueryParserPtr qp(newLucene(LuceneVersion::LUCENE_CURRENT, L"field", newLucene())); - QueryPtr q(qp->parse(args[0])); - std::wcout << q->toString(L"field"); - return 0; + break; + default: + jj_la1[3] = jj_gen; } - - int32_t QueryParser::Conjunction() - { - int32_t ret = CONJ_NONE; - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case AND: - case OR: - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case AND: - jj_consume_token(AND); - ret = CONJ_AND; - break; - case OR: - jj_consume_token(OR); - ret = CONJ_OR; - break; - default: - jj_la1[0] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - break; - default: - jj_la1[1] = jj_gen; + return ret; +} + +QueryPtr QueryParser::TopLevelQuery(const String& field) { + QueryPtr q(ParseQuery(field)); + jj_consume_token(0); + return q; +} + +QueryPtr QueryParser::ParseQuery(const String& field) { + Collection clauses(Collection::newInstance()); + QueryPtr firstQuery; + int32_t mods = Modifiers(); + QueryPtr q(ParseClause(field)); + addClause(clauses, CONJ_NONE, mods, q); + if (mods == MOD_NONE) { + firstQuery = q; + } + for (bool more = true; more; ) { + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case AND: + case OR: + case NOT: + case PLUS: + case MINUS: + case LPAREN: + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + break; + default: + jj_la1[4] = jj_gen; + more = false; + continue; } - return ret; + int32_t conj = Conjunction(); + mods = Modifiers(); + q = ParseClause(field); + addClause(clauses, conj, mods, q); } - - int32_t QueryParser::Modifiers() - { - int32_t ret = MOD_NONE; - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case NOT: - case PLUS: - case MINUS: - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case PLUS: - jj_consume_token(PLUS); - ret = MOD_REQ; - break; - case MINUS: - jj_consume_token(MINUS); - ret = MOD_NOT; - break; - case NOT: - jj_consume_token(NOT); - ret = MOD_NOT; - break; - default: - jj_la1[2] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - break; - default: - jj_la1[3] = jj_gen; + if (clauses.size() == 1 && firstQuery) { + return firstQuery; + } else { + return getBooleanQuery(clauses); + } +} + +QueryPtr QueryParser::ParseClause(const String& field) { + QueryPtr q; + QueryParserTokenPtr fieldToken; + QueryParserTokenPtr boost; + String fieldClause(field); + if (jj_2_1(2)) { + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case TERM: + fieldToken = jj_consume_token(TERM); + jj_consume_token(COLON); + fieldClause = discardEscapeChar(fieldToken->image); + break; + case STAR: + jj_consume_token(STAR); + jj_consume_token(COLON); + fieldClause = L"*"; + break; + default: + jj_la1[5] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); } - return ret; - } - - QueryPtr QueryParser::TopLevelQuery(const String& field) - { - QueryPtr q(ParseQuery(field)); - jj_consume_token(0); - return q; - } - - QueryPtr QueryParser::ParseQuery(const String& field) - { - Collection clauses(Collection::newInstance()); - QueryPtr firstQuery; - int32_t mods = Modifiers(); - QueryPtr q(ParseClause(field)); - addClause(clauses, CONJ_NONE, mods, q); - if (mods == MOD_NONE) - firstQuery = q; - for (bool more = true; more; ) - { - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case AND: - case OR: - case NOT: - case PLUS: - case MINUS: - case LPAREN: - case STAR: - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - break; - default: - jj_la1[4] = jj_gen; - more = false; - continue; - } - int32_t conj = Conjunction(); - mods = Modifiers(); - q = ParseClause(field); - addClause(clauses, conj, mods, q); + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case STAR: + case QUOTED: + case TERM: + case PREFIXTERM: + case WILDTERM: + case RANGEIN_START: + case RANGEEX_START: + case NUMBER: + q = ParseTerm(fieldClause); + break; + case LPAREN: + jj_consume_token(LPAREN); + q = ParseQuery(fieldClause); + jj_consume_token(RPAREN); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[6] = jj_gen; } - if (clauses.size() == 1 && firstQuery) - return firstQuery; - else - return getBooleanQuery(clauses); - } - - QueryPtr QueryParser::ParseClause(const String& field) - { - QueryPtr q; - QueryParserTokenPtr fieldToken; - QueryParserTokenPtr boost; - String fieldClause(field); - if (jj_2_1(2)) - { - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case TERM: - fieldToken = jj_consume_token(TERM); - jj_consume_token(COLON); - fieldClause = discardEscapeChar(fieldToken->image); - break; - case STAR: - jj_consume_token(STAR); - jj_consume_token(COLON); - fieldClause = L"*"; - break; - default: - jj_la1[5] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); + break; + default: + jj_la1[7] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); + } + if (boost) { + double f = 1.0; + try { + if (q) { + f = StringUtils::toDouble(boost->image); + q->setBoost(f); } + } catch (...) { } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case STAR: - case QUOTED: - case TERM: - case PREFIXTERM: - case WILDTERM: - case RANGEIN_START: - case RANGEEX_START: - case NUMBER: - q = ParseTerm(fieldClause); - break; - case LPAREN: - jj_consume_token(LPAREN); - q = ParseQuery(fieldClause); - jj_consume_token(RPAREN); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[6] = jj_gen; - } + } + return q; +} + +QueryPtr QueryParser::ParseTerm(const String& field) { + QueryParserTokenPtr term; + QueryParserTokenPtr boost; + QueryParserTokenPtr fuzzySlop; + QueryParserTokenPtr goop1; + QueryParserTokenPtr goop2; + bool prefix = false; + bool wildcard = false; + bool fuzzy = false; + QueryPtr q; + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case STAR: + case TERM: + case PREFIXTERM: + case WILDTERM: + case NUMBER: + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case TERM: + term = jj_consume_token(TERM); + break; + case STAR: + term = jj_consume_token(STAR); + wildcard = true; + break; + case PREFIXTERM: + term = jj_consume_token(PREFIXTERM); + prefix = true; + break; + case WILDTERM: + term = jj_consume_token(WILDTERM); + wildcard = true; + break; + case NUMBER: + term = jj_consume_token(NUMBER); + break; + default: + jj_la1[8] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy = true; + break; + default: + jj_la1[9] = jj_gen; + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + fuzzy = true; break; default: - jj_la1[7] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - if (boost) - { - double f = 1.0; - try - { - if (q) - { - f = StringUtils::toDouble(boost->image); - q->setBoost(f); - } - } - catch (...) - { + jj_la1[10] = jj_gen; } + break; + default: + jj_la1[11] = jj_gen; } - return q; - } - - QueryPtr QueryParser::ParseTerm(const String& field) - { - QueryParserTokenPtr term; - QueryParserTokenPtr boost; - QueryParserTokenPtr fuzzySlop; - QueryParserTokenPtr goop1; - QueryParserTokenPtr goop2; - bool prefix = false; - bool wildcard = false; - bool fuzzy = false; - QueryPtr q; - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { - case STAR: - case TERM: - case PREFIXTERM: - case WILDTERM: - case NUMBER: - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case TERM: - term = jj_consume_token(TERM); - break; - case STAR: - term = jj_consume_token(STAR); - wildcard = true; - break; - case PREFIXTERM: - term = jj_consume_token(PREFIXTERM); - prefix = true; - break; - case WILDTERM: - term = jj_consume_token(WILDTERM); - wildcard = true; - break; - case NUMBER: - term = jj_consume_token(NUMBER); - break; - default: - jj_la1[8] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); + String termImage(discardEscapeChar(term->image)); + if (wildcard) { + q = getWildcardQuery(field, termImage); + } else if (prefix) { + q = getPrefixQuery(field, discardEscapeChar(term->image.substr(0, term->image.length() - 1))); + } else if (fuzzy) { + double fms = fuzzyMinSim; + try { + fms = StringUtils::toDouble(fuzzySlop->image.substr(1)); + } catch (...) { } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy = true; - break; - default: - jj_la1[9] = jj_gen; - } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - fuzzy = true; - break; - default: - jj_la1[10] = jj_gen; - } - break; - default: - jj_la1[11] = jj_gen; - } - { - String termImage(discardEscapeChar(term->image)); - if (wildcard) - q = getWildcardQuery(field, termImage); - else if (prefix) - q = getPrefixQuery(field, discardEscapeChar(term->image.substr(0, term->image.length() - 1))); - else if (fuzzy) - { - double fms = fuzzyMinSim; - try - { - fms = StringUtils::toDouble(fuzzySlop->image.substr(1)); - } - catch (...) - { - } - if (fms < 0.0 || fms > 1.0) - boost::throw_exception(QueryParserError(L"Minimum similarity for a FuzzyQuery has to be between 0.0 and 1.0")); - q = getFuzzyQuery(field, termImage, fms); - } - else - q = getFieldQuery(field, termImage); - } - break; - case RANGEIN_START: - jj_consume_token(RANGEIN_START); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEIN_GOOP: - goop1 = jj_consume_token(RANGEIN_GOOP); - break; - case RANGEIN_QUOTED: - goop1 = jj_consume_token(RANGEIN_QUOTED); - break; - default: - jj_la1[12] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEIN_TO: - jj_consume_token(RANGEIN_TO); - break; - default: - jj_la1[13] = jj_gen; - } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEIN_GOOP: - goop2 = jj_consume_token(RANGEIN_GOOP); - break; - case RANGEIN_QUOTED: - goop2 = jj_consume_token(RANGEIN_QUOTED); - break; - default: - jj_la1[14] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - jj_consume_token(RANGEIN_END); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[15] = jj_gen; - } - if (goop1->kind == RANGEIN_QUOTED) - goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); - if (goop2->kind == RANGEIN_QUOTED) - goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); - q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), true); - break; - case RANGEEX_START: - jj_consume_token(RANGEEX_START); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEEX_GOOP: - goop1 = jj_consume_token(RANGEEX_GOOP); - break; - case RANGEEX_QUOTED: - goop1 = jj_consume_token(RANGEEX_QUOTED); - break; - default: - jj_la1[16] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEEX_TO: - jj_consume_token(RANGEEX_TO); - break; - default: - jj_la1[17] = jj_gen; + if (fms < 0.0 || fms > 1.0) { + boost::throw_exception(QueryParserError(L"Minimum similarity for a FuzzyQuery has to be between 0.0 and 1.0")); } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case RANGEEX_GOOP: - goop2 = jj_consume_token(RANGEEX_GOOP); - break; - case RANGEEX_QUOTED: - goop2 = jj_consume_token(RANGEEX_QUOTED); - break; - default: - jj_la1[18] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - jj_consume_token(RANGEEX_END); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[19] = jj_gen; - } - if (goop1->kind == RANGEEX_QUOTED) - goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); - if (goop2->kind == RANGEEX_QUOTED) - goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); - q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), false); - break; - case QUOTED: - term = jj_consume_token(QUOTED); - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case FUZZY_SLOP: - fuzzySlop = jj_consume_token(FUZZY_SLOP); - break; - default: - jj_la1[20] = jj_gen; - } - switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) - { - case CARAT: - jj_consume_token(CARAT); - boost = jj_consume_token(NUMBER); - break; - default: - jj_la1[21] = jj_gen; - } - { - int32_t s = phraseSlop; - if (fuzzySlop) - { - try - { - s = StringUtils::toInt(fuzzySlop->image.substr(1)); - } - catch (...) - { - } - } - q = getFieldQuery(field, discardEscapeChar(term->image.substr(1, std::max((int32_t)0, (int32_t)term->image.length() - 2))), s); - } - break; - default: - jj_la1[22] = jj_gen; - jj_consume_token(-1); - boost::throw_exception(QueryParserError()); - } - if (boost) - { - double f = 1.0; - try - { - f = StringUtils::toDouble(boost->image); - } - catch (...) - { + q = getFuzzyQuery(field, termImage, fms); + } else { + q = getFieldQuery(field, termImage); } - - // avoid boosting null queries, such as those caused by stop words - if (q) - q->setBoost(f); } - return q; - } - - bool QueryParser::jj_2_1(int32_t xla) - { - jj_la = xla; - jj_scanpos = token; - jj_lastpos = jj_scanpos; - bool _jj_2_1 = false; - LuceneException finally; - try - { - _jj_2_1 = !jj_3_1(); + break; + case RANGEIN_START: + jj_consume_token(RANGEIN_START); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEIN_GOOP: + goop1 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop1 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[12] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); } - catch (LookaheadSuccess&) - { - _jj_2_1 = true; + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEIN_TO: + jj_consume_token(RANGEIN_TO); + break; + default: + jj_la1[13] = jj_gen; } - catch (LuceneException& e) - { - finally = e; + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEIN_GOOP: + goop2 = jj_consume_token(RANGEIN_GOOP); + break; + case RANGEIN_QUOTED: + goop2 = jj_consume_token(RANGEIN_QUOTED); + break; + default: + jj_la1[14] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); } - jj_save(0, xla); - finally.throwException(); - return _jj_2_1; - } - - bool QueryParser::jj_3R_2() - { - if (jj_scan_token(TERM)) - return true; - if (jj_scan_token(COLON)) - return true; - return false; - } - - bool QueryParser::jj_3_1() - { - QueryParserTokenPtr xsp(jj_scanpos); - if (jj_3R_2()) - { - jj_scanpos = xsp; - if (jj_3R_3()) - return true; + jj_consume_token(RANGEIN_END); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[15] = jj_gen; } - return false; - } - - bool QueryParser::jj_3R_3() - { - if (jj_scan_token(STAR)) - return true; - if (jj_scan_token(COLON)) - return true; - return false; - } - - void QueryParser::ReInit(QueryParserCharStreamPtr stream) - { - token_source->ReInit(stream); - token = newLucene(); - _jj_ntk = -1; - jj_gen = 0; - for (int32_t i = 0; i < 23; ++i) - jj_la1[i] = -1; - for (int32_t i = 0; i < jj_2_rtns.size(); ++i) - jj_2_rtns[i] = newInstance(); - } - - void QueryParser::ReInit(QueryParserTokenManagerPtr tokenMgr) - { - token_source = tokenMgr; - token = newLucene(); - _jj_ntk = -1; - jj_gen = 0; - for (int32_t i = 0; i < 23; ++i) - jj_la1[i] = -1; - for (int32_t i = 0; i < jj_2_rtns.size(); ++i) - jj_2_rtns[i] = newInstance(); - } - - QueryParserTokenPtr QueryParser::jj_consume_token(int32_t kind) - { - QueryParserTokenPtr oldToken(token); - if (oldToken->next) - token = token->next; - else - { - token->next = token_source->getNextToken(); - token = token->next; + if (goop1->kind == RANGEIN_QUOTED) { + goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); + } + if (goop2->kind == RANGEIN_QUOTED) { + goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); + } + q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), true); + break; + case RANGEEX_START: + jj_consume_token(RANGEEX_START); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEEX_GOOP: + goop1 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop1 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[16] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEEX_TO: + jj_consume_token(RANGEEX_TO); + break; + default: + jj_la1[17] = jj_gen; + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case RANGEEX_GOOP: + goop2 = jj_consume_token(RANGEEX_GOOP); + break; + case RANGEEX_QUOTED: + goop2 = jj_consume_token(RANGEEX_QUOTED); + break; + default: + jj_la1[18] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); + } + jj_consume_token(RANGEEX_END); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[19] = jj_gen; + } + if (goop1->kind == RANGEEX_QUOTED) { + goop1->image = goop1->image.substr(1, std::max((int32_t)0, (int32_t)goop1->image.length() - 2)); + } + if (goop2->kind == RANGEEX_QUOTED) { + goop2->image = goop2->image.substr(1, std::max((int32_t)0, (int32_t)goop2->image.length() - 2)); + } + q = getRangeQuery(field, discardEscapeChar(goop1->image), discardEscapeChar(goop2->image), false); + break; + case QUOTED: + term = jj_consume_token(QUOTED); + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case FUZZY_SLOP: + fuzzySlop = jj_consume_token(FUZZY_SLOP); + break; + default: + jj_la1[20] = jj_gen; + } + switch ((_jj_ntk == -1) ? jj_ntk() : _jj_ntk) { + case CARAT: + jj_consume_token(CARAT); + boost = jj_consume_token(NUMBER); + break; + default: + jj_la1[21] = jj_gen; } - _jj_ntk = -1; - if (token->kind == kind) { - ++jj_gen; - if (++jj_gc > 100) - { - jj_gc = 0; - for (int32_t i = 0; i < jj_2_rtns.size(); ++i) - { - JJCallsPtr c(jj_2_rtns[i]); - while (c) - { - if (c->gen < jj_gen) - c->first.reset(); - c = c->next; - } + int32_t s = phraseSlop; + if (fuzzySlop) { + try { + s = StringUtils::toInt(fuzzySlop->image.substr(1)); + } catch (...) { } } - return token; + q = getFieldQuery(field, discardEscapeChar(term->image.substr(1, std::max((int32_t)0, (int32_t)term->image.length() - 2))), s); } - token = oldToken; - jj_kind = kind; - generateParseException(); - return QueryParserTokenPtr(); - } - - bool QueryParser::jj_scan_token(int32_t kind) - { - if (jj_scanpos == jj_lastpos) - { - --jj_la; - if (!jj_scanpos->next) - { - jj_scanpos->next = token_source->getNextToken(); - jj_scanpos = jj_scanpos->next; - jj_lastpos = jj_scanpos; - } - else - { - jj_scanpos = jj_scanpos->next; - jj_lastpos = jj_scanpos; - } + break; + default: + jj_la1[22] = jj_gen; + jj_consume_token(-1); + boost::throw_exception(QueryParserError()); + } + if (boost) { + double f = 1.0; + try { + f = StringUtils::toDouble(boost->image); + } catch (...) { } - else - jj_scanpos = jj_scanpos->next; - if (jj_rescan) - { - int32_t i = 0; - QueryParserTokenPtr tok(token); - while (tok && tok != jj_scanpos) - { - ++i; - tok = tok->next; - } - if (tok) - jj_add_error_token(kind, i); + + // avoid boosting null queries, such as those caused by stop words + if (q) { + q->setBoost(f); } - if (jj_scanpos->kind != kind) + } + return q; +} + +bool QueryParser::jj_2_1(int32_t xla) { + jj_la = xla; + jj_scanpos = token; + jj_lastpos = jj_scanpos; + bool _jj_2_1 = false; + LuceneException finally; + try { + _jj_2_1 = !jj_3_1(); + } catch (LookaheadSuccess&) { + _jj_2_1 = true; + } catch (LuceneException& e) { + finally = e; + } + jj_save(0, xla); + finally.throwException(); + return _jj_2_1; +} + +bool QueryParser::jj_3R_2() { + if (jj_scan_token(TERM)) { + return true; + } + if (jj_scan_token(COLON)) { + return true; + } + return false; +} + +bool QueryParser::jj_3_1() { + QueryParserTokenPtr xsp(jj_scanpos); + if (jj_3R_2()) { + jj_scanpos = xsp; + if (jj_3R_3()) { return true; - if (jj_la == 0 && jj_scanpos == jj_lastpos) - boost::throw_exception(LookaheadSuccess()); - return false; - } - - QueryParserTokenPtr QueryParser::getNextToken() - { - if (token->next) - token = token->next; - else - { - token->next = token_source->getNextToken(); - token = token->next; } - _jj_ntk = -1; - ++jj_gen; - return token; } - - QueryParserTokenPtr QueryParser::getToken(int32_t index) - { - QueryParserTokenPtr t(token); - for (int32_t i = 0; i < index; ++i) - { - if (t->next) - t = t->next; - else - { - t->next = token_source->getNextToken(); - t = t->next; + return false; +} + +bool QueryParser::jj_3R_3() { + if (jj_scan_token(STAR)) { + return true; + } + if (jj_scan_token(COLON)) { + return true; + } + return false; +} + +void QueryParser::ReInit(const QueryParserCharStreamPtr& stream) { + token_source->ReInit(stream); + token = newLucene(); + _jj_ntk = -1; + jj_gen = 0; + for (int32_t i = 0; i < 23; ++i) { + jj_la1[i] = -1; + } + for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { + jj_2_rtns[i] = newInstance(); + } +} + +void QueryParser::ReInit(const QueryParserTokenManagerPtr& tokenMgr) { + token_source = tokenMgr; + token = newLucene(); + _jj_ntk = -1; + jj_gen = 0; + for (int32_t i = 0; i < 23; ++i) { + jj_la1[i] = -1; + } + for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { + jj_2_rtns[i] = newInstance(); + } +} + +QueryParserTokenPtr QueryParser::jj_consume_token(int32_t kind) { + QueryParserTokenPtr oldToken(token); + if (oldToken->next) { + token = token->next; + } else { + token->next = token_source->getNextToken(); + token = token->next; + } + _jj_ntk = -1; + if (token->kind == kind) { + ++jj_gen; + if (++jj_gc > 100) { + jj_gc = 0; + for (int32_t i = 0; i < jj_2_rtns.size(); ++i) { + JJCallsPtr c(jj_2_rtns[i]); + while (c) { + if (c->gen < jj_gen) { + c->first.reset(); + } + c = c->next; + } } } - return t; + return token; } - - int32_t QueryParser::jj_ntk() - { - jj_nt = token->next; - if (!jj_nt) - { - token->next = token_source->getNextToken(); - _jj_ntk = token->next->kind; - return _jj_ntk; + token = oldToken; + jj_kind = kind; + generateParseException(); + return QueryParserTokenPtr(); +} + +bool QueryParser::jj_scan_token(int32_t kind) { + if (jj_scanpos == jj_lastpos) { + --jj_la; + if (!jj_scanpos->next) { + jj_scanpos->next = token_source->getNextToken(); + jj_scanpos = jj_scanpos->next; + jj_lastpos = jj_scanpos; + } else { + jj_scanpos = jj_scanpos->next; + jj_lastpos = jj_scanpos; } - else - { - _jj_ntk = jj_nt->kind; - return _jj_ntk; + } else { + jj_scanpos = jj_scanpos->next; + } + if (jj_rescan) { + int32_t i = 0; + QueryParserTokenPtr tok(token); + while (tok && tok != jj_scanpos) { + ++i; + tok = tok->next; + } + if (tok) { + jj_add_error_token(kind, i); } } - - void QueryParser::jj_add_error_token(int32_t kind, int32_t pos) - { - if (pos >= 100) - return; - if (pos == jj_endpos + 1) - jj_lasttokens[jj_endpos++] = kind; - else if (jj_endpos != 0) - { - jj_expentry = Collection::newInstance(jj_endpos); - for (int32_t i = 0; i < jj_endpos; ++i) - jj_expentry[i] = jj_lasttokens[i]; - for (Collection< Collection >::iterator oldentry = jj_expentries.begin(); oldentry != jj_expentries.end(); ++oldentry) - { - if (oldentry->size() == jj_expentry.size()) - { - bool jj_entries_loop = true; - for (int32_t i = 0; i < jj_expentry.size(); ++i) - { - if ((*oldentry)[i] != jj_expentry[i]) - { - jj_entries_loop = false; - break; - } - } - if (!jj_entries_loop) - continue; - jj_expentries.add(jj_expentry); - break; - } - } - if (pos != 0) - { - jj_endpos = pos; - jj_lasttokens[jj_endpos - 1] = kind; - } + if (jj_scanpos->kind != kind) { + return true; + } + if (jj_la == 0 && jj_scanpos == jj_lastpos) { + boost::throw_exception(LookaheadSuccess()); + } + return false; +} + +QueryParserTokenPtr QueryParser::getNextToken() { + if (token->next) { + token = token->next; + } else { + token->next = token_source->getNextToken(); + token = token->next; + } + _jj_ntk = -1; + ++jj_gen; + return token; +} + +QueryParserTokenPtr QueryParser::getToken(int32_t index) { + QueryParserTokenPtr t(token); + for (int32_t i = 0; i < index; ++i) { + if (t->next) { + t = t->next; + } else { + t->next = token_source->getNextToken(); + t = t->next; } } - - void QueryParser::generateParseException() - { - jj_expentries.clear(); - Collection la1tokens(Collection::newInstance(34)); - if (jj_kind >= 0) - { - la1tokens[jj_kind] = true; - jj_kind = -1; + return t; +} + +int32_t QueryParser::jj_ntk() { + jj_nt = token->next; + if (!jj_nt) { + token->next = token_source->getNextToken(); + _jj_ntk = token->next->kind; + return _jj_ntk; + } else { + _jj_ntk = jj_nt->kind; + return _jj_ntk; + } +} + +void QueryParser::jj_add_error_token(int32_t kind, int32_t pos) { + if (pos >= 100) { + return; + } + if (pos == jj_endpos + 1) { + jj_lasttokens[jj_endpos++] = kind; + } else if (jj_endpos != 0) { + jj_expentry = Collection::newInstance(jj_endpos); + for (int32_t i = 0; i < jj_endpos; ++i) { + jj_expentry[i] = jj_lasttokens[i]; } - for (int32_t i = 0; i < 23; ++i) - { - if (jj_la1[i] == jj_gen) - { - for (int32_t j = 0; j < 32; ++j) - { - if ((jj_la1_0[i] & (1 << j)) != 0) - la1tokens[j] = true; - if ((jj_la1_1[i] & (1 << j)) != 0) - la1tokens[32 + j] = true; + for (Collection< Collection >::iterator oldentry = jj_expentries.begin(); oldentry != jj_expentries.end(); ++oldentry) { + if (oldentry->size() == jj_expentry.size()) { + bool jj_entries_loop = true; + for (int32_t i = 0; i < jj_expentry.size(); ++i) { + if ((*oldentry)[i] != jj_expentry[i]) { + jj_entries_loop = false; + break; + } + } + if (!jj_entries_loop) { + continue; } - } - } - for (int32_t i = 0; i < 34; ++i) - { - if (la1tokens[i]) - { - jj_expentry = Collection::newInstance(1); - jj_expentry[0] = i; jj_expentries.add(jj_expentry); + break; } } - jj_endpos = 0; - jj_rescan_token(); - jj_add_error_token(0, 0); - Collection< Collection > exptokseq(Collection< Collection >::newInstance(jj_expentries.size())); - for (int32_t i = 0; i < jj_expentries.size(); ++i) - exptokseq[i] = jj_expentries[i]; - boost::throw_exception(QueryParserError(QueryParseError::parseError(token, exptokseq, tokenImage))); - } - - void QueryParser::enable_tracing() - { - } - - void QueryParser::disable_tracing() - { - } - - void QueryParser::jj_rescan_token() - { - jj_rescan = true; - for (int32_t i = 0; i < 1; ++i) - { - try - { - JJCallsPtr p(jj_2_rtns[i]); - do - { - if (p->gen > jj_gen) - { - jj_la = p->arg; - jj_scanpos = p->first; - jj_lastpos = jj_scanpos; - jj_3_1(); - } - p = p->next; + if (pos != 0) { + jj_endpos = pos; + jj_lasttokens[jj_endpos - 1] = kind; + } + } +} + +void QueryParser::generateParseException() { + jj_expentries.clear(); + Collection la1tokens(Collection::newInstance(34)); + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int32_t i = 0; i < 23; ++i) { + if (jj_la1[i] == jj_gen) { + for (int32_t j = 0; j < 32; ++j) { + if ((jj_la1_0[i] & (1 << j)) != 0) { + la1tokens[j] = true; + } + if ((jj_la1_1[i] & (1 << j)) != 0) { + la1tokens[32 + j] = true; } - while (p); - } - catch (LookaheadSuccess&) - { } } - jj_rescan = false; } - - void QueryParser::jj_save(int32_t index, int32_t xla) - { - JJCallsPtr p(jj_2_rtns[index]); - while (p->gen > jj_gen) - { - if (!p->next) - { - p->next = newInstance(); + for (int32_t i = 0; i < 34; ++i) { + if (la1tokens[i]) { + jj_expentry = Collection::newInstance(1); + jj_expentry[0] = i; + jj_expentries.add(jj_expentry); + } + } + jj_endpos = 0; + jj_rescan_token(); + jj_add_error_token(0, 0); + Collection< Collection > exptokseq(Collection< Collection >::newInstance(jj_expentries.size())); + for (int32_t i = 0; i < jj_expentries.size(); ++i) { + exptokseq[i] = jj_expentries[i]; + } + boost::throw_exception(QueryParserError(QueryParseError::parseError(token, exptokseq, tokenImage))); +} + +void QueryParser::enable_tracing() { +} + +void QueryParser::disable_tracing() { +} + +void QueryParser::jj_rescan_token() { + jj_rescan = true; + for (int32_t i = 0; i < 1; ++i) { + try { + JJCallsPtr p(jj_2_rtns[i]); + do { + if (p->gen > jj_gen) { + jj_la = p->arg; + jj_scanpos = p->first; + jj_lastpos = jj_scanpos; + jj_3_1(); + } p = p->next; - break; - } + } while (p); + } catch (LookaheadSuccess&) { + } + } + jj_rescan = false; +} + +void QueryParser::jj_save(int32_t index, int32_t xla) { + JJCallsPtr p(jj_2_rtns[index]); + while (p->gen > jj_gen) { + if (!p->next) { + p->next = newInstance(); p = p->next; + break; } - p->gen = jj_gen + xla - jj_la; - p->first = token; - p->arg = xla; + p = p->next; } + p->gen = jj_gen + xla - jj_la; + p->first = token; + p->arg = xla; +} + } diff --git a/src/core/queryparser/QueryParserCharStream.cpp b/src/core/queryparser/QueryParserCharStream.cpp index ae152ba5..a5d7b76f 100644 --- a/src/core/queryparser/QueryParserCharStream.cpp +++ b/src/core/queryparser/QueryParserCharStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,77 +7,66 @@ #include "LuceneInc.h" #include "QueryParserCharStream.h" -namespace Lucene -{ - wchar_t QueryParserCharStream::readChar() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getColumn() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getLine() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getEndColumn() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getEndLine() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getBeginColumn() - { - BOOST_ASSERT(false); - return 0; // override - } - - int32_t QueryParserCharStream::getBeginLine() - { - BOOST_ASSERT(false); - return 0; // override - } - - void QueryParserCharStream::backup(int32_t amount) - { - BOOST_ASSERT(false); - // override - } - - wchar_t QueryParserCharStream::BeginToken() - { - BOOST_ASSERT(false); - return 0; // override - } - - String QueryParserCharStream::GetImage() - { - BOOST_ASSERT(false); - return L""; // override - } - - CharArray QueryParserCharStream::GetSuffix(int32_t length) - { - BOOST_ASSERT(false); - return CharArray(); // override - } - - void QueryParserCharStream::Done() - { - BOOST_ASSERT(false); - // override - } +namespace Lucene { + +wchar_t QueryParserCharStream::readChar() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getColumn() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getLine() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getEndColumn() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getEndLine() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getBeginColumn() { + BOOST_ASSERT(false); + return 0; // override +} + +int32_t QueryParserCharStream::getBeginLine() { + BOOST_ASSERT(false); + return 0; // override +} + +void QueryParserCharStream::backup(int32_t amount) { + BOOST_ASSERT(false); + // override +} + +wchar_t QueryParserCharStream::BeginToken() { + BOOST_ASSERT(false); + return 0; // override +} + +String QueryParserCharStream::GetImage() { + BOOST_ASSERT(false); + return L""; // override +} + +CharArray QueryParserCharStream::GetSuffix(int32_t length) { + BOOST_ASSERT(false); + return CharArray(); // override +} + +void QueryParserCharStream::Done() { + BOOST_ASSERT(false); + // override +} + } diff --git a/src/core/queryparser/QueryParserConstants.cpp b/src/core/queryparser/QueryParserConstants.cpp index 838b99a1..89e0cb85 100644 --- a/src/core/queryparser/QueryParserConstants.cpp +++ b/src/core/queryparser/QueryParserConstants.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,52 +7,50 @@ #include "LuceneInc.h" #include "QueryParserConstants.h" -namespace Lucene -{ - const wchar_t* QueryParserConstants::_tokenImage[] = - { - L"", - L"<_NUM_CHAR>", - L"<_ESCAPED_CHAR>", - L"<_TERM_START_CHAR>", - L"<_TERM_CHAR>", - L"<_WHITESPACE>", - L"<_QUOTED_CHAR>", - L"", - L"", - L"", - L"", - L"\"+\"", - L"\"-\"", - L"\"(\"", - L"\")\"", - L"\":\"", - L"\"*\"", - L"\"^\"", - L"", - L"", - L"", - L"", - L"", - L"\"[\"", - L"\"{\"", - L"", - L"\"TO\"", - L"\"]\"", - L"", - L"", - L"\"TO\"", - L"\"}\"", - L"", - L"" - }; - Collection QueryParserConstants::tokenImage = Collection::newInstance(_tokenImage, _tokenImage + SIZEOF_ARRAY(_tokenImage)); - - QueryParserConstants::QueryParserConstants() - { - } +namespace Lucene { + +const wchar_t* QueryParserConstants::_tokenImage[] = { + L"", + L"<_NUM_CHAR>", + L"<_ESCAPED_CHAR>", + L"<_TERM_START_CHAR>", + L"<_TERM_CHAR>", + L"<_WHITESPACE>", + L"<_QUOTED_CHAR>", + L"", + L"", + L"", + L"", + L"\"+\"", + L"\"-\"", + L"\"(\"", + L"\")\"", + L"\":\"", + L"\"*\"", + L"\"^\"", + L"", + L"", + L"", + L"", + L"", + L"\"[\"", + L"\"{\"", + L"", + L"\"TO\"", + L"\"]\"", + L"", + L"", + L"\"TO\"", + L"\"}\"", + L"", + L"" +}; +Collection QueryParserConstants::tokenImage = Collection::newInstance(_tokenImage, _tokenImage + SIZEOF_ARRAY(_tokenImage)); + +QueryParserConstants::QueryParserConstants() { +} + +QueryParserConstants::~QueryParserConstants() { +} - QueryParserConstants::~QueryParserConstants() - { - } } diff --git a/src/core/queryparser/QueryParserToken.cpp b/src/core/queryparser/QueryParserToken.cpp index f37f6aff..973c331c 100644 --- a/src/core/queryparser/QueryParserToken.cpp +++ b/src/core/queryparser/QueryParserToken.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,29 +7,26 @@ #include "LuceneInc.h" #include "QueryParserToken.h" -namespace Lucene -{ - QueryParserToken::QueryParserToken(int32_t kind, const String& image) - { - this->kind = kind; - this->image = image; - this->beginLine = 0; - this->beginColumn = 0; - this->endLine = 0; - this->endColumn = 0; - } - - QueryParserToken::~QueryParserToken() - { - } - - String QueryParserToken::toString() - { - return image; - } - - QueryParserTokenPtr QueryParserToken::newToken(int32_t ofKind, const String& image) - { - return newLucene(ofKind, image); - } +namespace Lucene { + +QueryParserToken::QueryParserToken(int32_t kind, const String& image) { + this->kind = kind; + this->image = image; + this->beginLine = 0; + this->beginColumn = 0; + this->endLine = 0; + this->endColumn = 0; +} + +QueryParserToken::~QueryParserToken() { +} + +String QueryParserToken::toString() { + return image; +} + +QueryParserTokenPtr QueryParserToken::newToken(int32_t ofKind, const String& image) { + return newLucene(ofKind, image); +} + } diff --git a/src/core/queryparser/QueryParserTokenManager.cpp b/src/core/queryparser/QueryParserTokenManager.cpp index e769470f..cd43c7e8 100644 --- a/src/core/queryparser/QueryParserTokenManager.cpp +++ b/src/core/queryparser/QueryParserTokenManager.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,1320 +12,1279 @@ #include "InfoStream.h" #include "StringUtils.h" -namespace Lucene -{ - const int64_t QueryParserTokenManager::jjbitVec0[] = {0x1LL, 0x0LL, 0x0LL, 0x0LL}; - const int64_t QueryParserTokenManager::jjbitVec1[] = {0xfffffffffffffffeLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; - const int64_t QueryParserTokenManager::jjbitVec3[] = {0x0LL, 0x0LL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; - const int64_t QueryParserTokenManager::jjbitVec4[] = {0xfffefffffffffffeLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL, 0xffffffffffffffffLL}; - const int32_t QueryParserTokenManager::jjnextStates[] = {15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1}; - - /// Token literal values. - const wchar_t* QueryParserTokenManager::jjstrLiteralImages[] = - { - L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"\53", L"\55", - L"\50", L"\51", L"\72", L"\52", L"\136", L"", L"", L"", L"", L"", L"\133", - L"\173", L"", L"\124\117", L"\135", L"", L"", L"\124\117", L"\175", L"", L"" - }; - - /// Lexer state names. - const wchar_t* QueryParserTokenManager::lexStateNames[] = - { - L"Boost", L"RangeEx", L"RangeIn", L"DEFAULT" - }; - - /// Lex State array. - const int32_t QueryParserTokenManager::jjnewLexState[] = - { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, - -1, -1, -1, -1, -1, 2, 1, 3, -1, 3, -1, -1, -1, 3, -1, -1 - }; - - const int64_t QueryParserTokenManager::jjtoToken[] = {0x3ffffff01LL}; - const int64_t QueryParserTokenManager::jjtoSkip[] = {0x80LL}; - - QueryParserTokenManager::QueryParserTokenManager(QueryParserCharStreamPtr stream) - { - debugStream = newLucene(); - jjrounds = IntArray::newInstance(36); - jjstateSet = IntArray::newInstance(72); - curChar = 0; - curLexState = 3; - defaultLexState = 3; - jjnewStateCnt = 0; - jjround = 0; - jjmatchedPos = 0; - jjmatchedKind = 0; - input_stream = stream; - } - - QueryParserTokenManager::QueryParserTokenManager(QueryParserCharStreamPtr stream, int32_t lexState) - { - debugStream = newLucene(); - jjrounds = IntArray::newInstance(36); - jjstateSet = IntArray::newInstance(72); - input_stream = stream; - curChar = 0; - curLexState = 3; - defaultLexState = 3; - jjnewStateCnt = 0; - jjround = 0; - jjmatchedPos = 0; - jjmatchedKind = 0; - SwitchTo(lexState); - } - - QueryParserTokenManager::~QueryParserTokenManager() - { - } - - void QueryParserTokenManager::setDebugStream(InfoStreamPtr debugStream) - { - this->debugStream = debugStream; - } - - int32_t QueryParserTokenManager::jjStopStringLiteralDfa_3(int32_t pos, int64_t active0) - { - return -1; - } - - int32_t QueryParserTokenManager::jjStartNfa_3(int32_t pos, int64_t active0) - { - return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); +namespace Lucene { + +const int64_t QueryParserTokenManager::jjbitVec0[] = {0x1LL, 0x0LL, 0x0LL, 0x0LL}; +const int64_t QueryParserTokenManager::jjbitVec1[] = {static_cast(0xfffffffffffffffeLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; +const int64_t QueryParserTokenManager::jjbitVec3[] = {0x0LL, 0x0LL, static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; +const int64_t QueryParserTokenManager::jjbitVec4[] = {static_cast(0xfffefffffffffffeLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL), static_cast(0xffffffffffffffffLL)}; + +const int32_t QueryParserTokenManager::jjnextStates[] = {15, 16, 18, 29, 32, 23, 33, 30, 20, 21, 32, 23, 33, 31, 34, 27, 2, 4, 5, 0, 1}; + +/// Token literal values. +const wchar_t* QueryParserTokenManager::jjstrLiteralImages[] = { + L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"", L"\53", L"\55", + L"\50", L"\51", L"\72", L"\52", L"\136", L"", L"", L"", L"", L"", L"\133", + L"\173", L"", L"\124\117", L"\135", L"", L"", L"\124\117", L"\175", L"", L"" +}; + +/// Lexer state names. +const wchar_t* QueryParserTokenManager::lexStateNames[] = { + L"Boost", L"RangeEx", L"RangeIn", L"DEFAULT" +}; + +/// Lex State array. +const int32_t QueryParserTokenManager::jjnewLexState[] = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, + -1, -1, -1, -1, -1, 2, 1, 3, -1, 3, -1, -1, -1, 3, -1, -1 +}; + +const int64_t QueryParserTokenManager::jjtoToken[] = {0x3ffffff01LL}; +const int64_t QueryParserTokenManager::jjtoSkip[] = {0x80LL}; + +QueryParserTokenManager::QueryParserTokenManager(const QueryParserCharStreamPtr& stream) { + debugStream = newLucene(); + jjrounds = IntArray::newInstance(36); + jjstateSet = IntArray::newInstance(72); + curChar = 0; + curLexState = 3; + defaultLexState = 3; + jjnewStateCnt = 0; + jjround = 0; + jjmatchedPos = 0; + jjmatchedKind = 0; + input_stream = stream; +} + +QueryParserTokenManager::QueryParserTokenManager(const QueryParserCharStreamPtr& stream, int32_t lexState) { + debugStream = newLucene(); + jjrounds = IntArray::newInstance(36); + jjstateSet = IntArray::newInstance(72); + input_stream = stream; + curChar = 0; + curLexState = 3; + defaultLexState = 3; + jjnewStateCnt = 0; + jjround = 0; + jjmatchedPos = 0; + jjmatchedKind = 0; + SwitchTo(lexState); +} + +QueryParserTokenManager::~QueryParserTokenManager() { +} + +void QueryParserTokenManager::setDebugStream(const InfoStreamPtr& debugStream) { + this->debugStream = debugStream; +} + +int32_t QueryParserTokenManager::jjStopStringLiteralDfa_3(int32_t pos, int64_t active0) { + return -1; +} + +int32_t QueryParserTokenManager::jjStartNfa_3(int32_t pos, int64_t active0) { + return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); +} + +int32_t QueryParserTokenManager::jjStopAtPos(int32_t pos, int32_t kind) { + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_3() { + switch (curChar) { + case 40: + return jjStopAtPos(0, 13); + case 41: + return jjStopAtPos(0, 14); + case 42: + return jjStartNfaWithStates_3(0, 16, 36); + case 43: + return jjStopAtPos(0, 11); + case 45: + return jjStopAtPos(0, 12); + case 58: + return jjStopAtPos(0, 15); + case 91: + return jjStopAtPos(0, 23); + case 94: + return jjStopAtPos(0, 17); + case 123: + return jjStopAtPos(0, 24); + default: + return jjMoveNfa_3(0, 0); } - - int32_t QueryParserTokenManager::jjStopAtPos(int32_t pos, int32_t kind) - { - jjmatchedKind = kind; - jjmatchedPos = pos; +} + +int32_t QueryParserTokenManager::jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state) { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { + curChar = input_stream->readChar(); + } catch (IOException&) { return pos + 1; } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_3() - { - switch (curChar) - { - case 40: - return jjStopAtPos(0, 13); - case 41: - return jjStopAtPos(0, 14); - case 42: - return jjStartNfaWithStates_3(0, 16, 36); - case 43: - return jjStopAtPos(0, 11); - case 45: - return jjStopAtPos(0, 12); - case 58: - return jjStopAtPos(0, 15); - case 91: - return jjStopAtPos(0, 23); - case 94: - return jjStopAtPos(0, 17); - case 123: - return jjStopAtPos(0, 24); - default: - return jjMoveNfa_3(0, 0); - } - } - - int32_t QueryParserTokenManager::jjStartNfaWithStates_3(int32_t pos, int32_t kind, int32_t state) - { - jjmatchedKind = kind; - jjmatchedPos = pos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return pos + 1; + return jjMoveNfa_3(state, pos + 1); +} + +int32_t QueryParserTokenManager::jjMoveNfa_3(int32_t startState, int32_t curPos) { + int32_t startsAt = 0; + jjnewStateCnt = 36; + int32_t i = 1; + jjstateSet[0] = startState; + int32_t kind = 0x7fffffff; + while (true) { + if (++jjround == 0x7fffffff) { + ReInitRounds(); } - return jjMoveNfa_3(state, pos + 1); - } - - int32_t QueryParserTokenManager::jjMoveNfa_3(int32_t startState, int32_t curPos) - { - int32_t startsAt = 0; - jjnewStateCnt = 36; - int32_t i = 1; - jjstateSet[0] = startState; - int32_t kind = 0x7fffffff; - while (true) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - int64_t l = (int64_t)1 << curChar; - do - { - switch (jjstateSet[--i]) - { - case 36: - case 25: - if ((0xfbfffcf8ffffd9ffLL & l) == 0) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 0: - if ((0xfbffd4f8ffffd9ffLL & l) != 0) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - else if ((0x100002600LL & l) != 0) - { - if (kind > 7) - kind = 7; - } - else if (curChar == 34) - jjCheckNAddStates(0, 2); - else if (curChar == 33) - { - if (kind > 10) - kind = 10; - } - if ((0x7bffd0f8ffffd9ffLL & l) != 0) - { - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); - } - else if (curChar == 42) - { - if (kind > 21) - kind = 21; - } - if (curChar == 38) - jjstateSet[jjnewStateCnt++] = 4; - break; - case 4: - if (curChar == 38 && kind > 8) - kind = 8; - break; - case 5: - if (curChar == 38) - jjstateSet[jjnewStateCnt++] = 4; - break; - case 13: - if (curChar == 33 && kind > 10) - kind = 10; - break; - case 14: - if (curChar == 34) - jjCheckNAddStates(0, 2); - break; - case 15: - if ((0xfffffffbffffffffLL & l) != 0) - jjCheckNAddStates(0, 2); - break; - case 17: - jjCheckNAddStates(0, 2); - break; - case 18: - if (curChar == 34 && kind > 18) - kind = 18; - break; - case 20: - if ((0x3ff000000000000LL & l) == 0) - break; - if (kind > 20) - kind = 20; - jjAddStates(8, 9); - break; - case 21: - if (curChar == 46) - jjCheckNAdd(22); - break; - case 22: - if ((0x3ff000000000000LL & l) == 0) - break; - if (kind > 20) - kind = 20; - jjCheckNAdd(22); - break; - case 23: - if (curChar == 42 && kind > 21) - kind = 21; - break; - case 24: - if ((0xfbffd4f8ffffd9ffLL & l) == 0) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 27: - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 28: - if ((0x7bffd0f8ffffd9ffLL & l) == 0) - break; - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); - break; - case 29: - if ((0x7bfff8f8ffffd9ffLL & l) == 0) - break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 31: - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 32: - if ((0x7bfff8f8ffffd9ffLL & l) != 0) - jjCheckNAddStates(10, 12); - break; - case 34: - jjCheckNAddStates(10, 12); - break; - default: - break; + if (curChar < 64) { + int64_t l = (int64_t)1 << curChar; + do { + switch (jjstateSet[--i]) { + case 36: + case 25: + if ((0xfbfffcf8ffffd9ffLL & l) == 0) { + break; + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 0: + if ((0xfbffd4f8ffffd9ffLL & l) != 0) { + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + } else if ((0x100002600LL & l) != 0) { + if (kind > 7) { + kind = 7; + } + } else if (curChar == 34) { + jjCheckNAddStates(0, 2); + } else if (curChar == 33) { + if (kind > 10) { + kind = 10; + } + } + if ((0x7bffd0f8ffffd9ffLL & l) != 0) { + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + } else if (curChar == 42) { + if (kind > 21) { + kind = 21; + } + } + if (curChar == 38) { + jjstateSet[jjnewStateCnt++] = 4; + } + break; + case 4: + if (curChar == 38 && kind > 8) { + kind = 8; + } + break; + case 5: + if (curChar == 38) { + jjstateSet[jjnewStateCnt++] = 4; + } + break; + case 13: + if (curChar == 33 && kind > 10) { + kind = 10; + } + break; + case 14: + if (curChar == 34) { + jjCheckNAddStates(0, 2); + } + break; + case 15: + if ((0xfffffffbffffffffLL & l) != 0) { + jjCheckNAddStates(0, 2); + } + break; + case 17: + jjCheckNAddStates(0, 2); + break; + case 18: + if (curChar == 34 && kind > 18) { + kind = 18; + } + break; + case 20: + if ((0x3ff000000000000LL & l) == 0) { + break; + } + if (kind > 20) { + kind = 20; + } + jjAddStates(8, 9); + break; + case 21: + if (curChar == 46) { + jjCheckNAdd(22); + } + break; + case 22: + if ((0x3ff000000000000LL & l) == 0) { + break; + } + if (kind > 20) { + kind = 20; + } + jjCheckNAdd(22); + break; + case 23: + if (curChar == 42 && kind > 21) { + kind = 21; + } + break; + case 24: + if ((0xfbffd4f8ffffd9ffLL & l) == 0) { + break; + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 27: + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 28: + if ((0x7bffd0f8ffffd9ffLL & l) == 0) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + break; + case 29: + if ((0x7bfff8f8ffffd9ffLL & l) == 0) { + break; } + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 31: + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 32: + if ((0x7bfff8f8ffffd9ffLL & l) != 0) { + jjCheckNAddStates(10, 12); + } + break; + case 34: + jjCheckNAddStates(10, 12); + break; + default: + break; } - while (i != startsAt); - } - else if (curChar < 128) - { - int64_t l = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 36: - if ((0x97ffffff87ffffffLL & l) != 0) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - else if (curChar == 92) - jjCheckNAddTwoStates(27, 27); - break; - case 0: - if ((0x97ffffff87ffffffLL & l) != 0) - { - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); - } - else if (curChar == 92) - jjCheckNAddStates(13, 15); - else if (curChar == 126) - { - if (kind > 20) - kind = 20; - jjstateSet[jjnewStateCnt++] = 20; - } - if ((0x97ffffff87ffffffLL & l) != 0) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 11; - else if (curChar == 124) - jjstateSet[jjnewStateCnt++] = 8; - else if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 6; - else if (curChar == 65) - jjstateSet[jjnewStateCnt++] = 2; - break; - case 1: - if (curChar == 68 && kind > 8) - kind = 8; - break; - case 2: - if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 1; - break; - case 3: - if (curChar == 65) - jjstateSet[jjnewStateCnt++] = 2; - break; - case 6: - if (curChar == 82 && kind > 9) - kind = 9; - break; - case 7: - if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 6; - break; - case 8: - if (curChar == 124 && kind > 9) - kind = 9; - break; - case 9: - if (curChar == 124) - jjstateSet[jjnewStateCnt++] = 8; - break; - case 10: - if (curChar == 84 && kind > 10) - kind = 10; - break; - case 11: - if (curChar == 79) - jjstateSet[jjnewStateCnt++] = 10; - break; - case 12: - if (curChar == 78) - jjstateSet[jjnewStateCnt++] = 11; - break; - case 15: - if ((0xffffffffefffffffLL & l) != 0) - jjCheckNAddStates(0, 2); - break; - case 16: - if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 17; - break; - case 17: - jjCheckNAddStates(0, 2); - break; - case 19: - if (curChar != 126) - break; - if (kind > 20) - kind = 20; - jjstateSet[jjnewStateCnt++] = 20; - break; - case 24: - if ((0x97ffffff87ffffffLL & l) == 0) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 25: - if ((0x97ffffff87ffffffLL & l) == 0) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 26: - if (curChar == 92) - jjCheckNAddTwoStates(27, 27); - break; - case 27: - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 28: - if ((0x97ffffff87ffffffLL & l) == 0) - break; - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); - break; - case 29: - if ((0x97ffffff87ffffffLL & l) == 0) - break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 30: - if (curChar == 92) - jjCheckNAddTwoStates(31, 31); - break; - case 31: - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 32: - if ((0x97ffffff87ffffffLL & l) != 0) - jjCheckNAddStates(10, 12); - break; - case 33: - if (curChar == 92) - jjCheckNAddTwoStates(34, 34); - break; - case 34: - jjCheckNAddStates(10, 12); - break; - case 35: - if (curChar == 92) - jjCheckNAddStates(13, 15); - break; - default: - break; + } while (i != startsAt); + } else if (curChar < 128) { + int64_t l = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 36: + if ((0x97ffffff87ffffffLL & l) != 0) { + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + } else if (curChar == 92) { + jjCheckNAddTwoStates(27, 27); + } + break; + case 0: + if ((0x97ffffff87ffffffLL & l) != 0) { + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + } else if (curChar == 92) { + jjCheckNAddStates(13, 15); + } else if (curChar == 126) { + if (kind > 20) { + kind = 20; + } + jjstateSet[jjnewStateCnt++] = 20; + } + if ((0x97ffffff87ffffffLL & l) != 0) { + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + } + if (curChar == 78) { + jjstateSet[jjnewStateCnt++] = 11; + } else if (curChar == 124) { + jjstateSet[jjnewStateCnt++] = 8; + } else if (curChar == 79) { + jjstateSet[jjnewStateCnt++] = 6; + } else if (curChar == 65) { + jjstateSet[jjnewStateCnt++] = 2; + } + break; + case 1: + if (curChar == 68 && kind > 8) { + kind = 8; + } + break; + case 2: + if (curChar == 78) { + jjstateSet[jjnewStateCnt++] = 1; + } + break; + case 3: + if (curChar == 65) { + jjstateSet[jjnewStateCnt++] = 2; + } + break; + case 6: + if (curChar == 82 && kind > 9) { + kind = 9; + } + break; + case 7: + if (curChar == 79) { + jjstateSet[jjnewStateCnt++] = 6; } + break; + case 8: + if (curChar == 124 && kind > 9) { + kind = 9; + } + break; + case 9: + if (curChar == 124) { + jjstateSet[jjnewStateCnt++] = 8; + } + break; + case 10: + if (curChar == 84 && kind > 10) { + kind = 10; + } + break; + case 11: + if (curChar == 79) { + jjstateSet[jjnewStateCnt++] = 10; + } + break; + case 12: + if (curChar == 78) { + jjstateSet[jjnewStateCnt++] = 11; + } + break; + case 15: + if ((0xffffffffefffffffLL & l) != 0) { + jjCheckNAddStates(0, 2); + } + break; + case 16: + if (curChar == 92) { + jjstateSet[jjnewStateCnt++] = 17; + } + break; + case 17: + jjCheckNAddStates(0, 2); + break; + case 19: + if (curChar != 126) { + break; + } + if (kind > 20) { + kind = 20; + } + jjstateSet[jjnewStateCnt++] = 20; + break; + case 24: + if ((0x97ffffff87ffffffLL & l) == 0) { + break; + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 25: + if ((0x97ffffff87ffffffLL & l) == 0) { + break; + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 26: + if (curChar == 92) { + jjCheckNAddTwoStates(27, 27); + } + break; + case 27: + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 28: + if ((0x97ffffff87ffffffLL & l) == 0) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + break; + case 29: + if ((0x97ffffff87ffffffLL & l) == 0) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 30: + if (curChar == 92) { + jjCheckNAddTwoStates(31, 31); + } + break; + case 31: + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 32: + if ((0x97ffffff87ffffffLL & l) != 0) { + jjCheckNAddStates(10, 12); + } + break; + case 33: + if (curChar == 92) { + jjCheckNAddTwoStates(34, 34); + } + break; + case 34: + jjCheckNAddStates(10, 12); + break; + case 35: + if (curChar == 92) { + jjCheckNAddStates(13, 15); + } + break; + default: + break; } - while (i != startsAt); - } - else - { - int32_t hiByte = (int32_t)(curChar >> 8); - int32_t i1 = hiByte >> 6; - int64_t l1 = (int64_t)1 << (hiByte & 077); - int32_t i2 = (curChar & 0xff) >> 6; - int64_t l2 = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 36: - case 25: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 0: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - { - if (kind > 7) - kind = 7; - } - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - { - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - } - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - { - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); - } - break; - case 15: - case 17: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(0, 2); - break; - case 24: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) + } while (i != startsAt); + } else { + int32_t hiByte = (int32_t)(curChar >> 8); + int32_t i1 = hiByte >> 6; + int64_t l1 = (int64_t)1 << (hiByte & 077); + int32_t i2 = (curChar & 0xff) >> 6; + int64_t l2 = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 36: + case 25: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 0: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { + if (kind > 7) { + kind = 7; + } + } + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + } + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + } + break; + case 15: + case 17: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + jjCheckNAddStates(0, 2); + } + break; + case 24: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 27: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) - break; - if (kind > 22) - kind = 22; - jjCheckNAddTwoStates(25, 26); - break; - case 28: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 19) - kind = 19; - jjCheckNAddStates(3, 7); + } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 27: + if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { break; - case 29: - if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) - break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 31: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) - break; - if (kind > 19) - kind = 19; - jjCheckNAddTwoStates(29, 30); - break; - case 32: - if (jjCanMove_2(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(10, 12); - break; - case 34: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjCheckNAddStates(10, 12); - break; - default: - break; } + if (kind > 22) { + kind = 22; + } + jjCheckNAddTwoStates(25, 26); + break; + case 28: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddStates(3, 7); + break; + case 29: + if (!jjCanMove_2(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 31: + if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 19) { + kind = 19; + } + jjCheckNAddTwoStates(29, 30); + break; + case 32: + if (jjCanMove_2(hiByte, i1, i2, l1, l2)) { + jjCheckNAddStates(10, 12); + } + break; + case 34: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + jjCheckNAddStates(10, 12); + } + break; + default: + break; } - while (i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 36 - (jjnewStateCnt = startsAt))) - return curPos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return curPos; - } + } while (i != startsAt); } - } - - int32_t QueryParserTokenManager::jjStopStringLiteralDfa_1(int32_t pos, int64_t active0) - { - switch (pos) - { - case 0: - if ((active0 & 0x40000000LL) != 0) - { - jjmatchedKind = 33; - return 6; - } - return -1; - default: - return -1; + if (kind != 0x7fffffff) { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; } - } - - int32_t QueryParserTokenManager::jjStartNfa_1(int32_t pos, int64_t active0) - { - return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); - } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_1() - { - switch (curChar) - { - case 84: - return jjMoveStringLiteralDfa1_1(0x40000000LL); - case 125: - return jjStopAtPos(0, 31); - default: - return jjMoveNfa_1(0, 0); + ++curPos; + i = jjnewStateCnt; + jjnewStateCnt = startsAt; + if (i == (startsAt = 36 - jjnewStateCnt)) { + return curPos; } - } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_1(int64_t active0) - { - try - { + try { curChar = input_stream->readChar(); + } catch (IOException&) { + return curPos; } - catch (IOException&) - { - jjStopStringLiteralDfa_1(0, active0); - return 1; - } - switch (curChar) - { - case 79: - if ((active0 & 0x40000000LL) != 0) - return jjStartNfaWithStates_1(1, 30, 6); - break; - default: - break; - } - return jjStartNfa_1(0, active0); } - - int32_t QueryParserTokenManager::jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state) - { - jjmatchedKind = kind; - jjmatchedPos = pos; - try - { - curChar = input_stream->readChar(); +} + +int32_t QueryParserTokenManager::jjStopStringLiteralDfa_1(int32_t pos, int64_t active0) { + switch (pos) { + case 0: + if ((active0 & 0x40000000LL) != 0) { + jjmatchedKind = 33; + return 6; } - catch (IOException&) - { - return pos + 1; + return -1; + default: + return -1; + } +} + +int32_t QueryParserTokenManager::jjStartNfa_1(int32_t pos, int64_t active0) { + return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_1() { + switch (curChar) { + case 84: + return jjMoveStringLiteralDfa1_1(0x40000000LL); + case 125: + return jjStopAtPos(0, 31); + default: + return jjMoveNfa_1(0, 0); + } +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_1(int64_t active0) { + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + jjStopStringLiteralDfa_1(0, active0); + return 1; + } + switch (curChar) { + case 79: + if ((active0 & 0x40000000LL) != 0) { + return jjStartNfaWithStates_1(1, 30, 6); } - return jjMoveNfa_1(state, pos + 1); + break; + default: + break; + } + return jjStartNfa_1(0, active0); +} + +int32_t QueryParserTokenManager::jjStartNfaWithStates_1(int32_t pos, int32_t kind, int32_t state) { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + return pos + 1; } - - int32_t QueryParserTokenManager::jjMoveNfa_1(int32_t startState, int32_t curPos) - { - int32_t startsAt = 0; - jjnewStateCnt = 7; - int32_t i = 1; - jjstateSet[0] = startState; - int32_t kind = 0x7fffffff; - while (true) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - int64_t l = (int64_t)1 << curChar; - do - { - switch (jjstateSet[--i]) - { - case 0: - if ((0xfffffffeffffffffLL & l) != 0) - { - if (kind > 33) - kind = 33; - jjCheckNAdd(6); - } - if ((0x100002600LL & l) != 0) - { - if (kind > 7) - kind = 7; - } - else if (curChar == 34) - jjCheckNAddTwoStates(2, 4); - break; - case 1: - if (curChar == 34) - jjCheckNAddTwoStates(2, 4); - break; - case 2: - if ((0xfffffffbffffffffLL & l) != 0) - jjCheckNAddStates(16, 18); - break; - case 3: - if (curChar == 34) - jjCheckNAddStates(16, 18); - break; - case 5: - if (curChar == 34 && kind > 32) - kind = 32; - break; - case 6: - if ((0xfffffffeffffffffLL & l) == 0) - break; - if (kind > 33) - kind = 33; - jjCheckNAdd(6); - break; - default: - break; + return jjMoveNfa_1(state, pos + 1); +} + +int32_t QueryParserTokenManager::jjMoveNfa_1(int32_t startState, int32_t curPos) { + int32_t startsAt = 0; + jjnewStateCnt = 7; + int32_t i = 1; + jjstateSet[0] = startState; + int32_t kind = 0x7fffffff; + while (true) { + if (++jjround == 0x7fffffff) { + ReInitRounds(); + } + if (curChar < 64) { + int64_t l = (int64_t)1 << curChar; + do { + switch (jjstateSet[--i]) { + case 0: + if ((0xfffffffeffffffffLL & l) != 0) { + if (kind > 33) { + kind = 33; + } + jjCheckNAdd(6); } - } - while (i != startsAt); - } - else if (curChar < 128) - { - int64_t l = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 0: - case 6: - if ((0xdfffffffffffffffLL & l) == 0) - break; - if (kind > 33) - kind = 33; - jjCheckNAdd(6); - break; - case 2: - jjAddStates(16, 18); - break; - case 4: - if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 3; - break; - default: - break; + if ((0x100002600LL & l) != 0) { + if (kind > 7) { + kind = 7; + } + } else if (curChar == 34) { + jjCheckNAddTwoStates(2, 4); } - } - while (i != startsAt); - } - else - { - int32_t hiByte = (int32_t)(curChar >> 8); - int32_t i1 = hiByte >> 6; - int64_t l1 = (int64_t)1 << (hiByte & 077); - int32_t i2 = (curChar & 0xff) >> 6; - int64_t l2 = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 0: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - { - if (kind > 7) - kind = 7; - } - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - { - if (kind > 33) - kind = 33; - jjCheckNAdd(6); - } - break; - case 2: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(16, 18); - break; - case 6: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) - break; - if (kind > 33) - kind = 33; - jjCheckNAdd(6); - break; - default: - break; + break; + case 1: + if (curChar == 34) { + jjCheckNAddTwoStates(2, 4); } - } - while (i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) - return curPos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return curPos; - } - } - } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_0() - { - return jjMoveNfa_0(0, 0); - } - - int32_t QueryParserTokenManager::jjMoveNfa_0(int32_t startState, int32_t curPos) - { - int32_t startsAt = 0; - jjnewStateCnt = 3; - int32_t i = 1; - jjstateSet[0] = startState; - int32_t kind = 0x7fffffff; - while (true) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - int64_t l = (int64_t)1 << curChar; - do - { - switch (jjstateSet[--i]) - { - case 0: - if ((0x3ff000000000000LL & l) == 0) - break; - if (kind > 25) - kind = 25; - jjAddStates(19, 20); - break; - case 1: - if (curChar == 46) - jjCheckNAdd(2); - break; - case 2: - if ((0x3ff000000000000LL & l) == 0) - break; - if (kind > 25) - kind = 25; - jjCheckNAdd(2); - break; - default: - break; + break; + case 2: + if ((0xfffffffbffffffffLL & l) != 0) { + jjCheckNAddStates(16, 18); } + break; + case 3: + if (curChar == 34) { + jjCheckNAddStates(16, 18); + } + break; + case 5: + if (curChar == 34 && kind > 32) { + kind = 32; + } + break; + case 6: + if ((0xfffffffeffffffffLL & l) == 0) { + break; + } + if (kind > 33) { + kind = 33; + } + jjCheckNAdd(6); + break; + default: + break; } - while (i != startsAt); - } - else if (curChar < 128) - { - int64_t l = (int64_t)1 << (curChar & 077); - do - { - jjstateSet[--i]; + } while (i != startsAt); + } else if (curChar < 128) { + int64_t l = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 0: + case 6: + if ((0xdfffffffffffffffLL & l) == 0) { + break; + } + if (kind > 33) { + kind = 33; + } + jjCheckNAdd(6); + break; + case 2: + jjAddStates(16, 18); + break; + case 4: + if (curChar == 92) { + jjstateSet[jjnewStateCnt++] = 3; + } + break; + default: + break; } - while (i != startsAt); - } - else - { - int32_t hiByte = (int32_t)(curChar >> 8); - int32_t i1 = hiByte >> 6; - int64_t l1 = (int64_t)1 << (hiByte & 077); - int32_t i2 = (curChar & 0xff) >> 6; - int64_t l2 = (int64_t)1 << (curChar & 077); - do - { - jjstateSet[--i]; + } while (i != startsAt); + } else { + int32_t hiByte = (int32_t)(curChar >> 8); + int32_t i1 = hiByte >> 6; + int64_t l1 = (int64_t)1 << (hiByte & 077); + int32_t i2 = (curChar & 0xff) >> 6; + int64_t l2 = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 0: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { + if (kind > 7) { + kind = 7; + } + } + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + if (kind > 33) { + kind = 33; + } + jjCheckNAdd(6); + } + break; + case 2: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + jjAddStates(16, 18); + } + break; + case 6: + if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 33) { + kind = 33; + } + jjCheckNAdd(6); + break; + default: + break; } - while (i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) - return curPos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return curPos; - } + } while (i != startsAt); } - } - - int32_t QueryParserTokenManager::jjStopStringLiteralDfa_2(int32_t pos, int64_t active0) - { - switch (pos) - { - case 0: - if ((active0 & 0x4000000LL) != 0) - { - jjmatchedKind = 29; - return 6; - } - return -1; - default: - return -1; + if (kind != 0x7fffffff) { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; } - } - - int32_t QueryParserTokenManager::jjStartNfa_2(int32_t pos, int64_t active0) - { - return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); - } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_2() - { - switch (curChar) - { - case 84: - return jjMoveStringLiteralDfa1_2(0x4000000LL); - case 93: - return jjStopAtPos(0, 27); - default: - return jjMoveNfa_2(0, 0); + ++curPos; + i = jjnewStateCnt; + jjnewStateCnt = startsAt; + if (i == (startsAt = 7 - jjnewStateCnt)) { + return curPos; } - } - - int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_2(int64_t active0) - { - try - { + try { curChar = input_stream->readChar(); + } catch (IOException&) { + return curPos; } - catch (IOException&) - { - jjStopStringLiteralDfa_2(0, active0); - return 1; - } - switch (curChar) - { - case 79: - if ((active0 & 0x4000000LL) != 0) - return jjStartNfaWithStates_2(1, 26, 6); - break; - default: - break; - } - return jjStartNfa_2(0, active0); } - - int32_t QueryParserTokenManager::jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state) - { - jjmatchedKind = kind; - jjmatchedPos = pos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return pos + 1; +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_0() { + return jjMoveNfa_0(0, 0); +} + +int32_t QueryParserTokenManager::jjMoveNfa_0(int32_t startState, int32_t curPos) { + int32_t startsAt = 0; + jjnewStateCnt = 3; + int32_t i = 1; + jjstateSet[0] = startState; + int32_t kind = 0x7fffffff; + while (true) { + if (++jjround == 0x7fffffff) { + ReInitRounds(); } - return jjMoveNfa_2(state, pos + 1); - } - - int32_t QueryParserTokenManager::jjMoveNfa_2(int32_t startState, int32_t curPos) - { - int32_t startsAt = 0; - jjnewStateCnt = 7; - int32_t i = 1; - jjstateSet[0] = startState; - int32_t kind = 0x7fffffff; - while (true) - { - if (++jjround == 0x7fffffff) - ReInitRounds(); - if (curChar < 64) - { - int64_t l = (int64_t)1 << curChar; - do - { - switch (jjstateSet[--i]) - { - case 0: - if ((0xfffffffeffffffffLL & l) != 0) - { - if (kind > 29) - kind = 29; - jjCheckNAdd(6); - } - if ((0x100002600LL & l) != 0) - { - if (kind > 7) - kind = 7; - } - else if (curChar == 34) - jjCheckNAddTwoStates(2, 4); - break; - case 1: - if (curChar == 34) - jjCheckNAddTwoStates(2, 4); - break; - case 2: - if ((0xfffffffbffffffffLL & l) != 0) - jjCheckNAddStates(16, 18); - break; - case 3: - if (curChar == 34) - jjCheckNAddStates(16, 18); - break; - case 5: - if (curChar == 34 && kind > 28) - kind = 28; - break; - case 6: - if ((0xfffffffeffffffffLL & l) == 0) - break; - if (kind > 29) - kind = 29; - jjCheckNAdd(6); - break; - default: - break; + if (curChar < 64) { + int64_t l = (int64_t)1 << curChar; + do { + switch (jjstateSet[--i]) { + case 0: + if ((0x3ff000000000000LL & l) == 0) { + break; } - } - while (i != startsAt); - } - else if (curChar < 128) - { - int64_t l = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 0: - case 6: - if ((0xffffffffdfffffffLL & l) == 0) - break; - if (kind > 29) - kind = 29; - jjCheckNAdd(6); - break; - case 2: - jjAddStates(16, 18); - break; - case 4: - if (curChar == 92) - jjstateSet[jjnewStateCnt++] = 3; - break; - default: - break; + if (kind > 25) { + kind = 25; } - } - while (i != startsAt); - } - else - { - int32_t hiByte = (int32_t)(curChar >> 8); - int32_t i1 = hiByte >> 6; - int64_t l1 = (int64_t)1 << (hiByte & 077); - int32_t i2 = (curChar & 0xff) >> 6; - int64_t l2 = (int64_t)1 << (curChar & 077); - do - { - switch (jjstateSet[--i]) - { - case 0: - if (jjCanMove_0(hiByte, i1, i2, l1, l2)) - { - if (kind > 7) - kind = 7; - } - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - { - if (kind > 29) - kind = 29; - jjCheckNAdd(6); - } - break; - case 2: - if (jjCanMove_1(hiByte, i1, i2, l1, l2)) - jjAddStates(16, 18); - break; - case 6: - if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) - break; - if (kind > 29) - kind = 29; - jjCheckNAdd(6); - break; - default: - break; + jjAddStates(19, 20); + break; + case 1: + if (curChar == 46) { + jjCheckNAdd(2); } + break; + case 2: + if ((0x3ff000000000000LL & l) == 0) { + break; + } + if (kind > 25) { + kind = 25; + } + jjCheckNAdd(2); + break; + default: + break; } - while (i != startsAt); - } - if (kind != 0x7fffffff) - { - jjmatchedKind = kind; - jjmatchedPos = curPos; - kind = 0x7fffffff; - } - ++curPos; - if ((i = jjnewStateCnt) == (startsAt = 7 - (jjnewStateCnt = startsAt))) - return curPos; - try - { - curChar = input_stream->readChar(); - } - catch (IOException&) - { - return curPos; - } + } while (i != startsAt); + } else if (curChar < 128) { + int64_t l = (int64_t)1 << (curChar & 077); + do { + jjstateSet[--i]; + } while (i != startsAt); + } else { + int32_t hiByte = (int32_t)(curChar >> 8); + int32_t i1 = hiByte >> 6; + int64_t l1 = (int64_t)1 << (hiByte & 077); + int32_t i2 = (curChar & 0xff) >> 6; + int64_t l2 = (int64_t)1 << (curChar & 077); + do { + jjstateSet[--i]; + } while (i != startsAt); } - } - - bool QueryParserTokenManager::jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) - { - switch (hiByte) - { - case 48: - return ((jjbitVec0[i2] & l2) != 0); - default: - return false; + if (kind != 0x7fffffff) { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; } - } - - bool QueryParserTokenManager::jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) - { - switch (hiByte) - { - case 0: - return ((jjbitVec3[i2] & l2) != 0); - default: - if ((jjbitVec1[i1] & l1) != 0) - return true; - return false; + ++curPos; + i = jjnewStateCnt; + jjnewStateCnt = startsAt; + if (i == (startsAt = 3 - jjnewStateCnt)) { + return curPos; } - } - - bool QueryParserTokenManager::jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) - { - switch (hiByte) - { - case 0: - return ((jjbitVec3[i2] & l2) != 0); - case 48: - return ((jjbitVec1[i2] & l2) != 0); - default: - if ((jjbitVec4[i1] & l1) != 0) - return true; - return false; + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + return curPos; } } - - void QueryParserTokenManager::ReInit(QueryParserCharStreamPtr stream) - { - jjmatchedPos = 0; - jjnewStateCnt = 0; - curLexState = defaultLexState; - input_stream = stream; - ReInitRounds(); +} + +int32_t QueryParserTokenManager::jjStopStringLiteralDfa_2(int32_t pos, int64_t active0) { + switch (pos) { + case 0: + if ((active0 & 0x4000000LL) != 0) { + jjmatchedKind = 29; + return 6; + } + return -1; + default: + return -1; } - - void QueryParserTokenManager::ReInitRounds() - { - jjround = 0x80000001; - for (int32_t i = 36; i-- > 0;) - jjrounds[i] = 0x80000000; +} + +int32_t QueryParserTokenManager::jjStartNfa_2(int32_t pos, int64_t active0) { + return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa0_2() { + switch (curChar) { + case 84: + return jjMoveStringLiteralDfa1_2(0x4000000LL); + case 93: + return jjStopAtPos(0, 27); + default: + return jjMoveNfa_2(0, 0); } - - void QueryParserTokenManager::ReInit(QueryParserCharStreamPtr stream, int32_t lexState) - { - ReInit(stream); - SwitchTo(lexState); +} + +int32_t QueryParserTokenManager::jjMoveStringLiteralDfa1_2(int64_t active0) { + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + jjStopStringLiteralDfa_2(0, active0); + return 1; } - - void QueryParserTokenManager::SwitchTo(int32_t lexState) - { - if (lexState >= 4 || lexState < 0) - { - boost::throw_exception(QueryParserError(L"Error: Ignoring invalid lexical state : " + - StringUtils::toString(lexState) + L". State unchanged.")); + switch (curChar) { + case 79: + if ((active0 & 0x4000000LL) != 0) { + return jjStartNfaWithStates_2(1, 26, 6); } - else - curLexState = lexState; + break; + default: + break; } - - QueryParserTokenPtr QueryParserTokenManager::jjFillToken() - { - String im(jjstrLiteralImages[jjmatchedKind]); - String curTokenImage(im.empty() ? input_stream->GetImage() : im); - int32_t beginLine = input_stream->getBeginLine(); - int32_t beginColumn = input_stream->getBeginColumn(); - int32_t endLine = input_stream->getEndLine(); - int32_t endColumn = input_stream->getEndColumn(); - QueryParserTokenPtr t(QueryParserToken::newToken(jjmatchedKind, curTokenImage)); - - t->beginLine = beginLine; - t->endLine = endLine; - t->beginColumn = beginColumn; - t->endColumn = endColumn; - - return t; + return jjStartNfa_2(0, active0); +} + +int32_t QueryParserTokenManager::jjStartNfaWithStates_2(int32_t pos, int32_t kind, int32_t state) { + jjmatchedKind = kind; + jjmatchedPos = pos; + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + return pos + 1; } - - QueryParserTokenPtr QueryParserTokenManager::getNextToken() - { - QueryParserTokenPtr matchedToken; - int32_t curPos = 0; - - while (true) - { - try - { - curChar = input_stream->BeginToken(); - } - catch (IOException&) - { - jjmatchedKind = 0; - matchedToken = jjFillToken(); - return matchedToken; - } - - switch (curLexState) - { + return jjMoveNfa_2(state, pos + 1); +} + +int32_t QueryParserTokenManager::jjMoveNfa_2(int32_t startState, int32_t curPos) { + int32_t startsAt = 0; + jjnewStateCnt = 7; + int32_t i = 1; + jjstateSet[0] = startState; + int32_t kind = 0x7fffffff; + while (true) { + if (++jjround == 0x7fffffff) { + ReInitRounds(); + } + if (curChar < 64) { + int64_t l = (int64_t)1 << curChar; + do { + switch (jjstateSet[--i]) { case 0: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_0(); + if ((0xfffffffeffffffffLL & l) != 0) { + if (kind > 29) { + kind = 29; + } + jjCheckNAdd(6); + } + if ((0x100002600LL & l) != 0) { + if (kind > 7) { + kind = 7; + } + } else if (curChar == 34) { + jjCheckNAddTwoStates(2, 4); + } break; case 1: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_1(); + if (curChar == 34) { + jjCheckNAddTwoStates(2, 4); + } break; case 2: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_2(); + if ((0xfffffffbffffffffLL & l) != 0) { + jjCheckNAddStates(16, 18); + } break; case 3: - jjmatchedKind = 0x7fffffff; - jjmatchedPos = 0; - curPos = jjMoveStringLiteralDfa0_3(); + if (curChar == 34) { + jjCheckNAddStates(16, 18); + } + break; + case 5: + if (curChar == 34 && kind > 28) { + kind = 28; + } + break; + case 6: + if ((0xfffffffeffffffffLL & l) == 0) { + break; + } + if (kind > 29) { + kind = 29; + } + jjCheckNAdd(6); + break; + default: break; - } - - if (jjmatchedKind != 0x7fffffff) - { - if (jjmatchedPos + 1 < curPos) - input_stream->backup(curPos - jjmatchedPos - 1); - if ((jjtoToken[jjmatchedKind >> 6] & ((int64_t)1 << (jjmatchedKind & 077))) != 0) - { - matchedToken = jjFillToken(); - if (jjnewLexState[jjmatchedKind] != -1) - curLexState = jjnewLexState[jjmatchedKind]; - return matchedToken; } - else - { - if (jjnewLexState[jjmatchedKind] != -1) - curLexState = jjnewLexState[jjmatchedKind]; - continue; + } while (i != startsAt); + } else if (curChar < 128) { + int64_t l = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 0: + case 6: + if ((0xffffffffdfffffffLL & l) == 0) { + break; + } + if (kind > 29) { + kind = 29; + } + jjCheckNAdd(6); + break; + case 2: + jjAddStates(16, 18); + break; + case 4: + if (curChar == 92) { + jjstateSet[jjnewStateCnt++] = 3; + } + break; + default: + break; } - } - int32_t error_line = input_stream->getEndLine(); - int32_t error_column = input_stream->getEndColumn(); - String error_after; - bool EOFSeen = false; - try - { - input_stream->readChar(); - input_stream->backup(1); - } - catch (IOException&) - { - EOFSeen = true; - error_after = curPos <= 1 ? L"" : input_stream->GetImage(); - if (curChar == L'\n' || curChar == L'\r') - { - ++error_line; - error_column = 0; + } while (i != startsAt); + } else { + int32_t hiByte = (int32_t)(curChar >> 8); + int32_t i1 = hiByte >> 6; + int64_t l1 = (int64_t)1 << (hiByte & 077); + int32_t i2 = (curChar & 0xff) >> 6; + int64_t l2 = (int64_t)1 << (curChar & 077); + do { + switch (jjstateSet[--i]) { + case 0: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) { + if (kind > 7) { + kind = 7; + } + } + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + if (kind > 29) { + kind = 29; + } + jjCheckNAdd(6); + } + break; + case 2: + if (jjCanMove_1(hiByte, i1, i2, l1, l2)) { + jjAddStates(16, 18); + } + break; + case 6: + if (!jjCanMove_1(hiByte, i1, i2, l1, l2)) { + break; + } + if (kind > 29) { + kind = 29; + } + jjCheckNAdd(6); + break; + default: + break; } - else - ++error_column; - } - - if (!EOFSeen) - { - input_stream->backup(1); - error_after = curPos <= 1 ? L"" : input_stream->GetImage(); - } - - boost::throw_exception(QueryParserError(QueryParseError::lexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar))); + } while (i != startsAt); + } + if (kind != 0x7fffffff) { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; } + ++curPos; + i = jjnewStateCnt; + jjnewStateCnt = startsAt; + if (i == (startsAt = 7 - jjnewStateCnt)) { + return curPos; + } + try { + curChar = input_stream->readChar(); + } catch (IOException&) { + return curPos; + } + } +} + +bool QueryParserTokenManager::jjCanMove_0(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { + switch (hiByte) { + case 48: + return ((jjbitVec0[i2] & l2) != 0); + default: + return false; } - - void QueryParserTokenManager::jjCheckNAdd(int32_t state) - { - if (jjrounds[state] != jjround) - { - jjstateSet[jjnewStateCnt++] = state; - jjrounds[state] = jjround; +} + +bool QueryParserTokenManager::jjCanMove_1(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { + switch (hiByte) { + case 0: + return ((jjbitVec3[i2] & l2) != 0); + default: + if ((jjbitVec1[i1] & l1) != 0) { + return true; } + return false; } - - void QueryParserTokenManager::jjAddStates(int32_t start, int32_t end) - { - do - { - jjstateSet[jjnewStateCnt++] = jjnextStates[start]; +} + +bool QueryParserTokenManager::jjCanMove_2(int32_t hiByte, int32_t i1, int32_t i2, int64_t l1, int64_t l2) { + switch (hiByte) { + case 0: + return ((jjbitVec3[i2] & l2) != 0); + case 48: + return ((jjbitVec1[i2] & l2) != 0); + default: + if ((jjbitVec4[i1] & l1) != 0) { + return true; } - while (start++ != end); + return false; } - - void QueryParserTokenManager::jjCheckNAddTwoStates(int32_t state1, int32_t state2) - { - jjCheckNAdd(state1); - jjCheckNAdd(state2); +} + +void QueryParserTokenManager::ReInit(const QueryParserCharStreamPtr& stream) { + jjmatchedPos = 0; + jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); +} + +void QueryParserTokenManager::ReInitRounds() { + jjround = 0x80000001; + for (int32_t i = 36; i-- > 0;) { + jjrounds[i] = 0x80000000; + } +} + +void QueryParserTokenManager::ReInit(const QueryParserCharStreamPtr& stream, int32_t lexState) { + ReInit(stream); + SwitchTo(lexState); +} + +void QueryParserTokenManager::SwitchTo(int32_t lexState) { + if (lexState >= 4 || lexState < 0) { + boost::throw_exception(QueryParserError(L"Error: Ignoring invalid lexical state : " + + StringUtils::toString(lexState) + L". State unchanged.")); + } else { + curLexState = lexState; } - - void QueryParserTokenManager::jjCheckNAddStates(int32_t start, int32_t end) - { - do - { - jjCheckNAdd(jjnextStates[start]); +} + +QueryParserTokenPtr QueryParserTokenManager::jjFillToken() { + String im(jjstrLiteralImages[jjmatchedKind]); + String curTokenImage(im.empty() ? input_stream->GetImage() : im); + int32_t beginLine = input_stream->getBeginLine(); + int32_t beginColumn = input_stream->getBeginColumn(); + int32_t endLine = input_stream->getEndLine(); + int32_t endColumn = input_stream->getEndColumn(); + QueryParserTokenPtr t(QueryParserToken::newToken(jjmatchedKind, curTokenImage)); + + t->beginLine = beginLine; + t->endLine = endLine; + t->beginColumn = beginColumn; + t->endColumn = endColumn; + + return t; +} + +QueryParserTokenPtr QueryParserTokenManager::getNextToken() { + QueryParserTokenPtr matchedToken; + int32_t curPos = 0; + + while (true) { + try { + curChar = input_stream->BeginToken(); + } catch (IOException&) { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + return matchedToken; + } + + switch (curLexState) { + case 0: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + break; + case 2: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_2(); + break; + case 3: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_3(); + break; + } + + if (jjmatchedKind != 0x7fffffff) { + if (jjmatchedPos + 1 < curPos) { + input_stream->backup(curPos - jjmatchedPos - 1); + } + if ((jjtoToken[jjmatchedKind >> 6] & ((int64_t)1 << (jjmatchedKind & 077))) != 0) { + matchedToken = jjFillToken(); + if (jjnewLexState[jjmatchedKind] != -1) { + curLexState = jjnewLexState[jjmatchedKind]; + } + return matchedToken; + } else { + if (jjnewLexState[jjmatchedKind] != -1) { + curLexState = jjnewLexState[jjmatchedKind]; + } + continue; + } } - while (start++ != end); + int32_t error_line = input_stream->getEndLine(); + int32_t error_column = input_stream->getEndColumn(); + String error_after; + bool EOFSeen = false; + try { + input_stream->readChar(); + input_stream->backup(1); + } catch (IOException&) { + EOFSeen = true; + error_after = curPos <= 1 ? L"" : input_stream->GetImage(); + if (curChar == L'\n' || curChar == L'\r') { + ++error_line; + error_column = 0; + } else { + ++error_column; + } + } + + if (!EOFSeen) { + input_stream->backup(1); + error_after = curPos <= 1 ? L"" : input_stream->GetImage(); + } + + boost::throw_exception(QueryParserError(QueryParseError::lexicalError(EOFSeen, curLexState, error_line, error_column, error_after, curChar))); } } + +void QueryParserTokenManager::jjCheckNAdd(int32_t state) { + if (jjrounds[state] != jjround) { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } +} + +void QueryParserTokenManager::jjAddStates(int32_t start, int32_t end) { + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); +} + +void QueryParserTokenManager::jjCheckNAddTwoStates(int32_t state1, int32_t state2) { + jjCheckNAdd(state1); + jjCheckNAdd(state2); +} + +void QueryParserTokenManager::jjCheckNAddStates(int32_t start, int32_t end) { + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); +} + +} diff --git a/src/core/search/BooleanClause.cpp b/src/core/search/BooleanClause.cpp index ba72dfe1..e47c4a8b 100644 --- a/src/core/search/BooleanClause.cpp +++ b/src/core/search/BooleanClause.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,71 +8,61 @@ #include "BooleanClause.h" #include "Query.h" -namespace Lucene -{ - BooleanClause::BooleanClause(QueryPtr query, Occur occur) - { - this->query = query; - this->occur = occur; - } - - BooleanClause::~BooleanClause() - { - } - - BooleanClause::Occur BooleanClause::getOccur() - { - return occur; - } - - void BooleanClause::setOccur(BooleanClause::Occur occur) - { - this->occur = occur; - } - - QueryPtr BooleanClause::getQuery() - { - return query; - } - - void BooleanClause::setQuery(QueryPtr query) - { - this->query = query; - } - - bool BooleanClause::isProhibited() - { - return (occur == MUST_NOT); - } - - bool BooleanClause::isRequired() - { - return (occur == MUST); - } - - bool BooleanClause::equals(LuceneObjectPtr other) - { - BooleanClausePtr otherBooleanClause(boost::dynamic_pointer_cast(other)); - if (!otherBooleanClause) - return false; - return (this->query->equals(otherBooleanClause->query) && this->occur == otherBooleanClause->occur); - } - - int32_t BooleanClause::hashCode() - { - return query->hashCode() ^ (occur == MUST ? 1 : 0) ^ (occur == MUST_NOT ? 2 : 0); +namespace Lucene { + +BooleanClause::BooleanClause(const QueryPtr& query, Occur occur) { + this->query = query; + this->occur = occur; +} + +BooleanClause::~BooleanClause() { +} + +BooleanClause::Occur BooleanClause::getOccur() { + return occur; +} + +void BooleanClause::setOccur(BooleanClause::Occur occur) { + this->occur = occur; +} + +QueryPtr BooleanClause::getQuery() { + return query; +} + +void BooleanClause::setQuery(const QueryPtr& query) { + this->query = query; +} + +bool BooleanClause::isProhibited() { + return (occur == MUST_NOT); +} + +bool BooleanClause::isRequired() { + return (occur == MUST); +} + +bool BooleanClause::equals(const LuceneObjectPtr& other) { + BooleanClausePtr otherBooleanClause(boost::dynamic_pointer_cast(other)); + if (!otherBooleanClause) { + return false; } - - String BooleanClause::toString() - { - switch (occur) - { - case MUST: - return L"+" + query->toString(); - case MUST_NOT: - return L"-" + query->toString(); - default: - return query->toString(); - } + return (this->query->equals(otherBooleanClause->query) && this->occur == otherBooleanClause->occur); +} + +int32_t BooleanClause::hashCode() { + return query->hashCode() ^ (occur == MUST ? 1 : 0) ^ (occur == MUST_NOT ? 2 : 0); +} + +String BooleanClause::toString() { + switch (occur) { + case MUST: + return L"+" + query->toString(); + case MUST_NOT: + return L"-" + query->toString(); + default: + return query->toString(); } } + +} diff --git a/src/core/search/BooleanQuery.cpp b/src/core/search/BooleanQuery.cpp index f0c95011..1376d24b 100644 --- a/src/core/search/BooleanQuery.cpp +++ b/src/core/search/BooleanQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,403 +13,364 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - int32_t BooleanQuery::maxClauseCount = 1024; - - BooleanQuery::BooleanQuery(bool disableCoord) - { - this->disableCoord = disableCoord; - this->clauses = Collection::newInstance(); - this->minNrShouldMatch = 0; - } - - BooleanQuery::~BooleanQuery() - { - } - - int32_t BooleanQuery::getMaxClauseCount() - { - return maxClauseCount; - } - - void BooleanQuery::setMaxClauseCount(int32_t maxClauseCount) - { - if (maxClauseCount < 1) - boost::throw_exception(IllegalArgumentException(L"maxClauseCount must be >= 1")); - BooleanQuery::maxClauseCount = maxClauseCount; - } - - bool BooleanQuery::isCoordDisabled() - { - return disableCoord; - } - - SimilarityPtr BooleanQuery::getSimilarity(SearcherPtr searcher) - { - SimilarityPtr result(Query::getSimilarity(searcher)); - if (disableCoord) // disable coord as requested - result = newLucene(result); - return result; - } - - void BooleanQuery::setMinimumNumberShouldMatch(int32_t min) - { - this->minNrShouldMatch = min; - } - - int32_t BooleanQuery::getMinimumNumberShouldMatch() - { - return minNrShouldMatch; - } - - void BooleanQuery::add(QueryPtr query, BooleanClause::Occur occur) - { - add(newLucene(query, occur)); - } - - void BooleanQuery::add(BooleanClausePtr clause) - { - if (clauses.size() >= maxClauseCount) - boost::throw_exception(TooManyClausesException(L"maxClauseCount is set to " + StringUtils::toString(maxClauseCount))); - clauses.add(clause); - } - - Collection BooleanQuery::getClauses() - { - return clauses; - } - - Collection::iterator BooleanQuery::begin() - { - return clauses.begin(); +namespace Lucene { + +int32_t BooleanQuery::maxClauseCount = 1024; + +BooleanQuery::BooleanQuery(bool disableCoord) { + this->disableCoord = disableCoord; + this->clauses = Collection::newInstance(); + this->minNrShouldMatch = 0; +} + +BooleanQuery::~BooleanQuery() { +} + +int32_t BooleanQuery::getMaxClauseCount() { + return maxClauseCount; +} + +void BooleanQuery::setMaxClauseCount(int32_t maxClauseCount) { + if (maxClauseCount < 1) { + boost::throw_exception(IllegalArgumentException(L"maxClauseCount must be >= 1")); } - - Collection::iterator BooleanQuery::end() - { - return clauses.end(); + BooleanQuery::maxClauseCount = maxClauseCount; +} + +bool BooleanQuery::isCoordDisabled() { + return disableCoord; +} + +SimilarityPtr BooleanQuery::getSimilarity(const SearcherPtr& searcher) { + SimilarityPtr result(Query::getSimilarity(searcher)); + if (disableCoord) { // disable coord as requested + result = newLucene(result); } - - WeightPtr BooleanQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); + return result; +} + +void BooleanQuery::setMinimumNumberShouldMatch(int32_t min) { + this->minNrShouldMatch = min; +} + +int32_t BooleanQuery::getMinimumNumberShouldMatch() { + return minNrShouldMatch; +} + +void BooleanQuery::add(const QueryPtr& query, BooleanClause::Occur occur) { + add(newLucene(query, occur)); +} + +void BooleanQuery::add(const BooleanClausePtr& clause) { + if (clauses.size() >= maxClauseCount) { + boost::throw_exception(TooManyClausesException(L"maxClauseCount is set to " + StringUtils::toString(maxClauseCount))); } - - QueryPtr BooleanQuery::rewrite(IndexReaderPtr reader) - { - if (minNrShouldMatch == 0 && clauses.size() == 1) // optimize 1-clause queries - { - BooleanClausePtr c(clauses[0]); - if (!c->isProhibited()) // just return clause - { - QueryPtr query(c->getQuery()->rewrite(reader)); // rewrite first - - if (getBoost() != 1.0) // incorporate boost - { - if (query == c->getQuery()) // if rewrite was no-op - query = boost::dynamic_pointer_cast(query->clone()); // then clone before boost - query->setBoost(getBoost() * query->getBoost()); + clauses.add(clause); +} + +Collection BooleanQuery::getClauses() { + return clauses; +} + +Collection::iterator BooleanQuery::begin() { + return clauses.begin(); +} + +Collection::iterator BooleanQuery::end() { + return clauses.end(); +} + +WeightPtr BooleanQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +QueryPtr BooleanQuery::rewrite(const IndexReaderPtr& reader) { + if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries + BooleanClausePtr c(clauses[0]); + if (!c->isProhibited()) { // just return clause + QueryPtr query(c->getQuery()->rewrite(reader)); // rewrite first + + if (getBoost() != 1.0) { // incorporate boost + if (query == c->getQuery()) { // if rewrite was no-op + query = boost::dynamic_pointer_cast(query->clone()); // then clone before boost } - - return query; + query->setBoost(getBoost() * query->getBoost()); } + + return query; } - - BooleanQueryPtr clone; // recursively rewrite - for (int32_t i = 0; i < clauses.size(); ++i) - { - BooleanClausePtr c(clauses[i]); - QueryPtr query(c->getQuery()->rewrite(reader)); - if (query != c->getQuery()) // clause rewrote: must clone - { - if (!clone) - clone = boost::dynamic_pointer_cast(this->clone()); - clone->clauses[i] = newLucene(query, c->getOccur()); + } + + BooleanQueryPtr clone; // recursively rewrite + for (int32_t i = 0; i < clauses.size(); ++i) { + BooleanClausePtr c(clauses[i]); + QueryPtr query(c->getQuery()->rewrite(reader)); + if (query != c->getQuery()) { // clause rewrote: must clone + if (!clone) { + clone = boost::dynamic_pointer_cast(this->clone()); } + clone->clauses[i] = newLucene(query, c->getOccur()); } - - if (clone) - return clone; // some clauses rewrote - else - return shared_from_this(); // no clauses rewrote } - - void BooleanQuery::extractTerms(SetTerm terms) - { - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - (*clause)->getQuery()->extractTerms(terms); + + if (clone) { + return clone; // some clauses rewrote + } else { + return shared_from_this(); // no clauses rewrote } - - LuceneObjectPtr BooleanQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); - BooleanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->disableCoord = disableCoord; - cloneQuery->minNrShouldMatch = minNrShouldMatch; - cloneQuery->clauses = Collection::newInstance(clauses.begin(), clauses.end()); - return cloneQuery; +} + +void BooleanQuery::extractTerms(SetTerm terms) { + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + (*clause)->getQuery()->extractTerms(terms); } - - String BooleanQuery::toString(const String& field) - { - String buffer; - bool needParens = (getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0); - if (needParens) - buffer += L"("; - - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (clause != clauses.begin()) - buffer += L" "; - - if ((*clause)->isProhibited()) - buffer += L"-"; - else if ((*clause)->isRequired()) - buffer += L"+"; - - QueryPtr subQuery((*clause)->getQuery()); - if (subQuery) - { - if (boost::dynamic_pointer_cast(subQuery)) // wrap sub-bools in parens - { - buffer += L"("; - buffer += subQuery->toString(field); - buffer += L")"; - } - else - buffer += subQuery->toString(field); - } - else - buffer += L"null"; +} + +LuceneObjectPtr BooleanQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); + BooleanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->disableCoord = disableCoord; + cloneQuery->minNrShouldMatch = minNrShouldMatch; + cloneQuery->clauses = Collection::newInstance(clauses.begin(), clauses.end()); + return cloneQuery; +} + +String BooleanQuery::toString(const String& field) { + String buffer; + bool needParens = (getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0); + if (needParens) { + buffer += L"("; + } + + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (clause != clauses.begin()) { + buffer += L" "; } - - if (needParens) - buffer += L")"; - - if (getMinimumNumberShouldMatch() > 0) - { - buffer += L"~"; - buffer += StringUtils::toString(getMinimumNumberShouldMatch()); + + if ((*clause)->isProhibited()) { + buffer += L"-"; + } else if ((*clause)->isRequired()) { + buffer += L"+"; + } + + QueryPtr subQuery((*clause)->getQuery()); + if (subQuery) { + if (boost::dynamic_pointer_cast(subQuery)) { // wrap sub-bools in parens + buffer += L"("; + buffer += subQuery->toString(field); + buffer += L")"; + } else { + buffer += subQuery->toString(field); + } + } else { + buffer += L"null"; } - - if (getBoost() != 1.0) - buffer += boostString(); - - return buffer; } - - bool BooleanQuery::equals(LuceneObjectPtr other) - { - BooleanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - return (getBoost() == otherQuery->getBoost() && - clauses.equals(otherQuery->clauses, luceneEquals()) && - getMinimumNumberShouldMatch() == otherQuery->getMinimumNumberShouldMatch() && - disableCoord == otherQuery->disableCoord); + + if (needParens) { + buffer += L")"; } - - int32_t BooleanQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) ^ MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene) + - getMinimumNumberShouldMatch() + (disableCoord ? 17 : 0); + + if (getMinimumNumberShouldMatch() > 0) { + buffer += L"~"; + buffer += StringUtils::toString(getMinimumNumberShouldMatch()); } - - BooleanWeight::BooleanWeight(BooleanQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - weights = Collection::newInstance(); - for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) - weights.add((*clause)->getQuery()->createWeight(searcher)); + + if (getBoost() != 1.0) { + buffer += boostString(); } - - BooleanWeight::~BooleanWeight() - { + + return buffer; +} + +bool BooleanQuery::equals(const LuceneObjectPtr& other) { + BooleanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; + } + return (getBoost() == otherQuery->getBoost() && + clauses.equals(otherQuery->clauses, luceneEquals()) && + getMinimumNumberShouldMatch() == otherQuery->getMinimumNumberShouldMatch() && + disableCoord == otherQuery->disableCoord); +} + +int32_t BooleanQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) ^ MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene) + + getMinimumNumberShouldMatch() + (disableCoord ? 17 : 0); +} + +BooleanWeight::BooleanWeight(const BooleanQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); + weights = Collection::newInstance(); + for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) { + weights.add((*clause)->getQuery()->createWeight(searcher)); } - - QueryPtr BooleanWeight::getQuery() - { - return query; +} + +BooleanWeight::~BooleanWeight() { +} + +QueryPtr BooleanWeight::getQuery() { + return query; +} + +double BooleanWeight::getValue() { + return query->getBoost(); +} + +double BooleanWeight::sumOfSquaredWeights() { + double sum = 0.0; + for (int32_t i = 0; i < weights.size(); ++i) { + // call sumOfSquaredWeights for all clauses in case of side effects + double s = weights[i]->sumOfSquaredWeights(); // sum sub weights + if (!query->clauses[i]->isProhibited()) { + // only add to sum for non-prohibited clauses + sum += s; + } } - - double BooleanWeight::getValue() - { - return query->getBoost(); + + sum *= query->getBoost() * query->getBoost(); // boost each sub-weight + + return sum; +} + +void BooleanWeight::normalize(double norm) { + norm *= query->getBoost(); // incorporate boost + for (Collection::iterator w = weights.begin(); w != weights.end(); ++w) { + // normalize all clauses, (even if prohibited in case of side affects) + (*w)->normalize(norm); } - - double BooleanWeight::sumOfSquaredWeights() - { - double sum = 0.0; - for (int32_t i = 0; i < weights.size(); ++i) - { - // call sumOfSquaredWeights for all clauses in case of side effects - double s = weights[i]->sumOfSquaredWeights(); // sum sub weights - if (!query->clauses[i]->isProhibited()) - { - // only add to sum for non-prohibited clauses - sum += s; - } +} + +ExplanationPtr BooleanWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + int32_t minShouldMatch = query->getMinimumNumberShouldMatch(); + ComplexExplanationPtr sumExpl(newLucene()); + sumExpl->setDescription(L"sum of:"); + int32_t coord = 0; + int32_t maxCoord = 0; + double sum = 0.0; + bool fail = false; + int32_t shouldMatchCount = 0; + Collection::iterator c = query->clauses.begin(); + for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { + if (!(*w)->scorer(reader, true, true)) { + continue; } - - sum *= query->getBoost() * query->getBoost(); // boost each sub-weight - - return sum; - } - - void BooleanWeight::normalize(double norm) - { - norm *= query->getBoost(); // incorporate boost - for (Collection::iterator w = weights.begin(); w != weights.end(); ++w) - { - // normalize all clauses, (even if prohibited in case of side affects) - (*w)->normalize(norm); + ExplanationPtr e((*w)->explain(reader, doc)); + if (!(*c)->isProhibited()) { + ++maxCoord; } - } - - ExplanationPtr BooleanWeight::explain(IndexReaderPtr reader, int32_t doc) - { - int32_t minShouldMatch = query->getMinimumNumberShouldMatch(); - ComplexExplanationPtr sumExpl(newLucene()); - sumExpl->setDescription(L"sum of:"); - int32_t coord = 0; - int32_t maxCoord = 0; - double sum = 0.0; - bool fail = false; - int32_t shouldMatchCount = 0; - Collection::iterator c = query->clauses.begin(); - for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) - { - if (!(*w)->scorer(reader, true, true)) - continue; - ExplanationPtr e((*w)->explain(reader, doc)); - if (!(*c)->isProhibited()) - ++maxCoord; - if (e->isMatch()) - { - if (!(*c)->isProhibited()) - { - sumExpl->addDetail(e); - sum += e->getValue(); - ++coord; - } - else - { - ExplanationPtr r(newLucene(0.0, L"match on prohibited clause (" + (*c)->getQuery()->toString() + L")")); - r->addDetail(e); - sumExpl->addDetail(r); - fail = true; - } - if ((*c)->getOccur() == BooleanClause::SHOULD) - ++shouldMatchCount; - } - else if ((*c)->isRequired()) - { - ExplanationPtr r(newLucene(0.0, L"no match on required clause (" + (*c)->getQuery()->toString() + L")")); + if (e->isMatch()) { + if (!(*c)->isProhibited()) { + sumExpl->addDetail(e); + sum += e->getValue(); + ++coord; + } else { + ExplanationPtr r(newLucene(0.0, L"match on prohibited clause (" + (*c)->getQuery()->toString() + L")")); r->addDetail(e); sumExpl->addDetail(r); fail = true; } + if ((*c)->getOccur() == BooleanClause::SHOULD) { + ++shouldMatchCount; + } + } else if ((*c)->isRequired()) { + ExplanationPtr r(newLucene(0.0, L"no match on required clause (" + (*c)->getQuery()->toString() + L")")); + r->addDetail(e); + sumExpl->addDetail(r); + fail = true; } - if (fail) - { - sumExpl->setMatch(false); - sumExpl->setValue(0.0); - sumExpl->setDescription(L"Failure to meet condition(s) of required/prohibited clause(s)"); - return sumExpl; - } - else if (shouldMatchCount < minShouldMatch) - { - sumExpl->setMatch(false); - sumExpl->setValue(0.0); - sumExpl->setDescription(L"Failure to match minimum number of optional clauses: " + StringUtils::toString(minShouldMatch)); - return sumExpl; - } - - sumExpl->setMatch(0 < coord); - sumExpl->setValue(sum); - double coordFactor = similarity->coord(coord, maxCoord); - if (coordFactor == 1.0) // coord is no-op - return sumExpl; // eliminate wrapper - else - { - ComplexExplanationPtr result(newLucene(sumExpl->isMatch(), sum * coordFactor, L"product of:")); - result->addDetail(sumExpl); - result->addDetail(newLucene(coordFactor, L"coord(" + StringUtils::toString(coord) + L"/" + StringUtils::toString(maxCoord) + L")")); - return result; - } } - - ScorerPtr BooleanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - Collection required(Collection::newInstance()); - Collection prohibited(Collection::newInstance()); - Collection optional(Collection::newInstance()); - Collection::iterator c = query->clauses.begin(); - for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) - { - ScorerPtr subScorer((*w)->scorer(reader, true, false)); - if (!subScorer) - { - if ((*c)->isRequired()) - return ScorerPtr(); + if (fail) { + sumExpl->setMatch(false); + sumExpl->setValue(0.0); + sumExpl->setDescription(L"Failure to meet condition(s) of required/prohibited clause(s)"); + return sumExpl; + } else if (shouldMatchCount < minShouldMatch) { + sumExpl->setMatch(false); + sumExpl->setValue(0.0); + sumExpl->setDescription(L"Failure to match minimum number of optional clauses: " + StringUtils::toString(minShouldMatch)); + return sumExpl; + } + + sumExpl->setMatch(0 < coord); + sumExpl->setValue(sum); + double coordFactor = similarity->coord(coord, maxCoord); + if (coordFactor == 1.0) { // coord is no-op + return sumExpl; // eliminate wrapper + } else { + ComplexExplanationPtr result(newLucene(sumExpl->isMatch(), sum * coordFactor, L"product of:")); + result->addDetail(sumExpl); + result->addDetail(newLucene(coordFactor, L"coord(" + StringUtils::toString(coord) + L"/" + StringUtils::toString(maxCoord) + L")")); + return result; + } +} + +ScorerPtr BooleanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + Collection required(Collection::newInstance()); + Collection prohibited(Collection::newInstance()); + Collection optional(Collection::newInstance()); + Collection::iterator c = query->clauses.begin(); + for (Collection::iterator w = weights.begin(); w != weights.end(); ++w, ++c) { + ScorerPtr subScorer((*w)->scorer(reader, true, false)); + if (!subScorer) { + if ((*c)->isRequired()) { + return ScorerPtr(); } - else if ((*c)->isRequired()) - required.add(subScorer); - else if ((*c)->isProhibited()) - prohibited.add(subScorer); - else - optional.add(subScorer); - } - - // Check if we can return a BooleanScorer - if (!scoreDocsInOrder && topScorer && required.empty() && prohibited.size() < 32) - return newLucene(similarity, query->minNrShouldMatch, optional, prohibited); - - if (required.empty() && optional.empty()) - { - // no required and optional clauses. - return ScorerPtr(); + } else if ((*c)->isRequired()) { + required.add(subScorer); + } else if ((*c)->isProhibited()) { + prohibited.add(subScorer); + } else { + optional.add(subScorer); } - else if (optional.size() < query->minNrShouldMatch) - { - // either >1 req scorer, or there are 0 req scorers and at least 1 optional scorer. Therefore if there - // are not enough optional scorers no documents will be matched by the query - return ScorerPtr(); - } - - // Return a BooleanScorer2 - return newLucene(similarity, query->minNrShouldMatch, required, prohibited, optional); } - - bool BooleanWeight::scoresDocsOutOfOrder() - { - int32_t numProhibited = 0; - for (Collection::iterator c = query->clauses.begin(); c != query->clauses.end(); ++c) - { - if ((*c)->isRequired()) - return false; // BS2 (in-order) will be used by scorer() - else if ((*c)->isProhibited()) - ++numProhibited; - } - - if (numProhibited > 32) // cannot use BS - return false; - - // scorer() will return an out-of-order scorer if requested. - return true; + + // Check if we can return a BooleanScorer + if (!scoreDocsInOrder && topScorer && required.empty() && prohibited.size() < 32) { + return newLucene(similarity, query->minNrShouldMatch, optional, prohibited); } - - SimilarityDisableCoord::SimilarityDisableCoord(SimilarityPtr delegee) : SimilarityDelegator(delegee) - { + + if (required.empty() && optional.empty()) { + // no required and optional clauses. + return ScorerPtr(); + } else if (optional.size() < query->minNrShouldMatch) { + // either >1 req scorer, or there are 0 req scorers and at least 1 optional scorer. Therefore if there + // are not enough optional scorers no documents will be matched by the query + return ScorerPtr(); } - - SimilarityDisableCoord::~SimilarityDisableCoord() - { + + // Return a BooleanScorer2 + return newLucene(similarity, query->minNrShouldMatch, required, prohibited, optional); +} + +bool BooleanWeight::scoresDocsOutOfOrder() { + int32_t numProhibited = 0; + for (Collection::iterator c = query->clauses.begin(); c != query->clauses.end(); ++c) { + if ((*c)->isRequired()) { + return false; // BS2 (in-order) will be used by scorer() + } else if ((*c)->isProhibited()) { + ++numProhibited; + } } - - double SimilarityDisableCoord::coord(int32_t overlap, int32_t maxOverlap) - { - return 1.0; // disable coord + + if (numProhibited > 32) { // cannot use BS + return false; } + + // scorer() will return an out-of-order scorer if requested. + return true; +} + +SimilarityDisableCoord::SimilarityDisableCoord(const SimilarityPtr& delegee) : SimilarityDelegator(delegee) { +} + +SimilarityDisableCoord::~SimilarityDisableCoord() { +} + +double SimilarityDisableCoord::coord(int32_t overlap, int32_t maxOverlap) { + return 1.0; // disable coord +} + } diff --git a/src/core/search/BooleanScorer.cpp b/src/core/search/BooleanScorer.cpp index 5e63c864..ace799d0 100644 --- a/src/core/search/BooleanScorer.cpp +++ b/src/core/search/BooleanScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,315 +8,273 @@ #include "BooleanScorer.h" #include "Similarity.h" -namespace Lucene -{ - BooleanScorer::BooleanScorer(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers) : Scorer(similarity) - { - this->bucketTable = newLucene(); - this->maxCoord = 1; - this->requiredMask = 0; - this->prohibitedMask = 0; - this->nextMask = 1; - this->minNrShouldMatch = minNrShouldMatch; - this->end = 0; - this->doc = -1; - - if (optionalScorers && !optionalScorers.empty()) - { - for (Collection::iterator scorer = optionalScorers.begin(); scorer != optionalScorers.end(); ++scorer) - { - ++maxCoord; - if ((*scorer)->nextDoc() != NO_MORE_DOCS) - scorers = newLucene(*scorer, false, false, bucketTable->newCollector(0), scorers); +namespace Lucene { + +BooleanScorer::BooleanScorer(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection optionalScorers, Collection prohibitedScorers) : Scorer(similarity) { + this->bucketTable = newLucene(); + this->maxCoord = 1; + this->requiredMask = 0; + this->prohibitedMask = 0; + this->nextMask = 1; + this->minNrShouldMatch = minNrShouldMatch; + this->end = 0; + this->doc = -1; + + if (optionalScorers && !optionalScorers.empty()) { + for (Collection::iterator scorer = optionalScorers.begin(); scorer != optionalScorers.end(); ++scorer) { + ++maxCoord; + if ((*scorer)->nextDoc() != NO_MORE_DOCS) { + scorers = newLucene(*scorer, false, false, bucketTable->newCollector(0), scorers); } } - - if (prohibitedScorers && !prohibitedScorers.empty()) - { - for (Collection::iterator scorer = prohibitedScorers.begin(); scorer != prohibitedScorers.end(); ++scorer) - { - int32_t mask = nextMask; - nextMask = nextMask << 1; - prohibitedMask |= mask; // update prohibited mask - if ((*scorer)->nextDoc() != NO_MORE_DOCS) - scorers = newLucene(*scorer, false, true, bucketTable->newCollector(mask), scorers); + } + + if (prohibitedScorers && !prohibitedScorers.empty()) { + for (Collection::iterator scorer = prohibitedScorers.begin(); scorer != prohibitedScorers.end(); ++scorer) { + int32_t mask = nextMask; + nextMask = nextMask << 1; + prohibitedMask |= mask; // update prohibited mask + if ((*scorer)->nextDoc() != NO_MORE_DOCS) { + scorers = newLucene(*scorer, false, true, bucketTable->newCollector(mask), scorers); } } - - coordFactors = Collection::newInstance(maxCoord); - SimilarityPtr sim(getSimilarity()); - for (int32_t i = 0; i < maxCoord; ++i) - coordFactors[i] = sim->coord(i, maxCoord - 1); } - - BooleanScorer::~BooleanScorer() - { + + coordFactors = Collection::newInstance(maxCoord); + SimilarityPtr sim(getSimilarity()); + for (int32_t i = 0; i < maxCoord; ++i) { + coordFactors[i] = sim->coord(i, maxCoord - 1); } - - bool BooleanScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { - bool more = false; - BucketPtr tmp; - BucketScorerPtr bs(newLucene()); - // The internal loop will set the score and doc before calling collect. - collector->setScorer(bs); - do - { - bucketTable->first.reset(); - - while (current) // more queued - { - // check prohibited & required - if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask) - { - if (current->doc >= max) - { - tmp = current; - current = current->_next.lock(); - tmp->_next = bucketTable->first; - bucketTable->first = tmp; - continue; - } - - if (current->coord >= minNrShouldMatch) - { - bs->_score = current->score * coordFactors[current->coord]; - bs->doc = current->doc; - collector->collect(current->doc); - } +} + +BooleanScorer::~BooleanScorer() { +} + +bool BooleanScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { + bool more = false; + Bucket* __tmp; + BucketScorerPtr bs(newLucene()); + // The internal loop will set the score and doc before calling collect. + collector->setScorer(bs); + do { + bucketTable->__first = nullptr; + + while (__current) { // more queued + // check prohibited & required + if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask) { + if (__current->doc >= max) { + __tmp = __current; + __current = __current->__next; + __tmp->__next = bucketTable->__first; + bucketTable->__first = __tmp; + continue; } - - current = current->_next.lock(); // pop the queue - } - - if (bucketTable->first) - { - current = bucketTable->first; - bucketTable->first = current->_next.lock(); - return true; - } - - // refill the queue - more = false; - end += BucketTable::SIZE; - - for (SubScorerPtr sub(scorers); sub; sub = sub->next) - { - int32_t subScorerDocID = sub->scorer->docID(); - if (subScorerDocID != NO_MORE_DOCS) - { - if (sub->scorer->score(sub->collector, end, subScorerDocID)) - more = true; + + if (__current->coord >= minNrShouldMatch) { + auto s = coordFactors.size(); + bs->_score = __current->score * coordFactors[__current->coord]; + bs->doc = __current->doc; + bs->freq = __current->coord; + collector->collect(__current->doc); } } - current = bucketTable->first; + + __current = __current->__next; // pop the queue } - while (current || more); - - return false; - } - - int32_t BooleanScorer::advance(int32_t target) - { - boost::throw_exception(UnsupportedOperationException()); - return 0; - } - - int32_t BooleanScorer::docID() - { - return doc; - } - - int32_t BooleanScorer::nextDoc() - { - bool more = false; - do - { - while (bucketTable->first) // more queued - { - current = bucketTable->first; - bucketTable->first = current->_next.lock(); // pop the queue - - // check prohibited & required and minNrShouldMatch - if ((current->bits & prohibitedMask) == 0 && (current->bits & requiredMask) == requiredMask && current->coord >= minNrShouldMatch) - { - doc = current->doc; - return doc; - } - } - - // refill the queue - more = false; - end += BucketTable::SIZE; - - for (SubScorerPtr sub(scorers); sub; sub = sub->next) - { - ScorerPtr scorer(sub->scorer); - sub->collector->setScorer(scorer); - int32_t doc = scorer->docID(); - while (doc < end) - { - sub->collector->collect(doc); - doc = scorer->nextDoc(); - } - if (doc != NO_MORE_DOCS) + + if (bucketTable->__first) { + __current = bucketTable->__first; + bucketTable->__first = __current->__next; + return true; + } + + // refill the queue + more = false; + end += BucketTable::SIZE; + + for (SubScorerPtr sub(scorers); sub; sub = sub->next) { + int32_t subScorerDocID = sub->scorer->docID(); + if (subScorerDocID != NO_MORE_DOCS) { + if (sub->scorer->score(sub->collector, end, subScorerDocID)) { more = true; + } } } - while (bucketTable->first || more); - - doc = NO_MORE_DOCS; - return doc; - } - - double BooleanScorer::score() - { - return current->score * coordFactors[current->coord]; - } - - void BooleanScorer::score(CollectorPtr collector) - { - score(collector, INT_MAX, nextDoc()); - } - - String BooleanScorer::toString() - { - StringStream buffer; - buffer << L"boolean("; - for (SubScorerPtr sub(scorers); sub; sub = sub->next) - buffer << sub->scorer->toString() << L" "; - buffer << L")"; - return buffer.str(); - } - - BooleanScorerCollector::BooleanScorerCollector(int32_t mask, BucketTablePtr bucketTable) - { - this->mask = mask; - this->_bucketTable = bucketTable; - } - - BooleanScorerCollector::~BooleanScorerCollector() - { - } - - void BooleanScorerCollector::collect(int32_t doc) - { - BucketTablePtr table(_bucketTable); - int32_t i = doc & BucketTable::MASK; - BucketPtr bucket(table->buckets[i]); - if (!bucket) - { - bucket = newLucene(); - table->buckets[i] = bucket; - } - - if (bucket->doc != doc) // invalid bucket - { - bucket->doc = doc; // set doc - bucket->score = ScorerPtr(_scorer)->score(); // initialize score - bucket->bits = mask; // initialize mask - bucket->coord = 1; // initialize coord - - bucket->_next = table->first; // push onto valid list - table->first = bucket; + __current = bucketTable->__first; + } while (__current || more); + + return false; +} + +int32_t BooleanScorer::advance(int32_t target) { + boost::throw_exception(UnsupportedOperationException()); + return 0; +} + +int32_t BooleanScorer::docID() { + return doc; +} + +int32_t BooleanScorer::nextDoc() { + bool more = false; + do { + while (bucketTable->__first) { // more queued + __current = bucketTable->__first; + bucketTable->__first = __current->__next; // pop the queue + + // check prohibited & required and minNrShouldMatch + if ((__current->bits & prohibitedMask) == 0 && (__current->bits & requiredMask) == requiredMask && __current->coord >= minNrShouldMatch) { + doc = __current->doc; + return doc; + } } - else - { - bucket->score += ScorerPtr(_scorer)->score(); // increment score - bucket->bits |= mask; // add bits in mask - ++bucket->coord; // increment coord + + // refill the queue + more = false; + end += BucketTable::SIZE; + + for (SubScorerPtr sub(scorers); sub; sub = sub->next) { + ScorerPtr scorer(sub->scorer); + sub->collector->setScorer(scorer); + int32_t doc = scorer->docID(); + while (doc < end) { + sub->collector->collect(doc); + doc = scorer->nextDoc(); + } + if (doc != NO_MORE_DOCS) { + more = true; + } } + } while (bucketTable->__first || more); + + doc = NO_MORE_DOCS; + return doc; +} + +inline double BooleanScorer::score() { + return __current->score * coordFactors[__current->coord]; +} + +void BooleanScorer::score(const CollectorPtr& collector) { + score(collector, INT_MAX, nextDoc()); +} + +String BooleanScorer::toString() { + StringStream buffer; + buffer << L"boolean("; + for (SubScorerPtr sub(scorers); sub; sub = sub->next) { + buffer << sub->scorer->toString() << L" "; } - - void BooleanScorerCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - // not needed by this implementation - } - - void BooleanScorerCollector::setScorer(ScorerPtr scorer) - { - this->_scorer = scorer; - } - - bool BooleanScorerCollector::acceptsDocsOutOfOrder() - { - return true; - } - - BucketScorer::BucketScorer() : Scorer(SimilarityPtr()) - { - _score = 0; - doc = NO_MORE_DOCS; - } - - BucketScorer::~BucketScorer() - { - } - - int32_t BucketScorer::advance(int32_t target) - { - return NO_MORE_DOCS; - } - - int32_t BucketScorer::docID() - { - return doc; - } - - int32_t BucketScorer::nextDoc() - { - return NO_MORE_DOCS; - } - - double BucketScorer::score() - { - return _score; - } - - Bucket::Bucket() - { - doc = -1; - score = 0; - bits = 0; - coord = 0; - } - - Bucket::~Bucket() - { - } - - const int32_t BucketTable::SIZE = 1 << 11; - const int32_t BucketTable::MASK = BucketTable::SIZE - 1; - - BucketTable::BucketTable() - { - buckets = Collection::newInstance(SIZE); - } - - BucketTable::~BucketTable() - { - } - - CollectorPtr BucketTable::newCollector(int32_t mask) - { - return newLucene(mask, shared_from_this()); - } - - int32_t BucketTable::size() - { - return SIZE; - } - - SubScorer::SubScorer(ScorerPtr scorer, bool required, bool prohibited, CollectorPtr collector, SubScorerPtr next) - { - this->scorer = scorer; - this->required = required; - this->prohibited = prohibited; - this->collector = collector; - this->next = next; + buffer << L")"; + return buffer.str(); +} + +BooleanScorerCollector::BooleanScorerCollector(int32_t mask, const BucketTablePtr& bucketTable) { + this->mask = mask; + this->_bucketTable = bucketTable; + this->__bucketTable = bucketTable.get(); +} + +BooleanScorerCollector::~BooleanScorerCollector() { +} + +void BooleanScorerCollector::collect(int32_t doc) { + auto* table = __bucketTable; + int32_t i = doc & BucketTable::MASK; + auto& bucket = table->buckets[i]; + if (!bucket) { + bucket = newLucene(); } - - SubScorer::~SubScorer() - { + auto* __bucket = bucket.get(); + if (__bucket->doc != doc) { // invalid bucket + __bucket->doc = doc; // set doc + __bucket->score = __scorer->score(); // initialize score + __bucket->bits = mask; // initialize mask + __bucket->coord = 1; // initialize coord + + __bucket->__next = table->__first; // push onto valid list + table->__first = __bucket; + } else { + __bucket->score += __scorer->score(); // increment score + __bucket->bits |= mask; // add bits in mask + ++__bucket->coord; // increment coord } } + +void BooleanScorerCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + // not needed by this implementation +} + +void BooleanScorerCollector::setScorer(const ScorerPtr& scorer) { + this->_scorer = scorer; + this->__scorer = scorer.get(); +} + +bool BooleanScorerCollector::acceptsDocsOutOfOrder() { + return true; +} + +BucketScorer::BucketScorer() : Scorer(SimilarityPtr()) { + _score = 0; + doc = NO_MORE_DOCS; +} + +BucketScorer::~BucketScorer() { +} + +int32_t BucketScorer::advance(int32_t target) { + return NO_MORE_DOCS; +} + +int32_t BucketScorer::docID() { + return doc; +} + +int32_t BucketScorer::nextDoc() { + return NO_MORE_DOCS; +} + +double BucketScorer::score() { + return _score; +} + +Bucket::Bucket() { + doc = -1; + score = 0; + bits = 0; + coord = 0; +} + +Bucket::~Bucket() { +} + +const int32_t BucketTable::SIZE = 1 << 11; +const int32_t BucketTable::MASK = BucketTable::SIZE - 1; + +BucketTable::BucketTable() { + buckets = Collection::newInstance(SIZE); +} + +BucketTable::~BucketTable() { +} + +CollectorPtr BucketTable::newCollector(int32_t mask) { + return newLucene(mask, shared_from_this()); +} + +int32_t BucketTable::size() { + return SIZE; +} + +SubScorer::SubScorer(const ScorerPtr& scorer, bool required, bool prohibited, const CollectorPtr& collector, const SubScorerPtr& next) { + this->scorer = scorer; + this->required = required; + this->prohibited = prohibited; + this->collector = collector; + this->next = next; +} + +SubScorer::~SubScorer() { +} + +} diff --git a/src/core/search/BooleanScorer2.cpp b/src/core/search/BooleanScorer2.cpp index fe4ac3a2..81010f09 100644 --- a/src/core/search/BooleanScorer2.cpp +++ b/src/core/search/BooleanScorer2.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,253 +11,218 @@ #include "Similarity.h" #include "Collector.h" -namespace Lucene -{ - BooleanScorer2::BooleanScorer2(SimilarityPtr similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional) : Scorer(similarity) - { - this->minNrShouldMatch = minNrShouldMatch; - this->requiredScorers = required; - this->prohibitedScorers = prohibited; - this->optionalScorers = optional; - this->doc = -1; - } - - BooleanScorer2::~BooleanScorer2() - { - } - - void BooleanScorer2::initialize() - { - if (minNrShouldMatch < 0) - boost::throw_exception(IllegalArgumentException(L"Minimum number of optional scorers should not be negative")); - - coordinator = newLucene(shared_from_this()); - coordinator->maxCoord += optionalScorers.size(); - coordinator->maxCoord += requiredScorers.size(); - - coordinator->init(); - countingSumScorer = makeCountingSumScorer(); - } - - ScorerPtr BooleanScorer2::countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch) - { - // each scorer from the list counted as a single matcher - return newLucene(shared_from_this(), scorers, minNrShouldMatch); - } - - ScorerPtr BooleanScorer2::countingConjunctionSumScorer(Collection requiredScorers) - { - // each scorer from the list counted as a single matcher - return newLucene(shared_from_this(), Similarity::getDefault(), requiredScorers); - } - - ScorerPtr BooleanScorer2::dualConjunctionSumScorer(ScorerPtr req1, ScorerPtr req2) - { - Collection scorers(newCollection(req1, req2)); - - // All scorers match, so Similarity::getDefault() always has 1 as the coordination factor. - // Therefore the sum of the scores of two scorers is used as score. - return newLucene(Similarity::getDefault(), scorers); - } - - ScorerPtr BooleanScorer2::makeCountingSumScorer() - { - return requiredScorers.empty() ? makeCountingSumScorerNoReq() : makeCountingSumScorerSomeReq(); - } - - ScorerPtr BooleanScorer2::makeCountingSumScorerNoReq() - { - // minNrShouldMatch optional scorers are required, but at least 1 - int32_t nrOptRequired = minNrShouldMatch < 1 ? 1 : minNrShouldMatch; - ScorerPtr requiredCountingSumScorer; - if (optionalScorers.size() > nrOptRequired) - requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); - else if (optionalScorers.size() == 1) - requiredCountingSumScorer = newLucene(optionalScorers[0], coordinator); - else - requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); - return addProhibitedScorers(requiredCountingSumScorer); - } - - ScorerPtr BooleanScorer2::makeCountingSumScorerSomeReq() - { - if (optionalScorers.size() == minNrShouldMatch) // all optional scorers also required. - { - Collection allReq(Collection::newInstance(requiredScorers.begin(), requiredScorers.end())); - allReq.addAll(optionalScorers.begin(), optionalScorers.end()); - return addProhibitedScorers(countingConjunctionSumScorer(allReq)); - } - else // optionalScorers.size() > minNrShouldMatch, and at least one required scorer - { - ScorerPtr requiredCountingSumScorer = requiredScorers.size() == 1 ? newLucene(requiredScorers[0], coordinator) : countingConjunctionSumScorer(requiredScorers); - if (minNrShouldMatch > 0) // use a required disjunction scorer over the optional scorers - return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers, minNrShouldMatch))); - else // minNrShouldMatch == 0 - return newLucene(addProhibitedScorers(requiredCountingSumScorer), optionalScorers.size() == 1 ? newLucene(optionalScorers[0], coordinator) : countingDisjunctionSumScorer(optionalScorers, 1)); - } - } - - ScorerPtr BooleanScorer2::addProhibitedScorers(ScorerPtr requiredCountingSumScorer) - { - return prohibitedScorers.empty() ? requiredCountingSumScorer : newLucene(requiredCountingSumScorer, (prohibitedScorers.size() == 1 ? prohibitedScorers[0] : newLucene(prohibitedScorers))); - } - - void BooleanScorer2::score(CollectorPtr collector) - { - collector->setScorer(shared_from_this()); - while ((doc = countingSumScorer->nextDoc()) != NO_MORE_DOCS) - collector->collect(doc); +namespace Lucene { + +BooleanScorer2::BooleanScorer2(const SimilarityPtr& similarity, int32_t minNrShouldMatch, Collection required, Collection prohibited, Collection optional) : Scorer(similarity) { + this->minNrShouldMatch = minNrShouldMatch; + this->requiredScorers = required; + this->prohibitedScorers = prohibited; + this->optionalScorers = optional; + this->doc = -1; +} + +BooleanScorer2::~BooleanScorer2() { +} + +void BooleanScorer2::initialize() { + if (minNrShouldMatch < 0) { + boost::throw_exception(IllegalArgumentException(L"Minimum number of optional scorers should not be negative")); } - - bool BooleanScorer2::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { - doc = firstDocID; - collector->setScorer(shared_from_this()); - while (doc < max) - { - collector->collect(doc); - doc = countingSumScorer->nextDoc(); + + coordinator = newLucene(shared_from_this()); + coordinator->maxCoord += optionalScorers.size(); + coordinator->maxCoord += requiredScorers.size(); + + coordinator->init(); + countingSumScorer = makeCountingSumScorer(); +} + +ScorerPtr BooleanScorer2::countingDisjunctionSumScorer(Collection scorers, int32_t minNrShouldMatch) { + // each scorer from the list counted as a single matcher + return newLucene(shared_from_this(), scorers, minNrShouldMatch); +} + +ScorerPtr BooleanScorer2::countingConjunctionSumScorer(Collection requiredScorers) { + // each scorer from the list counted as a single matcher + return newLucene(shared_from_this(), Similarity::getDefault(), requiredScorers); +} + +ScorerPtr BooleanScorer2::dualConjunctionSumScorer(const ScorerPtr& req1, const ScorerPtr& req2) { + Collection scorers(newCollection(req1, req2)); + + // All scorers match, so Similarity::getDefault() always has 1 as the coordination factor. + // Therefore the sum of the scores of two scorers is used as score. + return newLucene(Similarity::getDefault(), scorers); +} + +ScorerPtr BooleanScorer2::makeCountingSumScorer() { + return requiredScorers.empty() ? makeCountingSumScorerNoReq() : makeCountingSumScorerSomeReq(); +} + +ScorerPtr BooleanScorer2::makeCountingSumScorerNoReq() { + // minNrShouldMatch optional scorers are required, but at least 1 + int32_t nrOptRequired = minNrShouldMatch < 1 ? 1 : minNrShouldMatch; + ScorerPtr requiredCountingSumScorer; + if (optionalScorers.size() > nrOptRequired) { + requiredCountingSumScorer = countingDisjunctionSumScorer(optionalScorers, nrOptRequired); + } else if (optionalScorers.size() == 1) { + requiredCountingSumScorer = newLucene(optionalScorers[0], coordinator); + } else { + requiredCountingSumScorer = countingConjunctionSumScorer(optionalScorers); + } + return addProhibitedScorers(requiredCountingSumScorer); +} + +ScorerPtr BooleanScorer2::makeCountingSumScorerSomeReq() { + if (optionalScorers.size() == minNrShouldMatch) { // all optional scorers also required. + Collection allReq(Collection::newInstance(requiredScorers.begin(), requiredScorers.end())); + allReq.addAll(optionalScorers.begin(), optionalScorers.end()); + return addProhibitedScorers(countingConjunctionSumScorer(allReq)); + } else { // optionalScorers.size() > minNrShouldMatch, and at least one required scorer + ScorerPtr requiredCountingSumScorer = requiredScorers.size() == 1 ? newLucene(requiredScorers[0], coordinator) : countingConjunctionSumScorer(requiredScorers); + if (minNrShouldMatch > 0) { // use a required disjunction scorer over the optional scorers + return addProhibitedScorers(dualConjunctionSumScorer(requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers, minNrShouldMatch))); + } else { // minNrShouldMatch == 0 + return newLucene(addProhibitedScorers(requiredCountingSumScorer), optionalScorers.size() == 1 ? newLucene(optionalScorers[0], coordinator) : countingDisjunctionSumScorer(optionalScorers, 1)); } - return (doc != NO_MORE_DOCS); } - - int32_t BooleanScorer2::docID() - { - return doc; +} + +ScorerPtr BooleanScorer2::addProhibitedScorers(const ScorerPtr& requiredCountingSumScorer) { + return prohibitedScorers.empty() ? requiredCountingSumScorer : newLucene(requiredCountingSumScorer, (prohibitedScorers.size() == 1 ? prohibitedScorers[0] : newLucene(prohibitedScorers))); +} + +void BooleanScorer2::score(const CollectorPtr& collector) { + collector->setScorer(shared_from_this()); + while ((doc = countingSumScorer->nextDoc()) != NO_MORE_DOCS) { + collector->collect(doc); } +} - int32_t BooleanScorer2::nextDoc() - { +bool BooleanScorer2::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { + doc = firstDocID; + collector->setScorer(shared_from_this()); + while (doc < max) { + collector->collect(doc); doc = countingSumScorer->nextDoc(); - return doc; - } - - double BooleanScorer2::score() - { - coordinator->nrMatchers = 0; - double sum = countingSumScorer->score(); - return sum * coordinator->coordFactors[coordinator->nrMatchers]; - } - - int32_t BooleanScorer2::advance(int32_t target) - { - doc = countingSumScorer->advance(target); - return doc; - } - - Coordinator::Coordinator(BooleanScorer2Ptr scorer) - { - _scorer = scorer; - maxCoord = 0; - nrMatchers = 0; } - - Coordinator::~Coordinator() - { - } - - void Coordinator::init() - { - coordFactors = Collection::newInstance(maxCoord + 1); - SimilarityPtr sim(BooleanScorer2Ptr(_scorer)->getSimilarity()); - for (int32_t i = 0; i <= maxCoord; ++i) - coordFactors[i] = sim->coord(i, maxCoord); - } - - SingleMatchScorer::SingleMatchScorer(ScorerPtr scorer, CoordinatorPtr coordinator) : Scorer(scorer->getSimilarity()) - { - lastScoredDoc = -1; - lastDocScore = std::numeric_limits::quiet_NaN(); - this->scorer = scorer; - this->coordinator = coordinator; - } - - SingleMatchScorer::~SingleMatchScorer() - { + return (doc != NO_MORE_DOCS); +} + +int32_t BooleanScorer2::docID() { + return doc; +} + +int32_t BooleanScorer2::nextDoc() { + doc = countingSumScorer->nextDoc(); + return doc; +} + +double BooleanScorer2::score() { + coordinator->nrMatchers = 0; + double sum = countingSumScorer->score(); + return sum * coordinator->coordFactors[coordinator->nrMatchers]; +} + +int32_t BooleanScorer2::advance(int32_t target) { + doc = countingSumScorer->advance(target); + return doc; +} + +Coordinator::Coordinator(const BooleanScorer2Ptr& scorer) { + _scorer = scorer; + maxCoord = 0; + nrMatchers = 0; +} + +Coordinator::~Coordinator() { +} + +void Coordinator::init() { + coordFactors = Collection::newInstance(maxCoord + 1); + SimilarityPtr sim(BooleanScorer2Ptr(_scorer)->getSimilarity()); + for (int32_t i = 0; i <= maxCoord; ++i) { + coordFactors[i] = sim->coord(i, maxCoord); } - - double SingleMatchScorer::score() - { - int32_t doc = docID(); - if (doc >= lastScoredDoc) - { - if (doc > lastScoredDoc) - { - lastDocScore = scorer->score(); - lastScoredDoc = doc; - } - ++coordinator->nrMatchers; +} + +SingleMatchScorer::SingleMatchScorer(const ScorerPtr& scorer, const CoordinatorPtr& coordinator) : Scorer(scorer->getSimilarity()) { + lastScoredDoc = -1; + lastDocScore = std::numeric_limits::quiet_NaN(); + this->scorer = scorer; + this->coordinator = coordinator; +} + +SingleMatchScorer::~SingleMatchScorer() { +} + +double SingleMatchScorer::score() { + int32_t doc = docID(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = scorer->score(); + lastScoredDoc = doc; } - return lastDocScore; - } - - int32_t SingleMatchScorer::docID() - { - return scorer->docID(); - } - - int32_t SingleMatchScorer::nextDoc() - { - return scorer->nextDoc(); - } - - int32_t SingleMatchScorer::advance(int32_t target) - { - return scorer->advance(target); - } - - CountingDisjunctionSumScorer::CountingDisjunctionSumScorer(BooleanScorer2Ptr scorer, Collection subScorers, int32_t minimumNrMatchers) : DisjunctionSumScorer(subScorers, minimumNrMatchers) - { - _scorer = scorer; - lastScoredDoc = -1; - lastDocScore = std::numeric_limits::quiet_NaN(); - } - - CountingDisjunctionSumScorer::~CountingDisjunctionSumScorer() - { + ++coordinator->nrMatchers; } - - double CountingDisjunctionSumScorer::score() - { - int32_t doc = docID(); - if (doc >= lastScoredDoc) - { - if (doc > lastScoredDoc) - { - lastDocScore = DisjunctionSumScorer::score(); - lastScoredDoc = doc; - } - BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += DisjunctionSumScorer::_nrMatchers; + return lastDocScore; +} + +int32_t SingleMatchScorer::docID() { + return scorer->docID(); +} + +int32_t SingleMatchScorer::nextDoc() { + return scorer->nextDoc(); +} + +int32_t SingleMatchScorer::advance(int32_t target) { + return scorer->advance(target); +} + +CountingDisjunctionSumScorer::CountingDisjunctionSumScorer(const BooleanScorer2Ptr& scorer, Collection subScorers, int32_t minimumNrMatchers) : DisjunctionSumScorer(subScorers, minimumNrMatchers) { + _scorer = scorer; + lastScoredDoc = -1; + lastDocScore = std::numeric_limits::quiet_NaN(); +} + +CountingDisjunctionSumScorer::~CountingDisjunctionSumScorer() { +} + +double CountingDisjunctionSumScorer::score() { + int32_t doc = docID(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = DisjunctionSumScorer::score(); + lastScoredDoc = doc; } - return lastDocScore; - } - - CountingConjunctionSumScorer::CountingConjunctionSumScorer(BooleanScorer2Ptr scorer, SimilarityPtr similarity, Collection scorers) : ConjunctionScorer(similarity, scorers) - { - _scorer = scorer; - lastScoredDoc = -1; - requiredNrMatchers = scorers.size(); - lastDocScore = std::numeric_limits::quiet_NaN(); - } - - CountingConjunctionSumScorer::~CountingConjunctionSumScorer() - { + BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += DisjunctionSumScorer::_nrMatchers; } - - double CountingConjunctionSumScorer::score() - { - int32_t doc = docID(); - if (doc >= lastScoredDoc) - { - if (doc > lastScoredDoc) - { - lastDocScore = ConjunctionScorer::score(); - lastScoredDoc = doc; - } - BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += requiredNrMatchers; + return lastDocScore; +} + +CountingConjunctionSumScorer::CountingConjunctionSumScorer(const BooleanScorer2Ptr& scorer, const SimilarityPtr& similarity, Collection scorers) : ConjunctionScorer(similarity, scorers) { + _scorer = scorer; + lastScoredDoc = -1; + requiredNrMatchers = scorers.size(); + lastDocScore = std::numeric_limits::quiet_NaN(); +} + +CountingConjunctionSumScorer::~CountingConjunctionSumScorer() { +} + +double CountingConjunctionSumScorer::score() { + int32_t doc = docID(); + if (doc >= lastScoredDoc) { + if (doc > lastScoredDoc) { + lastDocScore = ConjunctionScorer::score(); + lastScoredDoc = doc; } - // All scorers match, so Similarity::getDefault() ConjunctionScorer::score() always has 1 as the - /// coordination factor. Therefore the sum of the scores of the requiredScorers is used as score. - return lastDocScore; + BooleanScorer2Ptr(_scorer)->coordinator->nrMatchers += requiredNrMatchers; } + // All scorers match, so Similarity::getDefault() ConjunctionScorer::score() always has 1 as the + /// coordination factor. Therefore the sum of the scores of the requiredScorers is used as score. + return lastDocScore; +} + } diff --git a/src/core/search/CachingSpanFilter.cpp b/src/core/search/CachingSpanFilter.cpp index eb55babb..9a449342 100644 --- a/src/core/search/CachingSpanFilter.cpp +++ b/src/core/search/CachingSpanFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,86 +10,78 @@ #include "SpanFilterResult.h" #include "IndexReader.h" -namespace Lucene -{ - CachingSpanFilter::CachingSpanFilter(SpanFilterPtr filter, CachingWrapperFilter::DeletesMode deletesMode) - { - this->filter = filter; - if (deletesMode == CachingWrapperFilter::DELETES_DYNAMIC) - boost::throw_exception(IllegalArgumentException(L"DeletesMode::DYNAMIC is not supported")); - this->cache = newLucene(deletesMode); - this->hitCount = 0; - this->missCount = 0; - } - - CachingSpanFilter::~CachingSpanFilter() - { - } - - DocIdSetPtr CachingSpanFilter::getDocIdSet(IndexReaderPtr reader) - { - SpanFilterResultPtr result(getCachedResult(reader)); - return result ? result->getDocIdSet() : DocIdSetPtr(); +namespace Lucene { + +CachingSpanFilter::CachingSpanFilter(const SpanFilterPtr& filter, CachingWrapperFilter::DeletesMode deletesMode) { + this->filter = filter; + if (deletesMode == CachingWrapperFilter::DELETES_DYNAMIC) { + boost::throw_exception(IllegalArgumentException(L"DeletesMode::DYNAMIC is not supported")); } - - SpanFilterResultPtr CachingSpanFilter::getCachedResult(IndexReaderPtr reader) - { - LuceneObjectPtr coreKey = reader->getFieldCacheKey(); - LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; - - SpanFilterResultPtr result(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); - if (result) - { - ++hitCount; - return result; - } - - ++missCount; - result = filter->bitSpans(reader); - - cache->put(coreKey, delCoreKey, result); - + this->cache = newLucene(deletesMode); + this->hitCount = 0; + this->missCount = 0; +} + +CachingSpanFilter::~CachingSpanFilter() { +} + +DocIdSetPtr CachingSpanFilter::getDocIdSet(const IndexReaderPtr& reader) { + SpanFilterResultPtr result(getCachedResult(reader)); + return result ? result->getDocIdSet() : DocIdSetPtr(); +} + +SpanFilterResultPtr CachingSpanFilter::getCachedResult(const IndexReaderPtr& reader) { + LuceneObjectPtr coreKey = reader->getFieldCacheKey(); + LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; + + SpanFilterResultPtr result(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); + if (result) { + ++hitCount; return result; } - - SpanFilterResultPtr CachingSpanFilter::bitSpans(IndexReaderPtr reader) - { - return getCachedResult(reader); - } - - String CachingSpanFilter::toString() - { - return L"CachingSpanFilter(" + filter->toString() + L")"; - } - - bool CachingSpanFilter::equals(LuceneObjectPtr other) - { - if (SpanFilter::equals(other)) - return true; - - CachingSpanFilterPtr otherCachingSpanFilter(boost::dynamic_pointer_cast(other)); - if (!otherCachingSpanFilter) - return false; - - return this->filter->equals(otherCachingSpanFilter->filter); - } - - int32_t CachingSpanFilter::hashCode() - { - return filter->hashCode() ^ 0x1117bf25; - } - - FilterCacheSpanFilterResult::FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) - { - } - - FilterCacheSpanFilterResult::~FilterCacheSpanFilterResult() - { + + ++missCount; + result = filter->bitSpans(reader); + + cache->put(coreKey, delCoreKey, result); + + return result; +} + +SpanFilterResultPtr CachingSpanFilter::bitSpans(const IndexReaderPtr& reader) { + return getCachedResult(reader); +} + +String CachingSpanFilter::toString() { + return L"CachingSpanFilter(" + filter->toString() + L")"; +} + +bool CachingSpanFilter::equals(const LuceneObjectPtr& other) { + if (SpanFilter::equals(other)) { + return true; } - - LuceneObjectPtr FilterCacheSpanFilterResult::mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) - { - boost::throw_exception(IllegalStateException(L"DeletesMode::DYNAMIC is not supported")); - return LuceneObjectPtr(); + + CachingSpanFilterPtr otherCachingSpanFilter(boost::dynamic_pointer_cast(other)); + if (!otherCachingSpanFilter) { + return false; } + + return this->filter->equals(otherCachingSpanFilter->filter); +} + +int32_t CachingSpanFilter::hashCode() { + return filter->hashCode() ^ 0x1117bf25; +} + +FilterCacheSpanFilterResult::FilterCacheSpanFilterResult(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { +} + +FilterCacheSpanFilterResult::~FilterCacheSpanFilterResult() { +} + +LuceneObjectPtr FilterCacheSpanFilterResult::mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) { + boost::throw_exception(IllegalStateException(L"DeletesMode::DYNAMIC is not supported")); + return LuceneObjectPtr(); +} + } diff --git a/src/core/search/CachingWrapperFilter.cpp b/src/core/search/CachingWrapperFilter.cpp index 0e462d83..c04f9650 100644 --- a/src/core/search/CachingWrapperFilter.cpp +++ b/src/core/search/CachingWrapperFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,168 +10,146 @@ #include "OpenBitSetDISI.h" #include "IndexReader.h" -namespace Lucene -{ - CachingWrapperFilter::CachingWrapperFilter(FilterPtr filter, DeletesMode deletesMode) - { - this->filter = filter; - this->cache = newLucene(deletesMode); - this->hitCount = 0; - this->missCount = 0; - } - - CachingWrapperFilter::~CachingWrapperFilter() - { - } - - DocIdSetPtr CachingWrapperFilter::docIdSetToCache(DocIdSetPtr docIdSet, IndexReaderPtr reader) - { - if (!docIdSet) - { - // this is better than returning null, as the nonnull result can be cached - return DocIdSet::EMPTY_DOCIDSET(); - } - else if (docIdSet->isCacheable()) - return docIdSet; - else - { - DocIdSetIteratorPtr it(docIdSet->iterator()); - // null is allowed to be returned by iterator(), in this case we wrap with the empty set, - // which is cacheable. - return !it ? DocIdSet::EMPTY_DOCIDSET() : newLucene(it, reader->maxDoc()); - } - } - - DocIdSetPtr CachingWrapperFilter::getDocIdSet(IndexReaderPtr reader) - { - LuceneObjectPtr coreKey = reader->getFieldCacheKey(); - LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; - - DocIdSetPtr docIdSet(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); - if (docIdSet) - { - ++hitCount; - return docIdSet; - } - - ++missCount; - - // cache miss - docIdSet = docIdSetToCache(filter->getDocIdSet(reader), reader); - - if (docIdSet) - cache->put(coreKey, delCoreKey, docIdSet); - +namespace Lucene { + +CachingWrapperFilter::CachingWrapperFilter(const FilterPtr& filter, DeletesMode deletesMode) { + this->filter = filter; + this->cache = newLucene(deletesMode); + this->hitCount = 0; + this->missCount = 0; +} + +CachingWrapperFilter::~CachingWrapperFilter() { +} + +DocIdSetPtr CachingWrapperFilter::docIdSetToCache(const DocIdSetPtr& docIdSet, const IndexReaderPtr& reader) { + if (!docIdSet) { + // this is better than returning null, as the nonnull result can be cached + return DocIdSet::EMPTY_DOCIDSET(); + } else if (docIdSet->isCacheable()) { return docIdSet; + } else { + DocIdSetIteratorPtr it(docIdSet->iterator()); + // null is allowed to be returned by iterator(), in this case we wrap with the empty set, + // which is cacheable. + return !it ? DocIdSet::EMPTY_DOCIDSET() : newLucene(it, reader->maxDoc()); } - - String CachingWrapperFilter::toString() - { - return L"CachingWrapperFilter(" + filter->toString() + L")"; +} + +DocIdSetPtr CachingWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { + LuceneObjectPtr coreKey = reader->getFieldCacheKey(); + LuceneObjectPtr delCoreKey = reader->hasDeletions() ? reader->getDeletesCacheKey() : coreKey; + + DocIdSetPtr docIdSet(boost::dynamic_pointer_cast(cache->get(reader, coreKey, delCoreKey))); + if (docIdSet) { + ++hitCount; + return docIdSet; } - - bool CachingWrapperFilter::equals(LuceneObjectPtr other) - { - if (Filter::equals(other)) - return true; - - CachingWrapperFilterPtr otherCachingWrapperFilter(boost::dynamic_pointer_cast(other)); - if (!otherCachingWrapperFilter) - return false; - - return this->filter->equals(otherCachingWrapperFilter->filter); + + ++missCount; + + // cache miss + docIdSet = docIdSetToCache(filter->getDocIdSet(reader), reader); + + if (docIdSet) { + cache->put(coreKey, delCoreKey, docIdSet); } - - int32_t CachingWrapperFilter::hashCode() - { - return filter->hashCode() ^ 0x1117bf25; + + return docIdSet; +} + +String CachingWrapperFilter::toString() { + return L"CachingWrapperFilter(" + filter->toString() + L")"; +} + +bool CachingWrapperFilter::equals(const LuceneObjectPtr& other) { + if (Filter::equals(other)) { + return true; } - - FilterCache::FilterCache(CachingWrapperFilter::DeletesMode deletesMode) - { - this->deletesMode = deletesMode; + + CachingWrapperFilterPtr otherCachingWrapperFilter(boost::dynamic_pointer_cast(other)); + if (!otherCachingWrapperFilter) { + return false; } - - FilterCache::~FilterCache() - { + + return this->filter->equals(otherCachingWrapperFilter->filter); +} + +int32_t CachingWrapperFilter::hashCode() { + return filter->hashCode() ^ 0x1117bf25; +} + +FilterCache::FilterCache(CachingWrapperFilter::DeletesMode deletesMode) { + this->deletesMode = deletesMode; +} + +FilterCache::~FilterCache() { +} + +LuceneObjectPtr FilterCache::get(const IndexReaderPtr& reader, const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey) { + SyncLock syncLock(this); + + if (!cache) { + cache = WeakMapObjectObject::newInstance(); } - - LuceneObjectPtr FilterCache::get(IndexReaderPtr reader, LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey) - { - SyncLock syncLock(this); - - if (!cache) - cache = WeakMapObjectObject::newInstance(); - - LuceneObjectPtr value; - if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) - { - // key on core + + LuceneObjectPtr value; + if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) { + // key on core + value = cache.get(coreKey); + } else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) { + // key on deletes, if any, else core + value = cache.get(delCoreKey); + } else { + BOOST_ASSERT(deletesMode == CachingWrapperFilter::DELETES_DYNAMIC); + + // first try for exact match + value = cache.get(delCoreKey); + + if (!value) { + // now for core match, but dynamically AND NOT deletions value = cache.get(coreKey); - } - else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) - { - // key on deletes, if any, else core - value = cache.get(delCoreKey); - } - else - { - BOOST_ASSERT(deletesMode == CachingWrapperFilter::DELETES_DYNAMIC); - - // first try for exact match - value = cache.get(delCoreKey); - - if (!value) - { - // now for core match, but dynamically AND NOT deletions - value = cache.get(coreKey); - if (value && reader->hasDeletions()) - value = mergeDeletes(reader, value); + if (value && reader->hasDeletions()) { + value = mergeDeletes(reader, value); } } - - return value; - } - - void FilterCache::put(LuceneObjectPtr coreKey, LuceneObjectPtr delCoreKey, LuceneObjectPtr value) - { - SyncLock syncLock(this); - - if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) - cache.put(coreKey, value); - else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) - cache.put(delCoreKey, value); - else - { - cache.put(coreKey, value); - cache.put(delCoreKey, value); - } - } - - FilterCacheDocIdSet::FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) - { - } - - FilterCacheDocIdSet::~FilterCacheDocIdSet() - { - } - - LuceneObjectPtr FilterCacheDocIdSet::mergeDeletes(IndexReaderPtr reader, LuceneObjectPtr value) - { - return newLucene(reader, boost::dynamic_pointer_cast(value)); } - - FilteredCacheDocIdSet::FilteredCacheDocIdSet(IndexReaderPtr reader, DocIdSetPtr innerSet) : FilteredDocIdSet(innerSet) - { - this->reader = reader; - } - - FilteredCacheDocIdSet::~FilteredCacheDocIdSet() - { - } - - bool FilteredCacheDocIdSet::match(int32_t docid) - { - return !reader->isDeleted(docid); + + return value; +} + +void FilterCache::put(const LuceneObjectPtr& coreKey, const LuceneObjectPtr& delCoreKey, const LuceneObjectPtr& value) { + SyncLock syncLock(this); + + if (deletesMode == CachingWrapperFilter::DELETES_IGNORE) { + cache.put(coreKey, value); + } else if (deletesMode == CachingWrapperFilter::DELETES_RECACHE) { + cache.put(delCoreKey, value); + } else { + cache.put(coreKey, value); + cache.put(delCoreKey, value); } } + +FilterCacheDocIdSet::FilterCacheDocIdSet(CachingWrapperFilter::DeletesMode deletesMode) : FilterCache(deletesMode) { +} + +FilterCacheDocIdSet::~FilterCacheDocIdSet() { +} + +LuceneObjectPtr FilterCacheDocIdSet::mergeDeletes(const IndexReaderPtr& reader, const LuceneObjectPtr& value) { + return newLucene(reader, boost::dynamic_pointer_cast(value)); +} + +FilteredCacheDocIdSet::FilteredCacheDocIdSet(const IndexReaderPtr& reader, const DocIdSetPtr& innerSet) : FilteredDocIdSet(innerSet) { + this->reader = reader; +} + +FilteredCacheDocIdSet::~FilteredCacheDocIdSet() { +} + +bool FilteredCacheDocIdSet::match(int32_t docid) { + return !reader->isDeleted(docid); +} + +} diff --git a/src/core/search/Collector.cpp b/src/core/search/Collector.cpp index 056854e3..a12f1445 100644 --- a/src/core/search/Collector.cpp +++ b/src/core/search/Collector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "Collector.h" -namespace Lucene -{ - Collector::~Collector() - { - } +namespace Lucene { + +Collector::~Collector() { +} + } diff --git a/src/core/search/ComplexExplanation.cpp b/src/core/search/ComplexExplanation.cpp index 31a1f641..c9ce412f 100644 --- a/src/core/search/ComplexExplanation.cpp +++ b/src/core/search/ComplexExplanation.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,34 +8,29 @@ #include "ComplexExplanation.h" #include "StringUtils.h" -namespace Lucene -{ - ComplexExplanation::ComplexExplanation(bool match, double value, const String& description) : Explanation(value, description) - { - this->match = match; - } - - ComplexExplanation::~ComplexExplanation() - { - } - - bool ComplexExplanation::getMatch() - { - return match; - } - - void ComplexExplanation::setMatch(bool match) - { - this->match = match; - } - - bool ComplexExplanation::isMatch() - { - return getMatch(); - } - - String ComplexExplanation::getSummary() - { - return StringUtils::toString(getValue()) + L" = " + (isMatch() ? L"(MATCH) " : L"(NON-MATCH) ") + getDescription(); - } +namespace Lucene { + +ComplexExplanation::ComplexExplanation(bool match, double value, const String& description) : Explanation(value, description) { + this->match = match; +} + +ComplexExplanation::~ComplexExplanation() { +} + +bool ComplexExplanation::getMatch() { + return match; +} + +void ComplexExplanation::setMatch(bool match) { + this->match = match; +} + +bool ComplexExplanation::isMatch() { + return getMatch(); +} + +String ComplexExplanation::getSummary() { + return StringUtils::toString(getValue()) + L" = " + (isMatch() ? L"(MATCH) " : L"(NON-MATCH) ") + getDescription(); +} + } diff --git a/src/core/search/ConjunctionScorer.cpp b/src/core/search/ConjunctionScorer.cpp index 589ac86b..aeacaa73 100644 --- a/src/core/search/ConjunctionScorer.cpp +++ b/src/core/search/ConjunctionScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,117 +8,108 @@ #include "ConjunctionScorer.h" #include "Similarity.h" -namespace Lucene -{ - struct lessScorerDocId - { - inline bool operator()(const ScorerPtr& first, const ScorerPtr& second) const - { - return (first->docID() < second->docID()); - } - }; +namespace Lucene { - ConjunctionScorer::ConjunctionScorer(SimilarityPtr similarity, Collection scorers) : Scorer(similarity) - { - this->lastDoc = -1; - this->scorers = scorers; - this->coord = similarity->coord(scorers.size(), scorers.size()); - - for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) - { - if ((*scorer)->nextDoc() == NO_MORE_DOCS) - { - // If even one of the sub-scorers does not have any documents, this scorer should not attempt - // to do any more work. - lastDoc = NO_MORE_DOCS; - return; - } - } - - // Sort the array the first time... - // We don't need to sort the array in any future calls because we know it will already start off - // sorted (all scorers on same doc). - std::sort(scorers.begin(), scorers.end(), lessScorerDocId()); - - // NOTE: doNext() must be called before the re-sorting of the array later on. The reason is this: - // assume there are 5 scorers, whose first docs are 1, 2, 3, 5, 5 respectively. Sorting (above) leaves - // the array as is. Calling doNext() here advances all the first scorers to 5 (or a larger doc ID - // they all agree on). - // However, if we re-sort before doNext() is called, the order will be 5, 3, 2, 1, 5 and then doNext() - // will stop immediately, since the first scorer's docs equals the last one. So the invariant that after - // calling doNext() all scorers are on the same doc ID is broken. - if (doNext() == NO_MORE_DOCS) - { - // The scorers did not agree on any document. +struct lessScorerDocId { + inline bool operator()(const ScorerPtr& first, const ScorerPtr& second) const { + return (first->docID() < second->docID()); + } +}; + +ConjunctionScorer::ConjunctionScorer(const SimilarityPtr& similarity, Collection scorers) : Scorer(similarity) { + this->lastDoc = -1; + this->scorers = scorers; + this->coord = similarity->coord(scorers.size(), scorers.size()); + + for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) { + if ((*scorer)->nextDoc() == NO_MORE_DOCS) { + // If even one of the sub-scorers does not have any documents, this scorer should not attempt + // to do any more work. lastDoc = NO_MORE_DOCS; return; } - - // If first-time skip distance is any predictor of scorer sparseness, then we should always try to skip - // first on those scorers. Keep last scorer in it's last place (it will be the first to be skipped on), - // but reverse all of the others so that they will be skipped on in order of original high skip. - int32_t end = scorers.size() - 1; - int32_t max = end >> 1; - for (int32_t i = 0; i < max; ++i) - { - ScorerPtr tmp(scorers[i]); - int32_t idx = end - i - 1; - scorers[i] = scorers[idx]; - scorers[idx] = tmp; - } } - - ConjunctionScorer::~ConjunctionScorer() - { + + // Sort the array the first time... + // We don't need to sort the array in any future calls because we know it will already start off + // sorted (all scorers on same doc). + std::sort(scorers.begin(), scorers.end(), lessScorerDocId()); + + // NOTE: doNext() must be called before the re-sorting of the array later on. The reason is this: + // assume there are 5 scorers, whose first docs are 1, 2, 3, 5, 5 respectively. Sorting (above) leaves + // the array as is. Calling doNext() here advances all the first scorers to 5 (or a larger doc ID + // they all agree on). + // However, if we re-sort before doNext() is called, the order will be 5, 3, 2, 1, 5 and then doNext() + // will stop immediately, since the first scorer's docs equals the last one. So the invariant that after + // calling doNext() all scorers are on the same doc ID is broken. + if (doNext() == NO_MORE_DOCS) { + // The scorers did not agree on any document. + lastDoc = NO_MORE_DOCS; + return; } - - int32_t ConjunctionScorer::doNext() - { - int32_t first = 0; - int32_t doc = scorers[scorers.size() - 1]->docID(); - ScorerPtr firstScorer; - while ((firstScorer = scorers[first])->docID() < doc) - { - doc = firstScorer->advance(doc); - first = first == scorers.size() - 1 ? 0 : first + 1; - } - return doc; + + // If first-time skip distance is any predictor of scorer sparseness, then we should always try to skip + // first on those scorers. Keep last scorer in it's last place (it will be the first to be skipped on), + // but reverse all of the others so that they will be skipped on in order of original high skip. + int32_t end = scorers.size() - 1; + int32_t max = end >> 1; + for (int32_t i = 0; i < max; ++i) { + ScorerPtr tmp(scorers[i]); + int32_t idx = end - i - 1; + scorers[i] = scorers[idx]; + scorers[idx] = tmp; } - - int32_t ConjunctionScorer::advance(int32_t target) - { - if (lastDoc == NO_MORE_DOCS) - return lastDoc; - else if (scorers[(scorers.size() - 1)]->docID() < target) - scorers[(scorers.size() - 1)]->advance(target); - lastDoc = doNext(); - return lastDoc; +} + +ConjunctionScorer::~ConjunctionScorer() { +} + +int32_t ConjunctionScorer::doNext() { + int32_t first = 0; + int32_t doc = scorers[scorers.size() - 1]->docID(); + Scorer* __firstScorer; + // TODO: __firstScore nullptr ?? + while ((__firstScorer = scorers[first].get())->docID() < doc) { + doc = __firstScorer->advance(doc); + first = first == scorers.size() - 1 ? 0 : first + 1; } - - int32_t ConjunctionScorer::docID() - { + return doc; +} + +int32_t ConjunctionScorer::advance(int32_t target) { + if (lastDoc == NO_MORE_DOCS) { return lastDoc; + } + auto& scorer = scorers[(scorers.size() - 1)]; + if (scorer->docID() < target) { + scorer->advance(target); } - - int32_t ConjunctionScorer::nextDoc() - { - if (lastDoc == NO_MORE_DOCS) - return lastDoc; - else if (lastDoc == -1) - { - lastDoc = scorers[scorers.size() - 1]->docID(); - return lastDoc; - } - scorers[(scorers.size() - 1)]->nextDoc(); - lastDoc = doNext(); + lastDoc = doNext(); + return lastDoc; +} + +inline int32_t ConjunctionScorer::docID() { + return lastDoc; +} + +int32_t ConjunctionScorer::nextDoc() { + if (lastDoc == NO_MORE_DOCS) { + return lastDoc; + } else if (lastDoc == -1) { + lastDoc = scorers[scorers.size() - 1]->docID(); return lastDoc; } - - double ConjunctionScorer::score() - { - double sum = 0.0; - for (Collection::iterator scorer = scorers.begin(); scorer != scorers.end(); ++scorer) - sum += (*scorer)->score(); - return sum * coord; + scorers[(scorers.size() - 1)]->nextDoc(); + lastDoc = doNext(); + return lastDoc; +} + +double ConjunctionScorer::score() { + double sum = 0.0; + for (auto& scorer : scorers){ + sum += scorer->score(); } + return sum * coord; +} + } diff --git a/src/core/search/ConstantScoreQuery.cpp b/src/core/search/ConstantScoreQuery.cpp index 972a579a..5cfd75b9 100644 --- a/src/core/search/ConstantScoreQuery.cpp +++ b/src/core/search/ConstantScoreQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,169 +13,145 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - ConstantScoreQuery::ConstantScoreQuery(FilterPtr filter) - { - this->filter = filter; - } - - ConstantScoreQuery::~ConstantScoreQuery() - { - } - - FilterPtr ConstantScoreQuery::getFilter() - { - return filter; - } - - QueryPtr ConstantScoreQuery::rewrite(IndexReaderPtr reader) - { - return shared_from_this(); - } - - void ConstantScoreQuery::extractTerms(SetTerm terms) - { - // OK to not add any terms when used for MultiSearcher, but may not be OK for highlighting - } - - WeightPtr ConstantScoreQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - String ConstantScoreQuery::toString(const String& field) - { - return L"ConstantScore(" + filter->toString() + (getBoost() == 1.0 ? L")" : L"^" + StringUtils::toString(getBoost())); - } - - bool ConstantScoreQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - ConstantScoreQueryPtr otherConstantScoreQuery(boost::dynamic_pointer_cast(other)); - if (!otherConstantScoreQuery) - return false; - - return (this->getBoost() == otherConstantScoreQuery->getBoost() && this->filter->equals(otherConstantScoreQuery->filter)); - } - - int32_t ConstantScoreQuery::hashCode() - { - // Simple add is OK since no existing filter hashcode has a float component. - return filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); - } - - LuceneObjectPtr ConstantScoreQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(filter); - ConstantScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->filter = filter; - return cloneQuery; - } - - ConstantWeight::ConstantWeight(ConstantScoreQueryPtr constantScorer, SearcherPtr searcher) - { - this->constantScorer = constantScorer; - this->similarity = constantScorer->getSimilarity(searcher); - this->queryNorm = 0; - this->queryWeight = 0; - } - - ConstantWeight::~ConstantWeight() - { - } - - QueryPtr ConstantWeight::getQuery() - { - return constantScorer; - } - - double ConstantWeight::getValue() - { - return queryWeight; - } - - double ConstantWeight::sumOfSquaredWeights() - { - queryWeight = constantScorer->getBoost(); - return queryWeight * queryWeight; - } - - void ConstantWeight::normalize(double norm) - { - this->queryNorm = norm; - queryWeight *= this->queryNorm; - } - - ScorerPtr ConstantWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(constantScorer, similarity, reader, shared_from_this()); +namespace Lucene { + +ConstantScoreQuery::ConstantScoreQuery(const FilterPtr& filter) { + this->filter = filter; +} + +ConstantScoreQuery::~ConstantScoreQuery() { +} + +FilterPtr ConstantScoreQuery::getFilter() { + return filter; +} + +QueryPtr ConstantScoreQuery::rewrite(const IndexReaderPtr& reader) { + return shared_from_this(); +} + +void ConstantScoreQuery::extractTerms(SetTerm terms) { + // OK to not add any terms when used for MultiSearcher, but may not be OK for highlighting +} + +WeightPtr ConstantScoreQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +String ConstantScoreQuery::toString(const String& field) { + return L"ConstantScore(" + filter->toString() + (getBoost() == 1.0 ? L")" : L"^" + StringUtils::toString(getBoost())); +} + +bool ConstantScoreQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - ExplanationPtr ConstantWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ConstantScorerPtr cs(newLucene(constantScorer, similarity, reader, shared_from_this())); - bool exists = (cs->docIdSetIterator->advance(doc) == doc); - - ComplexExplanationPtr result(newLucene()); - - if (exists) - { - result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L"), product of:"); - result->setValue(queryWeight); - result->setMatch(true); - result->addDetail(newLucene(constantScorer->getBoost(), L"boost")); - result->addDetail(newLucene(queryNorm, L"queryNorm")); - } - else - { - result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L") doesn't match id " + StringUtils::toString(doc)); - result->setValue(0); - result->setMatch(false); - } - return result; + + ConstantScoreQueryPtr otherConstantScoreQuery(boost::dynamic_pointer_cast(other)); + if (!otherConstantScoreQuery) { + return false; } - - ConstantScorer::ConstantScorer(ConstantScoreQueryPtr constantScorer, SimilarityPtr similarity, IndexReaderPtr reader, WeightPtr w) : Scorer(similarity) - { - doc = -1; - theScore = w->getValue(); - DocIdSetPtr docIdSet(constantScorer->filter->getDocIdSet(reader)); - if (!docIdSet) + + return (this->getBoost() == otherConstantScoreQuery->getBoost() && this->filter->equals(otherConstantScoreQuery->filter)); +} + +int32_t ConstantScoreQuery::hashCode() { + // Simple add is OK since no existing filter hashcode has a float component. + return filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); +} + +LuceneObjectPtr ConstantScoreQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(filter); + ConstantScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->filter = filter; + return cloneQuery; +} + +ConstantWeight::ConstantWeight(const ConstantScoreQueryPtr& constantScorer, const SearcherPtr& searcher) { + this->constantScorer = constantScorer; + this->similarity = constantScorer->getSimilarity(searcher); + this->queryNorm = 0; + this->queryWeight = 0; +} + +ConstantWeight::~ConstantWeight() { +} + +QueryPtr ConstantWeight::getQuery() { + return constantScorer; +} + +double ConstantWeight::getValue() { + return queryWeight; +} + +double ConstantWeight::sumOfSquaredWeights() { + queryWeight = constantScorer->getBoost(); + return queryWeight * queryWeight; +} + +void ConstantWeight::normalize(double norm) { + this->queryNorm = norm; + queryWeight *= this->queryNorm; +} + +ScorerPtr ConstantWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(constantScorer, similarity, reader, shared_from_this()); +} + +ExplanationPtr ConstantWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ConstantScorerPtr cs(newLucene(constantScorer, similarity, reader, shared_from_this())); + bool exists = (cs->docIdSetIterator->advance(doc) == doc); + + ComplexExplanationPtr result(newLucene()); + + if (exists) { + result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L"), product of:"); + result->setValue(queryWeight); + result->setMatch(true); + result->addDetail(newLucene(constantScorer->getBoost(), L"boost")); + result->addDetail(newLucene(queryNorm, L"queryNorm")); + } else { + result->setDescription(L"ConstantScoreQuery(" + constantScorer->filter->toString() + L") doesn't match id " + StringUtils::toString(doc)); + result->setValue(0); + result->setMatch(false); + } + return result; +} + +ConstantScorer::ConstantScorer(const ConstantScoreQueryPtr& constantScorer, const SimilarityPtr& similarity, const IndexReaderPtr& reader, const WeightPtr& w) : Scorer(similarity) { + doc = -1; + theScore = w->getValue(); + DocIdSetPtr docIdSet(constantScorer->filter->getDocIdSet(reader)); + if (!docIdSet) { + docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); + } else { + DocIdSetIteratorPtr iter(docIdSet->iterator()); + if (!iter) { docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); - else - { - DocIdSetIteratorPtr iter(docIdSet->iterator()); - if (!iter) - docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); - else - docIdSetIterator = iter; + } else { + docIdSetIterator = iter; } } - - ConstantScorer::~ConstantScorer() - { - } - - int32_t ConstantScorer::nextDoc() - { - return docIdSetIterator->nextDoc(); - } - - int32_t ConstantScorer::docID() - { - return docIdSetIterator->docID(); - } - - double ConstantScorer::score() - { - return theScore; - } - - int32_t ConstantScorer::advance(int32_t target) - { - return docIdSetIterator->advance(target); - } +} + +ConstantScorer::~ConstantScorer() { +} + +int32_t ConstantScorer::nextDoc() { + return docIdSetIterator->nextDoc(); +} + +int32_t ConstantScorer::docID() { + return docIdSetIterator->docID(); +} + +double ConstantScorer::score() { + return theScore; +} + +int32_t ConstantScorer::advance(int32_t target) { + return docIdSetIterator->advance(target); +} + } diff --git a/src/core/search/DefaultSimilarity.cpp b/src/core/search/DefaultSimilarity.cpp index 6449f911..c98f2d95 100644 --- a/src/core/search/DefaultSimilarity.cpp +++ b/src/core/search/DefaultSimilarity.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,64 +8,55 @@ #include "DefaultSimilarity.h" #include "FieldInvertState.h" -namespace Lucene -{ - DefaultSimilarity::DefaultSimilarity() - { - discountOverlaps = false; - } - - DefaultSimilarity::~DefaultSimilarity() - { - } - - double DefaultSimilarity::computeNorm(const String& fieldName, FieldInvertStatePtr state) - { - int32_t numTerms; - if (discountOverlaps) - numTerms = state->getLength() - state->getNumOverlap(); - else - numTerms = state->getLength(); - return (state->getBoost() * lengthNorm(fieldName, numTerms)); - } - - double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) - { - return (double)(1.0 / std::sqrt((double)numTokens)); - } - - double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) - { - return (double)(1.0 / std::sqrt(sumOfSquaredWeights)); - } - - double DefaultSimilarity::tf(double freq) - { - return (double)std::sqrt(freq); - } - - double DefaultSimilarity::sloppyFreq(int32_t distance) - { - return (1.0 / (double)(distance + 1)); - } - - double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) - { - return (double)(std::log((double)numDocs / (double)(docFreq + 1)) + 1.0); - } - - double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) - { - return (double)overlap / (double)maxOverlap; - } - - void DefaultSimilarity::setDiscountOverlaps(bool v) - { - discountOverlaps = v; - } - - bool DefaultSimilarity::getDiscountOverlaps() - { - return discountOverlaps; - } +namespace Lucene { + +DefaultSimilarity::DefaultSimilarity() { + discountOverlaps = false; +} + +DefaultSimilarity::~DefaultSimilarity() { +} + +double DefaultSimilarity::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { + int32_t numTerms; + if (discountOverlaps) { + numTerms = state->getLength() - state->getNumOverlap(); + } else { + numTerms = state->getLength(); + } + return (state->getBoost() * lengthNorm(fieldName, numTerms)); +} + +inline double DefaultSimilarity::lengthNorm(const String& fieldName, int32_t numTokens) { + return (double)(1.0 / std::sqrt((double)numTokens)); +} + +inline double DefaultSimilarity::queryNorm(double sumOfSquaredWeights) { + return (double)(1.0 / std::sqrt(sumOfSquaredWeights)); +} + +inline double DefaultSimilarity::tf(double freq) { + return (double)std::sqrt(freq); +} + +inline double DefaultSimilarity::sloppyFreq(int32_t distance) { + return (1.0 / (double)(distance + 1)); +} + +inline double DefaultSimilarity::idf(int32_t docFreq, int32_t numDocs) { + return (double)(std::log((double)numDocs / (double)(docFreq + 1)) + 1.0); +} + +inline double DefaultSimilarity::coord(int32_t overlap, int32_t maxOverlap) { + return (double)overlap / (double)maxOverlap; +} + +inline void DefaultSimilarity::setDiscountOverlaps(bool v) { + discountOverlaps = v; +} + +inline bool DefaultSimilarity::getDiscountOverlaps() { + return discountOverlaps; +} + } diff --git a/src/core/search/DisjunctionMaxQuery.cpp b/src/core/search/DisjunctionMaxQuery.cpp index 67cb42a7..b35f1fe0 100644 --- a/src/core/search/DisjunctionMaxQuery.cpp +++ b/src/core/search/DisjunctionMaxQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,212 +15,196 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DisjunctionMaxQuery::DisjunctionMaxQuery(double tieBreakerMultiplier) - { - this->tieBreakerMultiplier = tieBreakerMultiplier; - this->disjuncts = Collection::newInstance(); - } - - DisjunctionMaxQuery::DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier) - { - this->tieBreakerMultiplier = tieBreakerMultiplier; - this->disjuncts = Collection::newInstance(); - add(disjuncts); - } - - DisjunctionMaxQuery::~DisjunctionMaxQuery() - { - } - - void DisjunctionMaxQuery::add(QueryPtr query) - { - disjuncts.add(query); - } - - void DisjunctionMaxQuery::add(Collection disjuncts) - { - this->disjuncts.addAll(disjuncts.begin(), disjuncts.end()); - } - - Collection::iterator DisjunctionMaxQuery::begin() - { - return disjuncts.begin(); - } - - Collection::iterator DisjunctionMaxQuery::end() - { - return disjuncts.end(); - } - - WeightPtr DisjunctionMaxQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - QueryPtr DisjunctionMaxQuery::rewrite(IndexReaderPtr reader) - { - int32_t numDisjunctions = disjuncts.size(); - if (numDisjunctions == 1) - { - QueryPtr singleton(disjuncts[0]); - QueryPtr result(singleton->rewrite(reader)); - if (getBoost() != 1.0) - { - if (result == singleton) - result = boost::dynamic_pointer_cast(result->clone()); - result->setBoost(getBoost() * result->getBoost()); +namespace Lucene { + +DisjunctionMaxQuery::DisjunctionMaxQuery(double tieBreakerMultiplier) { + this->tieBreakerMultiplier = tieBreakerMultiplier; + this->disjuncts = Collection::newInstance(); +} + +DisjunctionMaxQuery::DisjunctionMaxQuery(Collection disjuncts, double tieBreakerMultiplier) { + this->tieBreakerMultiplier = tieBreakerMultiplier; + this->disjuncts = Collection::newInstance(); + add(disjuncts); +} + +DisjunctionMaxQuery::~DisjunctionMaxQuery() { +} + +void DisjunctionMaxQuery::add(const QueryPtr& query) { + disjuncts.add(query); +} + +void DisjunctionMaxQuery::add(Collection disjuncts) { + this->disjuncts.addAll(disjuncts.begin(), disjuncts.end()); +} + +Collection::iterator DisjunctionMaxQuery::begin() { + return disjuncts.begin(); +} + +Collection::iterator DisjunctionMaxQuery::end() { + return disjuncts.end(); +} + +WeightPtr DisjunctionMaxQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +QueryPtr DisjunctionMaxQuery::rewrite(const IndexReaderPtr& reader) { + int32_t numDisjunctions = disjuncts.size(); + if (numDisjunctions == 1) { + QueryPtr singleton(disjuncts[0]); + QueryPtr result(singleton->rewrite(reader)); + if (getBoost() != 1.0) { + if (result == singleton) { + result = boost::dynamic_pointer_cast(result->clone()); } - return result; + result->setBoost(getBoost() * result->getBoost()); } - DisjunctionMaxQueryPtr clone; - for (int32_t i = 0; i < numDisjunctions; ++i) - { - QueryPtr clause(disjuncts[i]); - QueryPtr rewrite(clause->rewrite(reader)); - if (rewrite != clause) - { - if (!clone) - clone = boost::dynamic_pointer_cast(this->clone()); - clone->disjuncts[i] = rewrite; + return result; + } + DisjunctionMaxQueryPtr clone; + for (int32_t i = 0; i < numDisjunctions; ++i) { + QueryPtr clause(disjuncts[i]); + QueryPtr rewrite(clause->rewrite(reader)); + if (rewrite != clause) { + if (!clone) { + clone = boost::dynamic_pointer_cast(this->clone()); } + clone->disjuncts[i] = rewrite; } - return clone ? clone : shared_from_this(); - } - - LuceneObjectPtr DisjunctionMaxQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); - DisjunctionMaxQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->tieBreakerMultiplier = tieBreakerMultiplier; - cloneQuery->disjuncts = Collection::newInstance(disjuncts.begin(), disjuncts.end()); - return cloneQuery; } - - void DisjunctionMaxQuery::extractTerms(SetTerm terms) - { - for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) - (*query)->extractTerms(terms); + return clone ? clone : shared_from_this(); +} + +LuceneObjectPtr DisjunctionMaxQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Query::clone(other ? other : newLucene()); + DisjunctionMaxQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->tieBreakerMultiplier = tieBreakerMultiplier; + cloneQuery->disjuncts = Collection::newInstance(disjuncts.begin(), disjuncts.end()); + return cloneQuery; +} + +void DisjunctionMaxQuery::extractTerms(SetTerm terms) { + for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) { + (*query)->extractTerms(terms); } - - String DisjunctionMaxQuery::toString(const String& field) - { - String buffer(L"("); - for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) - { - if (query != disjuncts.begin()) - buffer += L" | "; - if (boost::dynamic_pointer_cast(*query)) // wrap sub-bools in parens - buffer += L"(" + (*query)->toString(field) + L")"; - else - buffer += (*query)->toString(field); +} + +String DisjunctionMaxQuery::toString(const String& field) { + String buffer(L"("); + for (Collection::iterator query = disjuncts.begin(); query != disjuncts.end(); ++query) { + if (query != disjuncts.begin()) { + buffer += L" | "; + } + if (boost::dynamic_pointer_cast(*query)) { // wrap sub-bools in parens + buffer += L"(" + (*query)->toString(field) + L")"; + } else { + buffer += (*query)->toString(field); } - buffer += L")"; - if (tieBreakerMultiplier != 0.0) - buffer += L"~" + StringUtils::toString(tieBreakerMultiplier); - if (getBoost() != 1.0) - buffer += L"^" + StringUtils::toString(getBoost()); - return buffer; } - - bool DisjunctionMaxQuery::equals(LuceneObjectPtr other) - { - if (!Query::equals(other)) - return false; - - DisjunctionMaxQueryPtr otherDisjunctionMaxQuery(boost::dynamic_pointer_cast(other)); - if (!otherDisjunctionMaxQuery) - return false; - - return (tieBreakerMultiplier == otherDisjunctionMaxQuery->tieBreakerMultiplier && disjuncts.equals(otherDisjunctionMaxQuery->disjuncts, luceneEquals())); + buffer += L")"; + if (tieBreakerMultiplier != 0.0) { + buffer += L"~" + StringUtils::toString(tieBreakerMultiplier); } - - int32_t DisjunctionMaxQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) + MiscUtils::doubleToIntBits(tieBreakerMultiplier) + MiscUtils::hashCode(disjuncts.begin(), disjuncts.end(), MiscUtils::hashLucene); + if (getBoost() != 1.0) { + buffer += L"^" + StringUtils::toString(getBoost()); } - - DisjunctionMaxWeight::DisjunctionMaxWeight(DisjunctionMaxQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = searcher->getSimilarity(); - this->weights = Collection::newInstance(); - for (Collection::iterator disjunctQuery = query->disjuncts.begin(); disjunctQuery != query->disjuncts.end(); ++disjunctQuery) - this->weights.add((*disjunctQuery)->createWeight(searcher)); + return buffer; +} + +bool DisjunctionMaxQuery::equals(const LuceneObjectPtr& other) { + if (!Query::equals(other)) { + return false; } - - DisjunctionMaxWeight::~DisjunctionMaxWeight() - { + + DisjunctionMaxQueryPtr otherDisjunctionMaxQuery(boost::dynamic_pointer_cast(other)); + if (!otherDisjunctionMaxQuery) { + return false; } - - QueryPtr DisjunctionMaxWeight::getQuery() - { - return query; + + return (tieBreakerMultiplier == otherDisjunctionMaxQuery->tieBreakerMultiplier && disjuncts.equals(otherDisjunctionMaxQuery->disjuncts, luceneEquals())); +} + +int32_t DisjunctionMaxQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) + MiscUtils::doubleToIntBits(tieBreakerMultiplier) + MiscUtils::hashCode(disjuncts.begin(), disjuncts.end(), MiscUtils::hashLucene); +} + +DisjunctionMaxWeight::DisjunctionMaxWeight(const DisjunctionMaxQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = searcher->getSimilarity(); + this->weights = Collection::newInstance(); + for (Collection::iterator disjunctQuery = query->disjuncts.begin(); disjunctQuery != query->disjuncts.end(); ++disjunctQuery) { + this->weights.add((*disjunctQuery)->createWeight(searcher)); } - - double DisjunctionMaxWeight::getValue() - { - return query->getBoost(); +} + +DisjunctionMaxWeight::~DisjunctionMaxWeight() { +} + +QueryPtr DisjunctionMaxWeight::getQuery() { + return query; +} + +double DisjunctionMaxWeight::getValue() { + return query->getBoost(); +} + +double DisjunctionMaxWeight::sumOfSquaredWeights() { + double max = 0.0; + double sum = 0.0; + for (Collection::iterator currentWeight = weights.begin(); currentWeight != weights.end(); ++currentWeight) { + double sub = (*currentWeight)->sumOfSquaredWeights(); + sum += sub; + max = std::max(max, sub); + } + double boost = query->getBoost(); + return (((sum - max) * query->tieBreakerMultiplier * query->tieBreakerMultiplier) + max) * boost * boost; +} + +void DisjunctionMaxWeight::normalize(double norm) { + norm *= query->getBoost(); // Incorporate our boost + for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { + (*wt)->normalize(norm); } - - double DisjunctionMaxWeight::sumOfSquaredWeights() - { - double max = 0.0; - double sum = 0.0; - for (Collection::iterator currentWeight = weights.begin(); currentWeight != weights.end(); ++currentWeight) - { - double sub = (*currentWeight)->sumOfSquaredWeights(); - sum += sub; - max = std::max(max, sub); +} + +ScorerPtr DisjunctionMaxWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + Collection scorers(Collection::newInstance(weights.size())); + int32_t idx = 0; + for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { + ScorerPtr subScorer((*wt)->scorer(reader, true, false)); + if (subScorer && subScorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) { + scorers[idx++] = subScorer; } - double boost = query->getBoost(); - return (((sum - max) * query->tieBreakerMultiplier * query->tieBreakerMultiplier) + max) * boost * boost; - } - - void DisjunctionMaxWeight::normalize(double norm) - { - norm *= query->getBoost(); // Incorporate our boost - for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) - (*wt)->normalize(norm); } - - ScorerPtr DisjunctionMaxWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - Collection scorers(Collection::newInstance(weights.size())); - int32_t idx = 0; - for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) - { - ScorerPtr subScorer((*wt)->scorer(reader, true, false)); - if (subScorer && subScorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS) - scorers[idx++] = subScorer; - } - if (idx == 0) - return ScorerPtr(); // all scorers did not have documents - DisjunctionMaxScorerPtr result(newLucene(query->tieBreakerMultiplier, similarity, scorers, idx)); - return result; + if (idx == 0) { + return ScorerPtr(); // all scorers did not have documents } - - ExplanationPtr DisjunctionMaxWeight::explain(IndexReaderPtr reader, int32_t doc) - { - if (query->disjuncts.size() == 1) - return weights[0]->explain(reader, doc); - ComplexExplanationPtr result(newLucene()); - double max = 0.0; - double sum = 0.0; - result->setDescription(query->tieBreakerMultiplier == 0.0 ? L"max of:" : (L"max plus " + StringUtils::toString(query->tieBreakerMultiplier) + L" times others of:")); - for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) - { - ExplanationPtr e = (*wt)->explain(reader, doc); - if (e->isMatch()) - { - result->setMatch(true); - result->addDetail(e); - sum += e->getValue(); - max = std::max(max, e->getValue()); - } + DisjunctionMaxScorerPtr result(newLucene(query->tieBreakerMultiplier, similarity, scorers, idx)); + return result; +} + +ExplanationPtr DisjunctionMaxWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + if (query->disjuncts.size() == 1) { + return weights[0]->explain(reader, doc); + } + ComplexExplanationPtr result(newLucene()); + double max = 0.0; + double sum = 0.0; + result->setDescription(query->tieBreakerMultiplier == 0.0 ? L"max of:" : (L"max plus " + StringUtils::toString(query->tieBreakerMultiplier) + L" times others of:")); + for (Collection::iterator wt = weights.begin(); wt != weights.end(); ++wt) { + ExplanationPtr e = (*wt)->explain(reader, doc); + if (e->isMatch()) { + result->setMatch(true); + result->addDetail(e); + sum += e->getValue(); + max = std::max(max, e->getValue()); } - result->setValue(max + (sum - max) * query->tieBreakerMultiplier); - return result; } + result->setValue(max + (sum - max) * query->tieBreakerMultiplier); + return result; +} + } diff --git a/src/core/search/DisjunctionMaxScorer.cpp b/src/core/search/DisjunctionMaxScorer.cpp index 7ba6dacf..753dfd76 100644 --- a/src/core/search/DisjunctionMaxScorer.cpp +++ b/src/core/search/DisjunctionMaxScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,167 +7,139 @@ #include "LuceneInc.h" #include "DisjunctionMaxScorer.h" -namespace Lucene -{ - DisjunctionMaxScorer::DisjunctionMaxScorer(double tieBreakerMultiplier, SimilarityPtr similarity, Collection subScorers, int32_t numScorers) : Scorer(similarity) - { - this->doc = -1; - this->tieBreakerMultiplier = tieBreakerMultiplier; - - // The passed subScorers array includes only scorers which have documents (DisjunctionMaxQuery takes care - // of that), and their nextDoc() was already called. - this->subScorers = subScorers; - this->numScorers = numScorers; - - heapify(); - } - - DisjunctionMaxScorer::~DisjunctionMaxScorer() - { +namespace Lucene { + +DisjunctionMaxScorer::DisjunctionMaxScorer(double tieBreakerMultiplier, const SimilarityPtr& similarity, Collection subScorers, int32_t numScorers) : Scorer(similarity) { + this->doc = -1; + this->tieBreakerMultiplier = tieBreakerMultiplier; + + // The passed subScorers array includes only scorers which have documents (DisjunctionMaxQuery takes care + // of that), and their nextDoc() was already called. + this->subScorers = subScorers; + this->numScorers = numScorers; + + heapify(); +} + +DisjunctionMaxScorer::~DisjunctionMaxScorer() { +} + +int32_t DisjunctionMaxScorer::nextDoc() { + if (numScorers == 0) { + doc = NO_MORE_DOCS; + return doc; } - - int32_t DisjunctionMaxScorer::nextDoc() - { - if (numScorers == 0) - { - doc = NO_MORE_DOCS; - return doc; - } - while (subScorers[0]->docID() == doc) - { - if (subScorers[0]->nextDoc() != NO_MORE_DOCS) - heapAdjust(0); - else - { - heapRemoveRoot(); - if (numScorers == 0) - { - doc = NO_MORE_DOCS; - return doc; - } + while (subScorers[0]->docID() == doc) { + if (subScorers[0]->nextDoc() != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers == 0) { + doc = NO_MORE_DOCS; + return doc; } } - - doc = subScorers[0]->docID(); - return doc; - } - - int32_t DisjunctionMaxScorer::docID() - { - return doc; } - - double DisjunctionMaxScorer::score() - { - int32_t doc = subScorers[0]->docID(); - Collection sum(newCollection(subScorers[0]->score())); - Collection max(Collection::newInstance(sum.begin(), sum.end())); - int32_t size = numScorers; - scoreAll(1, size, doc, sum, max); - scoreAll(2, size, doc, sum, max); - return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier; + + doc = subScorers[0]->docID(); + return doc; +} + +int32_t DisjunctionMaxScorer::docID() { + return doc; +} + +double DisjunctionMaxScorer::score() { + int32_t doc = subScorers[0]->docID(); + Collection sum(newCollection(subScorers[0]->score())); + Collection max(Collection::newInstance(sum.begin(), sum.end())); + int32_t size = numScorers; + scoreAll(1, size, doc, sum, max); + scoreAll(2, size, doc, sum, max); + return max[0] + (sum[0] - max[0]) * tieBreakerMultiplier; +} + +void DisjunctionMaxScorer::scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max) { + if (root < size && subScorers[root]->docID() == doc) { + double sub = subScorers[root]->score(); + sum[0] += sub; + max[0] = std::max(max[0], sub); + scoreAll((root << 1) + 1, size, doc, sum, max); + scoreAll((root << 1) + 2, size, doc, sum, max); } - - void DisjunctionMaxScorer::scoreAll(int32_t root, int32_t size, int32_t doc, Collection sum, Collection max) - { - if (root < size && subScorers[root]->docID() == doc) - { - double sub = subScorers[root]->score(); - sum[0] += sub; - max[0] = std::max(max[0], sub); - scoreAll((root << 1) + 1, size, doc, sum, max); - scoreAll((root << 1) + 2, size, doc, sum, max); - } +} + +int32_t DisjunctionMaxScorer::advance(int32_t target) { + if (numScorers == 0) { + doc = NO_MORE_DOCS; + return doc; } - - int32_t DisjunctionMaxScorer::advance(int32_t target) - { - if (numScorers == 0) - { - doc = NO_MORE_DOCS; - return doc; - } - while (subScorers[0]->docID() < target) - { - if (subScorers[0]->advance(target) != NO_MORE_DOCS) - heapAdjust(0); - else - { - heapRemoveRoot(); - if (numScorers == 0) - { - doc = NO_MORE_DOCS; - return doc; - } + while (subScorers[0]->docID() < target) { + if (subScorers[0]->advance(target) != NO_MORE_DOCS) { + heapAdjust(0); + } else { + heapRemoveRoot(); + if (numScorers == 0) { + doc = NO_MORE_DOCS; + return doc; } } - doc = subScorers[0]->docID(); - return doc; } - - void DisjunctionMaxScorer::heapify() - { - for (int32_t i = (numScorers >> 1) - 1; i >= 0; --i) - heapAdjust(i); + doc = subScorers[0]->docID(); + return doc; +} + +void DisjunctionMaxScorer::heapify() { + for (int32_t i = (numScorers >> 1) - 1; i >= 0; --i) { + heapAdjust(i); } - - void DisjunctionMaxScorer::heapAdjust(int32_t root) - { - ScorerPtr scorer(subScorers[root]); - int32_t doc = scorer->docID(); - int32_t i = root; - while (i <= (numScorers >> 1) - 1) - { - int32_t lchild = (i << 1) + 1; - ScorerPtr lscorer(subScorers[lchild]); - int32_t ldoc = lscorer->docID(); - int32_t rdoc = INT_MAX; - int32_t rchild = (i << 1) + 2; - ScorerPtr rscorer; - if (rchild < numScorers) - { - rscorer = subScorers[rchild]; - rdoc = rscorer->docID(); - } - if (ldoc < doc) - { - if (rdoc < ldoc) - { - subScorers[i] = rscorer; - subScorers[rchild] = scorer; - i = rchild; - } - else - { - subScorers[i] = lscorer; - subScorers[lchild] = scorer; - i = lchild; - } - } - else if (rdoc < doc) - { +} + +void DisjunctionMaxScorer::heapAdjust(int32_t root) { + ScorerPtr scorer(subScorers[root]); + int32_t doc = scorer->docID(); + int32_t i = root; + while (i <= (numScorers >> 1) - 1) { + int32_t lchild = (i << 1) + 1; + ScorerPtr lscorer(subScorers[lchild]); + int32_t ldoc = lscorer->docID(); + int32_t rdoc = INT_MAX; + int32_t rchild = (i << 1) + 2; + ScorerPtr rscorer; + if (rchild < numScorers) { + rscorer = subScorers[rchild]; + rdoc = rscorer->docID(); + } + if (ldoc < doc) { + if (rdoc < ldoc) { subScorers[i] = rscorer; subScorers[rchild] = scorer; i = rchild; + } else { + subScorers[i] = lscorer; + subScorers[lchild] = scorer; + i = lchild; } - else - return; + } else if (rdoc < doc) { + subScorers[i] = rscorer; + subScorers[rchild] = scorer; + i = rchild; + } else { + return; } } - - void DisjunctionMaxScorer::heapRemoveRoot() - { - if (numScorers == 1) - { - subScorers[0].reset(); - numScorers = 0; - } - else - { - subScorers[0] = subScorers[numScorers - 1]; - subScorers[numScorers - 1].reset(); - --numScorers; - heapAdjust(0); - } +} + +void DisjunctionMaxScorer::heapRemoveRoot() { + if (numScorers == 1) { + subScorers[0].reset(); + numScorers = 0; + } else { + subScorers[0] = subScorers[numScorers - 1]; + subScorers[numScorers - 1].reset(); + --numScorers; + heapAdjust(0); } } + +} diff --git a/src/core/search/DisjunctionSumScorer.cpp b/src/core/search/DisjunctionSumScorer.cpp index 72d329e1..aa786bb9 100644 --- a/src/core/search/DisjunctionSumScorer.cpp +++ b/src/core/search/DisjunctionSumScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,141 +9,127 @@ #include "ScorerDocQueue.h" #include "Collector.h" -namespace Lucene -{ - DisjunctionSumScorer::DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers) : Scorer(SimilarityPtr()) - { - this->currentDoc = -1; - this->_nrMatchers = -1; - this->currentScore = std::numeric_limits::quiet_NaN(); - - this->nrScorers = subScorers.size(); - - if (minimumNrMatchers <= 0) - boost::throw_exception(IllegalArgumentException(L"Minimum nr of matchers must be positive")); - if (nrScorers <= 1) - boost::throw_exception(IllegalArgumentException(L"There must be at least 2 subScorers")); - - this->minimumNrMatchers = minimumNrMatchers; - this->subScorers = subScorers; - } - - DisjunctionSumScorer::~DisjunctionSumScorer() - { +namespace Lucene { + +DisjunctionSumScorer::DisjunctionSumScorer(Collection subScorers, int32_t minimumNrMatchers) : Scorer(SimilarityPtr()) { + this->currentDoc = -1; + this->_nrMatchers = -1; + this->currentScore = std::numeric_limits::quiet_NaN(); + + this->nrScorers = subScorers.size(); + + if (minimumNrMatchers <= 0) { + boost::throw_exception(IllegalArgumentException(L"Minimum nr of matchers must be positive")); } - - void DisjunctionSumScorer::initialize() - { - initScorerDocQueue(); + if (nrScorers <= 1) { + boost::throw_exception(IllegalArgumentException(L"There must be at least 2 subScorers")); } - - void DisjunctionSumScorer::initScorerDocQueue() - { - scorerDocQueue = newLucene(nrScorers); - for (Collection::iterator se = subScorers.begin(); se != subScorers.end(); ++se) - { - if ((*se)->nextDoc() != NO_MORE_DOCS) - scorerDocQueue->insert(*se); + + this->minimumNrMatchers = minimumNrMatchers; + this->subScorers = subScorers; +} + +DisjunctionSumScorer::~DisjunctionSumScorer() { +} + +void DisjunctionSumScorer::initialize() { + initScorerDocQueue(); +} + +void DisjunctionSumScorer::initScorerDocQueue() { + scorerDocQueue = newLucene(nrScorers); + for (Collection::iterator se = subScorers.begin(); se != subScorers.end(); ++se) { + if ((*se)->nextDoc() != NO_MORE_DOCS) { + scorerDocQueue->insert(*se); } } - - void DisjunctionSumScorer::score(CollectorPtr collector) - { - collector->setScorer(shared_from_this()); - while (nextDoc() != NO_MORE_DOCS) - collector->collect(currentDoc); +} + +void DisjunctionSumScorer::score(const CollectorPtr& collector) { + collector->setScorer(shared_from_this()); + while (nextDoc() != NO_MORE_DOCS) { + collector->collect(currentDoc); } - - bool DisjunctionSumScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { - // firstDocID is ignored since nextDoc() sets 'currentDoc' - collector->setScorer(shared_from_this()); - while (currentDoc < max) - { - collector->collect(currentDoc); - if (nextDoc() == NO_MORE_DOCS) - return false; +} + +bool DisjunctionSumScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { + // firstDocID is ignored since nextDoc() sets 'currentDoc' + collector->setScorer(shared_from_this()); + while (currentDoc < max) { + collector->collect(currentDoc); + if (nextDoc() == NO_MORE_DOCS) { + return false; } - return true; } - - int32_t DisjunctionSumScorer::nextDoc() - { - if (scorerDocQueue->size() < minimumNrMatchers || !advanceAfterCurrent()) - currentDoc = NO_MORE_DOCS; - return currentDoc; + return true; +} + +int32_t DisjunctionSumScorer::nextDoc() { + if (scorerDocQueue->size() < minimumNrMatchers || !advanceAfterCurrent()) { + currentDoc = NO_MORE_DOCS; } - - bool DisjunctionSumScorer::advanceAfterCurrent() - { - do // repeat until minimum nr of matchers - { - currentDoc = scorerDocQueue->topDoc(); - currentScore = scorerDocQueue->topScore(); - _nrMatchers = 1; - do // Until all subscorers are after currentDoc - { - if (!scorerDocQueue->topNextAndAdjustElsePop()) - { - if (scorerDocQueue->size() == 0) - break; // nothing more to advance, check for last match. + return currentDoc; +} + +bool DisjunctionSumScorer::advanceAfterCurrent() { + do { // repeat until minimum nr of matchers + currentDoc = scorerDocQueue->topDoc(); + currentScore = scorerDocQueue->topScore(); + _nrMatchers = 1; + do { // Until all subscorers are after currentDoc + if (!scorerDocQueue->topNextAndAdjustElsePop()) { + if (scorerDocQueue->size() == 0) { + break; // nothing more to advance, check for last match. } - if (scorerDocQueue->topDoc() != currentDoc) - break; // All remaining subscorers are after currentDoc. - currentScore += scorerDocQueue->topScore(); - ++_nrMatchers; } - while (true); - - if (_nrMatchers >= minimumNrMatchers) - return true; - else if (scorerDocQueue->size() < minimumNrMatchers) - return false; + if (scorerDocQueue->topDoc() != currentDoc) { + break; // All remaining subscorers are after currentDoc. + } + currentScore += scorerDocQueue->topScore(); + ++_nrMatchers; + } while (true); + + if (_nrMatchers >= minimumNrMatchers) { + return true; + } else if (scorerDocQueue->size() < minimumNrMatchers) { + return false; } - while (true); - } - - double DisjunctionSumScorer::score() - { - return currentScore; - } - - int32_t DisjunctionSumScorer::docID() - { + } while (true); +} + +double DisjunctionSumScorer::score() { + return currentScore; +} + +int32_t DisjunctionSumScorer::docID() { + return currentDoc; +} + +int32_t DisjunctionSumScorer::nrMatchers() { + return _nrMatchers; +} + +int32_t DisjunctionSumScorer::advance(int32_t target) { + if (scorerDocQueue->size() < minimumNrMatchers) { + currentDoc = NO_MORE_DOCS; return currentDoc; } - - int32_t DisjunctionSumScorer::nrMatchers() - { - return _nrMatchers; + if (target <= currentDoc) { + return currentDoc; } - - int32_t DisjunctionSumScorer::advance(int32_t target) - { - if (scorerDocQueue->size() < minimumNrMatchers) - { - currentDoc = NO_MORE_DOCS; - return currentDoc; - } - if (target <= currentDoc) + do { + if (scorerDocQueue->topDoc() >= target) { + if (!advanceAfterCurrent()) { + currentDoc = NO_MORE_DOCS; + } return currentDoc; - do - { - if (scorerDocQueue->topDoc() >= target) - { - if (!advanceAfterCurrent()) - currentDoc = NO_MORE_DOCS; + } else if (!scorerDocQueue->topSkipToAndAdjustElsePop(target)) { + if (scorerDocQueue->size() < minimumNrMatchers) { + currentDoc = NO_MORE_DOCS; return currentDoc; } - else if (!scorerDocQueue->topSkipToAndAdjustElsePop(target)) - { - if (scorerDocQueue->size() < minimumNrMatchers) - { - currentDoc = NO_MORE_DOCS; - return currentDoc; - } - } } - while (true); - } + } while (true); +} + } diff --git a/src/core/search/DocIdSet.cpp b/src/core/search/DocIdSet.cpp index e1050745..f3ddb40e 100644 --- a/src/core/search/DocIdSet.cpp +++ b/src/core/search/DocIdSet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,58 +8,48 @@ #include "DocIdSet.h" #include "_DocIdSet.h" -namespace Lucene -{ - DocIdSet::~DocIdSet() - { - } - - bool DocIdSet::isCacheable() - { - return false; - } - - DocIdSetPtr DocIdSet::EMPTY_DOCIDSET() - { - static DocIdSetPtr _EMPTY_DOCIDSET; - if (!_EMPTY_DOCIDSET) - { - _EMPTY_DOCIDSET = newLucene(); - CycleCheck::addStatic(_EMPTY_DOCIDSET); - } - return _EMPTY_DOCIDSET; - } - - EmptyDocIdSetIterator::~EmptyDocIdSetIterator() - { - } - - int32_t EmptyDocIdSetIterator::advance(int32_t target) - { - return NO_MORE_DOCS; - } - - int32_t EmptyDocIdSetIterator::docID() - { - return NO_MORE_DOCS; - } - - int32_t EmptyDocIdSetIterator::nextDoc() - { - return NO_MORE_DOCS; - } - - EmptyDocIdSet::~EmptyDocIdSet() - { - } - - DocIdSetIteratorPtr EmptyDocIdSet::iterator() - { - return newLucene(); - } - - bool EmptyDocIdSet::isCacheable() - { - return true; - } +namespace Lucene { + +DocIdSet::~DocIdSet() { +} + +bool DocIdSet::isCacheable() { + return false; +} + +DocIdSetPtr DocIdSet::EMPTY_DOCIDSET() { + static DocIdSetPtr _EMPTY_DOCIDSET; + LUCENE_RUN_ONCE( + _EMPTY_DOCIDSET = newLucene(); + CycleCheck::addStatic(_EMPTY_DOCIDSET); + ); + return _EMPTY_DOCIDSET; +} + +EmptyDocIdSetIterator::~EmptyDocIdSetIterator() { +} + +int32_t EmptyDocIdSetIterator::advance(int32_t target) { + return NO_MORE_DOCS; +} + +int32_t EmptyDocIdSetIterator::docID() { + return NO_MORE_DOCS; +} + +int32_t EmptyDocIdSetIterator::nextDoc() { + return NO_MORE_DOCS; +} + +EmptyDocIdSet::~EmptyDocIdSet() { +} + +DocIdSetIteratorPtr EmptyDocIdSet::iterator() { + return newLucene(); +} + +bool EmptyDocIdSet::isCacheable() { + return true; +} + } diff --git a/src/core/search/DocIdSetIterator.cpp b/src/core/search/DocIdSetIterator.cpp index 393fdf55..435d5164 100644 --- a/src/core/search/DocIdSetIterator.cpp +++ b/src/core/search/DocIdSetIterator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,13 +7,13 @@ #include "LuceneInc.h" #include "DocIdSetIterator.h" -namespace Lucene -{ - /// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there - /// docs in the iterator. - const int32_t DocIdSetIterator::NO_MORE_DOCS = INT_MAX; - - DocIdSetIterator::~DocIdSetIterator() - { - } +namespace Lucene { + +/// When returned by {@link #nextDoc()}, {@link #advance(int)} and {@link #docID()} it means there +/// docs in the iterator. +const int32_t DocIdSetIterator::NO_MORE_DOCS = INT_MAX; + +DocIdSetIterator::~DocIdSetIterator() { +} + } diff --git a/src/core/search/ExactPhraseScorer.cpp b/src/core/search/ExactPhraseScorer.cpp index d5f9431b..cd5130c9 100644 --- a/src/core/search/ExactPhraseScorer.cpp +++ b/src/core/search/ExactPhraseScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,46 +9,39 @@ #include "PhrasePositions.h" #include "PhraseQueue.h" -namespace Lucene -{ - ExactPhraseScorer::ExactPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) - { - } - - ExactPhraseScorer::~ExactPhraseScorer() - { +namespace Lucene { + +ExactPhraseScorer::ExactPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { +} + +ExactPhraseScorer::~ExactPhraseScorer() { +} + +double ExactPhraseScorer::phraseFreq() { + // sort list with pq + pq->clear(); + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + __pp->firstPosition(); + pq->add(__pp); } - - double ExactPhraseScorer::phraseFreq() - { - // sort list with pq - pq->clear(); - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) - { - pp->firstPosition(); - pq->add(pp); // build pq from list - } - pqToList(); // rebuild list from pq - - // For counting how many times the exact phrase is found in current document, just count how many - // times all PhrasePosition's have exactly the same position. - int32_t freq = 0; - do - { - while (first->position < last->position) // scan forward in first - { - do - { - if (!first->nextPosition()) - return freq; + pqToList(); // rebuild list from pq + + // For counting how many times the exact phrase is found in current document, just count how many + // times all PhrasePosition's have exactly the same position. + int32_t freq = 0; + do { + while (__first->position < __last->position) { // scan forward in first + do { + if (!__first->nextPosition()) { + return freq; } - while (first->position < last->position); - firstToLast(); - } - ++freq; // all equal: a match + } while (__first->position < __last->position); + firstToLast(); } - while (last->nextPosition()); - - return freq; - } + ++freq; // all equal: a match + } while (__last->nextPosition()); + + return freq; +} + } diff --git a/src/core/search/Explanation.cpp b/src/core/search/Explanation.cpp index d73ed4b4..3c64badb 100644 --- a/src/core/search/Explanation.cpp +++ b/src/core/search/Explanation.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,94 +8,84 @@ #include "Explanation.h" #include "StringUtils.h" -namespace Lucene -{ - Explanation::Explanation(double value, const String& description) - { - this->value = value; - this->description = description; - } - - Explanation::~Explanation() - { - } - - bool Explanation::isMatch() - { - return (0.0 < getValue()); - } - - double Explanation::getValue() - { - return value; - } - - void Explanation::setValue(double value) - { - this->value = value; - } - - String Explanation::getDescription() - { - return description; - } - - void Explanation::setDescription(const String& description) - { - this->description = description; - } - - String Explanation::getSummary() - { - return StringUtils::toString(getValue()) + L" = " + getDescription(); - } - - Collection Explanation::getDetails() - { - if (!details) - return Collection(); - return Collection::newInstance(this->details.begin(), this->details.end()); +namespace Lucene { + +Explanation::Explanation(double value, const String& description) { + this->value = value; + this->description = description; +} + +Explanation::~Explanation() { +} + +bool Explanation::isMatch() { + return (0.0 < getValue()); +} + +double Explanation::getValue() { + return value; +} + +void Explanation::setValue(double value) { + this->value = value; +} + +String Explanation::getDescription() { + return description; +} + +void Explanation::setDescription(const String& description) { + this->description = description; +} + +String Explanation::getSummary() { + return StringUtils::toString(getValue()) + L" = " + getDescription(); +} + +Collection Explanation::getDetails() { + if (!details) { + return Collection(); } - - void Explanation::addDetail(ExplanationPtr detail) - { - if (!details) - details = Collection::newInstance(); - details.add(detail); + return Collection::newInstance(this->details.begin(), this->details.end()); +} + +void Explanation::addDetail(const ExplanationPtr& detail) { + if (!details) { + details = Collection::newInstance(); } - - String Explanation::toString() - { - return toString(0); + details.add(detail); +} + +String Explanation::toString() { + return toString(0); +} + +String Explanation::toString(int32_t depth) { + String buffer; + for (int32_t i = 0; i < depth; ++i) { + buffer += L" "; } - - String Explanation::toString(int32_t depth) - { - String buffer; - for (int32_t i = 0; i < depth; ++i) - buffer += L" "; - buffer += getSummary() + L"\n"; - if (details) - { - for (int32_t i = 0; i < details.size(); ++i) - buffer += details[i]->toString(depth + 1); + buffer += getSummary() + L"\n"; + if (details) { + for (int32_t i = 0; i < details.size(); ++i) { + buffer += details[i]->toString(depth + 1); } - return buffer; } - - String Explanation::toHtml() - { - String buffer(L"
    \n
  • " + getSummary() + L"
    \n"); - if (details) - { - for (int32_t i = 0; i < details.size(); ++i) - buffer += details[i]->toHtml(); + return buffer; +} + +String Explanation::toHtml() { + String buffer(L"
      \n
    • " + getSummary() + L"
      \n"); + if (details) { + for (int32_t i = 0; i < details.size(); ++i) { + buffer += details[i]->toHtml(); } - buffer += L"
    • \n
    \n"; - return buffer; - } - - IDFExplanation::~IDFExplanation() - { } + buffer += L"
  • \n
\n"; + return buffer; +} + +IDFExplanation::~IDFExplanation() { +} + } diff --git a/src/core/search/FieldCache.cpp b/src/core/search/FieldCache.cpp index c93a9623..7b7e51f7 100644 --- a/src/core/search/FieldCache.cpp +++ b/src/core/search/FieldCache.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,351 +11,290 @@ #include "NumericUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// Indicator for StringIndex values in the cache. - const int32_t FieldCache::STRING_INDEX = -1; - - FieldCache::~FieldCache() - { - } - - FieldCachePtr FieldCache::DEFAULT() - { - static FieldCacheImplPtr _DEFAULT; - if (!_DEFAULT) - { - _DEFAULT = newLucene(); - CycleCheck::addStatic(_DEFAULT); - } - return _DEFAULT; - } - - ByteParserPtr FieldCache::DEFAULT_BYTE_PARSER() - { - static DefaultByteParserPtr _DEFAULT_BYTE_PARSER; - if (!_DEFAULT_BYTE_PARSER) - { - _DEFAULT_BYTE_PARSER = newLucene(); - CycleCheck::addStatic(_DEFAULT_BYTE_PARSER); - } - return _DEFAULT_BYTE_PARSER; - } - - IntParserPtr FieldCache::DEFAULT_INT_PARSER() - { - static DefaultIntParserPtr _DEFAULT_INT_PARSER; - if (!_DEFAULT_INT_PARSER) - { - _DEFAULT_INT_PARSER = newLucene(); - CycleCheck::addStatic(_DEFAULT_INT_PARSER); - } - return _DEFAULT_INT_PARSER; - } - - LongParserPtr FieldCache::DEFAULT_LONG_PARSER() - { - static DefaultLongParserPtr _DEFAULT_LONG_PARSER; - if (!_DEFAULT_LONG_PARSER) - { - _DEFAULT_LONG_PARSER = newLucene(); - CycleCheck::addStatic(_DEFAULT_LONG_PARSER); - } - return _DEFAULT_LONG_PARSER; - } - - DoubleParserPtr FieldCache::DEFAULT_DOUBLE_PARSER() - { - static DefaultDoubleParserPtr _DEFAULT_DOUBLE_PARSER; - if (!_DEFAULT_DOUBLE_PARSER) - { - _DEFAULT_DOUBLE_PARSER = newLucene(); - CycleCheck::addStatic(_DEFAULT_DOUBLE_PARSER); - } - return _DEFAULT_DOUBLE_PARSER; - } - - IntParserPtr FieldCache::NUMERIC_UTILS_INT_PARSER() - { - static NumericUtilsIntParserPtr _NUMERIC_UTILS_INT_PARSER; - if (!_NUMERIC_UTILS_INT_PARSER) - { - _NUMERIC_UTILS_INT_PARSER = newLucene(); - CycleCheck::addStatic(_NUMERIC_UTILS_INT_PARSER); - } - return _NUMERIC_UTILS_INT_PARSER; - } - - LongParserPtr FieldCache::NUMERIC_UTILS_LONG_PARSER() - { - static NumericUtilsLongParserPtr _NUMERIC_UTILS_LONG_PARSER; - if (!_NUMERIC_UTILS_LONG_PARSER) - { - _NUMERIC_UTILS_LONG_PARSER = newLucene(); - CycleCheck::addStatic(_NUMERIC_UTILS_LONG_PARSER); - } - return _NUMERIC_UTILS_LONG_PARSER; - } - - DoubleParserPtr FieldCache::NUMERIC_UTILS_DOUBLE_PARSER() - { - static NumericUtilsDoubleParserPtr _NUMERIC_UTILS_DOUBLE_PARSER; - if (!_NUMERIC_UTILS_DOUBLE_PARSER) - { - _NUMERIC_UTILS_DOUBLE_PARSER = newLucene(); - CycleCheck::addStatic(_NUMERIC_UTILS_DOUBLE_PARSER); - } - return _NUMERIC_UTILS_DOUBLE_PARSER; - } - - Collection FieldCache::getBytes(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getInts(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getLongs(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getDoubles(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - Collection FieldCache::getStrings(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - StringIndexPtr FieldCache::getStringIndex(IndexReaderPtr reader, const String& field) - { - BOOST_ASSERT(false); - return StringIndexPtr(); // override - } - - void FieldCache::setInfoStream(InfoStreamPtr stream) - { - BOOST_ASSERT(false); - // override - } - - InfoStreamPtr FieldCache::getInfoStream() - { - BOOST_ASSERT(false); - return InfoStreamPtr(); // override - } - - CreationPlaceholder::~CreationPlaceholder() - { - } - - StringIndex::StringIndex(Collection values, Collection lookup) - { - this->order = values; - this->lookup = lookup; - } - - StringIndex::~StringIndex() - { - } - - int32_t StringIndex::binarySearchLookup(const String& key) - { - Collection::iterator search = std::lower_bound(lookup.begin(), lookup.end(), key); - int32_t keyPos = std::distance(lookup.begin(), search); - return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; - } - - Parser::~Parser() - { - } - - ByteParser::~ByteParser() - { - } - - uint8_t ByteParser::parseByte(const String& string) - { - return 0; // override - } - - DefaultByteParser::~DefaultByteParser() - { - } - - uint8_t DefaultByteParser::parseByte(const String& string) - { - return (uint8_t)StringUtils::toInt(string); - } - - String DefaultByteParser::toString() - { - return FieldCache::_getClassName() + L".DEFAULT_BYTE_PARSER"; - } - - IntParser::~IntParser() - { - } - - int32_t IntParser::parseInt(const String& string) - { - return 0; // override - } - - DefaultIntParser::~DefaultIntParser() - { - } - - int32_t DefaultIntParser::parseInt(const String& string) - { - return StringUtils::toInt(string); - } - - String DefaultIntParser::toString() - { - return FieldCache::_getClassName() + L".DEFAULT_INT_PARSER"; - } - - NumericUtilsIntParser::~NumericUtilsIntParser() - { - } - - int32_t NumericUtilsIntParser::parseInt(const String& string) - { - int32_t shift = string[0] - NumericUtils::SHIFT_START_INT; - if (shift > 0 && shift <= 31) - boost::throw_exception(StopFillCacheException()); - return NumericUtils::prefixCodedToInt(string); - } - - String NumericUtilsIntParser::toString() - { - return FieldCache::_getClassName() + L".NUMERIC_UTILS_INT_PARSER"; - } - - LongParser::~LongParser() - { - } - - int64_t LongParser::parseLong(const String& string) - { - return 0; // override - } - - DefaultLongParser::~DefaultLongParser() - { - } - - int64_t DefaultLongParser::parseLong(const String& string) - { - return StringUtils::toLong(string); - } - - String DefaultLongParser::toString() - { - return FieldCache::_getClassName() + L".DEFAULT_LONG_PARSER"; - } - - NumericUtilsLongParser::~NumericUtilsLongParser() - { - } - - int64_t NumericUtilsLongParser::parseLong(const String& string) - { - int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; - if (shift > 0 && shift <= 63) - boost::throw_exception(StopFillCacheException()); - return NumericUtils::prefixCodedToLong(string); - } - - String NumericUtilsLongParser::toString() - { - return FieldCache::_getClassName() + L".NUMERIC_UTILS_LONG_PARSER"; - } - - DoubleParser::~DoubleParser() - { - } - - double DoubleParser::parseDouble(const String& string) - { - return 0; // override - } - - DefaultDoubleParser::~DefaultDoubleParser() - { - } - - double DefaultDoubleParser::parseDouble(const String& string) - { - return StringUtils::toDouble(string); - } - - String DefaultDoubleParser::toString() - { - return FieldCache::_getClassName() + L".DEFAULT_DOUBLE_PARSER"; - } - - NumericUtilsDoubleParser::~NumericUtilsDoubleParser() - { - } - - double NumericUtilsDoubleParser::parseDouble(const String& string) - { - int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; - if (shift > 0 && shift <= 63) - boost::throw_exception(StopFillCacheException()); - return NumericUtils::sortableLongToDouble(NumericUtils::prefixCodedToLong(string)); - } - - String NumericUtilsDoubleParser::toString() - { - return FieldCache::_getClassName() + L".NUMERIC_UTILS_DOUBLE_PARSER"; +namespace Lucene { + +/// Indicator for StringIndex values in the cache. +const int32_t FieldCache::STRING_INDEX = -1; + +FieldCache::~FieldCache() { +} + +FieldCachePtr FieldCache::DEFAULT() { + static FieldCacheImplPtr _DEFAULT; + LUCENE_RUN_ONCE( + _DEFAULT = newLucene(); + CycleCheck::addStatic(_DEFAULT); + ); + return _DEFAULT; +} + +ByteParserPtr FieldCache::DEFAULT_BYTE_PARSER() { + static DefaultByteParserPtr _DEFAULT_BYTE_PARSER; + LUCENE_RUN_ONCE( + _DEFAULT_BYTE_PARSER = newLucene(); + CycleCheck::addStatic(_DEFAULT_BYTE_PARSER); + ); + return _DEFAULT_BYTE_PARSER; +} + +IntParserPtr FieldCache::DEFAULT_INT_PARSER() { + static DefaultIntParserPtr _DEFAULT_INT_PARSER; + LUCENE_RUN_ONCE( + _DEFAULT_INT_PARSER = newLucene(); + CycleCheck::addStatic(_DEFAULT_INT_PARSER); + ); + return _DEFAULT_INT_PARSER; +} + +LongParserPtr FieldCache::DEFAULT_LONG_PARSER() { + static DefaultLongParserPtr _DEFAULT_LONG_PARSER; + LUCENE_RUN_ONCE( + _DEFAULT_LONG_PARSER = newLucene(); + CycleCheck::addStatic(_DEFAULT_LONG_PARSER); + ); + return _DEFAULT_LONG_PARSER; +} + +DoubleParserPtr FieldCache::DEFAULT_DOUBLE_PARSER() { + static DefaultDoubleParserPtr _DEFAULT_DOUBLE_PARSER; + LUCENE_RUN_ONCE( + _DEFAULT_DOUBLE_PARSER = newLucene(); + CycleCheck::addStatic(_DEFAULT_DOUBLE_PARSER); + ); + return _DEFAULT_DOUBLE_PARSER; +} + +IntParserPtr FieldCache::NUMERIC_UTILS_INT_PARSER() { + static NumericUtilsIntParserPtr _NUMERIC_UTILS_INT_PARSER; + LUCENE_RUN_ONCE( + _NUMERIC_UTILS_INT_PARSER = newLucene(); + CycleCheck::addStatic(_NUMERIC_UTILS_INT_PARSER); + ); + return _NUMERIC_UTILS_INT_PARSER; +} + +LongParserPtr FieldCache::NUMERIC_UTILS_LONG_PARSER() { + static NumericUtilsLongParserPtr _NUMERIC_UTILS_LONG_PARSER; + LUCENE_RUN_ONCE( + _NUMERIC_UTILS_LONG_PARSER = newLucene(); + CycleCheck::addStatic(_NUMERIC_UTILS_LONG_PARSER); + ); + return _NUMERIC_UTILS_LONG_PARSER; +} + +DoubleParserPtr FieldCache::NUMERIC_UTILS_DOUBLE_PARSER() { + static NumericUtilsDoubleParserPtr _NUMERIC_UTILS_DOUBLE_PARSER; + LUCENE_RUN_ONCE( + _NUMERIC_UTILS_DOUBLE_PARSER = newLucene(); + CycleCheck::addStatic(_NUMERIC_UTILS_DOUBLE_PARSER); + ); + return _NUMERIC_UTILS_DOUBLE_PARSER; +} + +Collection FieldCache::getBytes(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getInts(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getLongs(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getDoubles(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) { + BOOST_ASSERT(false); + return Collection(); // override +} + +Collection FieldCache::getStrings(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return Collection(); // override +} + +StringIndexPtr FieldCache::getStringIndex(const IndexReaderPtr& reader, const String& field) { + BOOST_ASSERT(false); + return StringIndexPtr(); // override +} + +void FieldCache::setInfoStream(const InfoStreamPtr& stream) { + BOOST_ASSERT(false); + // override +} + +InfoStreamPtr FieldCache::getInfoStream() { + BOOST_ASSERT(false); + return InfoStreamPtr(); // override +} + +CreationPlaceholder::~CreationPlaceholder() { +} + +StringIndex::StringIndex(Collection values, Collection lookup) { + this->order = values; + this->lookup = lookup; +} + +StringIndex::~StringIndex() { +} + +int32_t StringIndex::binarySearchLookup(const String& key) { + Collection::iterator search = std::lower_bound(lookup.begin(), lookup.end(), key); + int32_t keyPos = std::distance(lookup.begin(), search); + return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; +} + +Parser::~Parser() { +} + +ByteParser::~ByteParser() { +} + +uint8_t ByteParser::parseByte(const String& string) { + return 0; // override +} + +DefaultByteParser::~DefaultByteParser() { +} + +uint8_t DefaultByteParser::parseByte(const String& string) { + return (uint8_t)StringUtils::toInt(string); +} + +String DefaultByteParser::toString() { + return FieldCache::_getClassName() + L".DEFAULT_BYTE_PARSER"; +} + +IntParser::~IntParser() { +} + +int32_t IntParser::parseInt(const String& string) { + return 0; // override +} + +DefaultIntParser::~DefaultIntParser() { +} + +int32_t DefaultIntParser::parseInt(const String& string) { + return StringUtils::toInt(string); +} + +String DefaultIntParser::toString() { + return FieldCache::_getClassName() + L".DEFAULT_INT_PARSER"; +} + +NumericUtilsIntParser::~NumericUtilsIntParser() { +} + +int32_t NumericUtilsIntParser::parseInt(const String& string) { + int32_t shift = string[0] - NumericUtils::SHIFT_START_INT; + if (shift > 0 && shift <= 31) { + boost::throw_exception(StopFillCacheException()); } - - FieldCacheEntry::~FieldCacheEntry() - { + return NumericUtils::prefixCodedToInt(string); +} + +String NumericUtilsIntParser::toString() { + return FieldCache::_getClassName() + L".NUMERIC_UTILS_INT_PARSER"; +} + +LongParser::~LongParser() { +} + +int64_t LongParser::parseLong(const String& string) { + return 0; // override +} + +DefaultLongParser::~DefaultLongParser() { +} + +int64_t DefaultLongParser::parseLong(const String& string) { + return StringUtils::toLong(string); +} + +String DefaultLongParser::toString() { + return FieldCache::_getClassName() + L".DEFAULT_LONG_PARSER"; +} + +NumericUtilsLongParser::~NumericUtilsLongParser() { +} + +int64_t NumericUtilsLongParser::parseLong(const String& string) { + int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; + if (shift > 0 && shift <= 63) { + boost::throw_exception(StopFillCacheException()); } - - String FieldCacheEntry::toString() - { - StringStream buffer; - buffer << L"'" << getReaderKey()->toString() << L"'=>" << getFieldName() << L"'," << getCacheType(); - return buffer.str(); + return NumericUtils::prefixCodedToLong(string); +} + +String NumericUtilsLongParser::toString() { + return FieldCache::_getClassName() + L".NUMERIC_UTILS_LONG_PARSER"; +} + +DoubleParser::~DoubleParser() { +} + +double DoubleParser::parseDouble(const String& string) { + return 0; // override +} + +DefaultDoubleParser::~DefaultDoubleParser() { +} + +double DefaultDoubleParser::parseDouble(const String& string) { + return StringUtils::toDouble(string); +} + +String DefaultDoubleParser::toString() { + return FieldCache::_getClassName() + L".DEFAULT_DOUBLE_PARSER"; +} + +NumericUtilsDoubleParser::~NumericUtilsDoubleParser() { +} + +double NumericUtilsDoubleParser::parseDouble(const String& string) { + int32_t shift = string[0] - NumericUtils::SHIFT_START_LONG; + if (shift > 0 && shift <= 63) { + boost::throw_exception(StopFillCacheException()); } + return NumericUtils::sortableLongToDouble(NumericUtils::prefixCodedToLong(string)); +} + +String NumericUtilsDoubleParser::toString() { + return FieldCache::_getClassName() + L".NUMERIC_UTILS_DOUBLE_PARSER"; +} + +FieldCacheEntry::~FieldCacheEntry() { +} + +String FieldCacheEntry::toString() { + StringStream buffer; + buffer << L"'" << getReaderKey()->toString() << L"'=>" << getFieldName() << L"'," << getCacheType(); + return buffer.str(); +} + } diff --git a/src/core/search/FieldCacheImpl.cpp b/src/core/search/FieldCacheImpl.cpp index 9e45629f..aabcbff9 100644 --- a/src/core/search/FieldCacheImpl.cpp +++ b/src/core/search/FieldCacheImpl.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,603 +15,511 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - FieldCacheImpl::FieldCacheImpl() - { - } - - FieldCacheImpl::~FieldCacheImpl() - { - } - - void FieldCacheImpl::initialize() - { - caches = MapStringCache::newInstance(); - caches.put(CACHE_BYTE, newLucene(shared_from_this())); - caches.put(CACHE_INT, newLucene(shared_from_this())); - caches.put(CACHE_LONG, newLucene(shared_from_this())); - caches.put(CACHE_DOUBLE, newLucene(shared_from_this())); - caches.put(CACHE_STRING, newLucene(shared_from_this())); - caches.put(CACHE_STRING_INDEX, newLucene(shared_from_this())); - } - - void FieldCacheImpl::purgeAllCaches() - { - initialize(); - } - - void FieldCacheImpl::purge(IndexReaderPtr r) - { - for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) - cache->second->purge(r); +namespace Lucene { + +FieldCacheImpl::FieldCacheImpl() { +} + +FieldCacheImpl::~FieldCacheImpl() { +} + +void FieldCacheImpl::initialize() { + caches = MapStringCache::newInstance(); + caches.put(CACHE_BYTE, newLucene(shared_from_this())); + caches.put(CACHE_INT, newLucene(shared_from_this())); + caches.put(CACHE_LONG, newLucene(shared_from_this())); + caches.put(CACHE_DOUBLE, newLucene(shared_from_this())); + caches.put(CACHE_STRING, newLucene(shared_from_this())); + caches.put(CACHE_STRING_INDEX, newLucene(shared_from_this())); +} + +void FieldCacheImpl::purgeAllCaches() { + initialize(); +} + +void FieldCacheImpl::purge(const IndexReaderPtr& r) { + for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) { + cache->second->purge(r); } - - Collection FieldCacheImpl::getCacheEntries() - { - Collection result(Collection::newInstance()); - for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) - { - for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) - { - LuceneObjectPtr readerKey(key->first.lock()); - - // we've now materialized a hard ref - if (readerKey) - { - for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) - result.add(newLucene(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second)); +} + +Collection FieldCacheImpl::getCacheEntries() { + Collection result(Collection::newInstance()); + for (MapStringCache::iterator cache = caches.begin(); cache != caches.end(); ++cache) { + for (WeakMapLuceneObjectMapEntryAny::iterator key = cache->second->readerCache.begin(); key != cache->second->readerCache.end(); ++key) { + LuceneObjectPtr readerKey(key->first.lock()); + + // we've now materialized a hard ref + if (readerKey) { + for (MapEntryAny::iterator mapEntry = key->second.begin(); mapEntry != key->second.end(); ++mapEntry) { + result.add(newLucene(readerKey, mapEntry->first->field, cache->first, mapEntry->first->custom, mapEntry->second)); } } } - return result; - } - - Collection FieldCacheImpl::getBytes(IndexReaderPtr reader, const String& field) - { - return getBytes(reader, field, ByteParserPtr()); - } - - Collection FieldCacheImpl::getBytes(IndexReaderPtr reader, const String& field, ByteParserPtr parser) - { - return VariantUtils::get< Collection >(caches.get(CACHE_BYTE)->get(reader, newLucene(field, parser))); - } - - Collection FieldCacheImpl::getInts(IndexReaderPtr reader, const String& field) - { - return getInts(reader, field, IntParserPtr()); - } - - Collection FieldCacheImpl::getInts(IndexReaderPtr reader, const String& field, IntParserPtr parser) - { - return VariantUtils::get< Collection >(caches.get(CACHE_INT)->get(reader, newLucene(field, parser))); - } - - Collection FieldCacheImpl::getLongs(IndexReaderPtr reader, const String& field) - { - return getLongs(reader, field, LongParserPtr()); } - - Collection FieldCacheImpl::getLongs(IndexReaderPtr reader, const String& field, LongParserPtr parser) - { - return VariantUtils::get< Collection >(caches.get(CACHE_LONG)->get(reader, newLucene(field, parser))); - } - - Collection FieldCacheImpl::getDoubles(IndexReaderPtr reader, const String& field) - { - return getDoubles(reader, field, DoubleParserPtr()); - } - - Collection FieldCacheImpl::getDoubles(IndexReaderPtr reader, const String& field, DoubleParserPtr parser) - { - return VariantUtils::get< Collection >(caches.get(CACHE_DOUBLE)->get(reader, newLucene(field, parser))); - } - - Collection FieldCacheImpl::getStrings(IndexReaderPtr reader, const String& field) - { - return VariantUtils::get< Collection >(caches.get(CACHE_STRING)->get(reader, newLucene(field, ParserPtr()))); - } - - StringIndexPtr FieldCacheImpl::getStringIndex(IndexReaderPtr reader, const String& field) - { - return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene(field, ParserPtr()))); - } - - void FieldCacheImpl::setInfoStream(InfoStreamPtr stream) - { - infoStream = stream; - } - - InfoStreamPtr FieldCacheImpl::getInfoStream() - { - return infoStream; - } - - Entry::Entry(const String& field, boost::any custom) - { - this->field = field; - this->custom = custom; - } - - Entry::~Entry() - { + return result; +} + +Collection FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field) { + return getBytes(reader, field, ByteParserPtr()); +} + +Collection FieldCacheImpl::getBytes(const IndexReaderPtr& reader, const String& field, const ByteParserPtr& parser) { + return VariantUtils::get< Collection >(caches.get(CACHE_BYTE)->get(reader, newLucene(field, parser))); +} + +Collection FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field) { + return getInts(reader, field, IntParserPtr()); +} + +Collection FieldCacheImpl::getInts(const IndexReaderPtr& reader, const String& field, const IntParserPtr& parser) { + return VariantUtils::get< Collection >(caches.get(CACHE_INT)->get(reader, newLucene(field, parser))); +} + +Collection FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field) { + return getLongs(reader, field, LongParserPtr()); +} + +Collection FieldCacheImpl::getLongs(const IndexReaderPtr& reader, const String& field, const LongParserPtr& parser) { + return VariantUtils::get< Collection >(caches.get(CACHE_LONG)->get(reader, newLucene(field, parser))); +} + +Collection FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field) { + return getDoubles(reader, field, DoubleParserPtr()); +} + +Collection FieldCacheImpl::getDoubles(const IndexReaderPtr& reader, const String& field, const DoubleParserPtr& parser) { + return VariantUtils::get< Collection >(caches.get(CACHE_DOUBLE)->get(reader, newLucene(field, parser))); +} + +Collection FieldCacheImpl::getStrings(const IndexReaderPtr& reader, const String& field) { + return VariantUtils::get< Collection >(caches.get(CACHE_STRING)->get(reader, newLucene(field, ParserPtr()))); +} + +StringIndexPtr FieldCacheImpl::getStringIndex(const IndexReaderPtr& reader, const String& field) { + return VariantUtils::get< StringIndexPtr >(caches.get(CACHE_STRING_INDEX)->get(reader, newLucene(field, ParserPtr()))); +} + +void FieldCacheImpl::setInfoStream(const InfoStreamPtr& stream) { + infoStream = stream; +} + +InfoStreamPtr FieldCacheImpl::getInfoStream() { + return infoStream; +} + +Entry::Entry(const String& field, const boost::any& custom) { + this->field = field; + this->custom = custom; +} + +Entry::~Entry() { +} + +bool Entry::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - bool Entry::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - EntryPtr otherEntry(boost::dynamic_pointer_cast(other)); - if (otherEntry) - { - if (otherEntry->field == field) - return VariantUtils::equalsType(custom, otherEntry->custom); + + EntryPtr otherEntry(boost::dynamic_pointer_cast(other)); + if (otherEntry) { + if (otherEntry->field == field) { + return VariantUtils::equalsType(custom, otherEntry->custom); } - return false; - } - - int32_t Entry::hashCode() - { - return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom); - } - - Cache::Cache(FieldCachePtr wrapper) - { - this->_wrapper = wrapper; - this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance(); - } - - Cache::~Cache() - { } - - void Cache::purge(IndexReaderPtr r) + return false; +} + +int32_t Entry::hashCode() { + return StringUtils::hashCode(field) ^ VariantUtils::hashCode(custom); +} + +Cache::Cache(const FieldCachePtr& wrapper) { + this->_wrapper = wrapper; + this->readerCache = WeakMapLuceneObjectMapEntryAny::newInstance(); +} + +Cache::~Cache() { +} + +void Cache::purge(const IndexReaderPtr& r) { + LuceneObjectPtr readerKey(r->getFieldCacheKey()); + SyncLock cacheLock(&readerCache); + readerCache.remove(readerKey); +} + +boost::any Cache::get(const IndexReaderPtr& reader, const EntryPtr& key) { + MapEntryAny innerCache; + boost::any value; + LuceneObjectPtr readerKey(reader->getFieldCacheKey()); { - LuceneObjectPtr readerKey(r->getFieldCacheKey()); SyncLock cacheLock(&readerCache); - readerCache.remove(readerKey); - } - - boost::any Cache::get(IndexReaderPtr reader, EntryPtr key) - { - MapEntryAny innerCache; - boost::any value; - LuceneObjectPtr readerKey(reader->getFieldCacheKey()); - { - SyncLock cacheLock(&readerCache); - innerCache = readerCache.get(readerKey); - if (!innerCache) - { - innerCache = MapEntryAny::newInstance(); - readerCache.put(readerKey, innerCache); - } - else if (innerCache.contains(key)) - value = innerCache[key]; - if (VariantUtils::isNull(value)) - { - value = newLucene(); - innerCache.put(key, value); - } + innerCache = readerCache.get(readerKey); + if (!innerCache) { + innerCache = MapEntryAny::newInstance(); + readerCache.put(readerKey, innerCache); + } else if (innerCache.contains(key)) { + value = innerCache[key]; } - if (VariantUtils::typeOf(value)) - { - CreationPlaceholderPtr progress(VariantUtils::get(value)); - SyncLock valueLock(progress); - if (VariantUtils::isNull(progress->value)) - { - progress->value = createValue(reader, key); - { - SyncLock cacheLock(&readerCache); - innerCache.put(key, progress->value); - } - - FieldCachePtr wrapper(_wrapper); - - // Only check if key.custom (the parser) is non-null; else, we check twice for a single - // call to FieldCache.getXXX - if (!VariantUtils::isNull(key->custom) && wrapper) - { - InfoStreamPtr infoStream(wrapper->getInfoStream()); - if (infoStream) - printNewInsanity(infoStream, progress->value); - } - } - return progress->value; + if (VariantUtils::isNull(value)) { + value = newLucene(); + innerCache.put(key, value); } - return value; } - - void Cache::printNewInsanity(InfoStreamPtr infoStream, boost::any value) - { - Collection insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper))); - for (Collection::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) - { - Collection entries((*insanity)->getCacheEntries()); - for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) + if (VariantUtils::typeOf(value)) { + CreationPlaceholderPtr progress(VariantUtils::get(value)); + SyncLock valueLock(progress); + if (VariantUtils::isNull(progress->value)) { + progress->value = createValue(reader, key); { - if (VariantUtils::equalsType((*entry)->getValue(), value)) - { - // OK this insanity involves our entry - *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n"; - break; + SyncLock cacheLock(&readerCache); + innerCache.put(key, progress->value); + } + + FieldCachePtr wrapper(_wrapper); + + // Only check if key.custom (the parser) is non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (!VariantUtils::isNull(key->custom) && wrapper) { + InfoStreamPtr infoStream(wrapper->getInfoStream()); + if (infoStream) { + printNewInsanity(infoStream, progress->value); } } } + return progress->value; } - - ByteCache::ByteCache(FieldCachePtr wrapper) : Cache(wrapper) - { - } - - ByteCache::~ByteCache() - { - } - - boost::any ByteCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - ByteParserPtr parser(VariantUtils::get(entry->custom)); - if (!parser) - return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); - Collection retArray(Collection::newInstance(reader->maxDoc())); - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field) - break; - uint8_t termval = parser->parseByte(term->text()); - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = termval; + return value; +} + +void Cache::printNewInsanity(const InfoStreamPtr& infoStream, const boost::any& value) { + Collection insanities(FieldCacheSanityChecker::checkSanity(FieldCachePtr(_wrapper))); + for (Collection::iterator insanity = insanities.begin(); insanity != insanities.end(); ++insanity) { + Collection entries((*insanity)->getCacheEntries()); + for (Collection::iterator entry = entries.begin(); entry != entries.end(); ++entry) { + if (VariantUtils::equalsType((*entry)->getValue(), value)) { + // OK this insanity involves our entry + *infoStream << L"WARNING: new FieldCache insanity created\nDetails: " + (*insanity)->toString() << L"\n"; + break; } - while (termEnum->next()); } - catch (StopFillCacheException&) - { - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - return retArray; - } - - IntCache::IntCache(FieldCachePtr wrapper) : Cache(wrapper) - { } - - IntCache::~IntCache() - { - } - - boost::any IntCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - IntParserPtr parser(VariantUtils::get(entry->custom)); - if (!parser) - { - FieldCachePtr wrapper(_wrapper); - boost::any ints; - try - { - ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER()); +} + +ByteCache::ByteCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +ByteCache::~ByteCache() { +} + +boost::any ByteCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + ByteParserPtr parser(VariantUtils::get(entry->custom)); + if (!parser) { + return FieldCachePtr(_wrapper)->getBytes(reader, field, FieldCache::DEFAULT_BYTE_PARSER()); + } + Collection retArray(Collection::newInstance(reader->maxDoc())); + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field) { + break; } - catch (NumberFormatException&) - { - ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER()); + uint8_t termval = parser->parseByte(term->text()); + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = termval; } - return ints; + } while (termEnum->next()); + } catch (StopFillCacheException&) { + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + return retArray; +} + +IntCache::IntCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +IntCache::~IntCache() { +} + +boost::any IntCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + IntParserPtr parser(VariantUtils::get(entry->custom)); + if (!parser) { + FieldCachePtr wrapper(_wrapper); + boost::any ints; + try { + ints = wrapper->getInts(reader, field, FieldCache::DEFAULT_INT_PARSER()); + } catch (NumberFormatException&) { + ints = wrapper->getInts(reader, field, FieldCache::NUMERIC_UTILS_INT_PARSER()); } - Collection retArray; - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field) - break; - int32_t termval = parser->parseInt(term->text()); - if (!retArray) // late init - retArray = Collection::newInstance(reader->maxDoc()); - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = termval; + return ints; + } + Collection retArray; + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field) { + break; } - while (termEnum->next()); - } - catch (StopFillCacheException&) - { - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - if (!retArray) // no values - retArray = Collection::newInstance(reader->maxDoc()); - return retArray; - } - - LongCache::LongCache(FieldCachePtr wrapper) : Cache(wrapper) - { - } - - LongCache::~LongCache() - { - } - - boost::any LongCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - LongParserPtr parser(VariantUtils::get(entry->custom)); - if (!parser) - { - FieldCachePtr wrapper(_wrapper); - boost::any longs; - try - { - longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER()); + int32_t termval = parser->parseInt(term->text()); + if (!retArray) { // late init + retArray = Collection::newInstance(reader->maxDoc()); } - catch (NumberFormatException&) - { - longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER()); + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = termval; } - return longs; + } while (termEnum->next()); + } catch (StopFillCacheException&) { + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + if (!retArray) { // no values + retArray = Collection::newInstance(reader->maxDoc()); + } + return retArray; +} + +LongCache::LongCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +LongCache::~LongCache() { +} + +boost::any LongCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + LongParserPtr parser(VariantUtils::get(entry->custom)); + if (!parser) { + FieldCachePtr wrapper(_wrapper); + boost::any longs; + try { + longs = wrapper->getLongs(reader, field, FieldCache::DEFAULT_LONG_PARSER()); + } catch (NumberFormatException&) { + longs = wrapper->getLongs(reader, field, FieldCache::NUMERIC_UTILS_LONG_PARSER()); } - Collection retArray; - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field) - break; - int64_t termval = parser->parseLong(term->text()); - if (!retArray) // late init - retArray = Collection::newInstance(reader->maxDoc()); - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = termval; + return longs; + } + Collection retArray; + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field) { + break; } - while (termEnum->next()); - } - catch (StopFillCacheException&) - { - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - if (!retArray) // no values - retArray = Collection::newInstance(reader->maxDoc()); - return retArray; - } - - DoubleCache::DoubleCache(FieldCachePtr wrapper) : Cache(wrapper) - { - } - - DoubleCache::~DoubleCache() - { - } - - boost::any DoubleCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - DoubleParserPtr parser(VariantUtils::get(entry->custom)); - if (!parser) - { - FieldCachePtr wrapper(_wrapper); - boost::any doubles; - try - { - doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); + int64_t termval = parser->parseLong(term->text()); + if (!retArray) { // late init + retArray = Collection::newInstance(reader->maxDoc()); } - catch (NumberFormatException&) - { - doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = termval; } - return doubles; + } while (termEnum->next()); + } catch (StopFillCacheException&) { + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + if (!retArray) { // no values + retArray = Collection::newInstance(reader->maxDoc()); + } + return retArray; +} + +DoubleCache::DoubleCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +DoubleCache::~DoubleCache() { +} + +boost::any DoubleCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + DoubleParserPtr parser(VariantUtils::get(entry->custom)); + if (!parser) { + FieldCachePtr wrapper(_wrapper); + boost::any doubles; + try { + doubles = wrapper->getDoubles(reader, field, FieldCache::DEFAULT_DOUBLE_PARSER()); + } catch (NumberFormatException&) { + doubles = wrapper->getDoubles(reader, field, FieldCache::NUMERIC_UTILS_DOUBLE_PARSER()); } - Collection retArray; - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field) - break; - double termval = parser->parseDouble(term->text()); - if (!retArray) // late init - retArray = Collection::newInstance(reader->maxDoc()); - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = termval; + return doubles; + } + Collection retArray; + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field) { + break; } - while (termEnum->next()); - } - catch (StopFillCacheException&) - { - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - if (!retArray) // no values - retArray = Collection::newInstance(reader->maxDoc()); - return retArray; - } - - StringCache::StringCache(FieldCachePtr wrapper) : Cache(wrapper) - { - } - - StringCache::~StringCache() - { - } - - boost::any StringCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - Collection retArray(Collection::newInstance(reader->maxDoc())); - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field) - break; - String termval(term->text()); - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = termval; + double termval = parser->parseDouble(term->text()); + if (!retArray) { // late init + retArray = Collection::newInstance(reader->maxDoc()); } - while (termEnum->next()); - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - return retArray; - } - - StringIndexCache::StringIndexCache(FieldCachePtr wrapper) : Cache(wrapper) - { - } - - StringIndexCache::~StringIndexCache() - { - } - - boost::any StringIndexCache::createValue(IndexReaderPtr reader, EntryPtr key) - { - EntryPtr entry(key); - String field(entry->field); - Collection retArray(Collection::newInstance(reader->maxDoc())); - Collection mterms(Collection::newInstance(reader->maxDoc() + 1)); - TermDocsPtr termDocs(reader->termDocs()); - TermEnumPtr termEnum(reader->terms(newLucene(field))); - int32_t t = 0; // current term number - - // an entry for documents that have no terms in this field should a document with no terms be at - // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to - // change as well. - mterms[t++] = L""; - - LuceneException finally; - try - { - do - { - TermPtr term(termEnum->term()); - if (!term || term->field() != field || t >= mterms.size() ) - break; - - // store term text - mterms[t] = term->text(); - - termDocs->seek(termEnum); - while (termDocs->next()) - retArray[termDocs->doc()] = t; - - ++t; + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = termval; } - while (termEnum->next()); - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - termEnum->close(); - finally.throwException(); - - if (t == 0) - { - // if there are no terms, make the term array have a single null entry - mterms = Collection::newInstance(1); - } - else if (t < mterms.size()) - { - // if there are less terms than documents, trim off the dead array space - mterms.resize(t); - } - - return newLucene(retArray, mterms); - } - - FieldCacheEntryImpl::FieldCacheEntryImpl(LuceneObjectPtr readerKey, const String& fieldName, int32_t cacheType, boost::any custom, boost::any value) - { - this->readerKey = readerKey; - this->fieldName = fieldName; - this->cacheType = cacheType; - this->custom = custom; - this->value = value; - } - - FieldCacheEntryImpl::~FieldCacheEntryImpl() - { - } - - LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() - { - return readerKey; - } - - String FieldCacheEntryImpl::getFieldName() - { - return fieldName; - } - - int32_t FieldCacheEntryImpl::getCacheType() - { - return cacheType; - } - - boost::any FieldCacheEntryImpl::getCustom() - { - return custom; - } - - boost::any FieldCacheEntryImpl::getValue() - { - return value; + } while (termEnum->next()); + } catch (StopFillCacheException&) { + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + if (!retArray) { // no values + retArray = Collection::newInstance(reader->maxDoc()); + } + return retArray; +} + +StringCache::StringCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +StringCache::~StringCache() { +} + +boost::any StringCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + Collection retArray(Collection::newInstance(reader->maxDoc())); + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field) { + break; + } + String termval(term->text()); + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = termval; + } + } while (termEnum->next()); + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + return retArray; +} + +StringIndexCache::StringIndexCache(const FieldCachePtr& wrapper) : Cache(wrapper) { +} + +StringIndexCache::~StringIndexCache() { +} + +boost::any StringIndexCache::createValue(const IndexReaderPtr& reader, const EntryPtr& key) { + EntryPtr entry(key); + String field(entry->field); + Collection retArray(Collection::newInstance(reader->maxDoc())); + Collection mterms(Collection::newInstance(reader->maxDoc() + 1)); + TermDocsPtr termDocs(reader->termDocs()); + TermEnumPtr termEnum(reader->terms(newLucene(field))); + int32_t t = 0; // current term number + + // an entry for documents that have no terms in this field should a document with no terms be at + // top or bottom? This puts them at the top - if it is changed, FieldDocSortedHitQueue needs to + // change as well. + mterms[t++] = L""; + + LuceneException finally; + try { + do { + TermPtr term(termEnum->term()); + if (!term || term->field() != field || t >= mterms.size() ) { + break; + } + + // store term text + mterms[t] = term->text(); + + termDocs->seek(termEnum); + while (termDocs->next()) { + retArray[termDocs->doc()] = t; + } + + ++t; + } while (termEnum->next()); + } catch (LuceneException& e) { + finally = e; + } + termDocs->close(); + termEnum->close(); + finally.throwException(); + + if (t == 0) { + // if there are no terms, make the term array have a single null entry + mterms = Collection::newInstance(1); + } else if (t < mterms.size()) { + // if there are less terms than documents, trim off the dead array space + mterms.resize(t); } + + return newLucene(retArray, mterms); +} + +FieldCacheEntryImpl::FieldCacheEntryImpl(const LuceneObjectPtr& readerKey, const String& fieldName, int32_t cacheType, const boost::any& custom, const boost::any& value) { + this->readerKey = readerKey; + this->fieldName = fieldName; + this->cacheType = cacheType; + this->custom = custom; + this->value = value; +} + +FieldCacheEntryImpl::~FieldCacheEntryImpl() { +} + +LuceneObjectPtr FieldCacheEntryImpl::getReaderKey() { + return readerKey; +} + +String FieldCacheEntryImpl::getFieldName() { + return fieldName; +} + +int32_t FieldCacheEntryImpl::getCacheType() { + return cacheType; +} + +boost::any FieldCacheEntryImpl::getCustom() { + return custom; +} + +boost::any FieldCacheEntryImpl::getValue() { + return value; +} + } diff --git a/src/core/search/FieldCacheRangeFilter.cpp b/src/core/search/FieldCacheRangeFilter.cpp index d13c4ccf..7a3e1ae8 100644 --- a/src/core/search/FieldCacheRangeFilter.cpp +++ b/src/core/search/FieldCacheRangeFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,407 +14,349 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - FieldCacheRangeFilter::FieldCacheRangeFilter(const String& field, ParserPtr parser, bool includeLower, bool includeUpper) - { - this->field = field; - this->parser = parser; - this->includeLower = includeLower; - this->includeUpper = includeUpper; - } - - FieldCacheRangeFilter::~FieldCacheRangeFilter() - { - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) - { - return newLucene(field, ParserPtr(), lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) - { - return newByteRange(field, ByteParserPtr(), lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, ByteParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) - { - return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) - { - return newIntRange(field, IntParserPtr(), lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, IntParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) - { - return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) - { - return newLongRange(field, LongParserPtr(), lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, LongParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) - { - return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper) - { - return newDoubleRange(field, DoubleParserPtr(), lowerVal, upperVal, includeLower, includeUpper); - } - - FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, DoubleParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) - { - return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); - } - - String FieldCacheRangeFilter::getField() - { - return field; - } - - bool FieldCacheRangeFilter::includesLower() - { - return includeLower; - } - - bool FieldCacheRangeFilter::includesUpper() - { - return includeUpper; - } - - ParserPtr FieldCacheRangeFilter::getParser() - { - return parser; - } - - FieldCacheRangeFilterString::FieldCacheRangeFilterString(const String& field, ParserPtr parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) - : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) - { - this->lowerVal = lowerVal; - this->upperVal = upperVal; - } - - FieldCacheRangeFilterString::~FieldCacheRangeFilterString() - { +namespace Lucene { + +FieldCacheRangeFilter::FieldCacheRangeFilter(const String& field, const ParserPtr& parser, bool includeLower, bool includeUpper) { + this->field = field; + this->parser = parser; + this->includeLower = includeLower; + this->includeUpper = includeUpper; +} + +FieldCacheRangeFilter::~FieldCacheRangeFilter() { +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newStringRange(const String& field, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) { + return newLucene(field, ParserPtr(), lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { + return newByteRange(field, ByteParserPtr(), lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newByteRange(const String& field, const ByteParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) { + return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { + return newIntRange(field, IntParserPtr(), lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newIntRange(const String& field, const IntParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) { + return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { + return newLongRange(field, LongParserPtr(), lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newLongRange(const String& field, const LongParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) { + return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { + return newDoubleRange(field, DoubleParserPtr(), lowerVal, upperVal, includeLower, includeUpper); +} + +FieldCacheRangeFilterPtr FieldCacheRangeFilter::newDoubleRange(const String& field, const DoubleParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) { + return newLucene(field, parser, lowerVal, upperVal, includeLower, includeUpper); +} + +String FieldCacheRangeFilter::getField() { + return field; +} + +bool FieldCacheRangeFilter::includesLower() { + return includeLower; +} + +bool FieldCacheRangeFilter::includesUpper() { + return includeUpper; +} + +ParserPtr FieldCacheRangeFilter::getParser() { + return parser; +} + +FieldCacheRangeFilterString::FieldCacheRangeFilterString(const String& field, const ParserPtr& parser, const String& lowerVal, const String& upperVal, bool includeLower, bool includeUpper) + : FieldCacheRangeFilter(field, parser, includeLower, includeUpper) { + this->lowerVal = lowerVal; + this->upperVal = upperVal; +} + +FieldCacheRangeFilterString::~FieldCacheRangeFilterString() { +} + +DocIdSetPtr FieldCacheRangeFilterString::getDocIdSet(const IndexReaderPtr& reader) { + StringIndexPtr fcsi(FieldCache::DEFAULT()->getStringIndex(reader, field)); + int32_t lowerPoint = fcsi->binarySearchLookup(lowerVal); + int32_t upperPoint = fcsi->binarySearchLookup(upperVal); + + int32_t inclusiveLowerPoint = 0; + int32_t inclusiveUpperPoint = 0; + + // Hints: + // * binarySearchLookup returns 0, if value was null. + // * the value is <0 if no exact hit was found, the returned value is (-(insertion point) - 1) + if (lowerPoint == 0) { + BOOST_ASSERT(lowerVal.empty()); + inclusiveLowerPoint = 1; + } else if (includeLower && lowerPoint > 0) { + inclusiveLowerPoint = lowerPoint; + } else if (lowerPoint > 0) { + inclusiveLowerPoint = lowerPoint + 1; + } else { + inclusiveLowerPoint = std::max((int32_t)1, -lowerPoint - 1); } - - DocIdSetPtr FieldCacheRangeFilterString::getDocIdSet(IndexReaderPtr reader) - { - StringIndexPtr fcsi(FieldCache::DEFAULT()->getStringIndex(reader, field)); - int32_t lowerPoint = fcsi->binarySearchLookup(lowerVal); - int32_t upperPoint = fcsi->binarySearchLookup(upperVal); - - int32_t inclusiveLowerPoint = 0; - int32_t inclusiveUpperPoint = 0; - - // Hints: - // * binarySearchLookup returns 0, if value was null. - // * the value is <0 if no exact hit was found, the returned value is (-(insertion point) - 1) - if (lowerPoint == 0) - { - BOOST_ASSERT(lowerVal.empty()); - inclusiveLowerPoint = 1; - } - else if (includeLower && lowerPoint > 0) - inclusiveLowerPoint = lowerPoint; - else if (lowerPoint > 0) - inclusiveLowerPoint = lowerPoint + 1; - else - inclusiveLowerPoint = std::max((int32_t)1, -lowerPoint - 1); - - if (upperPoint == 0) - { - BOOST_ASSERT(upperVal.empty()); - inclusiveUpperPoint = INT_MAX; - } - else if (includeUpper && upperPoint > 0) - inclusiveUpperPoint = upperPoint; - else if (upperPoint > 0) - inclusiveUpperPoint = upperPoint - 1; - else - inclusiveUpperPoint = -upperPoint - 2; - - if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) - return DocIdSet::EMPTY_DOCIDSET(); - - BOOST_ASSERT(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0); - - // for this DocIdSet, we never need to use TermDocs, because deleted docs have an order of 0 - // (null entry in StringIndex) - return newLucene(reader, false, fcsi, inclusiveLowerPoint, inclusiveUpperPoint); + + if (upperPoint == 0) { + BOOST_ASSERT(upperVal.empty()); + inclusiveUpperPoint = INT_MAX; + } else if (includeUpper && upperPoint > 0) { + inclusiveUpperPoint = upperPoint; + } else if (upperPoint > 0) { + inclusiveUpperPoint = upperPoint - 1; + } else { + inclusiveUpperPoint = -upperPoint - 2; } - - String FieldCacheRangeFilterString::toString() - { - StringStream buffer; - buffer << field << L":" << (includeLower ? L"[" : L"{"); - buffer << lowerVal << L" TO " << lowerVal; - buffer << (includeLower ? L"]" : L"}"); - return buffer.str(); + + if (inclusiveUpperPoint <= 0 || inclusiveLowerPoint > inclusiveUpperPoint) { + return DocIdSet::EMPTY_DOCIDSET(); } - - bool FieldCacheRangeFilterString::equals(LuceneObjectPtr other) - { - if (Filter::equals(other)) - return true; - FieldCacheRangeFilterStringPtr otherFilter(boost::dynamic_pointer_cast(other)); - if (!otherFilter) - return false; - if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) - return false; - if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) - return false; - if (parser ? !parser->equals(otherFilter->parser) : otherFilter->parser) - return false; + + BOOST_ASSERT(inclusiveLowerPoint > 0 && inclusiveUpperPoint > 0); + + // for this DocIdSet, we never need to use TermDocs, because deleted docs have an order of 0 + // (null entry in StringIndex) + return newLucene(reader, false, fcsi, inclusiveLowerPoint, inclusiveUpperPoint); +} + +String FieldCacheRangeFilterString::toString() { + StringStream buffer; + buffer << field << L":" << (includeLower ? L"[" : L"{"); + buffer << lowerVal << L" TO " << lowerVal; + buffer << (includeLower ? L"]" : L"}"); + return buffer.str(); +} + +bool FieldCacheRangeFilterString::equals(const LuceneObjectPtr& other) { + if (Filter::equals(other)) { return true; } - - int32_t FieldCacheRangeFilterString::hashCode() - { - int32_t code = StringUtils::hashCode(field); - code ^= lowerVal.empty() ? 550356204 : StringUtils::hashCode(lowerVal); - code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper - code ^= upperVal.empty() ? -1674416163 : StringUtils::hashCode(upperVal); - code ^= parser ? parser->hashCode() : -1572457324; - code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); - return code; - } - - FieldCacheRangeFilterByte::FieldCacheRangeFilterByte(const String& field, ParserPtr parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) - : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, UCHAR_MAX, includeLower, includeUpper) - { - } - - FieldCacheRangeFilterByte::~FieldCacheRangeFilterByte() - { - } - - Collection FieldCacheRangeFilterByte::getValues(IndexReaderPtr reader) - { - return FieldCache::DEFAULT()->getBytes(reader, field, boost::static_pointer_cast(parser)); + FieldCacheRangeFilterStringPtr otherFilter(boost::dynamic_pointer_cast(other)); + if (!otherFilter) { + return false; } - - FieldCacheRangeFilterInt::FieldCacheRangeFilterInt(const String& field, ParserPtr parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) - : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, INT_MAX, includeLower, includeUpper) - { - } - - FieldCacheRangeFilterInt::~FieldCacheRangeFilterInt() - { + if (field != otherFilter->field || includeLower != otherFilter->includeLower || includeUpper != otherFilter->includeUpper) { + return false; } - - Collection FieldCacheRangeFilterInt::getValues(IndexReaderPtr reader) - { - return FieldCache::DEFAULT()->getInts(reader, field, boost::static_pointer_cast(parser)); + if (lowerVal != otherFilter->lowerVal || upperVal != otherFilter->upperVal) { + return false; } - - FieldCacheRangeFilterLong::FieldCacheRangeFilterLong(const String& field, ParserPtr parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) - : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, LLONG_MAX, includeLower, includeUpper) - { - } - - FieldCacheRangeFilterLong::~FieldCacheRangeFilterLong() - { + if (parser.get() != NULL ? !parser->equals(otherFilter->parser) : otherFilter->parser.get() != NULL) { + return false; } - - Collection FieldCacheRangeFilterLong::getValues(IndexReaderPtr reader) - { - return FieldCache::DEFAULT()->getLongs(reader, field, boost::static_pointer_cast(parser)); - } - - FieldCacheRangeFilterDouble::FieldCacheRangeFilterDouble(const String& field, ParserPtr parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) - : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::infinity(), includeLower, includeUpper) - { - } - - FieldCacheRangeFilterDouble::~FieldCacheRangeFilterDouble() - { - } - - DocIdSetPtr FieldCacheRangeFilterDouble::getDocIdSet(IndexReaderPtr reader) - { - if (!includeLower && lowerVal > 0.0 && MiscUtils::isInfinite(lowerVal)) - return DocIdSet::EMPTY_DOCIDSET(); - int64_t lower = NumericUtils::doubleToSortableLong(lowerVal); - double inclusiveLowerPoint = NumericUtils::sortableLongToDouble(includeLower ? lower : (lower + 1)); - - if (!includeUpper && upperVal < 0.0 && MiscUtils::isInfinite(upperVal)) - return DocIdSet::EMPTY_DOCIDSET(); - int64_t upper = NumericUtils::doubleToSortableLong(upperVal); - double inclusiveUpperPoint = NumericUtils::sortableLongToDouble(includeUpper ? upper : (upper - 1)); - - if (inclusiveLowerPoint > inclusiveUpperPoint) - return DocIdSet::EMPTY_DOCIDSET(); - - // we only request the usage of termDocs, if the range contains 0 - return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); - } - - Collection FieldCacheRangeFilterDouble::getValues(IndexReaderPtr reader) - { - return FieldCache::DEFAULT()->getDoubles(reader, field, boost::static_pointer_cast(parser)); - } - - FieldCacheDocIdSet::FieldCacheDocIdSet(IndexReaderPtr reader, bool mayUseTermDocs) - { - this->reader = reader; - this->mayUseTermDocs = mayUseTermDocs; - } - - FieldCacheDocIdSet::~FieldCacheDocIdSet() - { - } - - bool FieldCacheDocIdSet::isCacheable() - { - return !(mayUseTermDocs && reader->hasDeletions()); - } - - DocIdSetIteratorPtr FieldCacheDocIdSet::iterator() - { - // Synchronization needed because deleted docs BitVector can change after call to hasDeletions until - // TermDocs creation. We only use an iterator with termDocs, when this was requested (eg. range - // contains 0) and the index has deletions - TermDocsPtr termDocs; - { - SyncLock instancesLock(reader); - termDocs = isCacheable() ? TermDocsPtr() : reader->termDocs(TermPtr()); - } - if (termDocs) - { - // a DocIdSetIterator using TermDocs to iterate valid docIds - return newLucene(shared_from_this(), termDocs); - } - else - { - // a DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there - // are no deletions are on the index - return newLucene(shared_from_this()); - } - } - - FieldCacheDocIdSetString::FieldCacheDocIdSetString(IndexReaderPtr reader, bool mayUseTermDocs, StringIndexPtr fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) - { - this->fcsi = fcsi; - this->inclusiveLowerPoint = inclusiveLowerPoint; - this->inclusiveUpperPoint = inclusiveUpperPoint; - } - - FieldCacheDocIdSetString::~FieldCacheDocIdSetString() - { - } - - bool FieldCacheDocIdSetString::matchDoc(int32_t doc) - { - if (doc < 0 || doc >= fcsi->order.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (fcsi->order[doc] >= inclusiveLowerPoint && fcsi->order[doc] <= inclusiveUpperPoint); + return true; +} + +int32_t FieldCacheRangeFilterString::hashCode() { + int32_t code = StringUtils::hashCode(field); + code ^= lowerVal.empty() ? 550356204 : StringUtils::hashCode(lowerVal); + code = (code << 1) | MiscUtils::unsignedShift(code, 31); // rotate to distinguish lower from upper + code ^= upperVal.empty() ? -1674416163 : StringUtils::hashCode(upperVal); + code ^= parser ? parser->hashCode() : -1572457324; + code ^= (includeLower ? 1549299360 : -365038026) ^ (includeUpper ? 1721088258 : 1948649653); + return code; +} + +FieldCacheRangeFilterByte::FieldCacheRangeFilterByte(const String& field, const ParserPtr& parser, uint8_t lowerVal, uint8_t upperVal, bool includeLower, bool includeUpper) + : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, UCHAR_MAX, includeLower, includeUpper) { +} + +FieldCacheRangeFilterByte::~FieldCacheRangeFilterByte() { +} + +Collection FieldCacheRangeFilterByte::getValues(const IndexReaderPtr& reader) { + return FieldCache::DEFAULT()->getBytes(reader, field, boost::static_pointer_cast(parser)); +} + +FieldCacheRangeFilterInt::FieldCacheRangeFilterInt(const String& field, const ParserPtr& parser, int32_t lowerVal, int32_t upperVal, bool includeLower, bool includeUpper) + : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, INT_MAX, includeLower, includeUpper) { +} + +FieldCacheRangeFilterInt::~FieldCacheRangeFilterInt() { +} + +Collection FieldCacheRangeFilterInt::getValues(const IndexReaderPtr& reader) { + return FieldCache::DEFAULT()->getInts(reader, field, boost::static_pointer_cast(parser)); +} + +FieldCacheRangeFilterLong::FieldCacheRangeFilterLong(const String& field, const ParserPtr& parser, int64_t lowerVal, int64_t upperVal, bool includeLower, bool includeUpper) + : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::max(), includeLower, includeUpper) { +} + +FieldCacheRangeFilterLong::~FieldCacheRangeFilterLong() { +} + +Collection FieldCacheRangeFilterLong::getValues(const IndexReaderPtr& reader) { + return FieldCache::DEFAULT()->getLongs(reader, field, boost::static_pointer_cast(parser)); +} + +FieldCacheRangeFilterDouble::FieldCacheRangeFilterDouble(const String& field, const ParserPtr& parser, double lowerVal, double upperVal, bool includeLower, bool includeUpper) + : FieldCacheRangeFilterNumeric(field, parser, lowerVal, upperVal, std::numeric_limits::infinity(), includeLower, includeUpper) { +} + +FieldCacheRangeFilterDouble::~FieldCacheRangeFilterDouble() { +} + +DocIdSetPtr FieldCacheRangeFilterDouble::getDocIdSet(const IndexReaderPtr& reader) { + if (!includeLower && lowerVal > 0.0 && MiscUtils::isInfinite(lowerVal)) { + return DocIdSet::EMPTY_DOCIDSET(); } - - FieldDocIdSetIteratorTermDocs::FieldDocIdSetIteratorTermDocs(FieldCacheDocIdSetPtr cacheDocIdSet, TermDocsPtr termDocs) - { - this->_cacheDocIdSet = cacheDocIdSet; - this->termDocs = termDocs; - this->doc = -1; + int64_t lower = NumericUtils::doubleToSortableLong(lowerVal); + double inclusiveLowerPoint = NumericUtils::sortableLongToDouble(includeLower ? lower : (lower + 1)); + + if (!includeUpper && upperVal < 0.0 && MiscUtils::isInfinite(upperVal)) { + return DocIdSet::EMPTY_DOCIDSET(); } - - FieldDocIdSetIteratorTermDocs::~FieldDocIdSetIteratorTermDocs() - { + int64_t upper = NumericUtils::doubleToSortableLong(upperVal); + double inclusiveUpperPoint = NumericUtils::sortableLongToDouble(includeUpper ? upper : (upper - 1)); + + if (inclusiveLowerPoint > inclusiveUpperPoint) { + return DocIdSet::EMPTY_DOCIDSET(); } - - int32_t FieldDocIdSetIteratorTermDocs::docID() - { - return doc; + + // we only request the usage of termDocs, if the range contains 0 + return newLucene< FieldCacheDocIdSetNumeric >(reader, (inclusiveLowerPoint <= 0 && inclusiveUpperPoint >= 0), getValues(reader), inclusiveLowerPoint, inclusiveUpperPoint); +} + +Collection FieldCacheRangeFilterDouble::getValues(const IndexReaderPtr& reader) { + return FieldCache::DEFAULT()->getDoubles(reader, field, boost::static_pointer_cast(parser)); +} + +FieldCacheDocIdSet::FieldCacheDocIdSet(const IndexReaderPtr& reader, bool mayUseTermDocs) { + this->reader = reader; + this->mayUseTermDocs = mayUseTermDocs; +} + +FieldCacheDocIdSet::~FieldCacheDocIdSet() { +} + +bool FieldCacheDocIdSet::isCacheable() { + return !(mayUseTermDocs && reader->hasDeletions()); +} + +DocIdSetIteratorPtr FieldCacheDocIdSet::iterator() { + // Synchronization needed because deleted docs BitVector can change after call to hasDeletions until + // TermDocs creation. We only use an iterator with termDocs, when this was requested (eg. range + // contains 0) and the index has deletions + TermDocsPtr termDocs; + { + SyncLock instancesLock(reader); + termDocs = isCacheable() ? TermDocsPtr() : reader->termDocs(TermPtr()); + } + if (termDocs) { + // a DocIdSetIterator using TermDocs to iterate valid docIds + return newLucene(shared_from_this(), termDocs); + } else { + // a DocIdSetIterator generating docIds by incrementing a variable - this one can be used if there + // are no deletions are on the index + return newLucene(shared_from_this()); } - - int32_t FieldDocIdSetIteratorTermDocs::nextDoc() - { - FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); - do - { - if (!termDocs->next()) - { - doc = NO_MORE_DOCS; - return doc; - } - } - while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())); - return doc; +} + +FieldCacheDocIdSetString::FieldCacheDocIdSetString(const IndexReaderPtr& reader, bool mayUseTermDocs, const StringIndexPtr& fcsi, int32_t inclusiveLowerPoint, int32_t inclusiveUpperPoint) : FieldCacheDocIdSet(reader, mayUseTermDocs) { + this->fcsi = fcsi; + this->inclusiveLowerPoint = inclusiveLowerPoint; + this->inclusiveUpperPoint = inclusiveUpperPoint; +} + +FieldCacheDocIdSetString::~FieldCacheDocIdSetString() { +} + +bool FieldCacheDocIdSetString::matchDoc(int32_t doc) { + if (doc < 0 || doc >= fcsi->order.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - int32_t FieldDocIdSetIteratorTermDocs::advance(int32_t target) - { - FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); - if (!termDocs->skipTo(target)) - { + return (fcsi->order[doc] >= inclusiveLowerPoint && fcsi->order[doc] <= inclusiveUpperPoint); +} + +FieldDocIdSetIteratorTermDocs::FieldDocIdSetIteratorTermDocs(const FieldCacheDocIdSetPtr& cacheDocIdSet, const TermDocsPtr& termDocs) { + this->_cacheDocIdSet = cacheDocIdSet; + this->termDocs = termDocs; + this->doc = -1; +} + +FieldDocIdSetIteratorTermDocs::~FieldDocIdSetIteratorTermDocs() { +} + +int32_t FieldDocIdSetIteratorTermDocs::docID() { + return doc; +} + +int32_t FieldDocIdSetIteratorTermDocs::nextDoc() { + FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); + do { + if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } - while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())) - { - if (!termDocs->next()) - { - doc = NO_MORE_DOCS; - return doc; - } - } - return doc; - } - - FieldDocIdSetIteratorIncrement::FieldDocIdSetIteratorIncrement(FieldCacheDocIdSetPtr cacheDocIdSet) - { - this->_cacheDocIdSet = cacheDocIdSet; - this->doc = -1; - } - - FieldDocIdSetIteratorIncrement::~FieldDocIdSetIteratorIncrement() - { - } - - int32_t FieldDocIdSetIteratorIncrement::docID() - { + } while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())); + return doc; +} + +int32_t FieldDocIdSetIteratorTermDocs::advance(int32_t target) { + FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); + if (!termDocs->skipTo(target)) { + doc = NO_MORE_DOCS; return doc; } - - int32_t FieldDocIdSetIteratorIncrement::nextDoc() - { - FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); - try - { - do - { - ++doc; - } - while (!cacheDocIdSet->matchDoc(doc)); - return doc; - } - catch (IndexOutOfBoundsException&) - { + while (!cacheDocIdSet->matchDoc(doc = termDocs->doc())) { + if (!termDocs->next()) { doc = NO_MORE_DOCS; return doc; } } - - int32_t FieldDocIdSetIteratorIncrement::advance(int32_t target) - { - FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); - try - { - doc = target; - while (!cacheDocIdSet->matchDoc(doc)) - ++doc; - return doc; - } - catch (IndexOutOfBoundsException&) - { - doc = NO_MORE_DOCS; - return doc; + return doc; +} + +FieldDocIdSetIteratorIncrement::FieldDocIdSetIteratorIncrement(const FieldCacheDocIdSetPtr& cacheDocIdSet) { + this->_cacheDocIdSet = cacheDocIdSet; + this->doc = -1; +} + +FieldDocIdSetIteratorIncrement::~FieldDocIdSetIteratorIncrement() { +} + +int32_t FieldDocIdSetIteratorIncrement::docID() { + return doc; +} + +int32_t FieldDocIdSetIteratorIncrement::nextDoc() { + FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); + try { + do { + ++doc; + } while (!cacheDocIdSet->matchDoc(doc)); + return doc; + } catch (IndexOutOfBoundsException&) { + doc = NO_MORE_DOCS; + return doc; + } +} + +int32_t FieldDocIdSetIteratorIncrement::advance(int32_t target) { + FieldCacheDocIdSetPtr cacheDocIdSet(_cacheDocIdSet); + try { + doc = target; + while (!cacheDocIdSet->matchDoc(doc)) { + ++doc; } + return doc; + } catch (IndexOutOfBoundsException&) { + doc = NO_MORE_DOCS; + return doc; } } + +} diff --git a/src/core/search/FieldCacheTermsFilter.cpp b/src/core/search/FieldCacheTermsFilter.cpp index c9f688a6..32ca0a44 100644 --- a/src/core/search/FieldCacheTermsFilter.cpp +++ b/src/core/search/FieldCacheTermsFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,106 +10,90 @@ #include "FieldCache.h" #include "OpenBitSet.h" -namespace Lucene -{ - FieldCacheTermsFilter::FieldCacheTermsFilter(const String& field, Collection terms) - { - this->field = field; - this->terms = terms; - } - - FieldCacheTermsFilter::~FieldCacheTermsFilter() - { - } - - FieldCachePtr FieldCacheTermsFilter::getFieldCache() - { - return FieldCache::DEFAULT(); - } - - DocIdSetPtr FieldCacheTermsFilter::getDocIdSet(IndexReaderPtr reader) - { - return newLucene(terms, getFieldCache()->getStringIndex(reader, field)); - } - - FieldCacheTermsFilterDocIdSet::FieldCacheTermsFilterDocIdSet(Collection terms, StringIndexPtr fcsi) - { - this->fcsi = fcsi; - openBitSet = newLucene(this->fcsi->lookup.size()); - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - { - int32_t termNumber = this->fcsi->binarySearchLookup(*term); - if (termNumber > 0) - openBitSet->set(termNumber); +namespace Lucene { + +FieldCacheTermsFilter::FieldCacheTermsFilter(const String& field, Collection terms) { + this->field = field; + this->terms = terms; +} + +FieldCacheTermsFilter::~FieldCacheTermsFilter() { +} + +FieldCachePtr FieldCacheTermsFilter::getFieldCache() { + return FieldCache::DEFAULT(); +} + +DocIdSetPtr FieldCacheTermsFilter::getDocIdSet(const IndexReaderPtr& reader) { + return newLucene(terms, getFieldCache()->getStringIndex(reader, field)); +} + +FieldCacheTermsFilterDocIdSet::FieldCacheTermsFilterDocIdSet(Collection terms, const StringIndexPtr& fcsi) { + this->fcsi = fcsi; + openBitSet = newLucene(this->fcsi->lookup.size()); + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + int32_t termNumber = this->fcsi->binarySearchLookup(*term); + if (termNumber > 0) { + openBitSet->set(termNumber); } } - - FieldCacheTermsFilterDocIdSet::~FieldCacheTermsFilterDocIdSet() - { - } - - DocIdSetIteratorPtr FieldCacheTermsFilterDocIdSet::iterator() - { - return newLucene(fcsi, openBitSet); - } - - bool FieldCacheTermsFilterDocIdSet::isCacheable() - { - return true; - } - - FieldCacheTermsFilterDocIdSetIterator::FieldCacheTermsFilterDocIdSetIterator(StringIndexPtr fcsi, OpenBitSetPtr openBitSet) - { - this->fcsi = fcsi; - this->openBitSet = openBitSet; - this->doc = -1; - } - - FieldCacheTermsFilterDocIdSetIterator::~FieldCacheTermsFilterDocIdSetIterator() - { - } - - int32_t FieldCacheTermsFilterDocIdSetIterator::docID() - { - return doc; - } - - int32_t FieldCacheTermsFilterDocIdSetIterator::nextDoc() - { - try - { - if (++doc >= fcsi->order.size()) +} + +FieldCacheTermsFilterDocIdSet::~FieldCacheTermsFilterDocIdSet() { +} + +DocIdSetIteratorPtr FieldCacheTermsFilterDocIdSet::iterator() { + return newLucene(fcsi, openBitSet); +} + +bool FieldCacheTermsFilterDocIdSet::isCacheable() { + return true; +} + +FieldCacheTermsFilterDocIdSetIterator::FieldCacheTermsFilterDocIdSetIterator(const StringIndexPtr& fcsi, const OpenBitSetPtr& openBitSet) { + this->fcsi = fcsi; + this->openBitSet = openBitSet; + this->doc = -1; +} + +FieldCacheTermsFilterDocIdSetIterator::~FieldCacheTermsFilterDocIdSetIterator() { +} + +int32_t FieldCacheTermsFilterDocIdSetIterator::docID() { + return doc; +} + +int32_t FieldCacheTermsFilterDocIdSetIterator::nextDoc() { + try { + if (++doc >= fcsi->order.size()) { + boost::throw_exception(IndexOutOfBoundsException()); + } + while (!openBitSet->fastGet(fcsi->order[doc])) { + if (++doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); - while (!openBitSet->fastGet(fcsi->order[doc])) - { - if (++doc >= fcsi->order.size()) - boost::throw_exception(IndexOutOfBoundsException()); } } - catch (IndexOutOfBoundsException&) - { - doc = NO_MORE_DOCS; - } - return doc; + } catch (IndexOutOfBoundsException&) { + doc = NO_MORE_DOCS; } - - int32_t FieldCacheTermsFilterDocIdSetIterator::advance(int32_t target) - { - try - { - doc = target; - if (doc < 0 || doc >= fcsi->order.size()) + return doc; +} + +int32_t FieldCacheTermsFilterDocIdSetIterator::advance(int32_t target) { + try { + doc = target; + if (doc < 0 || doc >= fcsi->order.size()) { + boost::throw_exception(IndexOutOfBoundsException()); + } + while (!openBitSet->fastGet(fcsi->order[doc])) { + if (++doc >= fcsi->order.size()) { boost::throw_exception(IndexOutOfBoundsException()); - while (!openBitSet->fastGet(fcsi->order[doc])) - { - if (++doc >= fcsi->order.size()) - boost::throw_exception(IndexOutOfBoundsException()); } } - catch (IndexOutOfBoundsException&) - { - doc = NO_MORE_DOCS; - } - return doc; + } catch (IndexOutOfBoundsException&) { + doc = NO_MORE_DOCS; } + return doc; +} + } diff --git a/src/core/search/FieldComparator.cpp b/src/core/search/FieldComparator.cpp index dd736bd9..e0c612e4 100644 --- a/src/core/search/FieldComparator.cpp +++ b/src/core/search/FieldComparator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,381 +10,320 @@ #include "ScoreCachingWrappingScorer.h" #include "Collator.h" -namespace Lucene -{ - FieldComparator::~FieldComparator() - { - } - - void FieldComparator::setScorer(ScorerPtr scorer) - { - // Empty implementation since most comparators don't need the score. - // This can be overridden by those that need it. - } - - ByteComparator::ByteComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) - { - this->parser = boost::static_pointer_cast(parser); - } - - ByteComparator::~ByteComparator() - { - } - - void ByteComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getBytes(reader, field, parser); - } - - DocComparator::DocComparator(int32_t numHits) : NumericComparator(numHits) - { - this->docBase = 0; - } - - DocComparator::~DocComparator() - { - } - - int32_t DocComparator::compareBottom(int32_t doc) - { - // No overflow risk because docIDs are non-negative - return (bottom - (docBase + doc)); - } - - void DocComparator::copy(int32_t slot, int32_t doc) - { - values[slot] = docBase + doc; - } - - void DocComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - this->docBase = docBase; - } - - DoubleComparator::DoubleComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) - { - this->parser = boost::static_pointer_cast(parser); - } - - DoubleComparator::~DoubleComparator() - { - } - - int32_t DoubleComparator::compare(int32_t slot1, int32_t slot2) - { - double v1 = values[slot1]; - double v2 = values[slot2]; - return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); - } - - int32_t DoubleComparator::compareBottom(int32_t doc) - { - double v2 = currentReaderValues[doc]; - return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); - } - - void DoubleComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getDoubles(reader, field, parser); - } - - IntComparator::IntComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) - { - this->parser = boost::static_pointer_cast(parser); - } - - IntComparator::~IntComparator() - { - } - - int32_t IntComparator::compare(int32_t slot1, int32_t slot2) - { - int32_t v1 = values[slot1]; - int32_t v2 = values[slot2]; - return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); - } - - int32_t IntComparator::compareBottom(int32_t doc) - { - int32_t v2 = currentReaderValues[doc]; - return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); - } - - void IntComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getInts(reader, field, parser); - } - - LongComparator::LongComparator(int32_t numHits, const String& field, ParserPtr parser) : NumericComparator(numHits, field) - { - this->parser = boost::static_pointer_cast(parser); - } - - LongComparator::~LongComparator() - { - } - - int32_t LongComparator::compare(int32_t slot1, int32_t slot2) - { - int64_t v1 = values[slot1]; - int64_t v2 = values[slot2]; - return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); - } - - int32_t LongComparator::compareBottom(int32_t doc) - { - int64_t v2 = currentReaderValues[doc]; - return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); - } - - void LongComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getLongs(reader, field, parser); - } - - RelevanceComparator::RelevanceComparator(int32_t numHits) : NumericComparator(numHits) - { - } - - RelevanceComparator::~RelevanceComparator() - { - } - - int32_t RelevanceComparator::compare(int32_t slot1, int32_t slot2) - { - double score1 = values[slot1]; - double score2 = values[slot2]; - return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); - } - - int32_t RelevanceComparator::compareBottom(int32_t doc) - { - double score = scorer->score(); - return bottom > score ? -1 : (bottom < score ? 1 : 0); - } - - void RelevanceComparator::copy(int32_t slot, int32_t doc) - { - values[slot] = scorer->score(); - } - - void RelevanceComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - } - - void RelevanceComparator::setScorer(ScorerPtr scorer) - { - this->scorer = newLucene(scorer); - } - - StringComparatorLocale::StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale) : collator(newLucene(locale)) - { - this->values = Collection::newInstance(numHits); - this->field = field; - } - - StringComparatorLocale::~StringComparatorLocale() - { - } - - int32_t StringComparatorLocale::compare(int32_t slot1, int32_t slot2) - { - return collator->compare(values[slot1], values[slot2]); - } +namespace Lucene { - int32_t StringComparatorLocale::compareBottom(int32_t doc) - { - return collator->compare(bottom, currentReaderValues[doc]); - } - - void StringComparatorLocale::copy(int32_t slot, int32_t doc) - { - values[slot] = currentReaderValues[doc]; - } - - void StringComparatorLocale::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); - } - - void StringComparatorLocale::setBottom(int32_t slot) - { - bottom = values[slot]; - } - - ComparableValue StringComparatorLocale::value(int32_t slot) - { - return values[slot]; - } - - StringOrdValComparator::StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed) - { - this->ords = Collection::newInstance(numHits); - this->values = Collection::newInstance(numHits); - this->readerGen = Collection::newInstance(numHits); - this->sortPos = sortPos; - this->reversed = reversed; - this->field = field; - this->currentReaderGen = -1; - this->bottomSlot = -1; - this->bottomOrd = 0; - } - - StringOrdValComparator::~StringOrdValComparator() - { - } - - int32_t StringOrdValComparator::compare(int32_t slot1, int32_t slot2) - { - if (readerGen[slot1] == readerGen[slot2]) - { - int32_t cmp = ords[slot1] - ords[slot2]; - if (cmp != 0) - return cmp; - } - return values[slot1].compare(values[slot2]); - } +FieldComparator::~FieldComparator() { +} + +void FieldComparator::setScorer(const ScorerPtr& scorer) { + // Empty implementation since most comparators don't need the score. + // This can be overridden by those that need it. +} + +ByteComparator::ByteComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { + this->parser = boost::static_pointer_cast(parser); +} + +ByteComparator::~ByteComparator() { +} + +void ByteComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getBytes(reader, field, parser); +} + +DocComparator::DocComparator(int32_t numHits) : NumericComparator(numHits) { + this->docBase = 0; +} + +DocComparator::~DocComparator() { +} + +int32_t DocComparator::compareBottom(int32_t doc) { + // No overflow risk because docIDs are non-negative + return (bottom - (docBase + doc)); +} + +void DocComparator::copy(int32_t slot, int32_t doc) { + values[slot] = docBase + doc; +} + +void DocComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + this->docBase = docBase; +} + +DoubleComparator::DoubleComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { + this->parser = boost::static_pointer_cast(parser); +} + +DoubleComparator::~DoubleComparator() { +} + +int32_t DoubleComparator::compare(int32_t slot1, int32_t slot2) { + double v1 = values[slot1]; + double v2 = values[slot2]; + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); +} + +int32_t DoubleComparator::compareBottom(int32_t doc) { + double v2 = currentReaderValues[doc]; + return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); +} + +void DoubleComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getDoubles(reader, field, parser); +} + +IntComparator::IntComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { + this->parser = boost::static_pointer_cast(parser); +} + +IntComparator::~IntComparator() { +} + +int32_t IntComparator::compare(int32_t slot1, int32_t slot2) { + int32_t v1 = values[slot1]; + int32_t v2 = values[slot2]; + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); +} + +int32_t IntComparator::compareBottom(int32_t doc) { + int32_t v2 = currentReaderValues[doc]; + return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); +} + +void IntComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getInts(reader, field, parser); +} + +LongComparator::LongComparator(int32_t numHits, const String& field, const ParserPtr& parser) : NumericComparator(numHits, field) { + this->parser = boost::static_pointer_cast(parser); +} + +LongComparator::~LongComparator() { +} + +int32_t LongComparator::compare(int32_t slot1, int32_t slot2) { + int64_t v1 = values[slot1]; + int64_t v2 = values[slot2]; + return v1 > v2 ? 1 : (v1 < v2 ? -1 : 0); +} + +int32_t LongComparator::compareBottom(int32_t doc) { + int64_t v2 = currentReaderValues[doc]; + return bottom > v2 ? 1 : (bottom < v2 ? -1 : 0); +} + +void LongComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getLongs(reader, field, parser); +} + +RelevanceComparator::RelevanceComparator(int32_t numHits) : NumericComparator(numHits) { +} + +RelevanceComparator::~RelevanceComparator() { +} + +int32_t RelevanceComparator::compare(int32_t slot1, int32_t slot2) { + double score1 = values[slot1]; + double score2 = values[slot2]; + return score1 > score2 ? -1 : (score1 < score2 ? 1 : 0); +} + +int32_t RelevanceComparator::compareBottom(int32_t doc) { + double score = scorer->score(); + return bottom > score ? -1 : (bottom < score ? 1 : 0); +} + +void RelevanceComparator::copy(int32_t slot, int32_t doc) { + values[slot] = scorer->score(); +} + +void RelevanceComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { +} + +void RelevanceComparator::setScorer(const ScorerPtr& scorer) { + this->scorer = newLucene(scorer); +} + +StringComparatorLocale::StringComparatorLocale(int32_t numHits, const String& field, const std::locale& locale) : collator(newLucene(locale)) { + this->values = Collection::newInstance(numHits); + this->field = field; +} + +StringComparatorLocale::~StringComparatorLocale() { +} + +int32_t StringComparatorLocale::compare(int32_t slot1, int32_t slot2) { + return collator->compare(values[slot1], values[slot2]); +} + +int32_t StringComparatorLocale::compareBottom(int32_t doc) { + return collator->compare(bottom, currentReaderValues[doc]); +} - int32_t StringOrdValComparator::compareBottom(int32_t doc) - { - BOOST_ASSERT(bottomSlot != -1); - int32_t order = this->order[doc]; - int32_t cmp = bottomOrd - order; - if (cmp != 0) +void StringComparatorLocale::copy(int32_t slot, int32_t doc) { + values[slot] = currentReaderValues[doc]; +} + +void StringComparatorLocale::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); +} + +void StringComparatorLocale::setBottom(int32_t slot) { + bottom = values[slot]; +} + +ComparableValue StringComparatorLocale::value(int32_t slot) { + return values[slot]; +} + +StringOrdValComparator::StringOrdValComparator(int32_t numHits, const String& field, int32_t sortPos, bool reversed) { + this->ords = Collection::newInstance(numHits); + this->values = Collection::newInstance(numHits); + this->readerGen = Collection::newInstance(numHits); + this->sortPos = sortPos; + this->reversed = reversed; + this->field = field; + this->currentReaderGen = -1; + this->bottomSlot = -1; + this->bottomOrd = 0; +} + +StringOrdValComparator::~StringOrdValComparator() { +} + +int32_t StringOrdValComparator::compare(int32_t slot1, int32_t slot2) { + if (readerGen[slot1] == readerGen[slot2]) { + int32_t cmp = ords[slot1] - ords[slot2]; + if (cmp != 0) { return cmp; - return bottomValue.compare(lookup[order]); - } - - void StringOrdValComparator::convert(int32_t slot) - { - readerGen[slot] = currentReaderGen; - int32_t index = 0; - String value(values[slot]); - if (value.empty()) - { - ords[slot] = 0; - return; - } - - if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) - { - // Since we are the primary sort, the entries in the queue are bounded by bottomOrd - BOOST_ASSERT(bottomOrd < lookup.size()); - if (reversed) - index = binarySearch(lookup, value, bottomOrd, lookup.size() - 1); - else - index = binarySearch(lookup, value, 0, bottomOrd); - } - else - { - // Full binary search - index = binarySearch(lookup, value, 0, lookup.size() - 1); } - - if (index < 0) - index = -index - 2; - - ords[slot] = index; } - - int32_t StringOrdValComparator::binarySearch(Collection lookup, const String& key, int32_t low, int32_t high) - { - Collection::iterator search = std::lower_bound(lookup.begin() + low, lookup.begin() + high, key); - int32_t keyPos = std::distance(lookup.begin(), search); - return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; - } - - void StringOrdValComparator::copy(int32_t slot, int32_t doc) - { - int32_t ord = order[doc]; - ords[slot] = ord; - BOOST_ASSERT(ord >= 0); - values[slot] = lookup[ord]; - readerGen[slot] = currentReaderGen; - } - - void StringOrdValComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - StringIndexPtr currentReaderValues(FieldCache::DEFAULT()->getStringIndex(reader, field)); - ++currentReaderGen; - order = currentReaderValues->order; - lookup = currentReaderValues->lookup; - BOOST_ASSERT(!lookup.empty()); - if (bottomSlot != -1) - { - convert(bottomSlot); - bottomOrd = ords[bottomSlot]; - } + return values[slot1].compare(values[slot2]); +} + +int32_t StringOrdValComparator::compareBottom(int32_t doc) { + BOOST_ASSERT(bottomSlot != -1); + int32_t order = this->order[doc]; + int32_t cmp = bottomOrd - order; + if (cmp != 0) { + return cmp; + } + return bottomValue.compare(lookup[order]); +} + +void StringOrdValComparator::convert(int32_t slot) { + readerGen[slot] = currentReaderGen; + int32_t index = 0; + String value(values[slot]); + if (value.empty()) { + ords[slot] = 0; + return; } - - void StringOrdValComparator::setBottom(int32_t slot) - { - bottomSlot = slot; - if (readerGen[slot] != currentReaderGen) - convert(bottomSlot); - bottomOrd = ords[slot]; - BOOST_ASSERT(bottomOrd >= 0); + + if (sortPos == 0 && bottomSlot != -1 && bottomSlot != slot) { + // Since we are the primary sort, the entries in the queue are bounded by bottomOrd BOOST_ASSERT(bottomOrd < lookup.size()); - bottomValue = values[slot]; - } - - ComparableValue StringOrdValComparator::value(int32_t slot) - { - return values[slot]; - } - - Collection StringOrdValComparator::getValues() - { - return values; - } - - int32_t StringOrdValComparator::getBottomSlot() - { - return bottomSlot; - } - - String StringOrdValComparator::getField() - { - return field; - } - - StringValComparator::StringValComparator(int32_t numHits, const String& field) - { - this->values = Collection::newInstance(numHits); - this->field = field; - } - - StringValComparator::~StringValComparator() - { - } - - int32_t StringValComparator::compare(int32_t slot1, int32_t slot2) - { - return values[slot1].compare(values[slot2]); + if (reversed) { + index = binarySearch(lookup, value, bottomOrd, lookup.size() - 1); + } else { + index = binarySearch(lookup, value, 0, bottomOrd); + } + } else { + // Full binary search + index = binarySearch(lookup, value, 0, lookup.size() - 1); } - int32_t StringValComparator::compareBottom(int32_t doc) - { - return bottom.compare(currentReaderValues[doc]); - } - - void StringValComparator::copy(int32_t slot, int32_t doc) - { - values[slot] = currentReaderValues[doc]; + if (index < 0) { + index = -index - 2; } - - void StringValComparator::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); - } - - void StringValComparator::setBottom(int32_t slot) - { - bottom = values[slot]; - } - - ComparableValue StringValComparator::value(int32_t slot) - { - return values[slot]; + + ords[slot] = index; +} + +int32_t StringOrdValComparator::binarySearch(Collection lookup, const String& key, int32_t low, int32_t high) { + Collection::iterator search = std::lower_bound(lookup.begin() + low, lookup.begin() + high, key); + int32_t keyPos = std::distance(lookup.begin(), search); + return (search == lookup.end() || key < *search) ? -(keyPos + 1) : keyPos; +} + +void StringOrdValComparator::copy(int32_t slot, int32_t doc) { + int32_t ord = order[doc]; + ords[slot] = ord; + BOOST_ASSERT(ord >= 0); + values[slot] = lookup[ord]; + readerGen[slot] = currentReaderGen; +} + +void StringOrdValComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + StringIndexPtr currentReaderValues(FieldCache::DEFAULT()->getStringIndex(reader, field)); + ++currentReaderGen; + order = currentReaderValues->order; + lookup = currentReaderValues->lookup; + BOOST_ASSERT(!lookup.empty()); + if (bottomSlot != -1) { + convert(bottomSlot); + bottomOrd = ords[bottomSlot]; } } + +void StringOrdValComparator::setBottom(int32_t slot) { + bottomSlot = slot; + if (readerGen[slot] != currentReaderGen) { + convert(bottomSlot); + } + bottomOrd = ords[slot]; + BOOST_ASSERT(bottomOrd >= 0); + BOOST_ASSERT(bottomOrd < lookup.size()); + bottomValue = values[slot]; +} + +ComparableValue StringOrdValComparator::value(int32_t slot) { + return values[slot]; +} + +Collection StringOrdValComparator::getValues() { + return values; +} + +int32_t StringOrdValComparator::getBottomSlot() { + return bottomSlot; +} + +String StringOrdValComparator::getField() { + return field; +} + +StringValComparator::StringValComparator(int32_t numHits, const String& field) { + this->values = Collection::newInstance(numHits); + this->field = field; +} + +StringValComparator::~StringValComparator() { +} + +int32_t StringValComparator::compare(int32_t slot1, int32_t slot2) { + return values[slot1].compare(values[slot2]); +} + +int32_t StringValComparator::compareBottom(int32_t doc) { + return bottom.compare(currentReaderValues[doc]); +} + +void StringValComparator::copy(int32_t slot, int32_t doc) { + values[slot] = currentReaderValues[doc]; +} + +void StringValComparator::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + currentReaderValues = FieldCache::DEFAULT()->getStrings(reader, field); +} + +void StringValComparator::setBottom(int32_t slot) { + bottom = values[slot]; +} + +ComparableValue StringValComparator::value(int32_t slot) { + return values[slot]; +} + +} diff --git a/src/core/search/FieldComparatorSource.cpp b/src/core/search/FieldComparatorSource.cpp index 35c5edc6..fc00b814 100644 --- a/src/core/search/FieldComparatorSource.cpp +++ b/src/core/search/FieldComparatorSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "FieldComparatorSource.h" -namespace Lucene -{ - FieldComparatorSource::~FieldComparatorSource() - { - } +namespace Lucene { + +FieldComparatorSource::~FieldComparatorSource() { +} + } diff --git a/src/core/search/FieldDoc.cpp b/src/core/search/FieldDoc.cpp index 15acb18e..e42ef6ed 100644 --- a/src/core/search/FieldDoc.cpp +++ b/src/core/search/FieldDoc.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,28 +7,26 @@ #include "LuceneInc.h" #include "FieldDoc.h" -namespace Lucene -{ - FieldDoc::FieldDoc(int32_t doc, double score, Collection fields) : ScoreDoc(doc, score) - { - this->fields = fields; - } - - FieldDoc::~FieldDoc() - { - } - - String FieldDoc::toString() - { - StringStream buffer; - buffer << ScoreDoc::toString() << L"["; - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if (field != fields.begin()) - buffer << L", "; - buffer << *field; +namespace Lucene { + +FieldDoc::FieldDoc(int32_t doc, double score, Collection fields) : ScoreDoc(doc, score) { + this->fields = fields; +} + +FieldDoc::~FieldDoc() { +} + +String FieldDoc::toString() { + StringStream buffer; + buffer << ScoreDoc::toString() << L"["; + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if (field != fields.begin()) { + buffer << L", "; } - buffer << L"]"; - return buffer.str(); + buffer << *field; } + buffer << L"]"; + return buffer.str(); +} + } diff --git a/src/core/search/FieldDocSortedHitQueue.cpp b/src/core/search/FieldDocSortedHitQueue.cpp index f1ca6bee..8dc4d103 100644 --- a/src/core/search/FieldDocSortedHitQueue.cpp +++ b/src/core/search/FieldDocSortedHitQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,73 +12,69 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - FieldDocSortedHitQueue::FieldDocSortedHitQueue(int32_t size) : PriorityQueue(size) - { - } - - FieldDocSortedHitQueue::~FieldDocSortedHitQueue() - { - } - - void FieldDocSortedHitQueue::setFields(Collection fields) - { - this->fields = fields; - this->collators = hasCollators(fields); - } - - Collection FieldDocSortedHitQueue::getFields() - { - return fields; +namespace Lucene { + +FieldDocSortedHitQueue::FieldDocSortedHitQueue(int32_t size) : PriorityQueue(size) { +} + +FieldDocSortedHitQueue::~FieldDocSortedHitQueue() { +} + +void FieldDocSortedHitQueue::setFields(Collection fields) { + this->fields = fields; + this->collators = hasCollators(fields); +} + +Collection FieldDocSortedHitQueue::getFields() { + return fields; +} + +Collection FieldDocSortedHitQueue::hasCollators(Collection fields) { + if (!fields) { + return Collection(); } - - Collection FieldDocSortedHitQueue::hasCollators(Collection fields) - { - if (!fields) - return Collection(); - Collection ret(Collection::newInstance(fields.size())); - for (int32_t i = 0; i < fields.size(); ++i) - { - localePtr locale(fields[i]->getLocale()); - if (locale) - ret[i] = newInstance(*locale); + Collection ret(Collection::newInstance(fields.size())); + for (int32_t i = 0; i < fields.size(); ++i) { + localePtr locale(fields[i]->getLocale()); + if (locale) { + ret[i] = newInstance(*locale); } - return ret; } - - bool FieldDocSortedHitQueue::lessThan(const FieldDocPtr& first, const FieldDocPtr& second) - { - int32_t n = fields.size(); - int32_t c = 0; - for (int32_t i = 0; i < n && c == 0; ++i) - { - int32_t type = fields[i]->getType(); - if (type == SortField::STRING) - { - String s1(VariantUtils::get(first->fields[i])); - String s2(VariantUtils::get(second->fields[i])); - if (!fields[i]->getLocale()) - c = s1.compare(s2); - else - c = collators[i]->compare(s1, s2); - } - else - { - c = VariantUtils::compareTo(first->fields[i], second->fields[i]); - if (type == SortField::SCORE) - c = -c; + return ret; +} + +bool FieldDocSortedHitQueue::lessThan(const FieldDocPtr& first, const FieldDocPtr& second) { + int32_t n = fields.size(); + int32_t c = 0; + for (int32_t i = 0; i < n && c == 0; ++i) { + int32_t type = fields[i]->getType(); + if (type == SortField::STRING) { + String s1(VariantUtils::get(first->fields[i])); + String s2(VariantUtils::get(second->fields[i])); + if (!fields[i]->getLocale()) { + c = s1.compare(s2); + } else { + c = collators[i]->compare(s1, s2); } - - // reverse sort - if (fields[i]->getReverse()) + } else { + c = VariantUtils::compareTo(first->fields[i], second->fields[i]); + if (type == SortField::SCORE) { c = -c; + } + } + + // reverse sort + if (fields[i]->getReverse()) { + c = -c; } - - // avoid random sort order that could lead to duplicates - if (c == 0) - return (first->doc > second->doc); - - return (c > 0); } + + // avoid random sort order that could lead to duplicates + if (c == 0) { + return (first->doc > second->doc); + } + + return (c > 0); +} + } diff --git a/src/core/search/FieldValueHitQueue.cpp b/src/core/search/FieldValueHitQueue.cpp index 4c436407..c589bbb3 100644 --- a/src/core/search/FieldValueHitQueue.cpp +++ b/src/core/search/FieldValueHitQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,138 +11,126 @@ #include "FieldDoc.h" #include "SortField.h" -namespace Lucene -{ - FieldValueHitQueue::FieldValueHitQueue(Collection fields, int32_t size) : HitQueueBase(size) - { - // When we get here, fields.size() is guaranteed to be > 0, therefore no need to check it again. - - // All these are required by this class's API - need to return arrays. Therefore even in the case - // of a single comparator, create an array anyway. - this->fields = fields; - int32_t numComparators = fields.size(); - comparators = Collection::newInstance(numComparators); - reverseMul = Collection::newInstance(numComparators); - } - - FieldValueHitQueue::~FieldValueHitQueue() - { - } - - FieldValueHitQueuePtr FieldValueHitQueue::create(Collection fields, int32_t size) - { - if (fields.empty()) - boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); - - if (fields.size() == 1) - return newLucene(fields, size); - else - return newLucene(fields, size); - } - - Collection FieldValueHitQueue::getComparators() - { - return comparators; - } - - Collection FieldValueHitQueue::getReverseMul() - { - return reverseMul; - } - - FieldDocPtr FieldValueHitQueue::fillFields(FieldValueHitQueueEntryPtr entry) - { - int32_t n = comparators.size(); - Collection fields(Collection::newInstance(n)); - for (int32_t i = 0; i < n; ++i) - fields[i] = comparators[i]->value(entry->slot); - return newLucene(entry->doc, entry->score, fields); - } - - Collection FieldValueHitQueue::getFields() - { - return fields; - } - - FieldValueHitQueueEntry::FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score) : ScoreDoc(doc, score) - { - this->slot = slot; - } - - FieldValueHitQueueEntry::~FieldValueHitQueueEntry() - { - } - - String FieldValueHitQueueEntry::toString() - { - StringStream buffer; - buffer << L"slot:" << slot << L" " << ScoreDoc::toString(); - return buffer.str(); - } - - OneComparatorFieldValueHitQueue::OneComparatorFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) - { - if (fields.empty()) - boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); - - SortFieldPtr field(fields[0]); - comparator = field->getComparator(size, 0); - oneReverseMul = field->reverse ? -1 : 1; - - comparators[0] = comparator; - reverseMul[0] = oneReverseMul; +namespace Lucene { + +FieldValueHitQueue::FieldValueHitQueue(Collection fields, int32_t size) : HitQueueBase(size) { + // When we get here, fields.size() is guaranteed to be > 0, therefore no need to check it again. + + // All these are required by this class's API - need to return arrays. Therefore even in the case + // of a single comparator, create an array anyway. + this->fields = fields; + int32_t numComparators = fields.size(); + comparators = Collection::newInstance(numComparators); + reverseMul = Collection::newInstance(numComparators); +} + +FieldValueHitQueue::~FieldValueHitQueue() { +} + +FieldValueHitQueuePtr FieldValueHitQueue::create(Collection fields, int32_t size) { + if (fields.empty()) { + boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } - - OneComparatorFieldValueHitQueue::~OneComparatorFieldValueHitQueue() - { + + if (fields.size() == 1) { + return newLucene(fields, size); + } else { + return newLucene(fields, size); } - - bool OneComparatorFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) - { - FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); - FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); - - BOOST_ASSERT(firstEntry != secondEntry); - BOOST_ASSERT(firstEntry->slot != secondEntry->slot); - - int32_t c = oneReverseMul * comparator->compare(firstEntry->slot, secondEntry->slot); - - // avoid random sort order that could lead to duplicates - return c != 0 ? (c > 0) : (firstEntry->doc > secondEntry->doc); +} + +Collection FieldValueHitQueue::getComparators() { + return comparators; +} + +Collection FieldValueHitQueue::getReverseMul() { + return reverseMul; +} + +FieldDocPtr FieldValueHitQueue::fillFields(const FieldValueHitQueueEntryPtr& entry) { + int32_t n = comparators.size(); + Collection fields(Collection::newInstance(n)); + for (int32_t i = 0; i < n; ++i) { + fields[i] = comparators[i]->value(entry->slot); } - - MultiComparatorsFieldValueHitQueue::MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) - { - int32_t numComparators = comparators.size(); - for (int32_t i = 0; i < numComparators; ++i) - { - SortFieldPtr field(fields[i]); - reverseMul[i] = field->reverse ? -1 : 1; - comparators[i] = field->getComparator(size, i); - } + return newLucene(entry->doc, entry->score, fields); +} + +Collection FieldValueHitQueue::getFields() { + return fields; +} + +FieldValueHitQueueEntry::FieldValueHitQueueEntry(int32_t slot, int32_t doc, double score) : ScoreDoc(doc, score) { + this->slot = slot; +} + +FieldValueHitQueueEntry::~FieldValueHitQueueEntry() { +} + +String FieldValueHitQueueEntry::toString() { + StringStream buffer; + buffer << L"slot:" << slot << L" " << ScoreDoc::toString(); + return buffer.str(); +} + +OneComparatorFieldValueHitQueue::OneComparatorFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { + if (fields.empty()) { + boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } - - MultiComparatorsFieldValueHitQueue::~MultiComparatorsFieldValueHitQueue() - { + + SortFieldPtr field(fields[0]); + comparator = field->getComparator(size, 0); + oneReverseMul = field->reverse ? -1 : 1; + + comparators[0] = comparator; + reverseMul[0] = oneReverseMul; +} + +OneComparatorFieldValueHitQueue::~OneComparatorFieldValueHitQueue() { +} + +bool OneComparatorFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { + FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); + FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); + + BOOST_ASSERT(firstEntry != secondEntry); + BOOST_ASSERT(firstEntry->slot != secondEntry->slot); + + int32_t c = oneReverseMul * comparator->compare(firstEntry->slot, secondEntry->slot); + + // avoid random sort order that could lead to duplicates + return c != 0 ? (c > 0) : (firstEntry->doc > secondEntry->doc); +} + +MultiComparatorsFieldValueHitQueue::MultiComparatorsFieldValueHitQueue(Collection fields, int32_t size) : FieldValueHitQueue(fields, size) { + int32_t numComparators = comparators.size(); + for (int32_t i = 0; i < numComparators; ++i) { + SortFieldPtr field(fields[i]); + reverseMul[i] = field->reverse ? -1 : 1; + comparators[i] = field->getComparator(size, i); } - - bool MultiComparatorsFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) - { - FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); - FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); - - BOOST_ASSERT(firstEntry != secondEntry); - BOOST_ASSERT(firstEntry->slot != secondEntry->slot); - - int32_t numComparators = comparators.size(); - for (int32_t i = 0; i < numComparators; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compare(firstEntry->slot, secondEntry->slot); - if (c != 0) - return (c > 0); // Short circuit +} + +MultiComparatorsFieldValueHitQueue::~MultiComparatorsFieldValueHitQueue() { +} + +bool MultiComparatorsFieldValueHitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { + FieldValueHitQueueEntryPtr firstEntry(boost::static_pointer_cast(first)); + FieldValueHitQueueEntryPtr secondEntry(boost::static_pointer_cast(second)); + + BOOST_ASSERT(firstEntry != secondEntry); + BOOST_ASSERT(firstEntry->slot != secondEntry->slot); + + int32_t numComparators = comparators.size(); + for (int32_t i = 0; i < numComparators; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compare(firstEntry->slot, secondEntry->slot); + if (c != 0) { + return (c > 0); // Short circuit } - - // avoid random sort order that could lead to duplicates - return (firstEntry->doc > secondEntry->doc); } + + // avoid random sort order that could lead to duplicates + return (firstEntry->doc > secondEntry->doc); +} + } diff --git a/src/core/search/Filter.cpp b/src/core/search/Filter.cpp index 7ee0bc3a..8db26bf6 100644 --- a/src/core/search/Filter.cpp +++ b/src/core/search/Filter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "Filter.h" -namespace Lucene -{ - Filter::~Filter() - { - } +namespace Lucene { + +Filter::~Filter() { +} + } diff --git a/src/core/search/FilterManager.cpp b/src/core/search/FilterManager.cpp index d3c92601..c598bc46 100644 --- a/src/core/search/FilterManager.cpp +++ b/src/core/search/FilterManager.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,115 +10,102 @@ #include "Filter.h" #include "MiscUtils.h" -namespace Lucene -{ - /// The default maximum number of Filters in the cache - const int32_t FilterManager::DEFAULT_CACHE_CLEAN_SIZE = 100; - - /// The default frequency of cache cleanup - const int64_t FilterManager::DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; - - FilterManager::FilterManager() - { - } - - FilterManager::~FilterManager() - { - } - - void FilterManager::initialize() - { - cache = MapIntFilterItem::newInstance(); - cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items - cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings - - filterCleaner = newLucene(shared_from_this()); - filterCleaner->start(); - } - - FilterManagerPtr FilterManager::getInstance() - { - static FilterManagerPtr manager; - if (!manager) - { - manager = newLucene(); - CycleCheck::addStatic(manager); - } - return manager; - } - - void FilterManager::setCacheSize(int32_t cacheCleanSize) - { - this->cacheCleanSize = cacheCleanSize; - } - - void FilterManager::setCleanThreadSleepTime(int64_t cleanSleepTime) - { - this->cleanSleepTime = cleanSleepTime; - } - - FilterPtr FilterManager::getFilter(FilterPtr filter) - { - SyncLock parentLock(&cache); - FilterItemPtr fi(cache.get(filter->hashCode())); - if (fi) - { - fi->timestamp = MiscUtils::currentTimeMillis(); - return fi->filter; - } - cache.put(filter->hashCode(), newLucene(filter)); - return filter; - } - - FilterItem::FilterItem(FilterPtr filter) - { - this->filter = filter; - this->timestamp = MiscUtils::currentTimeMillis(); - } - - FilterItem::~FilterItem() - { - } - - FilterCleaner::FilterCleaner(FilterManagerPtr manager) - { - _manager = manager; - running = true; - } - - FilterCleaner::~FilterCleaner() - { +namespace Lucene { + +/// The default maximum number of Filters in the cache +const int32_t FilterManager::DEFAULT_CACHE_CLEAN_SIZE = 100; + +/// The default frequency of cache cleanup +const int64_t FilterManager::DEFAULT_CACHE_SLEEP_TIME = 1000 * 60 * 10; + +FilterManager::FilterManager() { +} + +FilterManager::~FilterManager() { +} + +void FilterManager::initialize() { + cache = MapIntFilterItem::newInstance(); + cacheCleanSize = DEFAULT_CACHE_CLEAN_SIZE; // Let the cache get to 100 items + cleanSleepTime = DEFAULT_CACHE_SLEEP_TIME; // 10 minutes between cleanings + + filterCleaner = newLucene(shared_from_this()); + filterCleaner->start(); +} + +FilterManagerPtr FilterManager::getInstance() { + static FilterManagerPtr manager; + LUCENE_RUN_ONCE( + manager = newLucene(); + CycleCheck::addStatic(manager); + ); + return manager; +} + +void FilterManager::setCacheSize(int32_t cacheCleanSize) { + this->cacheCleanSize = cacheCleanSize; +} + +void FilterManager::setCleanThreadSleepTime(int64_t cleanSleepTime) { + this->cleanSleepTime = cleanSleepTime; +} + +FilterPtr FilterManager::getFilter(const FilterPtr& filter) { + SyncLock parentLock(&cache); + FilterItemPtr fi(cache.get(filter->hashCode())); + if (fi) { + fi->timestamp = MiscUtils::currentTimeMillis(); + return fi->filter; } - - void FilterCleaner::run() - { - while (running) - { - FilterManagerPtr manager(_manager); - - // sort items from oldest to newest we delete the oldest filters - if (manager->cache.size() > manager->cacheCleanSize) + cache.put(filter->hashCode(), newLucene(filter)); + return filter; +} + +FilterItem::FilterItem(const FilterPtr& filter) { + this->filter = filter; + this->timestamp = MiscUtils::currentTimeMillis(); +} + +FilterItem::~FilterItem() { +} + +FilterCleaner::FilterCleaner(const FilterManagerPtr& manager) { + _manager = manager; + running = true; +} + +FilterCleaner::~FilterCleaner() { +} + +void FilterCleaner::run() { + while (running) { + FilterManagerPtr manager(_manager); + + // sort items from oldest to newest we delete the oldest filters + if (manager->cache.size() > manager->cacheCleanSize) { + // empty the temporary set + sortedFilterItems.clear(); + { - // empty the temporary set - sortedFilterItems.clear(); - - { - SyncLock parentLock(&manager->cache); - for (MapIntFilterItem::iterator item = manager->cache.begin(); item != manager->cache.end(); ++item) - sortedFilterItems.put(item->second->timestamp, item->first); - int32_t numToDelete = (int32_t)((double)(sortedFilterItems.size() - manager->cacheCleanSize) * 1.5); - int32_t counter = 0; - // loop over the set and delete all of the cache entries not used in a while - for (MapLongInt::iterator item = sortedFilterItems.begin(); item != sortedFilterItems.end() && counter++ < numToDelete; ++item) - manager->cache.remove(item->second); + SyncLock parentLock(&manager->cache); + for (MapIntFilterItem::iterator item = manager->cache.begin(); item != manager->cache.end(); ++item) { + sortedFilterItems.put(item->second->timestamp, item->first); + } + int32_t numToDelete = (int32_t)((double)(sortedFilterItems.size() - manager->cacheCleanSize) * 1.5); + int32_t counter = 0; + // loop over the set and delete all of the cache entries not used in a while + for (MapLongInt::iterator item = sortedFilterItems.begin(); item != sortedFilterItems.end() && counter++ < numToDelete; ++item) { + manager->cache.remove(item->second); } - - // empty the set so we don't tie up the memory - sortedFilterItems.clear(); } - - // take a nap - LuceneThread::threadSleep(manager->cleanSleepTime); + + // empty the set so we don't tie up the memory + sortedFilterItems.clear(); } + + // take a nap + LuceneThread::threadSleep(manager->cleanSleepTime); } } + +} diff --git a/src/core/search/FilteredDocIdSet.cpp b/src/core/search/FilteredDocIdSet.cpp index 8970e5c9..3daba8d7 100644 --- a/src/core/search/FilteredDocIdSet.cpp +++ b/src/core/search/FilteredDocIdSet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,38 +8,32 @@ #include "FilteredDocIdSet.h" #include "_FilteredDocIdSet.h" -namespace Lucene -{ - FilteredDocIdSet::FilteredDocIdSet(DocIdSetPtr innerSet) - { - this->innerSet = innerSet; - } - - FilteredDocIdSet::~FilteredDocIdSet() - { - } - - bool FilteredDocIdSet::isCacheable() - { - return innerSet->isCacheable(); - } - - DocIdSetIteratorPtr FilteredDocIdSet::iterator() - { - return newLucene(shared_from_this(), innerSet->iterator()); - } - - DefaultFilteredDocIdSetIterator::DefaultFilteredDocIdSetIterator(FilteredDocIdSetPtr filtered, DocIdSetIteratorPtr innerIter) : FilteredDocIdSetIterator(innerIter) - { - this->filtered = filtered; - } - - DefaultFilteredDocIdSetIterator::~DefaultFilteredDocIdSetIterator() - { - } - - bool DefaultFilteredDocIdSetIterator::match(int32_t docid) - { - return filtered->match(docid); - } +namespace Lucene { + +FilteredDocIdSet::FilteredDocIdSet(const DocIdSetPtr& innerSet) { + this->innerSet = innerSet; +} + +FilteredDocIdSet::~FilteredDocIdSet() { +} + +bool FilteredDocIdSet::isCacheable() { + return innerSet->isCacheable(); +} + +DocIdSetIteratorPtr FilteredDocIdSet::iterator() { + return newLucene(shared_from_this(), innerSet->iterator()); +} + +DefaultFilteredDocIdSetIterator::DefaultFilteredDocIdSetIterator(const FilteredDocIdSetPtr& filtered, const DocIdSetIteratorPtr& innerIter) : FilteredDocIdSetIterator(innerIter) { + this->filtered = filtered; +} + +DefaultFilteredDocIdSetIterator::~DefaultFilteredDocIdSetIterator() { +} + +bool DefaultFilteredDocIdSetIterator::match(int32_t docid) { + return filtered->match(docid); +} + } diff --git a/src/core/search/FilteredDocIdSetIterator.cpp b/src/core/search/FilteredDocIdSetIterator.cpp index 86f919fe..0d64ce76 100644 --- a/src/core/search/FilteredDocIdSetIterator.cpp +++ b/src/core/search/FilteredDocIdSetIterator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,52 +7,47 @@ #include "LuceneInc.h" #include "FilteredDocIdSetIterator.h" -namespace Lucene -{ - FilteredDocIdSetIterator::FilteredDocIdSetIterator(DocIdSetIteratorPtr innerIter) - { - if (!innerIter) - boost::throw_exception(IllegalArgumentException(L"null iterator")); - this->innerIter = innerIter; - this->doc = -1; - } - - FilteredDocIdSetIterator::~FilteredDocIdSetIterator() - { - } - - int32_t FilteredDocIdSetIterator::docID() - { - return doc; +namespace Lucene { + +FilteredDocIdSetIterator::FilteredDocIdSetIterator(const DocIdSetIteratorPtr& innerIter) { + if (!innerIter) { + boost::throw_exception(IllegalArgumentException(L"null iterator")); } - - int32_t FilteredDocIdSetIterator::nextDoc() - { - while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) - { - if (match(doc)) - return doc; + this->innerIter = innerIter; + this->doc = -1; +} + +FilteredDocIdSetIterator::~FilteredDocIdSetIterator() { +} + +int32_t FilteredDocIdSetIterator::docID() { + return doc; +} + +int32_t FilteredDocIdSetIterator::nextDoc() { + while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { + if (match(doc)) { + return doc; } - return doc; } - - int32_t FilteredDocIdSetIterator::advance(int32_t target) - { - doc = innerIter->advance(target); - if (doc != NO_MORE_DOCS) - { - if (match(doc)) - return doc; - else - { - while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) - { - if (match(doc)) - return doc; + return doc; +} + +int32_t FilteredDocIdSetIterator::advance(int32_t target) { + doc = innerIter->advance(target); + if (doc != NO_MORE_DOCS) { + if (match(doc)) { + return doc; + } else { + while ((doc = innerIter->nextDoc()) != NO_MORE_DOCS) { + if (match(doc)) { + return doc; } - return doc; } + return doc; } - return doc; } + return doc; +} + } diff --git a/src/core/search/FilteredQuery.cpp b/src/core/search/FilteredQuery.cpp index 53f0970d..a6dbaf78 100644 --- a/src/core/search/FilteredQuery.cpp +++ b/src/core/search/FilteredQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,201 +12,178 @@ #include "DocIdSet.h" #include "MiscUtils.h" -namespace Lucene -{ - FilteredQuery::FilteredQuery(QueryPtr query, FilterPtr filter) - { - this->query = query; - this->filter = filter; - } - - FilteredQuery::~FilteredQuery() - { - } - - WeightPtr FilteredQuery::createWeight(SearcherPtr searcher) - { - WeightPtr weight(query->createWeight(searcher)); - SimilarityPtr similarity(query->getSimilarity(searcher)); - return newLucene(shared_from_this(), weight, similarity); - } - - QueryPtr FilteredQuery::rewrite(IndexReaderPtr reader) - { - QueryPtr rewritten(query->rewrite(reader)); - if (rewritten != query) - { - FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone())); - cloneQuery->query = rewritten; - return cloneQuery; - } - else - return shared_from_this(); - } - - QueryPtr FilteredQuery::getQuery() - { - return query; - } - - FilterPtr FilteredQuery::getFilter() - { - return filter; - } - - void FilteredQuery::extractTerms(SetTerm terms) - { - getQuery()->extractTerms(terms); - } - - String FilteredQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"filtered(" << query->toString(field) << L")->" << filter->toString() << boostString(); - return buffer.str(); - } - - bool FilteredQuery::equals(LuceneObjectPtr other) - { - FilteredQueryPtr otherFilteredQuery(boost::dynamic_pointer_cast(other)); - if (!otherFilteredQuery) - return false; - return (Query::equals(other) && query->equals(otherFilteredQuery->query) && filter->equals(otherFilteredQuery->filter)); - } - - int32_t FilteredQuery::hashCode() - { - return query->hashCode() ^ filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); - } - - LuceneObjectPtr FilteredQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(query, filter); - FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->query = query; - cloneQuery->filter = filter; +namespace Lucene { + +FilteredQuery::FilteredQuery(const QueryPtr& query, const FilterPtr& filter) { + this->query = query; + this->filter = filter; +} + +FilteredQuery::~FilteredQuery() { +} + +WeightPtr FilteredQuery::createWeight(const SearcherPtr& searcher) { + WeightPtr weight(query->createWeight(searcher)); + SimilarityPtr similarity(query->getSimilarity(searcher)); + return newLucene(shared_from_this(), weight, similarity); +} + +QueryPtr FilteredQuery::rewrite(const IndexReaderPtr& reader) { + QueryPtr rewritten(query->rewrite(reader)); + if (rewritten != query) { + FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone())); + cloneQuery->query = rewritten; return cloneQuery; + } else { + return shared_from_this(); } - - FilteredQueryWeight::FilteredQueryWeight(FilteredQueryPtr query, WeightPtr weight, SimilarityPtr similarity) - { - this->query = query; - this->weight = weight; - this->similarity = similarity; - value = 0.0; - } - - FilteredQueryWeight::~FilteredQueryWeight() - { - } - - double FilteredQueryWeight::getValue() - { - return value; - } - - double FilteredQueryWeight::sumOfSquaredWeights() - { - return weight->sumOfSquaredWeights() * query->getBoost() * query->getBoost(); - } - - void FilteredQueryWeight::normalize(double norm) - { - weight->normalize(norm); - value = weight->getValue() * query->getBoost(); - } - - ExplanationPtr FilteredQueryWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ExplanationPtr inner(weight->explain(reader, doc)); - if (query->getBoost() !=1) - { - ExplanationPtr preBoost(inner); - inner = newLucene(inner->getValue() * query->getBoost(), L"product of:"); - inner->addDetail(newLucene(query->getBoost(), L"boost")); - inner->addDetail(preBoost); - } - FilterPtr f(query->filter); - DocIdSetPtr docIdSet(f->getDocIdSet(reader)); - DocIdSetIteratorPtr docIdSetIterator(!docIdSet ? DocIdSet::EMPTY_DOCIDSET()->iterator() : docIdSet->iterator()); - if (!docIdSetIterator) - docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); - if (docIdSetIterator->advance(doc) == doc) - return inner; - else - { - ExplanationPtr result(newLucene(0.0, L"failure to match filter: " + f->toString())); - result->addDetail(inner); - return result; - } +} + +QueryPtr FilteredQuery::getQuery() { + return query; +} + +FilterPtr FilteredQuery::getFilter() { + return filter; +} + +void FilteredQuery::extractTerms(SetTerm terms) { + getQuery()->extractTerms(terms); +} + +String FilteredQuery::toString(const String& field) { + StringStream buffer; + buffer << L"filtered(" << query->toString(field) << L")->" << filter->toString() << boostString(); + return buffer.str(); +} + +bool FilteredQuery::equals(const LuceneObjectPtr& other) { + FilteredQueryPtr otherFilteredQuery(boost::dynamic_pointer_cast(other)); + if (!otherFilteredQuery) { + return false; } - - QueryPtr FilteredQueryWeight::getQuery() - { - return query; + return (Query::equals(other) && query->equals(otherFilteredQuery->query) && filter->equals(otherFilteredQuery->filter)); +} + +int32_t FilteredQuery::hashCode() { + return query->hashCode() ^ filter->hashCode() + MiscUtils::doubleToIntBits(getBoost()); +} + +LuceneObjectPtr FilteredQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(query, filter); + FilteredQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->query = query; + cloneQuery->filter = filter; + return cloneQuery; +} + +FilteredQueryWeight::FilteredQueryWeight(const FilteredQueryPtr& query, const WeightPtr& weight, const SimilarityPtr& similarity) { + this->query = query; + this->weight = weight; + this->similarity = similarity; + value = 0.0; +} + +FilteredQueryWeight::~FilteredQueryWeight() { +} + +double FilteredQueryWeight::getValue() { + return value; +} + +double FilteredQueryWeight::sumOfSquaredWeights() { + return weight->sumOfSquaredWeights() * query->getBoost() * query->getBoost(); +} + +void FilteredQueryWeight::normalize(double norm) { + weight->normalize(norm); + value = weight->getValue() * query->getBoost(); +} + +ExplanationPtr FilteredQueryWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ExplanationPtr inner(weight->explain(reader, doc)); + if (query->getBoost() !=1) { + ExplanationPtr preBoost(inner); + inner = newLucene(inner->getValue() * query->getBoost(), L"product of:"); + inner->addDetail(newLucene(query->getBoost(), L"boost")); + inner->addDetail(preBoost); + } + FilterPtr f(query->filter); + DocIdSetPtr docIdSet(f->getDocIdSet(reader)); + DocIdSetIteratorPtr docIdSetIterator(!docIdSet ? DocIdSet::EMPTY_DOCIDSET()->iterator() : docIdSet->iterator()); + if (!docIdSetIterator) { + docIdSetIterator = DocIdSet::EMPTY_DOCIDSET()->iterator(); + } + if (docIdSetIterator->advance(doc) == doc) { + return inner; + } else { + ExplanationPtr result(newLucene(0.0, L"failure to match filter: " + f->toString())); + result->addDetail(inner); + return result; } - - ScorerPtr FilteredQueryWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - ScorerPtr scorer(weight->scorer(reader, true, false)); - if (!scorer) - return ScorerPtr(); - DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader)); - if (!docIdSet) - return ScorerPtr(); - DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator()); - if (!docIdSetIterator) - return ScorerPtr(); - return newLucene(shared_from_this(), scorer, docIdSetIterator, similarity); +} + +QueryPtr FilteredQueryWeight::getQuery() { + return query; +} + +ScorerPtr FilteredQueryWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + ScorerPtr scorer(weight->scorer(reader, true, false)); + if (!scorer) { + return ScorerPtr(); } - - FilteredQueryWeightScorer::FilteredQueryWeightScorer(FilteredQueryWeightPtr weight, ScorerPtr scorer, DocIdSetIteratorPtr docIdSetIterator, SimilarityPtr similarity) : Scorer(similarity) - { - this->weight = weight; - this->scorer = scorer; - this->docIdSetIterator = docIdSetIterator; - doc = -1; + DocIdSetPtr docIdSet(query->filter->getDocIdSet(reader)); + if (!docIdSet) { + return ScorerPtr(); } - - FilteredQueryWeightScorer::~FilteredQueryWeightScorer() - { + DocIdSetIteratorPtr docIdSetIterator(docIdSet->iterator()); + if (!docIdSetIterator) { + return ScorerPtr(); } - - int32_t FilteredQueryWeightScorer::advanceToCommon(int32_t scorerDoc, int32_t disiDoc) - { - while (scorerDoc != disiDoc) - { - if (scorerDoc < disiDoc) - scorerDoc = scorer->advance(disiDoc); - else - disiDoc = docIdSetIterator->advance(scorerDoc); + return newLucene(shared_from_this(), scorer, docIdSetIterator, similarity); +} + +FilteredQueryWeightScorer::FilteredQueryWeightScorer(const FilteredQueryWeightPtr& weight, const ScorerPtr& scorer, const DocIdSetIteratorPtr& docIdSetIterator, const SimilarityPtr& similarity) : Scorer(similarity) { + this->weight = weight; + this->scorer = scorer; + this->docIdSetIterator = docIdSetIterator; + doc = -1; +} + +FilteredQueryWeightScorer::~FilteredQueryWeightScorer() { +} + +int32_t FilteredQueryWeightScorer::advanceToCommon(int32_t scorerDoc, int32_t disiDoc) { + while (scorerDoc != disiDoc) { + if (scorerDoc < disiDoc) { + scorerDoc = scorer->advance(disiDoc); + } else { + disiDoc = docIdSetIterator->advance(scorerDoc); } - return scorerDoc; - } - - int32_t FilteredQueryWeightScorer::nextDoc() - { - int32_t disiDoc = docIdSetIterator->nextDoc(); - int32_t scorerDoc = scorer->nextDoc(); - doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; - return doc; - } - - int32_t FilteredQueryWeightScorer::docID() - { - return doc; - } - - int32_t FilteredQueryWeightScorer::advance(int32_t target) - { - int32_t disiDoc = docIdSetIterator->advance(target); - int32_t scorerDoc = scorer->advance(target); - doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; - return doc; - } - - double FilteredQueryWeightScorer::score() - { - return weight->query->getBoost() * scorer->score(); } + return scorerDoc; +} + +int32_t FilteredQueryWeightScorer::nextDoc() { + int32_t disiDoc = docIdSetIterator->nextDoc(); + int32_t scorerDoc = scorer->nextDoc(); + doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; + return doc; +} + +int32_t FilteredQueryWeightScorer::docID() { + return doc; +} + +int32_t FilteredQueryWeightScorer::advance(int32_t target) { + int32_t disiDoc = docIdSetIterator->advance(target); + int32_t scorerDoc = scorer->advance(target); + doc = (scorerDoc != NO_MORE_DOCS && advanceToCommon(scorerDoc, disiDoc) != NO_MORE_DOCS) ? scorer->docID() : NO_MORE_DOCS; + return doc; +} + +double FilteredQueryWeightScorer::score() { + return weight->query->getBoost() * scorer->score(); +} + } diff --git a/src/core/search/FilteredTermEnum.cpp b/src/core/search/FilteredTermEnum.cpp index afe12457..b4473ad8 100644 --- a/src/core/search/FilteredTermEnum.cpp +++ b/src/core/search/FilteredTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,66 +7,63 @@ #include "LuceneInc.h" #include "FilteredTermEnum.h" -namespace Lucene -{ - FilteredTermEnum::~FilteredTermEnum() - { +namespace Lucene { + +FilteredTermEnum::~FilteredTermEnum() { +} + +void FilteredTermEnum::setEnum(const TermEnumPtr& actualEnum) { + this->actualEnum = actualEnum; + // Find the first term that matches + TermPtr term(actualEnum->term()); + if (term && termCompare(term)) { + currentTerm = term; + } else { + next(); } - - void FilteredTermEnum::setEnum(TermEnumPtr actualEnum) - { - this->actualEnum = actualEnum; - // Find the first term that matches - TermPtr term(actualEnum->term()); - if (term && termCompare(term)) - currentTerm = term; - else - next(); +} + +int32_t FilteredTermEnum::docFreq() { + if (!currentTerm) { + return -1; } - - int32_t FilteredTermEnum::docFreq() - { - if (!currentTerm) - return -1; - BOOST_ASSERT(actualEnum); - return actualEnum->docFreq(); + BOOST_ASSERT(actualEnum); + return actualEnum->docFreq(); +} + +bool FilteredTermEnum::next() { + if (!actualEnum) { + return false; // the actual enumerator is not initialized } - - bool FilteredTermEnum::next() - { - if (!actualEnum) - return false; // the actual enumerator is not initialized - currentTerm.reset(); - while (!currentTerm) - { - if (endEnum()) - return false; - if (actualEnum->next()) - { - TermPtr term(actualEnum->term()); - if (termCompare(term)) - { - currentTerm = term; - return true; - } + currentTerm.reset(); + while (!currentTerm) { + if (endEnum()) { + return false; + } + if (actualEnum->next()) { + TermPtr term(actualEnum->term()); + if (termCompare(term)) { + currentTerm = term; + return true; } - else - return false; + } else { + return false; } - currentTerm.reset(); - return false; } - - TermPtr FilteredTermEnum::term() - { - return currentTerm; - } - - void FilteredTermEnum::close() - { - if (actualEnum) - actualEnum->close(); - currentTerm.reset(); - actualEnum.reset(); + currentTerm.reset(); + return false; +} + +TermPtr FilteredTermEnum::term() { + return currentTerm; +} + +void FilteredTermEnum::close() { + if (actualEnum) { + actualEnum->close(); } + currentTerm.reset(); + actualEnum.reset(); +} + } diff --git a/src/core/search/FuzzyQuery.cpp b/src/core/search/FuzzyQuery.cpp index 13111f8b..38e1bf25 100644 --- a/src/core/search/FuzzyQuery.cpp +++ b/src/core/search/FuzzyQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,205 +14,192 @@ #include "BooleanClause.h" #include "MiscUtils.h" -namespace Lucene -{ - const int32_t FuzzyQuery::defaultPrefixLength = 0; - - FuzzyQuery::FuzzyQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) - { - ConstructQuery(term, minimumSimilarity, prefixLength); - } - - FuzzyQuery::FuzzyQuery(TermPtr term, double minimumSimilarity) - { - ConstructQuery(term, minimumSimilarity, defaultPrefixLength); - } - - FuzzyQuery::FuzzyQuery(TermPtr term) - { - ConstructQuery(term, defaultMinSimilarity(), defaultPrefixLength); - } - - FuzzyQuery::~FuzzyQuery() - { - } - - void FuzzyQuery::ConstructQuery(TermPtr term, double minimumSimilarity, int32_t prefixLength) - { - this->term = term; - - if (minimumSimilarity >= 1.0) - boost::throw_exception(IllegalArgumentException(L"minimumSimilarity >= 1")); - else if (minimumSimilarity < 0.0) - boost::throw_exception(IllegalArgumentException(L"minimumSimilarity < 0")); - if (prefixLength < 0) - boost::throw_exception(IllegalArgumentException(L"prefixLength < 0")); - - this->termLongEnough = ((int32_t)term->text().length() > (int32_t)(1.0 / (1.0 - minimumSimilarity))); - - this->minimumSimilarity = minimumSimilarity; - this->prefixLength = prefixLength; - rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE(); +namespace Lucene { + +const int32_t FuzzyQuery::defaultPrefixLength = 0; + +FuzzyQuery::FuzzyQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { + ConstructQuery(term, minimumSimilarity, prefixLength); +} + +FuzzyQuery::FuzzyQuery(const TermPtr& term, double minimumSimilarity) { + ConstructQuery(term, minimumSimilarity, defaultPrefixLength); +} + +FuzzyQuery::FuzzyQuery(const TermPtr& term) { + ConstructQuery(term, defaultMinSimilarity(), defaultPrefixLength); +} + +FuzzyQuery::~FuzzyQuery() { +} + +void FuzzyQuery::ConstructQuery(const TermPtr& term, double minimumSimilarity, int32_t prefixLength) { + this->term = term; + + if (minimumSimilarity >= 1.0) { + boost::throw_exception(IllegalArgumentException(L"minimumSimilarity >= 1")); + } else if (minimumSimilarity < 0.0) { + boost::throw_exception(IllegalArgumentException(L"minimumSimilarity < 0")); } - - double FuzzyQuery::defaultMinSimilarity() - { - const double _defaultMinSimilarity = 0.5; - return _defaultMinSimilarity; + if (prefixLength < 0) { + boost::throw_exception(IllegalArgumentException(L"prefixLength < 0")); } - - double FuzzyQuery::getMinSimilarity() - { - return minimumSimilarity; + + this->termLongEnough = ((int32_t)term->text().length() > (int32_t)(1.0 / (1.0 - minimumSimilarity))); + + this->minimumSimilarity = minimumSimilarity; + this->prefixLength = prefixLength; + rewriteMethod = SCORING_BOOLEAN_QUERY_REWRITE(); +} + +double FuzzyQuery::defaultMinSimilarity() { + const double _defaultMinSimilarity = 0.5; + return _defaultMinSimilarity; +} + +double FuzzyQuery::getMinSimilarity() { + return minimumSimilarity; +} + +int32_t FuzzyQuery::getPrefixLength() { + return prefixLength; +} + +FilteredTermEnumPtr FuzzyQuery::getEnum(const IndexReaderPtr& reader) { + return newLucene(reader, getTerm(), minimumSimilarity, prefixLength); +} + +TermPtr FuzzyQuery::getTerm() { + return term; +} + +void FuzzyQuery::setRewriteMethod(const RewriteMethodPtr& method) { + boost::throw_exception(UnsupportedOperationException(L"FuzzyQuery cannot change rewrite method")); +} + +QueryPtr FuzzyQuery::rewrite(const IndexReaderPtr& reader) { + if (!termLongEnough) { // can only match if it's exact + return newLucene(term); } - - int32_t FuzzyQuery::getPrefixLength() - { - return prefixLength; + + int32_t maxSize = BooleanQuery::getMaxClauseCount(); + ScoreTermQueuePtr stQueue(newLucene(maxSize + 1)); + FilteredTermEnumPtr enumerator(getEnum(reader)); + LuceneException finally; + try { + ScoreTermPtr st = newLucene(); + do { + TermPtr t(enumerator->term()); + if (!t) { + break; + } + double score = enumerator->difference(); + // ignore uncompetitive hits + if (stQueue->size() >= maxSize && score <= stQueue->top()->score) { + continue; + } + // add new entry in PQ + st->term = t; + st->score = score; + stQueue->add(st); + // possibly drop entries from queue + st = (stQueue->size() > maxSize) ? stQueue->pop() : newLucene(); + } while (enumerator->next()); + } catch (LuceneException& e) { + finally = e; + } + enumerator->close(); + finally.throwException(); + + BooleanQueryPtr query(newLucene(true)); + int32_t size = stQueue->size(); + for (int32_t i = 0; i < size; ++i) { + ScoreTermPtr st(stQueue->pop()); + TermQueryPtr tq(newLucene(st->term)); // found a match + tq->setBoost(getBoost() * st->score); // set the boost + query->add(tq, BooleanClause::SHOULD); // add to query } - - FilteredTermEnumPtr FuzzyQuery::getEnum(IndexReaderPtr reader) - { - return newLucene(reader, getTerm(), minimumSimilarity, prefixLength); + + return query; +} + +LuceneObjectPtr FuzzyQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); + FuzzyQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->minimumSimilarity = minimumSimilarity; + cloneQuery->prefixLength = prefixLength; + cloneQuery->termLongEnough = termLongEnough; + cloneQuery->term = term; + return cloneQuery; +} + +String FuzzyQuery::toString(const String& field) { + StringStream buffer; + if (term->field() != field) { + buffer << term->field() << L":"; } - - TermPtr FuzzyQuery::getTerm() - { - return term; + buffer << term->text() << L"~" << minimumSimilarity << boostString(); + return buffer.str(); +} + +int32_t FuzzyQuery::hashCode() { + int32_t prime = 31; + int32_t result = MultiTermQuery::hashCode(); + result = prime * result + MiscUtils::doubleToIntBits(minimumSimilarity); + result = prime * result + prefixLength; + result = prime * result + (term ? term->hashCode() : 0); + return result; +} + +bool FuzzyQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - void FuzzyQuery::setRewriteMethod(RewriteMethodPtr method) - { - boost::throw_exception(UnsupportedOperationException(L"FuzzyQuery cannot change rewrite method")); + if (!MultiTermQuery::equals(other)) { + return false; } - - QueryPtr FuzzyQuery::rewrite(IndexReaderPtr reader) - { - if (!termLongEnough) // can only match if it's exact - return newLucene(term); - - int32_t maxSize = BooleanQuery::getMaxClauseCount(); - ScoreTermQueuePtr stQueue(newLucene(1024)); - FilteredTermEnumPtr enumerator(getEnum(reader)); - LuceneException finally; - try - { - ScoreTermPtr st = newLucene(); - do - { - TermPtr t(enumerator->term()); - if (!t) - break; - double score = enumerator->difference(); - // ignore uncompetitive hits - if (stQueue->size() >= maxSize && score <= stQueue->top()->score) - continue; - // add new entry in PQ - st->term = t; - st->score = score; - stQueue->add(st); - // possibly drop entries from queue - st = (stQueue->size() > maxSize) ? stQueue->pop() : newLucene(); - } - while (enumerator->next()); - } - catch (LuceneException& e) - { - finally = e; - } - enumerator->close(); - finally.throwException(); - - BooleanQueryPtr query(newLucene(true)); - int32_t size = stQueue->size(); - for (int32_t i = 0; i < size; ++i) - { - ScoreTermPtr st(stQueue->pop()); - TermQueryPtr tq(newLucene(st->term)); // found a match - tq->setBoost(getBoost() * st->score); // set the boost - query->add(tq, BooleanClause::SHOULD); // add to query - } - - return query; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - LuceneObjectPtr FuzzyQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); - FuzzyQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->minimumSimilarity = minimumSimilarity; - cloneQuery->prefixLength = prefixLength; - cloneQuery->termLongEnough = termLongEnough; - cloneQuery->term = term; - return cloneQuery; + FuzzyQueryPtr otherFuzzyQuery(boost::dynamic_pointer_cast(other)); + if (!otherFuzzyQuery) { + return false; } - - String FuzzyQuery::toString(const String& field) - { - StringStream buffer; - if (term->field() != field) - buffer << term->field() << L":"; - buffer << term->text() << L"~" << minimumSimilarity << boostString(); - return buffer.str(); + if (MiscUtils::doubleToIntBits(minimumSimilarity) != MiscUtils::doubleToIntBits(otherFuzzyQuery->minimumSimilarity)) { + return false; } - - int32_t FuzzyQuery::hashCode() - { - int32_t prime = 31; - int32_t result = MultiTermQuery::hashCode(); - result = prime * result + MiscUtils::doubleToIntBits(minimumSimilarity); - result = prime * result + prefixLength; - result = prime * result + (term ? term->hashCode() : 0); - return result; + if (prefixLength != otherFuzzyQuery->prefixLength) { + return false; } - - bool FuzzyQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!MultiTermQuery::equals(other)) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) + if (!term) { + if (otherFuzzyQuery->term) { return false; - FuzzyQueryPtr otherFuzzyQuery(boost::dynamic_pointer_cast(other)); - if (!otherFuzzyQuery) - return false; - if (MiscUtils::doubleToIntBits(minimumSimilarity) != MiscUtils::doubleToIntBits(otherFuzzyQuery->minimumSimilarity)) - return false; - if (prefixLength != otherFuzzyQuery->prefixLength) - return false; - if (!term) - { - if (otherFuzzyQuery->term) - return false; } - else if (!term->equals(otherFuzzyQuery->term)) - return false; - return true; - } - - ScoreTerm::~ScoreTerm() - { - } - - int32_t ScoreTerm::compareTo(ScoreTermPtr other) - { - if (this->score == other->score) - return other->term->compareTo(this->term); - else - return this->score < other->score ? -1 : (this->score > other->score ? 1 : 0); - } - - ScoreTermQueue::ScoreTermQueue(int32_t size) : PriorityQueue(size) - { - } - - ScoreTermQueue::~ScoreTermQueue() - { + } else if (!term->equals(otherFuzzyQuery->term)) { + return false; } - - bool ScoreTermQueue::lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second) - { - return (first->compareTo(second) < 0); + return true; +} + +ScoreTerm::~ScoreTerm() { +} + +int32_t ScoreTerm::compareTo(const ScoreTermPtr& other) { + if (this->score == other->score) { + return other->term->compareTo(this->term); + } else { + return this->score < other->score ? -1 : (this->score > other->score ? 1 : 0); } } + +ScoreTermQueue::ScoreTermQueue(int32_t size) : PriorityQueue(size) { +} + +ScoreTermQueue::~ScoreTermQueue() { +} + +bool ScoreTermQueue::lessThan(const ScoreTermPtr& first, const ScoreTermPtr& second) { + return (first->compareTo(second) < 0); +} + +} diff --git a/src/core/search/FuzzyTermEnum.cpp b/src/core/search/FuzzyTermEnum.cpp index 2cc37ea6..36e66aab 100644 --- a/src/core/search/FuzzyTermEnum.cpp +++ b/src/core/search/FuzzyTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,155 +11,144 @@ #include "Term.h" #include "IndexReader.h" -namespace Lucene -{ - FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength) - { - ConstructTermEnum(reader, term, minSimilarity, prefixLength); - } - - FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity) - { - ConstructTermEnum(reader, term, minSimilarity, FuzzyQuery::defaultPrefixLength); +namespace Lucene { + +FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength) { + ConstructTermEnum(reader, term, minSimilarity, prefixLength); +} + +FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity) { + ConstructTermEnum(reader, term, minSimilarity, FuzzyQuery::defaultPrefixLength); +} + +FuzzyTermEnum::FuzzyTermEnum(const IndexReaderPtr& reader, const TermPtr& term) { + ConstructTermEnum(reader, term, FuzzyQuery::defaultMinSimilarity(), FuzzyQuery::defaultPrefixLength); +} + +FuzzyTermEnum::~FuzzyTermEnum() { +} + +void FuzzyTermEnum::ConstructTermEnum(const IndexReaderPtr& reader, const TermPtr& term, double minSimilarity, int32_t prefixLength) { + if (minSimilarity >= 1.0) { + boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be greater than or equal to 1")); + } else if (minSimilarity < 0.0) { + boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be less than 0")); } - - FuzzyTermEnum::FuzzyTermEnum(IndexReaderPtr reader, TermPtr term) - { - ConstructTermEnum(reader, term, FuzzyQuery::defaultMinSimilarity(), FuzzyQuery::defaultPrefixLength); + if (prefixLength < 0) { + boost::throw_exception(IllegalArgumentException(L"prefixLength cannot be less than 0")); } - - FuzzyTermEnum::~FuzzyTermEnum() - { + + this->minimumSimilarity = minSimilarity; + this->scale_factor = 1.0 / (1.0 - minimumSimilarity); + this->searchTerm = term; + this->field = searchTerm->field(); + this->_endEnum = false; + this->_similarity = 0.0; + + // The prefix could be longer than the word. + // It's kind of silly though. It means we must match the entire word. + int32_t fullSearchTermLength = searchTerm->text().length(); + int32_t realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; + + this->text = searchTerm->text().substr(realPrefixLength); + this->prefix = searchTerm->text().substr(0, realPrefixLength); + + this->p = Collection::newInstance(this->text.length() + 1); + this->d = Collection::newInstance(this->text.length() + 1); + + setEnum(reader->terms(newLucene(searchTerm->field(), prefix))); +} + +bool FuzzyTermEnum::termCompare(const TermPtr& term) { + if (field == term->field() && boost::starts_with(term->text(), prefix)) { + String target(term->text().substr(prefix.length())); + this->_similarity = similarity(target); + return (_similarity > minimumSimilarity); } - - void FuzzyTermEnum::ConstructTermEnum(IndexReaderPtr reader, TermPtr term, double minSimilarity, int32_t prefixLength) - { - if (minSimilarity >= 1.0) - boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be greater than or equal to 1")); - else if (minSimilarity < 0.0) - boost::throw_exception(IllegalArgumentException(L"minimumSimilarity cannot be less than 0")); - if (prefixLength < 0) - boost::throw_exception(IllegalArgumentException(L"prefixLength cannot be less than 0")); - - this->minimumSimilarity = minSimilarity; - this->scale_factor = 1.0 / (1.0 - minimumSimilarity); - this->searchTerm = term; - this->field = searchTerm->field(); - this->_endEnum = false; - this->_similarity = 0.0; - - // The prefix could be longer than the word. - // It's kind of silly though. It means we must match the entire word. - int32_t fullSearchTermLength = searchTerm->text().length(); - int32_t realPrefixLength = prefixLength > fullSearchTermLength ? fullSearchTermLength : prefixLength; - - this->text = searchTerm->text().substr(realPrefixLength); - this->prefix = searchTerm->text().substr(0, realPrefixLength); - - this->p = Collection::newInstance(this->text.length() + 1); - this->d = Collection::newInstance(this->text.length() + 1); - - setEnum(reader->terms(newLucene(searchTerm->field(), prefix))); + _endEnum = true; + return false; +} + +double FuzzyTermEnum::difference() { + return (_similarity - minimumSimilarity) * scale_factor; +} + +bool FuzzyTermEnum::endEnum() { + return _endEnum; +} + +double FuzzyTermEnum::similarity(const String& target) { + int32_t m = target.length(); + int32_t n = text.length(); + if (n == 0) { + // We don't have anything to compare. That means if we just add the letters for m we get the new word + return prefix.empty() ? 0.0 : 1.0 - ((double)m / (double)prefix.length()); } - - bool FuzzyTermEnum::termCompare(TermPtr term) - { - if (field == term->field() && boost::starts_with(term->text(), prefix)) - { - String target(term->text().substr(prefix.length())); - this->_similarity = similarity(target); - return (_similarity > minimumSimilarity); - } - _endEnum = true; - return false; + if (m == 0) { + return prefix.empty() ? 0.0 : 1.0 - ((double)n / (double)prefix.length()); } - - double FuzzyTermEnum::difference() - { - return (_similarity - minimumSimilarity) * scale_factor; + + int32_t maxDistance = calculateMaxDistance(m); + + if (maxDistance < std::abs(m - n)) { + // Just adding the characters of m to n or vice-versa results in too many edits for example "pre" length + // is 3 and "prefixes" length is 8. We can see that given this optimal circumstance, the edit distance + // cannot be less than 5. which is 8-3 or more precisely std::abs(3 - 8). if our maximum edit distance + // is 4, then we can discard this word without looking at it. + return 0.0; } - - bool FuzzyTermEnum::endEnum() - { - return _endEnum; + + // init matrix d + for (int32_t i = 0; i <= n; ++i) { + p[i] = i; } - - double FuzzyTermEnum::similarity(const String& target) - { - int32_t m = target.length(); - int32_t n = text.length(); - if (n == 0) - { - // We don't have anything to compare. That means if we just add the letters for m we get the new word - return prefix.empty() ? 0.0 : 1.0 - ((double)m / (double)prefix.length()); + + // start computing edit distance + for (int32_t j = 1; j <= m; ++j) { // iterates through target + int32_t bestPossibleEditDistance = m; + wchar_t t_j = target[j - 1]; // jth character of t + d[0] = j; + + for (int32_t i = 1; i <= n; ++i) { // iterates through text + // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) + if (t_j != text[i - 1]) { + d[i] = std::min(std::min(d[i - 1], p[i]), p[i - 1]) + 1; + } else { + d[i] = std::min(std::min(d[i - 1] + 1, p[i] + 1), p[i - 1]); + } + bestPossibleEditDistance = std::min(bestPossibleEditDistance, d[i]); } - if (m == 0) - return prefix.empty() ? 0.0 : 1.0 - ((double)n / (double)prefix.length()); - - int32_t maxDistance = calculateMaxDistance(m); - - if (maxDistance < std::abs(m - n)) - { - // Just adding the characters of m to n or vice-versa results in too many edits for example "pre" length - // is 3 and "prefixes" length is 8. We can see that given this optimal circumstance, the edit distance - // cannot be less than 5. which is 8-3 or more precisely std::abs(3 - 8). if our maximum edit distance - // is 4, then we can discard this word without looking at it. + + // After calculating row i, the best possible edit distance can be found by found by finding the smallest + // value in a given column. If the bestPossibleEditDistance is greater than the max distance, abort. + + if (j > maxDistance && bestPossibleEditDistance > maxDistance) { // equal is okay, but not greater + // The closest the target can be to the text is just too far away. + // This target is leaving the party early. return 0.0; } - - // init matrix d - for (int32_t i = 0; i <= n; ++i) - p[i] = i; - - // start computing edit distance - for (int32_t j = 1; j <= m; ++j) // iterates through target - { - int32_t bestPossibleEditDistance = m; - wchar_t t_j = target[j - 1]; // jth character of t - d[0] = j; - - for (int32_t i = 1; i <= n; ++i) // iterates through text - { - // minimum of cell to the left+1, to the top+1, diagonally left and up +(0|1) - if (t_j != text[i - 1]) - d[i] = std::min(std::min(d[i - 1], p[i]), p[i - 1]) + 1; - else - d[i] = std::min(std::min(d[i - 1] + 1, p[i] + 1), p[i - 1]); - bestPossibleEditDistance = std::min(bestPossibleEditDistance, d[i]); - } - - // After calculating row i, the best possible edit distance can be found by found by finding the smallest - // value in a given column. If the bestPossibleEditDistance is greater than the max distance, abort. - - if (j > maxDistance && bestPossibleEditDistance > maxDistance) // equal is okay, but not greater - { - // The closest the target can be to the text is just too far away. - // This target is leaving the party early. - return 0.0; - } - - // copy current distance counts to 'previous row' distance counts: swap p and d - std::swap(p, d); - } - - // Our last action in the above loop was to switch d and p, so p now actually has the most recent cost counts - - // This will return less than 0.0 when the edit distance is greater than the number of characters in the shorter - // word. But this was the formula that was previously used in FuzzyTermEnum, so it has not been changed (even - // though minimumSimilarity must be greater than 0.0) - return 1.0 - ((double)p[n] / (double)(prefix.length() + std::min(n, m))); - } - - int32_t FuzzyTermEnum::calculateMaxDistance(int32_t m) - { - return (int32_t)((1.0 - minimumSimilarity) * (double)(std::min((int32_t)text.length(), m) + prefix.length())); - } - - void FuzzyTermEnum::close() - { - p.reset(); - d.reset(); - searchTerm.reset(); - FilteredTermEnum::close(); // call FilteredTermEnum::close() and let the garbage collector do its work. + + // copy current distance counts to 'previous row' distance counts: swap p and d + std::swap(p, d); } + + // Our last action in the above loop was to switch d and p, so p now actually has the most recent cost counts + + // This will return less than 0.0 when the edit distance is greater than the number of characters in the shorter + // word. But this was the formula that was previously used in FuzzyTermEnum, so it has not been changed (even + // though minimumSimilarity must be greater than 0.0) + return 1.0 - ((double)p[n] / (double)(prefix.length() + std::min(n, m))); +} + +int32_t FuzzyTermEnum::calculateMaxDistance(int32_t m) { + return (int32_t)((1.0 - minimumSimilarity) * (double)(std::min((int32_t)text.length(), m) + prefix.length())); +} + +void FuzzyTermEnum::close() { + p.reset(); + d.reset(); + searchTerm.reset(); + FilteredTermEnum::close(); // call FilteredTermEnum::close() and let the garbage collector do its work. +} + } diff --git a/src/core/search/HitQueue.cpp b/src/core/search/HitQueue.cpp index 5e255674..d215b93a 100644 --- a/src/core/search/HitQueue.cpp +++ b/src/core/search/HitQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,29 +8,27 @@ #include "HitQueue.h" #include "ScoreDoc.h" -namespace Lucene -{ - HitQueue::HitQueue(int32_t size, bool prePopulate) : HitQueueBase(size) - { - this->prePopulate = prePopulate; - } - - HitQueue::~HitQueue() - { - } - - bool HitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) - { - if (first->score == second->score) - return (first->doc > second->doc); - else - return (first->score < second->score); - } - - ScoreDocPtr HitQueue::getSentinelObject() - { - // Always set the doc Id to MAX_VALUE so that it won't be favored by lessThan. This generally should - // not happen since if score is not NEG_INF, TopScoreDocCollector will always add the object to the queue. - return !prePopulate ? ScoreDocPtr() : newLucene(INT_MAX, -std::numeric_limits::infinity()); +namespace Lucene { + +HitQueue::HitQueue(int32_t size, bool prePopulate) : HitQueueBase(size) { + this->prePopulate = prePopulate; +} + +HitQueue::~HitQueue() { +} + +bool HitQueue::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { + if (first->score == second->score) { + return (first->doc > second->doc); + } else { + return (first->score < second->score); } } + +ScoreDocPtr HitQueue::getSentinelObject() { + // Always set the doc Id to MAX_VALUE so that it won't be favored by lessThan. This generally should + // not happen since if score is not NEG_INF, TopScoreDocCollector will always add the object to the queue. + return !prePopulate ? ScoreDocPtr() : newLucene(INT_MAX, -std::numeric_limits::infinity()); +} + +} diff --git a/src/core/search/HitQueueBase.cpp b/src/core/search/HitQueueBase.cpp index 46181df8..4d6c1161 100644 --- a/src/core/search/HitQueueBase.cpp +++ b/src/core/search/HitQueueBase.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,83 +8,68 @@ #include "HitQueueBase.h" #include "ScoreDoc.h" -namespace Lucene -{ - HitQueueBase::HitQueueBase(int32_t size) - { - queueSize = size; - } - - HitQueueBase::~HitQueueBase() - { - } - - void HitQueueBase::initialize() - { - queue = newLucene(shared_from_this(), queueSize); - } - - ScoreDocPtr HitQueueBase::add(ScoreDocPtr scoreDoc) - { - return queue->add(scoreDoc); - } - - ScoreDocPtr HitQueueBase::addOverflow(ScoreDocPtr scoreDoc) - { - return queue->addOverflow(scoreDoc); - } - - ScoreDocPtr HitQueueBase::top() - { - return queue->top(); - } - - ScoreDocPtr HitQueueBase::pop() - { - return queue->pop(); - } - - ScoreDocPtr HitQueueBase::updateTop() - { - return queue->updateTop(); - } - - int32_t HitQueueBase::size() - { - return queue->size(); - } - - bool HitQueueBase::empty() - { - return queue->empty(); - } - - void HitQueueBase::clear() - { - queue->clear(); - } - - ScoreDocPtr HitQueueBase::getSentinelObject() - { - return ScoreDocPtr(); - } - - PriorityQueueScoreDocs::PriorityQueueScoreDocs(HitQueueBasePtr hitQueue, int32_t size) : PriorityQueue(size) - { - _hitQueue = hitQueue; - } - - PriorityQueueScoreDocs::~PriorityQueueScoreDocs() - { - } - - bool PriorityQueueScoreDocs::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) - { - return HitQueueBasePtr(_hitQueue)->lessThan(first, second); - } - - ScoreDocPtr PriorityQueueScoreDocs::getSentinelObject() - { - return HitQueueBasePtr(_hitQueue)->getSentinelObject(); - } +namespace Lucene { + +HitQueueBase::HitQueueBase(int32_t size) { + queueSize = size; +} + +HitQueueBase::~HitQueueBase() { +} + +void HitQueueBase::initialize() { + queue = newLucene(shared_from_this(), queueSize); +} + +ScoreDocPtr HitQueueBase::add(const ScoreDocPtr& scoreDoc) { + return queue->add(scoreDoc); +} + +ScoreDocPtr HitQueueBase::addOverflow(const ScoreDocPtr& scoreDoc) { + return queue->addOverflow(scoreDoc); +} + +ScoreDocPtr HitQueueBase::top() { + return queue->top(); +} + +ScoreDocPtr HitQueueBase::pop() { + return queue->pop(); +} + +ScoreDocPtr HitQueueBase::updateTop() { + return queue->updateTop(); +} + +int32_t HitQueueBase::size() { + return queue->size(); +} + +bool HitQueueBase::empty() { + return queue->empty(); +} + +void HitQueueBase::clear() { + queue->clear(); +} + +ScoreDocPtr HitQueueBase::getSentinelObject() { + return ScoreDocPtr(); +} + +PriorityQueueScoreDocs::PriorityQueueScoreDocs(const HitQueueBasePtr& hitQueue, int32_t size) : PriorityQueue(size) { + _hitQueue = hitQueue; +} + +PriorityQueueScoreDocs::~PriorityQueueScoreDocs() { +} + +bool PriorityQueueScoreDocs::lessThan(const ScoreDocPtr& first, const ScoreDocPtr& second) { + return HitQueueBasePtr(_hitQueue)->lessThan(first, second); +} + +ScoreDocPtr PriorityQueueScoreDocs::getSentinelObject() { + return HitQueueBasePtr(_hitQueue)->getSentinelObject(); +} + } diff --git a/src/core/search/IndexSearcher.cpp b/src/core/search/IndexSearcher.cpp index b0a49d37..293b258d 100644 --- a/src/core/search/IndexSearcher.cpp +++ b/src/core/search/IndexSearcher.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,195 +17,172 @@ #include "Query.h" #include "ReaderUtil.h" -namespace Lucene -{ - IndexSearcher::IndexSearcher(DirectoryPtr path, bool readOnly) - { - ConstructSearcher(IndexReader::open(path, readOnly), true); - } - - IndexSearcher::IndexSearcher(IndexReaderPtr reader) - { - ConstructSearcher(reader, false); +namespace Lucene { + +IndexSearcher::IndexSearcher(const DirectoryPtr& path, bool readOnly) { + ConstructSearcher(IndexReader::open(path, readOnly), true); +} + +IndexSearcher::IndexSearcher(const IndexReaderPtr& reader) { + ConstructSearcher(reader, false); +} + +IndexSearcher::IndexSearcher(const IndexReaderPtr& reader, Collection subReaders, Collection docStarts) { + this->fieldSortDoTrackScores = false; + this->fieldSortDoMaxScore = false; + this->reader = reader; + this->subReaders = subReaders; + this->docStarts = docStarts; + closeReader = false; +} + +IndexSearcher::~IndexSearcher() { +} + +void IndexSearcher::ConstructSearcher(const IndexReaderPtr& reader, bool closeReader) { + this->fieldSortDoTrackScores = false; + this->fieldSortDoMaxScore = false; + this->reader = reader; + this->closeReader = closeReader; + + Collection subReadersList(Collection::newInstance()); + gatherSubReaders(subReadersList, reader); + subReaders = subReadersList; + docStarts = Collection::newInstance(subReaders.size()); + int32_t maxDoc = 0; + for (int32_t i = 0; i < subReaders.size(); ++i) { + docStarts[i] = maxDoc; + maxDoc += subReaders[i]->maxDoc(); } - - IndexSearcher::IndexSearcher(IndexReaderPtr reader, Collection subReaders, Collection docStarts) - { - this->fieldSortDoTrackScores = false; - this->fieldSortDoMaxScore = false; - this->reader = reader; - this->subReaders = subReaders; - this->docStarts = docStarts; - closeReader = false; +} + +void IndexSearcher::gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader) { + ReaderUtil::gatherSubReaders(allSubReaders, reader); +} + +IndexReaderPtr IndexSearcher::getIndexReader() { + return reader; +} + +void IndexSearcher::close() { + if (closeReader) { + reader->close(); } - - IndexSearcher::~IndexSearcher() - { +} + +int32_t IndexSearcher::docFreq(const TermPtr& term) { + return reader->docFreq(term); +} + +DocumentPtr IndexSearcher::doc(int32_t n) { + return reader->document(n); +} + +DocumentPtr IndexSearcher::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { + return reader->document(n, fieldSelector); +} + +int32_t IndexSearcher::maxDoc() { + return reader->maxDoc(); +} + +TopDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { + if (n <= 0) { + boost::throw_exception(IllegalArgumentException(L"n must be > 0")); } - - void IndexSearcher::ConstructSearcher(IndexReaderPtr reader, bool closeReader) - { - this->fieldSortDoTrackScores = false; - this->fieldSortDoMaxScore = false; - this->reader = reader; - this->closeReader = closeReader; - - Collection subReadersList(Collection::newInstance()); - gatherSubReaders(subReadersList, reader); - subReaders = subReadersList; - docStarts = Collection::newInstance(subReaders.size()); - int32_t maxDoc = 0; - for (int32_t i = 0; i < subReaders.size(); ++i) - { - docStarts[i] = maxDoc; - maxDoc += subReaders[i]->maxDoc(); + TopScoreDocCollectorPtr collector(TopScoreDocCollector::create(std::min(n, reader->maxDoc()), !weight->scoresDocsOutOfOrder())); + search(weight, filter, collector); + return collector->topDocs(); +} + +TopFieldDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + return search(weight, filter, n, sort, true); +} + +TopFieldDocsPtr IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort, bool fillFields) { + TopFieldCollectorPtr collector(TopFieldCollector::create(sort, std::min(n, reader->maxDoc()), fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight->scoresDocsOutOfOrder())); + search(weight, filter, collector); + return boost::dynamic_pointer_cast(collector->topDocs()); +} + +void IndexSearcher::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { + if (!filter) { + for (int32_t i = 0; i < subReaders.size(); ++i) { // search each subreader + results->setNextReader(subReaders[i], docStarts[i]); + ScorerPtr scorer(weight->scorer(subReaders[i], !results->acceptsDocsOutOfOrder(), true)); + if (scorer) { + scorer->score(results); + } + } + } else { + for (int32_t i = 0; i < subReaders.size(); ++i) { // search each subreader + results->setNextReader(subReaders[i], docStarts[i]); + searchWithFilter(subReaders[i], weight, filter, results); } } - - void IndexSearcher::gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader) - { - ReaderUtil::gatherSubReaders(allSubReaders, reader); - } - - IndexReaderPtr IndexSearcher::getIndexReader() - { - return reader; - } - - void IndexSearcher::close() - { - if (closeReader) - reader->close(); - } - - int32_t IndexSearcher::docFreq(TermPtr term) - { - return reader->docFreq(term); - } - - DocumentPtr IndexSearcher::doc(int32_t n) - { - return reader->document(n); - } - - DocumentPtr IndexSearcher::doc(int32_t n, FieldSelectorPtr fieldSelector) - { - return reader->document(n, fieldSelector); - } - - int32_t IndexSearcher::maxDoc() - { - return reader->maxDoc(); - } - - TopDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) - { - if (n <= 0) - boost::throw_exception(IllegalArgumentException(L"n must be > 0")); - TopScoreDocCollectorPtr collector(TopScoreDocCollector::create(std::min(n, reader->maxDoc()), !weight->scoresDocsOutOfOrder())); - search(weight, filter, collector); - return collector->topDocs(); - } - - TopFieldDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) - { - return search(weight, filter, n, sort, true); +} + +void IndexSearcher::searchWithFilter(const IndexReaderPtr& reader, const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) { + BOOST_ASSERT(filter); + + ScorerPtr scorer(weight->scorer(reader, true, false)); + if (!scorer) { + return; } - - TopFieldDocsPtr IndexSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort, bool fillFields) - { - TopFieldCollectorPtr collector(TopFieldCollector::create(sort, std::min(n, reader->maxDoc()), fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight->scoresDocsOutOfOrder())); - search(weight, filter, collector); - return boost::dynamic_pointer_cast(collector->topDocs()); + + int32_t docID = scorer->docID(); + BOOST_ASSERT(docID == -1 || docID == DocIdSetIterator::NO_MORE_DOCS); + + DocIdSetPtr filterDocIdSet(filter->getDocIdSet(reader)); + if (!filterDocIdSet) { + // this means the filter does not accept any documents. + return; } - - void IndexSearcher::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) - { - if (!filter) - { - for (int32_t i = 0; i < subReaders.size(); ++i) // search each subreader - { - results->setNextReader(subReaders[i], docStarts[i]); - ScorerPtr scorer(weight->scorer(subReaders[i], !results->acceptsDocsOutOfOrder(), true)); - if (scorer) - scorer->score(results); - } - } - else - { - for (int32_t i = 0; i < subReaders.size(); ++i) // search each subreader - { - results->setNextReader(subReaders[i], docStarts[i]); - searchWithFilter(subReaders[i], weight, filter, results); - } - } + + DocIdSetIteratorPtr filterIter(filterDocIdSet->iterator()); + if (!filterIter) { + // this means the filter does not accept any documents. + return; } - - void IndexSearcher::searchWithFilter(IndexReaderPtr reader, WeightPtr weight, FilterPtr filter, CollectorPtr collector) - { - BOOST_ASSERT(filter); - - ScorerPtr scorer(weight->scorer(reader, true, false)); - if (!scorer) - return; - - int32_t docID = scorer->docID(); - BOOST_ASSERT(docID == -1 || docID == DocIdSetIterator::NO_MORE_DOCS); - - DocIdSetPtr filterDocIdSet(filter->getDocIdSet(reader)); - if (!filterDocIdSet) - { - // this means the filter does not accept any documents. - return; - } - - DocIdSetIteratorPtr filterIter(filterDocIdSet->iterator()); - if (!filterIter) - { - // this means the filter does not accept any documents. - return; - } - - int32_t filterDoc = filterIter->nextDoc(); - int32_t scorerDoc = scorer->advance(filterDoc); - - collector->setScorer(scorer); - while (true) - { - if (scorerDoc == filterDoc) - { - // Check if scorer has exhausted, only before collecting. - if (scorerDoc == DocIdSetIterator::NO_MORE_DOCS) - break; - collector->collect(scorerDoc); - filterDoc = filterIter->nextDoc(); - scorerDoc = scorer->advance(filterDoc); + + int32_t filterDoc = filterIter->nextDoc(); + int32_t scorerDoc = scorer->advance(filterDoc); + + collector->setScorer(scorer); + while (true) { + if (scorerDoc == filterDoc) { + // Check if scorer has exhausted, only before collecting. + if (scorerDoc == DocIdSetIterator::NO_MORE_DOCS) { + break; } - else if (scorerDoc > filterDoc) - filterDoc = filterIter->advance(scorerDoc); - else - scorerDoc = scorer->advance(filterDoc); + collector->collect(scorerDoc); + filterDoc = filterIter->nextDoc(); + scorerDoc = scorer->advance(filterDoc); + } else if (scorerDoc > filterDoc) { + filterDoc = filterIter->advance(scorerDoc); + } else { + scorerDoc = scorer->advance(filterDoc); } } - - QueryPtr IndexSearcher::rewrite(QueryPtr original) - { - QueryPtr query(original); - for (QueryPtr rewrittenQuery(query->rewrite(reader)); rewrittenQuery != query; rewrittenQuery = query->rewrite(reader)) - query = rewrittenQuery; - return query; - } - - ExplanationPtr IndexSearcher::explain(WeightPtr weight, int32_t doc) - { - int32_t n = ReaderUtil::subIndex(doc, docStarts); - int32_t deBasedDoc = doc - docStarts[n]; - return weight->explain(subReaders[n], deBasedDoc); - } - - void IndexSearcher::setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore) - { - fieldSortDoTrackScores = doTrackScores; - fieldSortDoMaxScore = doMaxScore; +} + +QueryPtr IndexSearcher::rewrite(const QueryPtr& original) { + QueryPtr query(original); + for (QueryPtr rewrittenQuery(query->rewrite(reader)); rewrittenQuery != query; rewrittenQuery = query->rewrite(reader)) { + query = rewrittenQuery; } + return query; +} + +ExplanationPtr IndexSearcher::explain(const WeightPtr& weight, int32_t doc) { + int32_t n = ReaderUtil::subIndex(doc, docStarts); + int32_t deBasedDoc = doc - docStarts[n]; + return weight->explain(subReaders[n], deBasedDoc); +} + +void IndexSearcher::setDefaultFieldSortScoring(bool doTrackScores, bool doMaxScore) { + fieldSortDoTrackScores = doTrackScores; + fieldSortDoMaxScore = doMaxScore; +} + } diff --git a/src/core/search/MatchAllDocsQuery.cpp b/src/core/search/MatchAllDocsQuery.cpp index 0b5e7e15..8b0364ce 100644 --- a/src/core/search/MatchAllDocsQuery.cpp +++ b/src/core/search/MatchAllDocsQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,139 +14,118 @@ #include "Searcher.h" #include "MiscUtils.h" -namespace Lucene -{ - MatchAllDocsQuery::MatchAllDocsQuery(const String& normsField) - { - this->normsField = normsField; - } - - MatchAllDocsQuery::~MatchAllDocsQuery() - { - } - - WeightPtr MatchAllDocsQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - void MatchAllDocsQuery::extractTerms(SetTerm terms) - { - } - - String MatchAllDocsQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"*:*" << boostString(); - return buffer.str(); - } - - bool MatchAllDocsQuery::equals(LuceneObjectPtr other) - { - return Query::equals(other); - } - - int32_t MatchAllDocsQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) ^ 0x1aa71190; - } - - LuceneObjectPtr MatchAllDocsQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - MatchAllDocsQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->normsField = normsField; - return cloneQuery; - } - - MatchAllDocsWeight::MatchAllDocsWeight(MatchAllDocsQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = searcher->getSimilarity(); - this->queryWeight = 0.0; - this->queryNorm = 0.0; - } - - MatchAllDocsWeight::~MatchAllDocsWeight() - { - } - - String MatchAllDocsWeight::toString() - { - StringStream buffer; - buffer << L"weight(" << queryWeight << L", " << queryNorm << L")"; - return buffer.str(); - } - - QueryPtr MatchAllDocsWeight::getQuery() - { - return query; - } - - double MatchAllDocsWeight::getValue() - { - return queryWeight; - } - - double MatchAllDocsWeight::sumOfSquaredWeights() - { - queryWeight = getQuery()->getBoost(); - return queryWeight * queryWeight; - } - - void MatchAllDocsWeight::normalize(double norm) - { - this->queryNorm = norm; - queryWeight *= this->queryNorm; - } - - ScorerPtr MatchAllDocsWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(query, reader, similarity, shared_from_this(), !query->normsField.empty() ? reader->norms(query->normsField) : ByteArray()); - } - - ExplanationPtr MatchAllDocsWeight::explain(IndexReaderPtr reader, int32_t doc) - { - // explain query weight - ExplanationPtr queryExpl(newLucene(true, getValue(), L"MatchAllDocsQuery, product of:")); - if (getQuery()->getBoost() != 1.0) - queryExpl->addDetail(newLucene(getQuery()->getBoost(), L"boost")); - queryExpl->addDetail(newLucene(queryNorm, L"queryNorm")); - return queryExpl; - } - - MatchAllScorer::MatchAllScorer(MatchAllDocsQueryPtr query, IndexReaderPtr reader, SimilarityPtr similarity, WeightPtr weight, ByteArray norms) : Scorer(similarity) - { - this->query = query; - this->termDocs = reader->termDocs(TermPtr()); - this->_score = weight->getValue(); - this->norms = norms; - this->doc = -1; - } - - MatchAllScorer::~MatchAllScorer() - { - } - - int32_t MatchAllScorer::docID() - { - return doc; - } - - int32_t MatchAllScorer::nextDoc() - { - doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; - return doc; - } - - double MatchAllScorer::score() - { - return norms ? _score * Similarity::decodeNorm(norms[docID()]) : _score; - } - - int32_t MatchAllScorer::advance(int32_t target) - { - doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; - return doc; - } +namespace Lucene { + +MatchAllDocsQuery::MatchAllDocsQuery(const String& normsField) { + this->normsField = normsField; +} + +MatchAllDocsQuery::~MatchAllDocsQuery() { +} + +WeightPtr MatchAllDocsQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +void MatchAllDocsQuery::extractTerms(SetTerm terms) { +} + +String MatchAllDocsQuery::toString(const String& field) { + StringStream buffer; + buffer << L"*:*" << boostString(); + return buffer.str(); +} + +bool MatchAllDocsQuery::equals(const LuceneObjectPtr& other) { + return Query::equals(other); +} + +int32_t MatchAllDocsQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) ^ 0x1aa71190; +} + +LuceneObjectPtr MatchAllDocsQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + MatchAllDocsQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->normsField = normsField; + return cloneQuery; +} + +MatchAllDocsWeight::MatchAllDocsWeight(const MatchAllDocsQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = searcher->getSimilarity(); + this->queryWeight = 0.0; + this->queryNorm = 0.0; +} + +MatchAllDocsWeight::~MatchAllDocsWeight() { +} + +String MatchAllDocsWeight::toString() { + StringStream buffer; + buffer << L"weight(" << queryWeight << L", " << queryNorm << L")"; + return buffer.str(); +} + +QueryPtr MatchAllDocsWeight::getQuery() { + return query; +} + +double MatchAllDocsWeight::getValue() { + return queryWeight; +} + +double MatchAllDocsWeight::sumOfSquaredWeights() { + queryWeight = getQuery()->getBoost(); + return queryWeight * queryWeight; +} + +void MatchAllDocsWeight::normalize(double norm) { + this->queryNorm = norm; + queryWeight *= this->queryNorm; +} + +ScorerPtr MatchAllDocsWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(query, reader, similarity, shared_from_this(), !query->normsField.empty() ? reader->norms(query->normsField) : ByteArray()); +} + +ExplanationPtr MatchAllDocsWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + // explain query weight + ExplanationPtr queryExpl(newLucene(true, getValue(), L"MatchAllDocsQuery, product of:")); + if (getQuery()->getBoost() != 1.0) { + queryExpl->addDetail(newLucene(getQuery()->getBoost(), L"boost")); + } + queryExpl->addDetail(newLucene(queryNorm, L"queryNorm")); + return queryExpl; +} + +MatchAllScorer::MatchAllScorer(const MatchAllDocsQueryPtr& query, const IndexReaderPtr& reader, const SimilarityPtr& similarity, const WeightPtr& weight, ByteArray norms) : Scorer(similarity) { + this->query = query; + this->termDocs = reader->termDocs(TermPtr()); + this->_score = weight->getValue(); + this->norms = norms; + this->doc = -1; +} + +MatchAllScorer::~MatchAllScorer() { +} + +int32_t MatchAllScorer::docID() { + return doc; +} + +int32_t MatchAllScorer::nextDoc() { + doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; + return doc; +} + +double MatchAllScorer::score() { + return norms ? _score * Similarity::decodeNorm(norms[docID()]) : _score; +} + +int32_t MatchAllScorer::advance(int32_t target) { + doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; + return doc; +} + } diff --git a/src/core/search/MultiPhraseQuery.cpp b/src/core/search/MultiPhraseQuery.cpp index 5a04e786..e8968241 100644 --- a/src/core/search/MultiPhraseQuery.cpp +++ b/src/core/search/MultiPhraseQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,313 +20,299 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - MultiPhraseQuery::MultiPhraseQuery() - { - termArrays = Collection< Collection >::newInstance(); - positions = Collection::newInstance(); - slop = 0; - } - - MultiPhraseQuery::~MultiPhraseQuery() - { - } - - void MultiPhraseQuery::setSlop(int32_t s) - { - slop = s; - } - - int32_t MultiPhraseQuery::getSlop() - { - return slop; - } - - void MultiPhraseQuery::add(TermPtr term) - { - add(newCollection(term)); +namespace Lucene { + +MultiPhraseQuery::MultiPhraseQuery() { + termArrays = Collection< Collection >::newInstance(); + positions = Collection::newInstance(); + slop = 0; +} + +MultiPhraseQuery::~MultiPhraseQuery() { +} + +void MultiPhraseQuery::setSlop(int32_t s) { + slop = s; +} + +int32_t MultiPhraseQuery::getSlop() { + return slop; +} + +void MultiPhraseQuery::add(const TermPtr& term) { + add(newCollection(term)); +} + +void MultiPhraseQuery::add(Collection terms) { + int32_t position = 0; + if (!positions.empty()) { + position = positions[positions.size() - 1] + 1; } - - void MultiPhraseQuery::add(Collection terms) - { - int32_t position = 0; - if (!positions.empty()) - position = positions[positions.size() - 1] + 1; - add(terms, position); + add(terms, position); +} + +void MultiPhraseQuery::add(Collection terms, int32_t position) { + if (termArrays.empty()) { + field = terms[0]->field(); } - - void MultiPhraseQuery::add(Collection terms, int32_t position) - { - if (termArrays.empty()) - field = terms[0]->field(); - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - { - if ((*term)->field() != field) - boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field (" + field + L"): " + (*term)->toString())); + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + if ((*term)->field() != field) { + boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field (" + field + L"): " + (*term)->toString())); } - termArrays.add(terms); - positions.add(position); - } - - Collection< Collection > MultiPhraseQuery::getTermArrays() - { - return termArrays; - } - - Collection MultiPhraseQuery::getPositions() - { - return positions; } - - void MultiPhraseQuery::extractTerms(SetTerm terms) - { - for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) - { - for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) - terms.add(*term); + termArrays.add(terms); + positions.add(position); +} + +Collection< Collection > MultiPhraseQuery::getTermArrays() { + return termArrays; +} + +Collection MultiPhraseQuery::getPositions() { + return positions; +} + +void MultiPhraseQuery::extractTerms(SetTerm terms) { + for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { + for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { + terms.add(*term); } } - - QueryPtr MultiPhraseQuery::rewrite(IndexReaderPtr reader) - { - if (termArrays.size() == 1) // optimize one-term case - { - Collection terms(termArrays[0]); - BooleanQueryPtr boq(newLucene(true)); - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - boq->add(newLucene(*term), BooleanClause::SHOULD); - boq->setBoost(getBoost()); - return boq; +} + +QueryPtr MultiPhraseQuery::rewrite(const IndexReaderPtr& reader) { + if (termArrays.size() == 1) { // optimize one-term case + Collection terms(termArrays[0]); + BooleanQueryPtr boq(newLucene(true)); + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + boq->add(newLucene(*term), BooleanClause::SHOULD); } - else - return shared_from_this(); + boq->setBoost(getBoost()); + return boq; + } else { + return shared_from_this(); } - - WeightPtr MultiPhraseQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); +} + +WeightPtr MultiPhraseQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +String MultiPhraseQuery::toString(const String& field) { + StringStream buffer; + if (this->field != field) { + buffer << this->field << L":"; } - - String MultiPhraseQuery::toString(const String& field) - { - StringStream buffer; - if (this->field != field) - buffer << this->field << L":"; - buffer << L"\""; - for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) - { - if (arr != termArrays.begin()) - buffer << L" "; - if (arr->size() > 1) - { - buffer << L"("; - for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) - { - if (term != arr->begin()) - buffer << L" "; - buffer << (*term)->text(); + buffer << L"\""; + for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { + if (arr != termArrays.begin()) { + buffer << L" "; + } + if (arr->size() > 1) { + buffer << L"("; + for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { + if (term != arr->begin()) { + buffer << L" "; } - buffer << L")"; + buffer << (*term)->text(); } - else if (!arr->empty()) - buffer << (*arr)[0]->text(); + buffer << L")"; + } else if (!arr->empty()) { + buffer << (*arr)[0]->text(); } - buffer << L"\""; - - if (slop != 0) - buffer << L"~" << slop; - - buffer << boostString(); - - return buffer.str(); - } - - bool MultiPhraseQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - MultiPhraseQueryPtr otherMultiPhraseQuery(boost::dynamic_pointer_cast(other)); - if (!otherMultiPhraseQuery) - return false; - - return (getBoost() == otherMultiPhraseQuery->getBoost() && slop == otherMultiPhraseQuery->slop && - termArraysEquals(termArrays, otherMultiPhraseQuery->termArrays) && - positions.equals(otherMultiPhraseQuery->positions)); } - - int32_t MultiPhraseQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ termArraysHashCode() ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric) ^ 0x4ac65113; + buffer << L"\""; + + if (slop != 0) { + buffer << L"~" << slop; } - - int32_t MultiPhraseQuery::termArraysHashCode() - { - int32_t hashCode = 1; - for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) - hashCode = 31 * hashCode + MiscUtils::hashCode(arr->begin(), arr->end(), MiscUtils::hashLucene); - return hashCode; + + buffer << boostString(); + + return buffer.str(); +} + +bool MultiPhraseQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - struct equalTermArrays - { - inline bool operator()(const Collection& first, const Collection& second) const - { - if (first.size() != second.size()) - return false; - return first.equals(second, luceneEquals()); - } - }; - - bool MultiPhraseQuery::termArraysEquals(Collection< Collection > first, Collection< Collection > second) - { - return first.equals(second, equalTermArrays()); + + MultiPhraseQueryPtr otherMultiPhraseQuery(boost::dynamic_pointer_cast(other)); + if (!otherMultiPhraseQuery) { + return false; } - - LuceneObjectPtr MultiPhraseQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - MultiPhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->field = field; - cloneQuery->termArrays = termArrays; - cloneQuery->positions = positions; - cloneQuery->slop = slop; - return cloneQuery; + + return (getBoost() == otherMultiPhraseQuery->getBoost() && slop == otherMultiPhraseQuery->slop && + termArraysEquals(termArrays, otherMultiPhraseQuery->termArrays) && + positions.equals(otherMultiPhraseQuery->positions)); +} + +int32_t MultiPhraseQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ termArraysHashCode() ^ MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric) ^ 0x4ac65113; +} + +int32_t MultiPhraseQuery::termArraysHashCode() { + int32_t hashCode = 1; + for (Collection< Collection >::iterator arr = termArrays.begin(); arr != termArrays.end(); ++arr) { + hashCode = 31 * hashCode + MiscUtils::hashCode(arr->begin(), arr->end(), MiscUtils::hashLucene); } - - MultiPhraseWeight::MultiPhraseWeight(MultiPhraseQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - this->value = 0.0; - this->idf = 0.0; - this->queryNorm = 0.0; - this->queryWeight = 0.0; - - // compute idf - int32_t maxDoc = searcher->maxDoc(); - for (Collection< Collection >::iterator arr = query->termArrays.begin(); arr != query->termArrays.end(); ++arr) - { - for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) - idf += this->similarity->idf(searcher->docFreq(*term), maxDoc); + return hashCode; +} + +struct equalTermArrays { + inline bool operator()(const Collection& first, const Collection& second) const { + if (first.size() != second.size()) { + return false; } + return first.equals(second, luceneEquals()); } - - MultiPhraseWeight::~MultiPhraseWeight() - { +}; + +bool MultiPhraseQuery::termArraysEquals(Collection< Collection > first, Collection< Collection > second) { + return first.equals(second, equalTermArrays()); +} + +LuceneObjectPtr MultiPhraseQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + MultiPhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->field = field; + cloneQuery->termArrays = termArrays; + cloneQuery->positions = positions; + cloneQuery->slop = slop; + return cloneQuery; +} + +MultiPhraseWeight::MultiPhraseWeight(const MultiPhraseQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); + this->value = 0.0; + this->idf = 0.0; + this->queryNorm = 0.0; + this->queryWeight = 0.0; + + // compute idf + int32_t maxDoc = searcher->maxDoc(); + for (Collection< Collection >::iterator arr = query->termArrays.begin(); arr != query->termArrays.end(); ++arr) { + for (Collection::iterator term = arr->begin(); term != arr->end(); ++term) { + idf += this->similarity->idf(searcher->docFreq(*term), maxDoc); + } } - - QueryPtr MultiPhraseWeight::getQuery() - { - return query; +} + +MultiPhraseWeight::~MultiPhraseWeight() { +} + +QueryPtr MultiPhraseWeight::getQuery() { + return query; +} + +double MultiPhraseWeight::getValue() { + return value; +} + +double MultiPhraseWeight::sumOfSquaredWeights() { + queryWeight = idf * getQuery()->getBoost(); // compute query weight + return queryWeight * queryWeight; // square it +} + +void MultiPhraseWeight::normalize(double norm) { + queryNorm = norm; + queryWeight *= queryNorm; // normalize query weight + value = queryWeight * idf; // idf for document +} + +ScorerPtr MultiPhraseWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + if (query->termArrays.empty()) { // optimize zero-term case + return ScorerPtr(); } - - double MultiPhraseWeight::getValue() - { - return value; + + Collection tps(Collection::newInstance(query->termArrays.size())); + for (int32_t i = 0; i < tps.size(); ++i) { + Collection terms(query->termArrays[i]); + + TermPositionsPtr p; + if (terms.size() > 1) { + p = newLucene(reader, terms); + } else { + p = reader->termPositions(terms[0]); + } + + if (!p) { + return ScorerPtr(); + } + + tps[i] = p; } - - double MultiPhraseWeight::sumOfSquaredWeights() - { - queryWeight = idf * getQuery()->getBoost(); // compute query weight - return queryWeight * queryWeight; // square it + + if (query->slop == 0) { // optimize exact case + return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); + } else { + return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } - - void MultiPhraseWeight::normalize(double norm) - { - queryNorm = norm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document +} + +ExplanationPtr MultiPhraseWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ComplexExplanationPtr result(newLucene()); + result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->toString() + L")")); + + // explain query weight + ExplanationPtr queryExpl(newLucene()); + queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); + + ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); + if (query->getBoost() != 1.0) { + queryExpl->addDetail(boostExpl); } - - ScorerPtr MultiPhraseWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - if (query->termArrays.empty()) // optimize zero-term case - return ScorerPtr(); - - Collection tps(Collection::newInstance(query->termArrays.size())); - for (int32_t i = 0; i < tps.size(); ++i) - { - Collection terms(query->termArrays[i]); - - TermPositionsPtr p; - if (terms.size() > 1) - p = newLucene(reader, terms); - else - p = reader->termPositions(terms[0]); - - if (!p) - return ScorerPtr(); - - tps[i] = p; - } - - if (query->slop == 0) // optimize exact case - return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); - else - return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); + + queryExpl->addDetail(idfExpl); + + ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); + queryExpl->addDetail(queryNormExpl); + + queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); + result->addDetail(queryExpl); + + // explain field weight + ComplexExplanationPtr fieldExpl(newLucene()); + fieldExpl->setDescription(L"fieldWeight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); + if (!phraseScorer) { + return newLucene(0.0, L"no matching docs"); } - - ExplanationPtr MultiPhraseWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ComplexExplanationPtr result(newLucene()); - result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->toString() + L")")); - - // explain query weight - ExplanationPtr queryExpl(newLucene()); - queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); - - ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); - if (query->getBoost() != 1.0) - queryExpl->addDetail(boostExpl); - - queryExpl->addDetail(idfExpl); - - ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); - queryExpl->addDetail(queryNormExpl); - - queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); - result->addDetail(queryExpl); - - // explain field weight - ComplexExplanationPtr fieldExpl(newLucene()); - fieldExpl->setDescription(L"fieldWeight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); - if (!phraseScorer) - return newLucene(0.0, L"no matching docs"); - - ExplanationPtr tfExplanation(newLucene()); - int32_t d = phraseScorer->advance(doc); - double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; - tfExplanation->setValue(similarity->tf(phraseFreq)); - tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); - - fieldExpl->addDetail(tfExplanation); - fieldExpl->addDetail(idfExpl); - - ExplanationPtr fieldNormExpl(newLucene()); - ByteArray fieldNorms(reader->norms(query->field)); - double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; - fieldNormExpl->setValue(fieldNorm); - fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); - fieldExpl->addDetail(fieldNormExpl); - - fieldExpl->setMatch(tfExplanation->isMatch()); - fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); - - result->addDetail(fieldExpl); - result->setMatch(fieldExpl->getMatch()); - - // combine them - result->setValue(queryExpl->getValue() * fieldExpl->getValue()); - - if (queryExpl->getValue() == 1.0) - return fieldExpl; - - return result; + + ExplanationPtr tfExplanation(newLucene()); + int32_t d = phraseScorer->advance(doc); + double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; + tfExplanation->setValue(similarity->tf(phraseFreq)); + tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); + + fieldExpl->addDetail(tfExplanation); + fieldExpl->addDetail(idfExpl); + + ExplanationPtr fieldNormExpl(newLucene()); + ByteArray fieldNorms(reader->norms(query->field)); + double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; + fieldNormExpl->setValue(fieldNorm); + fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); + fieldExpl->addDetail(fieldNormExpl); + + fieldExpl->setMatch(tfExplanation->isMatch()); + fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); + + result->addDetail(fieldExpl); + result->setMatch(fieldExpl->getMatch()); + + // combine them + result->setValue(queryExpl->getValue() * fieldExpl->getValue()); + + if (queryExpl->getValue() == 1.0) { + return fieldExpl; } + + return result; +} + } diff --git a/src/core/search/MultiSearcher.cpp b/src/core/search/MultiSearcher.cpp index 6bf1b48a..d3276902 100644 --- a/src/core/search/MultiSearcher.cpp +++ b/src/core/search/MultiSearcher.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,369 +20,329 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - MultiSearcher::MultiSearcher(Collection searchables) - { - this->searchables = searchables; - this->_maxDoc = 0; - - this->starts = Collection::newInstance(searchables.size() + 1); // build starts array - for (int32_t i = 0; i < searchables.size(); ++i) - { - starts[i] = _maxDoc; - _maxDoc += searchables[i]->maxDoc(); // compute maxDocs - } - starts[searchables.size()] = _maxDoc; - } - - MultiSearcher::~MultiSearcher() - { - } - - Collection MultiSearcher::getSearchables() - { - return searchables; - } - - Collection MultiSearcher::getStarts() - { - return starts; - } - - void MultiSearcher::close() - { - for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) - (*searchable)->close(); - } - - int32_t MultiSearcher::docFreq(TermPtr term) - { - int32_t docFreq = 0; - for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) - docFreq += (*searchable)->docFreq(term); - return docFreq; - } - - DocumentPtr MultiSearcher::doc(int32_t n) - { - int32_t i = subSearcher(n); // find searcher index - return searchables[i]->doc(n - starts[i]); // dispatch to searcher - } - - DocumentPtr MultiSearcher::doc(int32_t n, FieldSelectorPtr fieldSelector) - { - int32_t i = subSearcher(n); // find searcher index - return searchables[i]->doc(n - starts[i], fieldSelector); // dispatch to searcher +namespace Lucene { + +MultiSearcher::MultiSearcher(Collection searchables) { + this->searchables = searchables; + this->_maxDoc = 0; + + this->starts = Collection::newInstance(searchables.size() + 1); // build starts array + for (int32_t i = 0; i < searchables.size(); ++i) { + starts[i] = _maxDoc; + _maxDoc += searchables[i]->maxDoc(); // compute maxDocs } - - int32_t MultiSearcher::subSearcher(int32_t n) - { - return ReaderUtil::subIndex(n, starts); + starts[searchables.size()] = _maxDoc; +} + +MultiSearcher::~MultiSearcher() { +} + +Collection MultiSearcher::getSearchables() { + return searchables; +} + +Collection MultiSearcher::getStarts() { + return starts; +} + +void MultiSearcher::close() { + for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { + (*searchable)->close(); } - - int32_t MultiSearcher::subDoc(int32_t n) - { - return n - starts[subSearcher(n)]; +} + +int32_t MultiSearcher::docFreq(const TermPtr& term) { + int32_t docFreq = 0; + for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { + docFreq += (*searchable)->docFreq(term); } - - int32_t MultiSearcher::maxDoc() - { - return _maxDoc; + return docFreq; +} + +DocumentPtr MultiSearcher::doc(int32_t n) { + int32_t i = subSearcher(n); // find searcher index + return searchables[i]->doc(n - starts[i]); // dispatch to searcher +} + +DocumentPtr MultiSearcher::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { + int32_t i = subSearcher(n); // find searcher index + return searchables[i]->doc(n - starts[i], fieldSelector); // dispatch to searcher +} + +int32_t MultiSearcher::subSearcher(int32_t n) { + return ReaderUtil::subIndex(n, starts); +} + +int32_t MultiSearcher::subDoc(int32_t n) { + return n - starts[subSearcher(n)]; +} + +int32_t MultiSearcher::maxDoc() { + return _maxDoc; +} + +TopDocsPtr MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { + HitQueuePtr hq(newLucene(n, false)); + int32_t totalHits = 0; + + for (int32_t i = 0; i < searchables.size(); ++i) { // search each searcher + TopDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, i, starts)->call()); + totalHits += docs->totalHits; // update totalHits } - - TopDocsPtr MultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) - { - HitQueuePtr hq(newLucene(n, false)); - int32_t totalHits = 0; - - for (int32_t i = 0; i < searchables.size(); ++i) // search each searcher - { - TopDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, i, starts)->call()); - totalHits += docs->totalHits; // update totalHits - } - - Collection scoreDocs(Collection::newInstance(hq->size())); - for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array - scoreDocs[i] = hq->pop(); - - double maxScore = totalHits == 0 ? -std::numeric_limits::infinity() : scoreDocs[0]->score; - - return newLucene(totalHits, scoreDocs, maxScore); + + Collection scoreDocs(Collection::newInstance(hq->size())); + for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array + scoreDocs[i] = hq->pop(); } - - TopFieldDocsPtr MultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) - { - FieldDocSortedHitQueuePtr hq(newLucene(n)); - int32_t totalHits = 0; - - double maxScore = -std::numeric_limits::infinity(); - - for (int32_t i = 0; i < searchables.size(); ++i) // search each searcher - { - TopFieldDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, sort, i, starts)->call()); - totalHits += docs->totalHits; // update totalHits - maxScore = std::max(maxScore, docs->maxScore); - } - - Collection scoreDocs(Collection::newInstance(hq->size())); - for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array - scoreDocs[i] = hq->pop(); - - return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); + + double maxScore = totalHits == 0 ? -std::numeric_limits::infinity() : scoreDocs[0]->score; + + return newLucene(totalHits, scoreDocs, maxScore); +} + +TopFieldDocsPtr MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + FieldDocSortedHitQueuePtr hq(newLucene(n)); + int32_t totalHits = 0; + + double maxScore = -std::numeric_limits::infinity(); + + for (int32_t i = 0; i < searchables.size(); ++i) { // search each searcher + TopFieldDocsPtr docs(newLucene(SynchronizePtr(), searchables[i], weight, filter, n, hq, sort, i, starts)->call()); + totalHits += docs->totalHits; // update totalHits + maxScore = std::max(maxScore, docs->maxScore); } - - void MultiSearcher::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) - { - for (int32_t i = 0; i < searchables.size(); ++i) - { - int32_t start = starts[i]; - CollectorPtr hc = newLucene(results, start); - searchables[i]->search(weight, filter, hc); - } + + Collection scoreDocs(Collection::newInstance(hq->size())); + for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array + scoreDocs[i] = hq->pop(); } - - QueryPtr MultiSearcher::rewrite(QueryPtr query) - { - Collection queries(Collection::newInstance(searchables.size())); - for (int32_t i = 0; i < searchables.size(); ++i) - queries[i] = searchables[i]->rewrite(query); - return queries[0]->combine(queries); + + return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); +} + +void MultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { + for (int32_t i = 0; i < searchables.size(); ++i) { + int32_t start = starts[i]; + CollectorPtr hc = newLucene(results, start); + searchables[i]->search(weight, filter, hc); } - - ExplanationPtr MultiSearcher::explain(WeightPtr weight, int32_t doc) - { - int32_t i = subSearcher(doc); // find searcher index - return searchables[i]->explain(weight, doc - starts[i]); // dispatch to searcher +} + +QueryPtr MultiSearcher::rewrite(const QueryPtr& query) { + Collection queries(Collection::newInstance(searchables.size())); + for (int32_t i = 0; i < searchables.size(); ++i) { + queries[i] = searchables[i]->rewrite(query); } - - WeightPtr MultiSearcher::createWeight(QueryPtr query) - { - // step 1 - QueryPtr rewrittenQuery(rewrite(query)); - - // step 2 - SetTerm terms(SetTerm::newInstance()); - rewrittenQuery->extractTerms(terms); - - // step3 - Collection allTermsArray(Collection::newInstance(terms.begin(), terms.end())); - Collection aggregatedDfs(Collection::newInstance(terms.size())); - for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) - { - Collection dfs((*searchable)->docFreqs(allTermsArray)); - for (int32_t j = 0; j < aggregatedDfs.size(); ++j) - aggregatedDfs[j] += dfs[j]; + return queries[0]->combine(queries); +} + +ExplanationPtr MultiSearcher::explain(const WeightPtr& weight, int32_t doc) { + int32_t i = subSearcher(doc); // find searcher index + return searchables[i]->explain(weight, doc - starts[i]); // dispatch to searcher +} + +WeightPtr MultiSearcher::createWeight(const QueryPtr& query) { + // step 1 + QueryPtr rewrittenQuery(rewrite(query)); + + // step 2 + SetTerm terms(SetTerm::newInstance()); + rewrittenQuery->extractTerms(terms); + + // step3 + Collection allTermsArray(Collection::newInstance(terms.begin(), terms.end())); + Collection aggregatedDfs(Collection::newInstance(terms.size())); + for (Collection::iterator searchable = searchables.begin(); searchable != searchables.end(); ++searchable) { + Collection dfs((*searchable)->docFreqs(allTermsArray)); + for (int32_t j = 0; j < aggregatedDfs.size(); ++j) { + aggregatedDfs[j] += dfs[j]; } - - MapTermInt dfMap(MapTermInt::newInstance()); - for (int32_t i = 0; i < allTermsArray.size(); ++i) - dfMap.put(allTermsArray[i], aggregatedDfs[i]); - - // step4 - int32_t numDocs = maxDoc(); - CachedDfSourcePtr cacheSim(newLucene(dfMap, numDocs, getSimilarity())); - - return rewrittenQuery->weight(cacheSim); - } - - CachedDfSource::CachedDfSource(MapTermInt dfMap, int32_t maxDoc, SimilarityPtr similarity) - { - this->dfMap = dfMap; - this->_maxDoc = maxDoc; - setSimilarity(similarity); - } - - CachedDfSource::~CachedDfSource() - { - } - - int32_t CachedDfSource::docFreq(TermPtr term) - { - MapTermInt::iterator df = dfMap.find(term); - if (df == dfMap.end()) - boost::throw_exception(IllegalArgumentException(L"df for term " + term->text() + L" not available")); - return df->second; } - - Collection CachedDfSource::docFreqs(Collection terms) - { - Collection result(Collection::newInstance(terms.size())); - for (int32_t i = 0; i < terms.size(); ++i) - result[i] = docFreq(terms[i]); - return result; - } - - int32_t CachedDfSource::maxDoc() - { - return _maxDoc; - } - - QueryPtr CachedDfSource::rewrite(QueryPtr query) - { - // This is a bit of a hack. We know that a query which creates a Weight based on this Dummy-Searcher is - // always already rewritten (see preparedWeight()). Therefore we just return the unmodified query here. - return query; - } - - void CachedDfSource::close() - { - boost::throw_exception(UnsupportedOperationException()); - } - - DocumentPtr CachedDfSource::doc(int32_t n) - { - boost::throw_exception(UnsupportedOperationException()); - return DocumentPtr(); - } - - DocumentPtr CachedDfSource::doc(int32_t n, FieldSelectorPtr fieldSelector) - { - boost::throw_exception(UnsupportedOperationException()); - return DocumentPtr(); - } - - ExplanationPtr CachedDfSource::explain(WeightPtr weight, int32_t doc) - { - boost::throw_exception(UnsupportedOperationException()); - return ExplanationPtr(); - } - - void CachedDfSource::search(WeightPtr weight, FilterPtr filter, CollectorPtr results) - { - boost::throw_exception(UnsupportedOperationException()); - } - - TopDocsPtr CachedDfSource::search(WeightPtr weight, FilterPtr filter, int32_t n) - { - boost::throw_exception(UnsupportedOperationException()); - return TopDocsPtr(); - } - - TopFieldDocsPtr CachedDfSource::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) - { - boost::throw_exception(UnsupportedOperationException()); - return TopFieldDocsPtr(); + + MapTermInt dfMap(MapTermInt::newInstance()); + for (int32_t i = 0; i < allTermsArray.size(); ++i) { + dfMap.put(allTermsArray[i], aggregatedDfs[i]); } - - MultiSearcherCallableNoSort::MultiSearcherCallableNoSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, - FilterPtr filter, int32_t nDocs, HitQueuePtr hq, int32_t i, - Collection starts) - { - this->lock = lock; - this->searchable = searchable; - this->weight = weight; - this->filter = filter; - this->nDocs = nDocs; - this->hq = hq; - this->i = i; - this->starts = starts; + + // step4 + int32_t numDocs = maxDoc(); + CachedDfSourcePtr cacheSim(newLucene(dfMap, numDocs, getSimilarity())); + + return rewrittenQuery->weight(cacheSim); +} + +CachedDfSource::CachedDfSource(MapTermInt dfMap, int32_t maxDoc, const SimilarityPtr& similarity) { + this->dfMap = dfMap; + this->_maxDoc = maxDoc; + setSimilarity(similarity); +} + +CachedDfSource::~CachedDfSource() { +} + +int32_t CachedDfSource::docFreq(const TermPtr& term) { + MapTermInt::iterator df = dfMap.find(term); + if (df == dfMap.end()) { + boost::throw_exception(IllegalArgumentException(L"df for term " + term->text() + L" not available")); } - - MultiSearcherCallableNoSort::~MultiSearcherCallableNoSort() - { + return df->second; +} + +Collection CachedDfSource::docFreqs(Collection terms) { + Collection result(Collection::newInstance(terms.size())); + for (int32_t i = 0; i < terms.size(); ++i) { + result[i] = docFreq(terms[i]); } - - TopDocsPtr MultiSearcherCallableNoSort::call() - { - TopDocsPtr docs(searchable->search(weight, filter, nDocs)); - Collection scoreDocs(docs->scoreDocs); - for (int32_t j = 0; j < scoreDocs.size(); ++j) // merge scoreDocs into hq - { - ScoreDocPtr scoreDoc(scoreDocs[j]); - scoreDoc->doc += starts[i]; // convert doc - - SyncLock syncLock(lock); - if (scoreDoc == hq->addOverflow(scoreDoc)) - break; + return result; +} + +int32_t CachedDfSource::maxDoc() { + return _maxDoc; +} + +QueryPtr CachedDfSource::rewrite(const QueryPtr& query) { + // This is a bit of a hack. We know that a query which creates a Weight based on this Dummy-Searcher is + // always already rewritten (see preparedWeight()). Therefore we just return the unmodified query here. + return query; +} + +void CachedDfSource::close() { + boost::throw_exception(UnsupportedOperationException()); +} + +DocumentPtr CachedDfSource::doc(int32_t n) { + boost::throw_exception(UnsupportedOperationException()); + return DocumentPtr(); +} + +DocumentPtr CachedDfSource::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { + boost::throw_exception(UnsupportedOperationException()); + return DocumentPtr(); +} + +ExplanationPtr CachedDfSource::explain(const WeightPtr& weight, int32_t doc) { + boost::throw_exception(UnsupportedOperationException()); + return ExplanationPtr(); +} + +void CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& results) { + boost::throw_exception(UnsupportedOperationException()); +} + +TopDocsPtr CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { + boost::throw_exception(UnsupportedOperationException()); + return TopDocsPtr(); +} + +TopFieldDocsPtr CachedDfSource::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + boost::throw_exception(UnsupportedOperationException()); + return TopFieldDocsPtr(); +} + +MultiSearcherCallableNoSort::MultiSearcherCallableNoSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, + const FilterPtr& filter, int32_t nDocs, const HitQueuePtr& hq, int32_t i, + Collection starts) { + this->lock = lock; + this->searchable = searchable; + this->weight = weight; + this->filter = filter; + this->nDocs = nDocs; + this->hq = hq; + this->i = i; + this->starts = starts; +} + +MultiSearcherCallableNoSort::~MultiSearcherCallableNoSort() { +} + +TopDocsPtr MultiSearcherCallableNoSort::call() { + TopDocsPtr docs(searchable->search(weight, filter, nDocs)); + Collection scoreDocs(docs->scoreDocs); + for (int32_t j = 0; j < scoreDocs.size(); ++j) { // merge scoreDocs into hq + ScoreDocPtr scoreDoc(scoreDocs[j]); + scoreDoc->doc += starts[i]; // convert doc + + SyncLock syncLock(lock); + if (scoreDoc == hq->addOverflow(scoreDoc)) { + break; } - return docs; } - - MultiSearcherCallableWithSort::MultiSearcherCallableWithSort(SynchronizePtr lock, SearchablePtr searchable, WeightPtr weight, - FilterPtr filter, int32_t nDocs, FieldDocSortedHitQueuePtr hq, - SortPtr sort, int32_t i, Collection starts) - { - this->lock = lock; - this->searchable = searchable; - this->weight = weight; - this->filter = filter; - this->nDocs = nDocs; - this->hq = hq; - this->i = i; - this->starts = starts; - this->sort = sort; - } - - MultiSearcherCallableWithSort::~MultiSearcherCallableWithSort() - { - } - - TopFieldDocsPtr MultiSearcherCallableWithSort::call() - { - TopFieldDocsPtr docs(searchable->search(weight, filter, nDocs, sort)); - // If one of the Sort fields is FIELD_DOC, need to fix its values, so that it will break ties by doc Id - // properly. Otherwise, it will compare to 'relative' doc Ids, that belong to two different searchables. - for (int32_t j = 0; j < docs->fields.size(); ++j) - { - if (docs->fields[j]->getType() == SortField::DOC) - { - // iterate over the score docs and change their fields value - for (int32_t j2 = 0; j2 < docs->scoreDocs.size(); ++j2) - { - FieldDocPtr fd(boost::dynamic_pointer_cast(docs->scoreDocs[j2])); - fd->fields[j] = VariantUtils::get(fd->fields[j]) + starts[i]; - } - break; + return docs; +} + +MultiSearcherCallableWithSort::MultiSearcherCallableWithSort(const SynchronizePtr& lock, const SearchablePtr& searchable, const WeightPtr& weight, + const FilterPtr& filter, int32_t nDocs, const FieldDocSortedHitQueuePtr& hq, + const SortPtr& sort, int32_t i, Collection starts) { + this->lock = lock; + this->searchable = searchable; + this->weight = weight; + this->filter = filter; + this->nDocs = nDocs; + this->hq = hq; + this->i = i; + this->starts = starts; + this->sort = sort; +} + +MultiSearcherCallableWithSort::~MultiSearcherCallableWithSort() { +} + +TopFieldDocsPtr MultiSearcherCallableWithSort::call() { + TopFieldDocsPtr docs(searchable->search(weight, filter, nDocs, sort)); + // If one of the Sort fields is FIELD_DOC, need to fix its values, so that it will break ties by doc Id + // properly. Otherwise, it will compare to 'relative' doc Ids, that belong to two different searchables. + for (int32_t j = 0; j < docs->fields.size(); ++j) { + if (docs->fields[j]->getType() == SortField::DOC) { + // iterate over the score docs and change their fields value + for (int32_t j2 = 0; j2 < docs->scoreDocs.size(); ++j2) { + FieldDocPtr fd(boost::dynamic_pointer_cast(docs->scoreDocs[j2])); + fd->fields[j] = VariantUtils::get(fd->fields[j]) + starts[i]; } + break; } - - { - SyncLock syncLock(lock); - hq->setFields(docs->fields); - } - - Collection scoreDocs(docs->scoreDocs); - for (int32_t j = 0; j < scoreDocs.size(); ++j) // merge scoreDocs into hq - { - FieldDocPtr fieldDoc(boost::dynamic_pointer_cast(scoreDocs[j])); - fieldDoc->doc += starts[i]; // convert doc - - SyncLock syncLock(lock); - if (fieldDoc == hq->addOverflow(fieldDoc)) - break; - } - - return docs; - } - - MultiSearcherCollector::MultiSearcherCollector(CollectorPtr collector, int32_t start) - { - this->collector = collector; - this->start = start; - } - - MultiSearcherCollector::~MultiSearcherCollector() - { - } - - void MultiSearcherCollector::setScorer(ScorerPtr scorer) - { - collector->setScorer(scorer); } - - void MultiSearcherCollector::collect(int32_t doc) - { - collector->collect(doc); - } - - void MultiSearcherCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) + { - collector->setNextReader(reader, start + docBase); + SyncLock syncLock(lock); + hq->setFields(docs->fields); } - - bool MultiSearcherCollector::acceptsDocsOutOfOrder() - { - return collector->acceptsDocsOutOfOrder(); + + Collection scoreDocs(docs->scoreDocs); + for (int32_t j = 0; j < scoreDocs.size(); ++j) { // merge scoreDocs into hq + FieldDocPtr fieldDoc(boost::dynamic_pointer_cast(scoreDocs[j])); + fieldDoc->doc += starts[i]; // convert doc + + SyncLock syncLock(lock); + if (fieldDoc == hq->addOverflow(fieldDoc)) { + break; + } } + + return docs; +} + +MultiSearcherCollector::MultiSearcherCollector(const CollectorPtr& collector, int32_t start) { + this->collector = collector; + this->start = start; +} + +MultiSearcherCollector::~MultiSearcherCollector() { +} + +void MultiSearcherCollector::setScorer(const ScorerPtr& scorer) { + collector->setScorer(scorer); +} + +void MultiSearcherCollector::collect(int32_t doc) { + collector->collect(doc); +} + +void MultiSearcherCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + collector->setNextReader(reader, start + docBase); +} + +bool MultiSearcherCollector::acceptsDocsOutOfOrder() { + return collector->acceptsDocsOutOfOrder(); +} + } diff --git a/src/core/search/MultiTermQuery.cpp b/src/core/search/MultiTermQuery.cpp index 5f350204..cb004e3e 100644 --- a/src/core/search/MultiTermQuery.cpp +++ b/src/core/search/MultiTermQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -18,326 +18,286 @@ #include "IndexReader.h" #include "MiscUtils.h" -namespace Lucene -{ - MultiTermQuery::MultiTermQuery() - { - numberOfTerms = 0; - rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); - } - - MultiTermQuery::~MultiTermQuery() - { - } - - RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() - { - static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE; - if (!_CONSTANT_SCORE_FILTER_REWRITE) - { - _CONSTANT_SCORE_FILTER_REWRITE = newLucene(); - CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE); - } - return _CONSTANT_SCORE_FILTER_REWRITE; - } - - RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() - { - static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE; - if (!_SCORING_BOOLEAN_QUERY_REWRITE) - { - _SCORING_BOOLEAN_QUERY_REWRITE = newLucene(); - CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE); - } - return _SCORING_BOOLEAN_QUERY_REWRITE; - } - - RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() - { - static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; - if (!_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE) - { - _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene(); - CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); - } - return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; - } - - RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() - { - static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - if (!_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT) - { - _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene(); - CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); - } - return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - } - - int32_t MultiTermQuery::getTotalNumberOfTerms() - { - return numberOfTerms; - } - - void MultiTermQuery::clearTotalNumberOfTerms() - { - numberOfTerms = 0; - } - - void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) - { - numberOfTerms += inc; - } - - QueryPtr MultiTermQuery::rewrite(IndexReaderPtr reader) - { - return rewriteMethod->rewrite(reader, shared_from_this()); - } - - RewriteMethodPtr MultiTermQuery::getRewriteMethod() - { - return rewriteMethod; - } - - void MultiTermQuery::setRewriteMethod(RewriteMethodPtr method) - { - rewriteMethod = method; - } - - LuceneObjectPtr MultiTermQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Query::clone(other); - MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->rewriteMethod = rewriteMethod; - cloneQuery->numberOfTerms = numberOfTerms; - return cloneQuery; - } - - int32_t MultiTermQuery::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + MiscUtils::doubleToIntBits(getBoost()); - result = prime * result; - result += rewriteMethod->hashCode(); - return result; - } - - bool MultiTermQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast(other)); - if (!otherMultiTermQuery) - return false; - if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) - return false; - if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) - return false; +namespace Lucene { + +MultiTermQuery::MultiTermQuery() { + numberOfTerms = 0; + rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT(); +} + +MultiTermQuery::~MultiTermQuery() { +} + +RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_FILTER_REWRITE() { + static RewriteMethodPtr _CONSTANT_SCORE_FILTER_REWRITE; + LUCENE_RUN_ONCE( + _CONSTANT_SCORE_FILTER_REWRITE = newLucene(); + CycleCheck::addStatic(_CONSTANT_SCORE_FILTER_REWRITE); + ); + return _CONSTANT_SCORE_FILTER_REWRITE; +} + +RewriteMethodPtr MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE() { + static RewriteMethodPtr _SCORING_BOOLEAN_QUERY_REWRITE; + LUCENE_RUN_ONCE( + _SCORING_BOOLEAN_QUERY_REWRITE = newLucene(); + CycleCheck::addStatic(_SCORING_BOOLEAN_QUERY_REWRITE); + ); + return _SCORING_BOOLEAN_QUERY_REWRITE; +} + +RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE() { + static RewriteMethodPtr _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; + LUCENE_RUN_ONCE( + _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = newLucene(); + CycleCheck::addStatic(_CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE); + ); + return _CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; +} + +RewriteMethodPtr MultiTermQuery::CONSTANT_SCORE_AUTO_REWRITE_DEFAULT() { + static RewriteMethodPtr _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; + LUCENE_RUN_ONCE( + _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = newLucene(); + CycleCheck::addStatic(_CONSTANT_SCORE_AUTO_REWRITE_DEFAULT); + ); + return _CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; +} + +int32_t MultiTermQuery::getTotalNumberOfTerms() { + return numberOfTerms; +} + +void MultiTermQuery::clearTotalNumberOfTerms() { + numberOfTerms = 0; +} + +void MultiTermQuery::incTotalNumberOfTerms(int32_t inc) { + numberOfTerms += inc; +} + +QueryPtr MultiTermQuery::rewrite(const IndexReaderPtr& reader) { + return rewriteMethod->rewrite(reader, shared_from_this()); +} + +RewriteMethodPtr MultiTermQuery::getRewriteMethod() { + return rewriteMethod; +} + +void MultiTermQuery::setRewriteMethod(const RewriteMethodPtr& method) { + rewriteMethod = method; +} + +LuceneObjectPtr MultiTermQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Query::clone(other); + MultiTermQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->rewriteMethod = rewriteMethod; + cloneQuery->numberOfTerms = numberOfTerms; + return cloneQuery; +} + +int32_t MultiTermQuery::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + MiscUtils::doubleToIntBits(getBoost()); + result = prime * result; + result += rewriteMethod->hashCode(); + return result; +} + +bool MultiTermQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { return true; } - - RewriteMethod::~RewriteMethod() - { - } - - ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() - { - } - - QueryPtr ConstantScoreFilterRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) - { - QueryPtr result(newLucene(newLucene(query))); - result->setBoost(query->getBoost()); - return result; - } - - ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() - { - } - - QueryPtr ScoringBooleanQueryRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) - { - FilteredTermEnumPtr enumerator(query->getEnum(reader)); - BooleanQueryPtr result(newLucene(true)); - int32_t count = 0; - LuceneException finally; - try - { - do - { - TermPtr t(enumerator->term()); - if (t) - { - TermQueryPtr tq(newLucene(t)); // found a match - tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost - result->add(tq, BooleanClause::SHOULD); // add to query - ++count; - } - } - while (enumerator->next()); - } - catch (LuceneException& e) - { - finally = e; - } - enumerator->close(); - finally.throwException(); - query->incTotalNumberOfTerms(count); - return result; - } - - ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() - { - } - - QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) - { - // strip the scores off - QueryPtr result(newLucene(newLucene(ScoringBooleanQueryRewrite::rewrite(reader, query)))); - result->setBoost(query->getBoost()); - return result; - } - - // Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms - // in the query, the filter method is fastest - const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350; - - // If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest - const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1; - - ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() - { - termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; - docCountPercent = DEFAULT_DOC_COUNT_PERCENT; + if (!other) { + return false; } - - ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() - { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) - { - termCountCutoff = count; + MultiTermQueryPtr otherMultiTermQuery(boost::dynamic_pointer_cast(other)); + if (!otherMultiTermQuery) { + return false; } - - int32_t ConstantScoreAutoRewrite::getTermCountCutoff() - { - return termCountCutoff; + if (MiscUtils::doubleToIntBits(getBoost()) != MiscUtils::doubleToIntBits(otherMultiTermQuery->getBoost())) { + return false; } - - void ConstantScoreAutoRewrite::setDocCountPercent(double percent) - { - docCountPercent = percent; + if (!rewriteMethod->equals(otherMultiTermQuery->rewriteMethod)) { + return false; } - - double ConstantScoreAutoRewrite::getDocCountPercent() - { - return docCountPercent; - } - - QueryPtr ConstantScoreAutoRewrite::rewrite(IndexReaderPtr reader, MultiTermQueryPtr query) - { - // Get the enum and start visiting terms. If we exhaust the enum before hitting either of the - // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite - Collection pendingTerms(Collection::newInstance()); - int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc()); - int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff); - int32_t docVisitCount = 0; - - FilteredTermEnumPtr enumerator(query->getEnum(reader)); - QueryPtr result; - LuceneException finally; - try - { - while (true) - { - TermPtr t(enumerator->term()); - if (t) - { - pendingTerms.add(t); - // Loading the TermInfo from the terms dict here should not be costly, because 1) the - // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache - docVisitCount += reader->docFreq(t); - } - - if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) - { - // Too many terms -- make a filter. - result = newLucene(newLucene(query)); - result->setBoost(query->getBoost()); - break; - } - else if (!enumerator->next()) - { - // Enumeration is done, and we hit a small enough number of terms and docs - - // just make a BooleanQuery, now - BooleanQueryPtr bq(newLucene(true)); - for (Collection::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) - { - TermQueryPtr tq(newLucene(*term)); - bq->add(tq, BooleanClause::SHOULD); - } - // Strip scores - result = newLucene(newLucene(bq)); - result->setBoost(query->getBoost()); - query->incTotalNumberOfTerms(pendingTerms.size()); - break; + return true; +} + +RewriteMethod::~RewriteMethod() { +} + +ConstantScoreFilterRewrite::~ConstantScoreFilterRewrite() { +} + +QueryPtr ConstantScoreFilterRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { + QueryPtr result(newLucene(newLucene(query))); + result->setBoost(query->getBoost()); + return result; +} + +ScoringBooleanQueryRewrite::~ScoringBooleanQueryRewrite() { +} + +QueryPtr ScoringBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { + FilteredTermEnumPtr enumerator(query->getEnum(reader)); + BooleanQueryPtr result(newLucene(true)); + int32_t count = 0; + LuceneException finally; + try { + do { + TermPtr t(enumerator->term()); + if (t) { + TermQueryPtr tq(newLucene(t)); // found a match + tq->setBoost(query->getBoost() * enumerator->difference()); // set the boost + result->add(tq, BooleanClause::SHOULD); // add to query + ++count; + } + } while (enumerator->next()); + } catch (LuceneException& e) { + finally = e; + } + enumerator->close(); + finally.throwException(); + query->incTotalNumberOfTerms(count); + return result; +} + +ConstantScoreBooleanQueryRewrite::~ConstantScoreBooleanQueryRewrite() { +} + +QueryPtr ConstantScoreBooleanQueryRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { + // strip the scores off + QueryPtr result(newLucene(newLucene(ScoringBooleanQueryRewrite::rewrite(reader, query)))); + result->setBoost(query->getBoost()); + return result; +} + +// Defaults derived from rough tests with a 20.0 million doc Wikipedia index. With more than 350 terms +// in the query, the filter method is fastest +const int32_t ConstantScoreAutoRewrite::DEFAULT_TERM_COUNT_CUTOFF = 350; + +// If the query will hit more than 1 in 1000 of the docs in the index (0.1%), the filter method is fastest +const double ConstantScoreAutoRewrite::DEFAULT_DOC_COUNT_PERCENT = 0.1; + +ConstantScoreAutoRewrite::ConstantScoreAutoRewrite() { + termCountCutoff = DEFAULT_TERM_COUNT_CUTOFF; + docCountPercent = DEFAULT_DOC_COUNT_PERCENT; +} + +ConstantScoreAutoRewrite::~ConstantScoreAutoRewrite() { +} + +void ConstantScoreAutoRewrite::setTermCountCutoff(int32_t count) { + termCountCutoff = count; +} + +int32_t ConstantScoreAutoRewrite::getTermCountCutoff() { + return termCountCutoff; +} + +void ConstantScoreAutoRewrite::setDocCountPercent(double percent) { + docCountPercent = percent; +} + +double ConstantScoreAutoRewrite::getDocCountPercent() { + return docCountPercent; +} + +QueryPtr ConstantScoreAutoRewrite::rewrite(const IndexReaderPtr& reader, const MultiTermQueryPtr& query) { + // Get the enum and start visiting terms. If we exhaust the enum before hitting either of the + // cutoffs, we use ConstantBooleanQueryRewrite; else ConstantFilterRewrite + Collection pendingTerms(Collection::newInstance()); + int32_t docCountCutoff = (int32_t)((docCountPercent / 100.0) * (double)reader->maxDoc()); + int32_t termCountLimit = std::min(BooleanQuery::getMaxClauseCount(), termCountCutoff); + int32_t docVisitCount = 0; + + FilteredTermEnumPtr enumerator(query->getEnum(reader)); + QueryPtr result; + LuceneException finally; + try { + while (true) { + TermPtr t(enumerator->term()); + if (t) { + pendingTerms.add(t); + // Loading the TermInfo from the terms dict here should not be costly, because 1) the + // query/filter will load the TermInfo when it runs, and 2) the terms dict has a cache + docVisitCount += reader->docFreq(t); + } + + if (pendingTerms.size() >= termCountLimit || docVisitCount >= docCountCutoff) { + // Too many terms -- make a filter. + result = newLucene(newLucene(query)); + result->setBoost(query->getBoost()); + break; + } else if (!enumerator->next()) { + // Enumeration is done, and we hit a small enough number of terms and docs - + // just make a BooleanQuery, now + BooleanQueryPtr bq(newLucene(true)); + for (Collection::iterator term = pendingTerms.begin(); term != pendingTerms.end(); ++ term) { + TermQueryPtr tq(newLucene(*term)); + bq->add(tq, BooleanClause::SHOULD); } + // Strip scores + result = newLucene(newLucene(bq)); + result->setBoost(query->getBoost()); + query->incTotalNumberOfTerms(pendingTerms.size()); + break; } } - catch (LuceneException& e) - { - finally = e; - } - enumerator->close(); - finally.throwException(); - return result; + } catch (LuceneException& e) { + finally = e; } - - int32_t ConstantScoreAutoRewrite::hashCode() - { - int32_t prime = 1279; - return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent)); - } - - bool ConstantScoreAutoRewrite::equals(LuceneObjectPtr other) - { - if (RewriteMethod::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - - ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast(other)); - if (!otherConstantScoreAutoRewrite) - return false; - - if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) - return false; - - if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) - return false; - + enumerator->close(); + finally.throwException(); + return result; +} + +int32_t ConstantScoreAutoRewrite::hashCode() { + int32_t prime = 1279; + return (int32_t)(prime * termCountCutoff + MiscUtils::doubleToLongBits(docCountPercent)); +} + +bool ConstantScoreAutoRewrite::equals(const LuceneObjectPtr& other) { + if (RewriteMethod::equals(other)) { return true; } - - ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() - { + if (!other) { + return false; } - - void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) - { - boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) - { - boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); + + ConstantScoreAutoRewritePtr otherConstantScoreAutoRewrite(boost::dynamic_pointer_cast(other)); + if (!otherConstantScoreAutoRewrite) { + return false; + } + + if (termCountCutoff != otherConstantScoreAutoRewrite->termCountCutoff) { + return false; } + + if (MiscUtils::doubleToLongBits(docCountPercent) != MiscUtils::doubleToLongBits(otherConstantScoreAutoRewrite->docCountPercent)) { + return false; + } + + return true; +} + +ConstantScoreAutoRewriteDefault::~ConstantScoreAutoRewriteDefault() { +} + +void ConstantScoreAutoRewriteDefault::setTermCountCutoff(int32_t count) { + boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); +} + +void ConstantScoreAutoRewriteDefault::setDocCountPercent(double percent) { + boost::throw_exception(UnsupportedOperationException(L"Please create a private instance")); +} + } diff --git a/src/core/search/MultiTermQueryWrapperFilter.cpp b/src/core/search/MultiTermQueryWrapperFilter.cpp index 261640b3..4b6c203f 100644 --- a/src/core/search/MultiTermQueryWrapperFilter.cpp +++ b/src/core/search/MultiTermQueryWrapperFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,105 +16,95 @@ #include "OpenBitSet.h" #include "MiscUtils.h" -namespace Lucene -{ - MultiTermQueryWrapperFilter::MultiTermQueryWrapperFilter(MultiTermQueryPtr query) - { - this->query = query; - } - - MultiTermQueryWrapperFilter::~MultiTermQueryWrapperFilter() - { - } - - String MultiTermQueryWrapperFilter::toString() - { - // query->toString should be ok for the filter, too, if the query boost is 1.0 - return query->toString(); +namespace Lucene { + +MultiTermQueryWrapperFilter::MultiTermQueryWrapperFilter(const MultiTermQueryPtr& query) { + this->query = query; +} + +MultiTermQueryWrapperFilter::~MultiTermQueryWrapperFilter() { +} + +String MultiTermQueryWrapperFilter::toString() { + // query->toString should be ok for the filter, too, if the query boost is 1.0 + return query->toString(); +} + +bool MultiTermQueryWrapperFilter::equals(const LuceneObjectPtr& other) { + if (Filter::equals(other)) { + return true; } - - bool MultiTermQueryWrapperFilter::equals(LuceneObjectPtr other) - { - if (Filter::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - MultiTermQueryWrapperFilterPtr otherMultiTermQueryWrapperFilter(boost::dynamic_pointer_cast(other)); - if (otherMultiTermQueryWrapperFilter) - return query->equals(otherMultiTermQueryWrapperFilter->query); + if (!other) { return false; } - - int32_t MultiTermQueryWrapperFilter::hashCode() - { - return query->hashCode(); - } - - int32_t MultiTermQueryWrapperFilter::getTotalNumberOfTerms() - { - return query->getTotalNumberOfTerms(); + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - void MultiTermQueryWrapperFilter::clearTotalNumberOfTerms() - { - query->clearTotalNumberOfTerms(); + MultiTermQueryWrapperFilterPtr otherMultiTermQueryWrapperFilter(boost::dynamic_pointer_cast(other)); + if (otherMultiTermQueryWrapperFilter) { + return query->equals(otherMultiTermQueryWrapperFilter->query); } - - DocIdSetPtr MultiTermQueryWrapperFilter::getDocIdSet(IndexReaderPtr reader) - { - TermEnumPtr enumerator(query->getEnum(reader)); - OpenBitSetPtr bitSet; - LuceneException finally; - try - { - // if current term in enum is null, the enum is empty -> shortcut - if (!enumerator->term()) - return DocIdSet::EMPTY_DOCIDSET(); - // else fill into a OpenBitSet - bitSet = newLucene(reader->maxDoc()); - Collection docs(Collection::newInstance(32)); - Collection freqs(Collection::newInstance(32)); - TermDocsPtr termDocs(reader->termDocs()); - try - { - int32_t termCount = 0; - do - { - TermPtr term(enumerator->term()); - if (!term) - break; - ++termCount; - termDocs->seek(term); - while (true) - { - int32_t count = termDocs->read(docs, freqs); - if (count != 0) - { - for (int32_t i = 0; i < count; ++i) - bitSet->set(docs[i]); + return false; +} + +int32_t MultiTermQueryWrapperFilter::hashCode() { + return query->hashCode(); +} + +int32_t MultiTermQueryWrapperFilter::getTotalNumberOfTerms() { + return query->getTotalNumberOfTerms(); +} + +void MultiTermQueryWrapperFilter::clearTotalNumberOfTerms() { + query->clearTotalNumberOfTerms(); +} + +DocIdSetPtr MultiTermQueryWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { + TermEnumPtr enumerator(query->getEnum(reader)); + OpenBitSetPtr bitSet; + LuceneException finally; + try { + // if current term in enum is null, the enum is empty -> shortcut + if (!enumerator->term()) { + return DocIdSet::EMPTY_DOCIDSET(); + } + // else fill into a OpenBitSet + bitSet = newLucene(reader->maxDoc()); + Collection docs(Collection::newInstance(32)); + Collection freqs(Collection::newInstance(32)); + TermDocsPtr termDocs(reader->termDocs()); + try { + int32_t termCount = 0; + do { + TermPtr term(enumerator->term()); + if (!term) { + break; + } + ++termCount; + termDocs->seek(term); + while (true) { + int32_t count = termDocs->read(docs, freqs); + if (count != 0) { + for (int32_t i = 0; i < count; ++i) { + bitSet->set(docs[i]); } - else - break; + } else { + break; } } - while (enumerator->next()); - - query->incTotalNumberOfTerms(termCount); - } - catch (LuceneException& e) - { - finally = e; - } - termDocs->close(); - } - catch (LuceneException& e) - { + } while (enumerator->next()); + + query->incTotalNumberOfTerms(termCount); + } catch (LuceneException& e) { finally = e; } - enumerator->close(); - finally.throwException(); - return bitSet; + termDocs->close(); + } catch (LuceneException& e) { + finally = e; } + enumerator->close(); + finally.throwException(); + return bitSet; +} + } diff --git a/src/core/search/NumericRangeFilter.cpp b/src/core/search/NumericRangeFilter.cpp index 7fef003d..562f8d1e 100644 --- a/src/core/search/NumericRangeFilter.cpp +++ b/src/core/search/NumericRangeFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,78 +8,64 @@ #include "NumericRangeFilter.h" #include "NumericRangeQuery.h" -namespace Lucene -{ - NumericRangeFilter::NumericRangeFilter(NumericRangeQueryPtr query) : MultiTermQueryWrapperFilter(query) - { - } - - NumericRangeFilter::~NumericRangeFilter() - { - } - - NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) - { - return newLucene(NumericRangeQuery::newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive)); - } - - NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) - { - return newLucene(NumericRangeQuery::newNumericRange(field, min, max, minInclusive, maxInclusive)); - } - - String NumericRangeFilter::getField() - { - return boost::static_pointer_cast(query)->field; - } - - bool NumericRangeFilter::includesMin() - { - return boost::static_pointer_cast(query)->minInclusive; - } - - bool NumericRangeFilter::includesMax() - { - return boost::static_pointer_cast(query)->maxInclusive; - } - - NumericValue NumericRangeFilter::getMin() - { - return boost::static_pointer_cast(query)->min; - } - - NumericValue NumericRangeFilter::getMax() - { - return boost::static_pointer_cast(query)->min; - } +namespace Lucene { + +NumericRangeFilter::NumericRangeFilter(const NumericRangeQueryPtr& query) : MultiTermQueryWrapperFilter(query) { +} + +NumericRangeFilter::~NumericRangeFilter() { +} + +NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { + return newLucene(NumericRangeQuery::newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive)); +} + +NumericRangeFilterPtr NumericRangeFilter::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { + return newLucene(NumericRangeQuery::newNumericRange(field, min, max, minInclusive, maxInclusive)); +} + +String NumericRangeFilter::getField() { + return boost::static_pointer_cast(query)->field; +} + +bool NumericRangeFilter::includesMin() { + return boost::static_pointer_cast(query)->minInclusive; +} + +bool NumericRangeFilter::includesMax() { + return boost::static_pointer_cast(query)->maxInclusive; +} + +NumericValue NumericRangeFilter::getMin() { + return boost::static_pointer_cast(query)->min; +} + +NumericValue NumericRangeFilter::getMax() { + return boost::static_pointer_cast(query)->min; +} + } diff --git a/src/core/search/NumericRangeQuery.cpp b/src/core/search/NumericRangeQuery.cpp index bb3d1893..384abd27 100644 --- a/src/core/search/NumericRangeQuery.cpp +++ b/src/core/search/NumericRangeQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,364 +13,340 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - NumericRangeQuery::NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) - { - BOOST_ASSERT(valSize == 32 || valSize == 64); - if (precisionStep < 1) - boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); - this->field = field; - this->precisionStep = precisionStep; - this->valSize = valSize; - this->min = min; - this->max = max; - this->minInclusive = minInclusive; - this->maxInclusive = maxInclusive; - - // For bigger precisionSteps this query likely hits too many terms, so set to CONSTANT_SCORE_FILTER - // right off (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it - // creates new enums from IndexReader for each sub-range) - switch (valSize) - { - case 64: - setRewriteMethod(precisionStep > 6 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); - break; - case 32: - setRewriteMethod(precisionStep > 8 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); - break; - default: - // should never happen - boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); - } - - // shortcut if upper bound == lower bound - if (!VariantUtils::isNull(min) && min == max) - setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); - } - - NumericRangeQuery::~NumericRangeQuery() - { - } - - NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) - { - if (!VariantUtils::equalsType(min, max)) - boost::throw_exception(IllegalArgumentException(L"min/max must be of the same type")); - int32_t valSize = VariantUtils::typeOf(min) ? 32 : 64; - return newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive); - } - - NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) - { - return newNumericRange(field, NumericUtils::PRECISION_STEP_DEFAULT, min, max, minInclusive, maxInclusive); - } - - FilteredTermEnumPtr NumericRangeQuery::getEnum(IndexReaderPtr reader) - { - return newLucene(shared_from_this(), reader); - } - - String NumericRangeQuery::getField() - { - return field; - } - - bool NumericRangeQuery::includesMin() - { - return minInclusive; +namespace Lucene { + +NumericRangeQuery::NumericRangeQuery(const String& field, int32_t precisionStep, int32_t valSize, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { + BOOST_ASSERT(valSize == 32 || valSize == 64); + if (precisionStep < 1) { + boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); + } + this->field = field; + this->precisionStep = precisionStep; + this->valSize = valSize; + this->min = min; + this->max = max; + this->minInclusive = minInclusive; + this->maxInclusive = maxInclusive; + + // For bigger precisionSteps this query likely hits too many terms, so set to CONSTANT_SCORE_FILTER + // right off (especially as the FilteredTermEnum is costly if wasted only for AUTO tests because it + // creates new enums from IndexReader for each sub-range) + switch (valSize) { + case 64: + setRewriteMethod(precisionStep > 6 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); + break; + case 32: + setRewriteMethod(precisionStep > 8 ? CONSTANT_SCORE_FILTER_REWRITE() : CONSTANT_SCORE_AUTO_REWRITE_DEFAULT()); + break; + default: + // should never happen + boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } - - bool NumericRangeQuery::includesMax() - { - return maxInclusive; + + // shortcut if upper bound == lower bound + if (!VariantUtils::isNull(min) && min == max) { + setRewriteMethod(CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE()); } - - NumericValue NumericRangeQuery::getMin() - { - return min; +} + +NumericRangeQuery::~NumericRangeQuery() { +} + +NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int32_t precisionStep, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newLongRange(const String& field, int64_t min, int64_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t precisionStep, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newIntRange(const String& field, int32_t min, int32_t max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, int32_t precisionStep, double min, double max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, precisionStep, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newDoubleRange(const String& field, double min, double max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, int32_t precisionStep, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { + if (!VariantUtils::equalsType(min, max)) { + boost::throw_exception(IllegalArgumentException(L"min/max must be of the same type")); } - - NumericValue NumericRangeQuery::getMax() - { - return min; + int32_t valSize = VariantUtils::typeOf(min) ? 32 : 64; + return newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive); +} + +NumericRangeQueryPtr NumericRangeQuery::newNumericRange(const String& field, NumericValue min, NumericValue max, bool minInclusive, bool maxInclusive) { + return newNumericRange(field, NumericUtils::PRECISION_STEP_DEFAULT, min, max, minInclusive, maxInclusive); +} + +FilteredTermEnumPtr NumericRangeQuery::getEnum(const IndexReaderPtr& reader) { + return newLucene(shared_from_this(), reader); +} + +String NumericRangeQuery::getField() { + return field; +} + +bool NumericRangeQuery::includesMin() { + return minInclusive; +} + +bool NumericRangeQuery::includesMax() { + return maxInclusive; +} + +NumericValue NumericRangeQuery::getMin() { + return min; +} + +NumericValue NumericRangeQuery::getMax() { + return min; +} + +LuceneObjectPtr NumericRangeQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive)); + NumericRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->field = field; + cloneQuery->precisionStep = precisionStep; + cloneQuery->valSize = valSize; + cloneQuery->min = min; + cloneQuery->max = max; + cloneQuery->minInclusive = minInclusive; + cloneQuery->maxInclusive = maxInclusive; + return cloneQuery; +} + +String NumericRangeQuery::toString(const String& field) { + StringStream buffer; + if (this->field != field) { + buffer << this->field << L":"; + } + buffer << (minInclusive ? L"[" : L"{"); + if (VariantUtils::isNull(min)) { + buffer << L"*"; + } else { + buffer << min; + } + buffer << L" TO "; + if (VariantUtils::isNull(max)) { + buffer << L"*"; + } else { + buffer << max; + } + buffer << (maxInclusive ? L"]" : L"}"); + buffer << boostString(); + return buffer.str(); +} + +bool NumericRangeQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - LuceneObjectPtr NumericRangeQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, precisionStep, valSize, min, max, minInclusive, maxInclusive)); - NumericRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->field = field; - cloneQuery->precisionStep = precisionStep; - cloneQuery->valSize = valSize; - cloneQuery->min = min; - cloneQuery->max = max; - cloneQuery->minInclusive = minInclusive; - cloneQuery->maxInclusive = maxInclusive; - return cloneQuery; + if (!MultiTermQuery::equals(other)) { + return false; } - - String NumericRangeQuery::toString(const String& field) - { - StringStream buffer; - if (this->field != field) - buffer << this->field << L":"; - buffer << (minInclusive ? L"[" : L"{"); - if (VariantUtils::isNull(min)) - buffer << L"*"; - else - buffer << min; - buffer << L" TO "; - if (VariantUtils::isNull(max)) - buffer << L"*"; - else - buffer << max; - buffer << (maxInclusive ? L"]" : L"}"); - buffer << boostString(); - return buffer.str(); + + NumericRangeQueryPtr otherNumericRangeQuery(boost::dynamic_pointer_cast(other)); + if (!otherNumericRangeQuery) { + return false; } - - bool NumericRangeQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!MultiTermQuery::equals(other)) - return false; - - NumericRangeQueryPtr otherNumericRangeQuery(boost::dynamic_pointer_cast(other)); - if (!otherNumericRangeQuery) - return false; - - return (field == otherNumericRangeQuery->field && - min == otherNumericRangeQuery->min && - max == otherNumericRangeQuery->max && - minInclusive == otherNumericRangeQuery->minInclusive && - maxInclusive == otherNumericRangeQuery->maxInclusive && - precisionStep == otherNumericRangeQuery->precisionStep); + + return (field == otherNumericRangeQuery->field && + min == otherNumericRangeQuery->min && + max == otherNumericRangeQuery->max && + minInclusive == otherNumericRangeQuery->minInclusive && + maxInclusive == otherNumericRangeQuery->maxInclusive && + precisionStep == otherNumericRangeQuery->precisionStep); +} + +int32_t NumericRangeQuery::hashCode() { + int32_t hash = MultiTermQuery::hashCode(); + hash += StringUtils::hashCode(field) ^ 0x4565fd66 + precisionStep ^ 0x64365465; + if (!VariantUtils::isNull(min)) { + hash += VariantUtils::hashCode(min) ^ 0x14fa55fb; } - - int32_t NumericRangeQuery::hashCode() - { - int32_t hash = MultiTermQuery::hashCode(); - hash += StringUtils::hashCode(field) ^ 0x4565fd66 + precisionStep ^ 0x64365465; - if (!VariantUtils::isNull(min)) - hash += VariantUtils::hashCode(min) ^ 0x14fa55fb; - if (!VariantUtils::isNull(max)) - hash += VariantUtils::hashCode(max) ^ 0x733fa5fe; - return hash + (MiscUtils::hashCode(minInclusive) ^ 0x14fa55fb) + (MiscUtils::hashCode(maxInclusive) ^ 0x733fa5fe); + if (!VariantUtils::isNull(max)) { + hash += VariantUtils::hashCode(max) ^ 0x733fa5fe; } - - NumericRangeTermEnum::NumericRangeTermEnum(NumericRangeQueryPtr query, IndexReaderPtr reader) - { - this->_query = query; - this->reader = reader; - this->rangeBounds = Collection::newInstance(); - this->termTemplate = newLucene(query->field); - - switch (query->valSize) - { - case 64: - { - // lower - int64_t minBound = LLONG_MIN; - if (VariantUtils::typeOf(query->min)) - minBound = VariantUtils::get(query->min); - else if (VariantUtils::typeOf(query->min)) - minBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->min)); - if (!query->minInclusive && !VariantUtils::isNull(query->min)) - { - if (minBound == LLONG_MAX) - break; - ++minBound; - } - - // upper - int64_t maxBound = LLONG_MAX; - if (VariantUtils::typeOf(query->max)) - maxBound = VariantUtils::get(query->max); - else if (VariantUtils::typeOf(query->max)) - maxBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->max)); - if (!query->maxInclusive && !VariantUtils::isNull(query->max)) - { - if (maxBound == LLONG_MIN) - break; - --maxBound; - } - - NumericUtils::splitLongRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); - + return hash + (MiscUtils::hashCode(minInclusive) ^ 0x14fa55fb) + (MiscUtils::hashCode(maxInclusive) ^ 0x733fa5fe); +} + +NumericRangeTermEnum::NumericRangeTermEnum(const NumericRangeQueryPtr& query, const IndexReaderPtr& reader) { + this->_query = query; + this->reader = reader; + this->rangeBounds = Collection::newInstance(); + this->termTemplate = newLucene(query->field); + + switch (query->valSize) { + case 64: { + // lower + int64_t minBound = std::numeric_limits::min(); + if (VariantUtils::typeOf(query->min)) { + minBound = VariantUtils::get(query->min); + } else if (VariantUtils::typeOf(query->min)) { + minBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->min)); + } + if (!query->minInclusive && !VariantUtils::isNull(query->min)) { + if (minBound == std::numeric_limits::max()) { break; } - - case 32: - { - // lower - int32_t minBound = INT_MIN; - if (VariantUtils::typeOf(query->min)) - minBound = VariantUtils::get(query->min); - if (!query->minInclusive && !VariantUtils::isNull(query->min)) - { - if (minBound == INT_MAX) - break; - ++minBound; - } - - // upper - int32_t maxBound = INT_MAX; - if (VariantUtils::typeOf(query->max)) - maxBound = VariantUtils::get(query->max); - if (!query->maxInclusive && !VariantUtils::isNull(query->max)) - { - if (maxBound == INT_MIN) - break; - --maxBound; - } - - NumericUtils::splitIntRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); - + ++minBound; + } + + // upper + int64_t maxBound = std::numeric_limits::max(); + if (VariantUtils::typeOf(query->max)) { + maxBound = VariantUtils::get(query->max); + } else if (VariantUtils::typeOf(query->max)) { + maxBound = NumericUtils::doubleToSortableLong(VariantUtils::get(query->max)); + } + if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { + if (maxBound == std::numeric_limits::min()) { break; } - - default: - // should never happen - boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); + --maxBound; } - - // seek to first term - next(); - } - - NumericRangeTermEnum::~NumericRangeTermEnum() - { - } - - double NumericRangeTermEnum::difference() - { - return 1.0; - } - - bool NumericRangeTermEnum::endEnum() - { - boost::throw_exception(UnsupportedOperationException(L"not implemented")); - return false; - } - - void NumericRangeTermEnum::setEnum(TermEnumPtr actualEnum) - { - boost::throw_exception(UnsupportedOperationException(L"not implemented")); - } - - bool NumericRangeTermEnum::termCompare(TermPtr term) - { - return (term->field() == NumericRangeQueryPtr(_query)->field && term->text().compare(currentUpperBound) <= 0); + + NumericUtils::splitLongRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); + + break; } - - bool NumericRangeTermEnum::next() - { - // if a current term exists, the actual enum is initialized: try change to next term, if no - // such term exists, fall-through - if (currentTerm) - { - BOOST_ASSERT(actualEnum); - if (actualEnum->next()) - { - currentTerm = actualEnum->term(); - if (termCompare(currentTerm)) - return true; + + case 32: { + // lower + int32_t minBound = INT_MIN; + if (VariantUtils::typeOf(query->min)) { + minBound = VariantUtils::get(query->min); + } + if (!query->minInclusive && !VariantUtils::isNull(query->min)) { + if (minBound == INT_MAX) { + break; } + ++minBound; } - - // if all above fails, we go forward to the next enum, if one is available - currentTerm.reset(); - while (rangeBounds.size() >= 2) - { - BOOST_ASSERT(rangeBounds.size() % 2 == 0); - // close the current enum and read next bounds - if (actualEnum) - { - actualEnum->close(); - actualEnum.reset(); + + // upper + int32_t maxBound = INT_MAX; + if (VariantUtils::typeOf(query->max)) { + maxBound = VariantUtils::get(query->max); + } + if (!query->maxInclusive && !VariantUtils::isNull(query->max)) { + if (maxBound == INT_MIN) { + break; } - String lowerBound(rangeBounds.removeFirst()); - currentUpperBound = rangeBounds.removeFirst(); - // create a new enum - actualEnum = reader->terms(termTemplate->createTerm(lowerBound)); - currentTerm = actualEnum->term(); - if (currentTerm && termCompare(currentTerm)) - return true; - // clear the current term for next iteration - currentTerm.reset(); + --maxBound; } - - // no more sub-range enums available - BOOST_ASSERT(rangeBounds.empty() && !currentTerm); - return false; - } - - void NumericRangeTermEnum::close() - { - rangeBounds.clear(); - currentUpperBound.clear(); - FilteredTermEnum::close(); - } - - NumericLongRangeBuilder::NumericLongRangeBuilder(Collection rangeBounds) - { - this->rangeBounds = rangeBounds; - } - - NumericLongRangeBuilder::~NumericLongRangeBuilder() - { - } - - void NumericLongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) - { - rangeBounds.add(minPrefixCoded); - rangeBounds.add(maxPrefixCoded); + + NumericUtils::splitIntRange(newLucene(rangeBounds), query->precisionStep, minBound, maxBound); + + break; } - - NumericIntRangeBuilder::NumericIntRangeBuilder(Collection rangeBounds) - { - this->rangeBounds = rangeBounds; + + default: + // should never happen + boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64")); } - - NumericIntRangeBuilder::~NumericIntRangeBuilder() - { + + // seek to first term + next(); +} + +NumericRangeTermEnum::~NumericRangeTermEnum() { +} + +double NumericRangeTermEnum::difference() { + return 1.0; +} + +bool NumericRangeTermEnum::endEnum() { + boost::throw_exception(UnsupportedOperationException(L"not implemented")); + return false; +} + +void NumericRangeTermEnum::setEnum(const TermEnumPtr& actualEnum) { + boost::throw_exception(UnsupportedOperationException(L"not implemented")); +} + +bool NumericRangeTermEnum::termCompare(const TermPtr& term) { + return (term->field() == NumericRangeQueryPtr(_query)->field && term->text().compare(currentUpperBound) <= 0); +} + +bool NumericRangeTermEnum::next() { + // if a current term exists, the actual enum is initialized: try change to next term, if no + // such term exists, fall-through + if (currentTerm) { + BOOST_ASSERT(actualEnum); + if (actualEnum->next()) { + currentTerm = actualEnum->term(); + if (termCompare(currentTerm)) { + return true; + } + } } - - void NumericIntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) - { - rangeBounds.add(minPrefixCoded); - rangeBounds.add(maxPrefixCoded); + + // if all above fails, we go forward to the next enum, if one is available + currentTerm.reset(); + while (rangeBounds.size() >= 2) { + BOOST_ASSERT(rangeBounds.size() % 2 == 0); + // close the current enum and read next bounds + if (actualEnum) { + actualEnum->close(); + actualEnum.reset(); + } + String lowerBound(rangeBounds.removeFirst()); + currentUpperBound = rangeBounds.removeFirst(); + // create a new enum + actualEnum = reader->terms(termTemplate->createTerm(lowerBound)); + currentTerm = actualEnum->term(); + if (currentTerm && termCompare(currentTerm)) { + return true; + } + // clear the current term for next iteration + currentTerm.reset(); } + + // no more sub-range enums available + BOOST_ASSERT(rangeBounds.empty() && !currentTerm); + return false; +} + +void NumericRangeTermEnum::close() { + rangeBounds.clear(); + currentUpperBound.clear(); + FilteredTermEnum::close(); +} + +NumericLongRangeBuilder::NumericLongRangeBuilder(Collection rangeBounds) { + this->rangeBounds = rangeBounds; +} + +NumericLongRangeBuilder::~NumericLongRangeBuilder() { +} + +void NumericLongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); +} + +NumericIntRangeBuilder::NumericIntRangeBuilder(Collection rangeBounds) { + this->rangeBounds = rangeBounds; +} + +NumericIntRangeBuilder::~NumericIntRangeBuilder() { +} + +void NumericIntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { + rangeBounds.add(minPrefixCoded); + rangeBounds.add(maxPrefixCoded); +} + } diff --git a/src/core/search/ParallelMultiSearcher.cpp b/src/core/search/ParallelMultiSearcher.cpp index 7c1722a9..f3bf4af0 100644 --- a/src/core/search/ParallelMultiSearcher.cpp +++ b/src/core/search/ParallelMultiSearcher.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,87 +15,84 @@ #include "TopFieldDocs.h" #include "ThreadPool.h" -namespace Lucene -{ - ParallelMultiSearcher::ParallelMultiSearcher(Collection searchables) : MultiSearcher(searchables) - { +namespace Lucene { + +ParallelMultiSearcher::ParallelMultiSearcher(Collection searchables) : MultiSearcher(searchables) { +} + +ParallelMultiSearcher::~ParallelMultiSearcher() { +} + +int32_t ParallelMultiSearcher::docFreq(const TermPtr& term) { + ThreadPoolPtr threadPool(ThreadPool::getInstance()); + Collection searchThreads(Collection::newInstance(searchables.size())); + for (int32_t i = 0; i < searchables.size(); ++i) { + searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&Searchable::docFreq), searchables[i], term))); } - - ParallelMultiSearcher::~ParallelMultiSearcher() - { + int32_t docFreq = 0; + for (int32_t i = 0; i < searchThreads.size(); ++i) { + docFreq += searchThreads[i]->get(); } - - int32_t ParallelMultiSearcher::docFreq(TermPtr term) - { - ThreadPoolPtr threadPool(ThreadPool::getInstance()); - Collection searchThreads(Collection::newInstance(searchables.size())); - for (int32_t i = 0; i < searchables.size(); ++i) - searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&Searchable::docFreq), searchables[i], term))); - int32_t docFreq = 0; - for (int32_t i = 0; i < searchThreads.size(); ++i) - docFreq += searchThreads[i]->get(); - return docFreq; + return docFreq; +} + +TopDocsPtr ParallelMultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { + HitQueuePtr hq(newLucene(n, false)); + SynchronizePtr lock(newInstance()); + ThreadPoolPtr threadPool(ThreadPool::getInstance()); + Collection searchThreads(Collection::newInstance(searchables.size())); + Collection multiSearcher(Collection::newInstance(searchables.size())); + for (int32_t i = 0; i < searchables.size(); ++i) { // search each searchable + multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, i, starts); + searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableNoSort::call), multiSearcher[i]))); } - - TopDocsPtr ParallelMultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n) - { - HitQueuePtr hq(newLucene(n, false)); - SynchronizePtr lock(newInstance()); - ThreadPoolPtr threadPool(ThreadPool::getInstance()); - Collection searchThreads(Collection::newInstance(searchables.size())); - Collection multiSearcher(Collection::newInstance(searchables.size())); - for (int32_t i = 0; i < searchables.size(); ++i) // search each searchable - { - multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, i, starts); - searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableNoSort::call), multiSearcher[i]))); - } - - int32_t totalHits = 0; - double maxScore = -std::numeric_limits::infinity(); - - for (int32_t i = 0; i < searchThreads.size(); ++i) - { - TopDocsPtr topDocs(searchThreads[i]->get()); - totalHits += topDocs->totalHits; - maxScore = std::max(maxScore, topDocs->maxScore); - } - - Collection scoreDocs(Collection::newInstance(hq->size())); - for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array - scoreDocs[i] = hq->pop(); - - return newLucene(totalHits, scoreDocs, maxScore); + + int32_t totalHits = 0; + double maxScore = -std::numeric_limits::infinity(); + + for (int32_t i = 0; i < searchThreads.size(); ++i) { + TopDocsPtr topDocs(searchThreads[i]->get()); + totalHits += topDocs->totalHits; + maxScore = std::max(maxScore, topDocs->maxScore); } - - TopFieldDocsPtr ParallelMultiSearcher::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) - { - if (!sort) - boost::throw_exception(NullPointerException(L"sort must not be null")); - FieldDocSortedHitQueuePtr hq(newLucene(n)); - SynchronizePtr lock(newInstance()); - ThreadPoolPtr threadPool(ThreadPool::getInstance()); - Collection searchThreads(Collection::newInstance(searchables.size())); - Collection multiSearcher(Collection::newInstance(searchables.size())); - for (int32_t i = 0; i < searchables.size(); ++i) // search each searchable - { - multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, sort, i, starts); - searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableWithSort::call), multiSearcher[i]))); - } - - int32_t totalHits = 0; - double maxScore = -std::numeric_limits::infinity(); - - for (int32_t i = 0; i < searchThreads.size(); ++i) - { - TopFieldDocsPtr topDocs(searchThreads[i]->get()); - totalHits += topDocs->totalHits; - maxScore = std::max(maxScore, topDocs->maxScore); - } - - Collection scoreDocs(Collection::newInstance(hq->size())); - for (int32_t i = hq->size() - 1; i >= 0; --i) // put docs in array - scoreDocs[i] = hq->pop(); - - return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); + + Collection scoreDocs(Collection::newInstance(hq->size())); + for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array + scoreDocs[i] = hq->pop(); } + + return newLucene(totalHits, scoreDocs, maxScore); +} + +TopFieldDocsPtr ParallelMultiSearcher::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + if (!sort) { + boost::throw_exception(NullPointerException(L"sort must not be null")); + } + FieldDocSortedHitQueuePtr hq(newLucene(n)); + SynchronizePtr lock(newInstance()); + ThreadPoolPtr threadPool(ThreadPool::getInstance()); + Collection searchThreads(Collection::newInstance(searchables.size())); + Collection multiSearcher(Collection::newInstance(searchables.size())); + for (int32_t i = 0; i < searchables.size(); ++i) { // search each searchable + multiSearcher[i] = newLucene(lock, searchables[i], weight, filter, n, hq, sort, i, starts); + searchThreads[i] = threadPool->scheduleTask(boost::protect(boost::bind(boost::mem_fn(&MultiSearcherCallableWithSort::call), multiSearcher[i]))); + } + + int32_t totalHits = 0; + double maxScore = -std::numeric_limits::infinity(); + + for (int32_t i = 0; i < searchThreads.size(); ++i) { + TopFieldDocsPtr topDocs(searchThreads[i]->get()); + totalHits += topDocs->totalHits; + maxScore = std::max(maxScore, topDocs->maxScore); + } + + Collection scoreDocs(Collection::newInstance(hq->size())); + for (int32_t i = hq->size() - 1; i >= 0; --i) { // put docs in array + scoreDocs[i] = hq->pop(); + } + + return newLucene(totalHits, scoreDocs, hq->getFields(), maxScore); +} + } diff --git a/src/core/search/PhrasePositions.cpp b/src/core/search/PhrasePositions.cpp index 002976fa..6415f770 100644 --- a/src/core/search/PhrasePositions.cpp +++ b/src/core/search/PhrasePositions.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,63 +8,55 @@ #include "PhrasePositions.h" #include "TermPositions.h" -namespace Lucene -{ - PhrasePositions::PhrasePositions(TermPositionsPtr t, int32_t o) - { - doc = 0; - position = 0; - count = 0; - repeats = false; - - tp = t; - offset = o; - } - - PhrasePositions::~PhrasePositions() - { +namespace Lucene { + +PhrasePositions::PhrasePositions(const TermPositionsPtr& t, int32_t o) { + doc = 0; + position = 0; + count = 0; + repeats = false; + + tp = t; + offset = o; +} + +PhrasePositions::~PhrasePositions() { +} + +bool PhrasePositions::next() { + if (!tp->next()) { + tp->close(); // close stream + doc = INT_MAX; // sentinel value + return false; } - - bool PhrasePositions::next() - { - if (!tp->next()) - { - tp->close(); // close stream - doc = INT_MAX; // sentinel value - return false; - } - doc = tp->doc(); - position = 0; - return true; + doc = tp->doc(); + position = 0; + return true; +} + +bool PhrasePositions::skipTo(int32_t target) { + if (!tp->skipTo(target)) { + tp->close(); // close stream + doc = INT_MAX; // sentinel value + return false; } - - bool PhrasePositions::skipTo(int32_t target) - { - if (!tp->skipTo(target)) - { - tp->close(); // close stream - doc = INT_MAX; // sentinel value - return false; - } - doc = tp->doc(); - position = 0; + doc = tp->doc(); + position = 0; + return true; +} + +void PhrasePositions::firstPosition() { + count = tp->freq(); // read first pos + nextPosition(); +} + +bool PhrasePositions::nextPosition() { + if (count-- > 0) { // read subsequent pos's + position = tp->nextPosition() - offset; return true; + } else { + return false; } - - void PhrasePositions::firstPosition() - { - count = tp->freq(); // read first pos - nextPosition(); - } - - bool PhrasePositions::nextPosition() - { - if (count-- > 0) // read subsequent pos's - { - position = tp->nextPosition() - offset; - return true; - } - else - return false; - } +} + } diff --git a/src/core/search/PhraseQuery.cpp b/src/core/search/PhraseQuery.cpp index ba7e49d2..d31c02a1 100644 --- a/src/core/search/PhraseQuery.cpp +++ b/src/core/search/PhraseQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -18,275 +18,264 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - PhraseQuery::PhraseQuery() - { - terms = Collection::newInstance(); - positions = Collection::newInstance(); - maxPosition = 0; - slop = 0; - } - - PhraseQuery::~PhraseQuery() - { - } - - void PhraseQuery::setSlop(int32_t slop) - { - this->slop = slop; - } - - int32_t PhraseQuery::getSlop() - { - return slop; +namespace Lucene { + +PhraseQuery::PhraseQuery() { + terms = Collection::newInstance(); + positions = Collection::newInstance(); + maxPosition = 0; + slop = 0; +} + +PhraseQuery::~PhraseQuery() { +} + +void PhraseQuery::setSlop(int32_t slop) { + this->slop = slop; +} + +int32_t PhraseQuery::getSlop() { + return slop; +} + +void PhraseQuery::add(const TermPtr& term) { + int32_t position = 0; + if (!positions.empty()) { + position = positions[positions.size() - 1] + 1; } - - void PhraseQuery::add(TermPtr term) - { - int32_t position = 0; - if (!positions.empty()) - position = positions[positions.size() - 1] + 1; - add(term, position); + add(term, position); +} + +void PhraseQuery::add(const TermPtr& term, int32_t position) { + if (terms.empty()) { + field = term->field(); + } else if (term->field() != field) { + boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field: " + term->toString())); } - - void PhraseQuery::add(TermPtr term, int32_t position) - { - if (terms.empty()) - field = term->field(); - else if (term->field() != field) - boost::throw_exception(IllegalArgumentException(L"All phrase terms must be in the same field: " + term->toString())); - - terms.add(term); - positions.add(position); - if (position > maxPosition) - maxPosition = position; + + terms.add(term); + positions.add(position); + if (position > maxPosition) { + maxPosition = position; } - - Collection PhraseQuery::getTerms() - { - return terms; +} + +Collection PhraseQuery::getTerms() { + return terms; +} + +Collection PhraseQuery::getPositions() { + return positions; +} + +WeightPtr PhraseQuery::createWeight(const SearcherPtr& searcher) { + if (terms.size() == 1) { // optimize one-term case + QueryPtr termQuery(newLucene(terms[0])); + termQuery->setBoost(getBoost()); + return termQuery->createWeight(searcher); } - - Collection PhraseQuery::getPositions() - { - return positions; + return newLucene(shared_from_this(), searcher); +} + +void PhraseQuery::extractTerms(SetTerm terms) { + terms.addAll(this->terms.begin(), this->terms.end()); +} + +String PhraseQuery::toString(const String& field) { + StringStream buffer; + if (this->field != field) { + buffer << this->field << L":"; } - - WeightPtr PhraseQuery::createWeight(SearcherPtr searcher) - { - if (terms.size() == 1) // optimize one-term case - { - QueryPtr termQuery(newLucene(terms[0])); - termQuery->setBoost(getBoost()); - return termQuery->createWeight(searcher); + buffer << L"\""; + Collection pieces(Collection::newInstance(maxPosition + 1)); + for (int32_t i = 0; i < terms.size(); ++i) { + int32_t pos = positions[i]; + String s(pieces[pos]); + if (!s.empty()) { + s += L"|"; } - return newLucene(shared_from_this(), searcher); + s += terms[i]->text(); + pieces[pos] = s; } - - void PhraseQuery::extractTerms(SetTerm terms) - { - terms.addAll(this->terms.begin(), this->terms.end()); - } - - String PhraseQuery::toString(const String& field) - { - StringStream buffer; - if (this->field != field) - buffer << this->field << L":"; - buffer << L"\""; - Collection pieces(Collection::newInstance(maxPosition + 1)); - for (int32_t i = 0; i < terms.size(); ++i) - { - int32_t pos = positions[i]; - String s(pieces[pos]); - if (!s.empty()) - s += L"|"; - s += terms[i]->text(); - pieces[pos] = s; - } - for (int32_t i = 0; i < pieces.size(); ++i) - { - if (i > 0) - buffer << L" "; - String s(pieces[i]); - buffer << (s.empty() ? L"?" : s); + for (int32_t i = 0; i < pieces.size(); ++i) { + if (i > 0) { + buffer << L" "; } - buffer << L"\""; - - if (slop != 0) - buffer << L"~" << slop; - - buffer << boostString(); - - return buffer.str(); - } - - bool PhraseQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - PhraseQueryPtr otherPhraseQuery(boost::dynamic_pointer_cast(other)); - if (!otherPhraseQuery) - return false; - - return (getBoost() == otherPhraseQuery->getBoost() && slop == otherPhraseQuery->slop && - terms.equals(otherPhraseQuery->terms, luceneEquals()) && positions.equals(otherPhraseQuery->positions)); + String s(pieces[i]); + buffer << (s.empty() ? L"?" : s); } - - int32_t PhraseQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ - MiscUtils::hashCode(terms.begin(), terms.end(), MiscUtils::hashLucene) ^ - MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric); - } - - LuceneObjectPtr PhraseQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - PhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->field = field; - cloneQuery->terms = terms; - cloneQuery->positions = positions; - cloneQuery->maxPosition = maxPosition; - cloneQuery->slop = slop; - return cloneQuery; + buffer << L"\""; + + if (slop != 0) { + buffer << L"~" << slop; } - - PhraseWeight::PhraseWeight(PhraseQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - this->value = 0.0; - this->idf = 0.0; - this->queryNorm = 0.0; - this->queryWeight = 0.0; - - this->idfExp = similarity->idfExplain(query->terms, searcher); - idf = idfExp->getIdf(); + + buffer << boostString(); + + return buffer.str(); +} + +bool PhraseQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - PhraseWeight::~PhraseWeight() - { + + PhraseQueryPtr otherPhraseQuery(boost::dynamic_pointer_cast(other)); + if (!otherPhraseQuery) { + return false; } - - String PhraseWeight::toString() - { - return L"weight(" + query->toString() + L")"; + + return (getBoost() == otherPhraseQuery->getBoost() && slop == otherPhraseQuery->slop && + terms.equals(otherPhraseQuery->terms, luceneEquals()) && positions.equals(otherPhraseQuery->positions)); +} + +int32_t PhraseQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) ^ slop ^ + MiscUtils::hashCode(terms.begin(), terms.end(), MiscUtils::hashLucene) ^ + MiscUtils::hashCode(positions.begin(), positions.end(), MiscUtils::hashNumeric); +} + +LuceneObjectPtr PhraseQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + PhraseQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->field = field; + cloneQuery->terms = terms; + cloneQuery->positions = positions; + cloneQuery->maxPosition = maxPosition; + cloneQuery->slop = slop; + return cloneQuery; +} + +PhraseWeight::PhraseWeight(const PhraseQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); + this->value = 0.0; + this->idf = 0.0; + this->queryNorm = 0.0; + this->queryWeight = 0.0; + + this->idfExp = similarity->idfExplain(query->terms, searcher); + idf = idfExp->getIdf(); +} + +PhraseWeight::~PhraseWeight() { +} + +String PhraseWeight::toString() { + return L"weight(" + query->toString() + L")"; +} + +QueryPtr PhraseWeight::getQuery() { + return query; +} + +double PhraseWeight::getValue() { + return value; +} + +double PhraseWeight::sumOfSquaredWeights() { + queryWeight = idf * getQuery()->getBoost(); // compute query weight + return queryWeight * queryWeight; // square it +} + +void PhraseWeight::normalize(double norm) { + queryNorm = norm; + queryWeight *= queryNorm; // normalize query weight + value = queryWeight * idf; // idf for document +} + +ScorerPtr PhraseWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + if (query->terms.empty()) { // optimize zero-term case + return ScorerPtr(); } - - QueryPtr PhraseWeight::getQuery() - { - return query; + + Collection tps(Collection::newInstance(query->terms.size())); + for (int32_t i = 0; i < tps.size(); ++i) { + TermPositionsPtr p(reader->termPositions(query->terms[i])); + if (!p) { + return ScorerPtr(); + } + tps[i] = p; } - - double PhraseWeight::getValue() - { - return value; + + if (query->slop == 0) { // optimize exact case + return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); + } else { + return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); } - - double PhraseWeight::sumOfSquaredWeights() - { - queryWeight = idf * getQuery()->getBoost(); // compute query weight - return queryWeight * queryWeight; // square it +} + +ExplanationPtr PhraseWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ExplanationPtr result(newLucene()); + result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + StringStream docFreqsBuffer; + StringStream queryBuffer; + queryBuffer << L"\""; + docFreqsBuffer << idfExp->explain(); + for (Collection::iterator term = query->terms.begin(); term != query->terms.end(); ++term) { + if (term != query->terms.begin()) { + queryBuffer << L" "; + } + queryBuffer << (*term)->text(); } - - void PhraseWeight::normalize(double norm) - { - queryNorm = norm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + queryBuffer << L"\""; + + ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")")); + + // explain query weight + ExplanationPtr queryExpl(newLucene()); + queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); + + ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); + if (query->getBoost() != 1.0) { + queryExpl->addDetail(boostExpl); } - - ScorerPtr PhraseWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - if (query->terms.empty()) // optimize zero-term case - return ScorerPtr(); - - Collection tps(Collection::newInstance(query->terms.size())); - for (int32_t i = 0; i < tps.size(); ++i) - { - TermPositionsPtr p(reader->termPositions(query->terms[i])); - if (!p) - return ScorerPtr(); - tps[i] = p; - } - - if (query->slop == 0) // optimize exact case - return newLucene(shared_from_this(), tps, query->getPositions(), similarity, reader->norms(query->field)); - else - return newLucene(shared_from_this(), tps, query->getPositions(), similarity, query->slop, reader->norms(query->field)); + queryExpl->addDetail(idfExpl); + + ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); + queryExpl->addDetail(queryNormExpl); + + queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); + result->addDetail(queryExpl); + + // explain field weight + ExplanationPtr fieldExpl(newLucene()); + fieldExpl->setDescription(L"fieldWeight(" + query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); + if (!phraseScorer) { + return newLucene(0.0, L"no matching docs"); } - - ExplanationPtr PhraseWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ExplanationPtr result(newLucene()); - result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - StringStream docFreqsBuffer; - StringStream queryBuffer; - queryBuffer << L"\""; - docFreqsBuffer << idfExp->explain(); - for (Collection::iterator term = query->terms.begin(); term != query->terms.end(); ++term) - { - if (term != query->terms.begin()) - queryBuffer << L" "; - queryBuffer << (*term)->text(); - } - queryBuffer << L"\""; - - ExplanationPtr idfExpl(newLucene(idf, L"idf(" + query->field + L":" + docFreqsBuffer.str() + L")")); - - // explain query weight - ExplanationPtr queryExpl(newLucene()); - queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); - - ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); - if (query->getBoost() != 1.0) - queryExpl->addDetail(boostExpl); - queryExpl->addDetail(idfExpl); - - ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); - queryExpl->addDetail(queryNormExpl); - - queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); - result->addDetail(queryExpl); - - // explain field weight - ExplanationPtr fieldExpl(newLucene()); - fieldExpl->setDescription(L"fieldWeight(" + query->field + L":" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - PhraseScorerPtr phraseScorer(boost::dynamic_pointer_cast(scorer(reader, true, false))); - if (!phraseScorer) - return newLucene(0.0, L"no matching docs"); - - ExplanationPtr tfExplanation(newLucene()); - int32_t d = phraseScorer->advance(doc); - double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; - tfExplanation->setValue(similarity->tf(phraseFreq)); - tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); - - fieldExpl->addDetail(tfExplanation); - fieldExpl->addDetail(idfExpl); - - ExplanationPtr fieldNormExpl(newLucene()); - ByteArray fieldNorms(reader->norms(query->field)); - double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; - fieldNormExpl->setValue(fieldNorm); - fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); - fieldExpl->addDetail(fieldNormExpl); - - fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); - - result->addDetail(fieldExpl); - - // combine them - result->setValue(queryExpl->getValue() * fieldExpl->getValue()); - - if (queryExpl->getValue() == 1.0) - return fieldExpl; - - return result; + + ExplanationPtr tfExplanation(newLucene()); + int32_t d = phraseScorer->advance(doc); + double phraseFreq = d == doc ? phraseScorer->currentFreq() : 0.0; + tfExplanation->setValue(similarity->tf(phraseFreq)); + tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); + + fieldExpl->addDetail(tfExplanation); + fieldExpl->addDetail(idfExpl); + + ExplanationPtr fieldNormExpl(newLucene()); + ByteArray fieldNorms(reader->norms(query->field)); + double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; + fieldNormExpl->setValue(fieldNorm); + fieldNormExpl->setDescription(L"fieldNorm(field=" + query->field + L", doc=" + StringUtils::toString(doc) + L")"); + fieldExpl->addDetail(fieldNormExpl); + + fieldExpl->setValue(tfExplanation->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); + + result->addDetail(fieldExpl); + + // combine them + result->setValue(queryExpl->getValue() * fieldExpl->getValue()); + + if (queryExpl->getValue() == 1.0) { + return fieldExpl; } + + return result; +} + } diff --git a/src/core/search/PhraseQueue.cpp b/src/core/search/PhraseQueue.cpp index c74e5e79..b88e1432 100644 --- a/src/core/search/PhraseQueue.cpp +++ b/src/core/search/PhraseQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,30 +8,29 @@ #include "PhraseQueue.h" #include "PhrasePositions.h" -namespace Lucene -{ - PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) - { - } - - PhraseQueue::~PhraseQueue() - { - } - - bool PhraseQueue::lessThan(const PhrasePositionsPtr& first, const PhrasePositionsPtr& second) - { - if (first->doc == second->doc) - { - if (first->position == second->position) - { - // same doc and pp.position, so decide by actual term positions. - // rely on: pp.position == tp.position - offset. +namespace Lucene { + +PhraseQueue::PhraseQueue(int32_t size) : PriorityQueue(size) { +} + +PhraseQueue::~PhraseQueue() { +} + +inline bool PhraseQueue::lessThan(const PhrasePositionsStar& first, const PhrasePositionsStar& second) { + if (first && second) { + if (first->doc == second->doc) { + if (first->position == second->position) { + // same doc and pp.position, so decide by actual term positions. + // rely on: pp.position == tp.position - offset. return first->offset < second->offset; - } - else + } else { return first->position < second->position; - } - else + } + } else { return first->doc < second->doc; + } } + return first ? false : true; +} + } diff --git a/src/core/search/PhraseScorer.cpp b/src/core/search/PhraseScorer.cpp index 78c1f9c6..15ec1428 100644 --- a/src/core/search/PhraseScorer.cpp +++ b/src/core/search/PhraseScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,146 +11,140 @@ #include "Weight.h" #include "Similarity.h" -namespace Lucene -{ - PhraseScorer::PhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) - { - this->firstTime = true; - this->more = true; - this->freq = 0.0; - - this->norms = norms; - this->weight = weight; - this->value = weight->getValue(); - - // convert tps to a list of phrase positions. - // Note: phrase-position differs from term-position in that its position reflects the phrase offset: pp.pos = tp.pos - offset. - // This allows to easily identify a matching (exact) phrase when all PhrasePositions have exactly the same position. - for (int32_t i = 0; i < tps.size(); ++i) - { - PhrasePositionsPtr pp(newLucene(tps[i], offsets[i])); - if (last) // add next to end of list - last->_next = pp; - else - first = pp; - last = pp; +namespace Lucene { + +PhraseScorer::PhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { + this->firstTime = true; + this->more = true; + this->freq = 0.0; + + this->norms = norms; + this->weight = weight; + this->value = weight->getValue(); + + // convert tps to a list of phrase positions. + // Note: phrase-position differs from term-position in that its position reflects the phrase offset: pp.pos = tp.pos - offset. + // This allows to easily identify a matching (exact) phrase when all PhrasePositions have exactly the same position. + for (int32_t i = 0; i < tps.size(); ++i) { + PhrasePositionsPtr pp(newLucene(tps[i], offsets[i])); + auto* __pp = pp.get(); + if (__last) { // add next to end of list + __last->__next = __pp; + } else { + __first = __pp; } - - pq = newLucene(tps.size()); // construct empty pq - first->doc = -1; + __last = __pp; + _holds.emplace_back(pp); } - - PhraseScorer::~PhraseScorer() - { + + pq = newLucene(tps.size()); // construct empty pq + __first->doc = -1; +} + +PhraseScorer::~PhraseScorer() { +} + +int32_t PhraseScorer::docID() { + return __first->doc; +} + +int32_t PhraseScorer::nextDoc() { + if (firstTime) { + init(); + firstTime = false; + } else if (more) { + more = __last->next(); // trigger further scanning } - - int32_t PhraseScorer::docID() - { - return first->doc; + if (!doNext()) { + __first->doc = NO_MORE_DOCS; } - - int32_t PhraseScorer::nextDoc() - { - if (firstTime) - { - init(); - firstTime = false; + return __first->doc; +} + +bool PhraseScorer::doNext() { + while (more) { + while (more && __first->doc < __last->doc) { // find doc with all the terms + more = __first->skipTo(__last->doc); // skip first upto last and move it to the end + firstToLast(); } - else if (more) - more = last->next(); // trigger further scanning - if (!doNext()) - first->doc = NO_MORE_DOCS; - return first->doc; - } - - bool PhraseScorer::doNext() - { - while (more) - { - while (more && first->doc < last->doc) // find doc with all the terms - { - more = first->skipTo(last->doc); // skip first upto last and move it to the end - firstToLast(); - } - - if (more) - { - // found a doc with all of the terms - freq = phraseFreq(); // check for phrase - if (freq == 0.0) // no match - more = last->next(); // trigger further scanning - else - return true; + + if (more) { + // found a doc with all of the terms + freq = phraseFreq(); // check for phrase + if (freq == 0.0) { // no match + more = __last->next(); // trigger further scanning + } else { + return true; } } - return false; // no more matches - } - - double PhraseScorer::score() - { - double raw = getSimilarity()->tf(freq) * value; // raw score - return !norms ? raw : raw * Similarity::decodeNorm(norms[first->doc]); // normalize } - - int32_t PhraseScorer::advance(int32_t target) - { - firstTime = false; - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) - more = pp->skipTo(target); - if (more) - sort(); // re-sort - if (!doNext()) - first->doc = NO_MORE_DOCS; - return first->doc; + return false; // no more matches +} + +double PhraseScorer::score() { + double raw = getSimilarity()->tf(freq) * value; // raw score + return !norms ? raw : raw * Similarity::decodeNorm(norms[__first->doc]); // normalize +} + +int32_t PhraseScorer::advance(int32_t target) { + firstTime = false; + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + more = __pp->skipTo(target); } - - double PhraseScorer::currentFreq() - { - return freq; + if (more) { + sort(); // re-sort } - - void PhraseScorer::init() - { - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) - more = pp->next(); - if (more) - sort(); + if (!doNext()) { + __first->doc = NO_MORE_DOCS; } - - void PhraseScorer::sort() - { - pq->clear(); - for (PhrasePositionsPtr pp(first); more && pp; pp = pp->_next) - pq->add(pp); - pqToList(); + return __first->doc; +} + +double PhraseScorer::currentFreq() { + return freq; +} + +void PhraseScorer::init() { + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + more = __pp->next(); } - - void PhraseScorer::pqToList() - { - last.reset(); - first.reset(); - while (pq->top()) - { - PhrasePositionsPtr pp(pq->pop()); - if (last) // add next to end of list - last->_next = pp; - else - first = pp; - last = pp; - pp->_next.reset(); - } + if (more) { + sort(); } - - void PhraseScorer::firstToLast() - { - last->_next = first; // move first to end of list - last = first; - first = first->_next; - last->_next.reset(); +} + +void PhraseScorer::sort() { + pq->clear(); + for (auto* __pp = __first; more && __pp; __pp = __pp->__next) { + pq->add(__pp); } - - String PhraseScorer::toString() - { - return L"scorer(" + weight->toString() + L")"; + pqToList(); +} + +void PhraseScorer::pqToList() { + __last = nullptr; + __first = nullptr; + while (pq->top()) { + auto* __pp = pq->pop(); + if (__last) { // add next to end of list + __last->__next = __pp; + } else { + __first = __pp; + } + __last = __pp; + __pp->__next = nullptr; } } + +void PhraseScorer::firstToLast() { + __last->__next = __first; // move first to end of list + __last = __first; + __first = __first->__next; + __last->__next = nullptr; +} + +String PhraseScorer::toString() { + return L"scorer(" + weight->toString() + L")"; +} + +} diff --git a/src/core/search/PositiveScoresOnlyCollector.cpp b/src/core/search/PositiveScoresOnlyCollector.cpp index 0a61e982..b9adbb46 100644 --- a/src/core/search/PositiveScoresOnlyCollector.cpp +++ b/src/core/search/PositiveScoresOnlyCollector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,37 +8,33 @@ #include "PositiveScoresOnlyCollector.h" #include "ScoreCachingWrappingScorer.h" -namespace Lucene -{ - PositiveScoresOnlyCollector::PositiveScoresOnlyCollector(CollectorPtr collector) - { - this->collector = collector; - } - - PositiveScoresOnlyCollector::~PositiveScoresOnlyCollector() - { - } - - void PositiveScoresOnlyCollector::collect(int32_t doc) - { - if (scorer->score() > 0) - collector->collect(doc); - } - - void PositiveScoresOnlyCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - collector->setNextReader(reader, docBase); - } - - void PositiveScoresOnlyCollector::setScorer(ScorerPtr scorer) - { - // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call score() also. - this->scorer = newLucene(scorer); - collector->setScorer(this->scorer); - } - - bool PositiveScoresOnlyCollector::acceptsDocsOutOfOrder() - { - return collector->acceptsDocsOutOfOrder(); +namespace Lucene { + +PositiveScoresOnlyCollector::PositiveScoresOnlyCollector(const CollectorPtr& collector) { + this->collector = collector; +} + +PositiveScoresOnlyCollector::~PositiveScoresOnlyCollector() { +} + +void PositiveScoresOnlyCollector::collect(int32_t doc) { + if (scorer->score() > 0) { + collector->collect(doc); } } + +void PositiveScoresOnlyCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + collector->setNextReader(reader, docBase); +} + +void PositiveScoresOnlyCollector::setScorer(const ScorerPtr& scorer) { + // Set a ScoreCachingWrappingScorer in case the wrapped Collector will call score() also. + this->scorer = newLucene(scorer); + collector->setScorer(this->scorer); +} + +bool PositiveScoresOnlyCollector::acceptsDocsOutOfOrder() { + return collector->acceptsDocsOutOfOrder(); +} + +} diff --git a/src/core/search/PrefixFilter.cpp b/src/core/search/PrefixFilter.cpp index 17f41c97..72a31ae4 100644 --- a/src/core/search/PrefixFilter.cpp +++ b/src/core/search/PrefixFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,25 +9,22 @@ #include "PrefixQuery.h" #include "Term.h" -namespace Lucene -{ - PrefixFilter::PrefixFilter(TermPtr prefix) : MultiTermQueryWrapperFilter(newLucene(prefix)) - { - } - - PrefixFilter::~PrefixFilter() - { - } - - TermPtr PrefixFilter::getPrefix() - { - return boost::static_pointer_cast(query)->getPrefix(); - } - - String PrefixFilter::toString() - { - StringStream buffer; - buffer << L"PrefixFilter(" << getPrefix()->toString() << L")"; - return buffer.str(); - } +namespace Lucene { + +PrefixFilter::PrefixFilter(const TermPtr& prefix) : MultiTermQueryWrapperFilter(newLucene(prefix)) { +} + +PrefixFilter::~PrefixFilter() { +} + +TermPtr PrefixFilter::getPrefix() { + return boost::static_pointer_cast(query)->getPrefix(); +} + +String PrefixFilter::toString() { + StringStream buffer; + buffer << L"PrefixFilter(" << getPrefix()->toString() << L")"; + return buffer.str(); +} + } diff --git a/src/core/search/PrefixQuery.cpp b/src/core/search/PrefixQuery.cpp index 226a08f7..430b57e9 100644 --- a/src/core/search/PrefixQuery.cpp +++ b/src/core/search/PrefixQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,70 +10,68 @@ #include "Term.h" #include "MiscUtils.h" -namespace Lucene -{ - PrefixQuery::PrefixQuery(TermPtr prefix) - { - this->prefix = prefix; - } - - PrefixQuery::~PrefixQuery() - { - } - - TermPtr PrefixQuery::getPrefix() - { - return prefix; +namespace Lucene { + +PrefixQuery::PrefixQuery(const TermPtr& prefix) { + this->prefix = prefix; +} + +PrefixQuery::~PrefixQuery() { +} + +TermPtr PrefixQuery::getPrefix() { + return prefix; +} + +FilteredTermEnumPtr PrefixQuery::getEnum(const IndexReaderPtr& reader) { + return newLucene(reader, prefix); +} + +String PrefixQuery::toString(const String& field) { + StringStream buffer; + if (prefix->field() != field) { + buffer << prefix->field() << L":"; } - - FilteredTermEnumPtr PrefixQuery::getEnum(IndexReaderPtr reader) - { - return newLucene(reader, prefix); + buffer << prefix->text() << L"*" << boostString(); + return buffer.str(); +} + +LuceneObjectPtr PrefixQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(prefix)); + PrefixQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->prefix = prefix; + return cloneQuery; +} + +int32_t PrefixQuery::hashCode() { + int32_t prime = 31; + int32_t result = MultiTermQuery::hashCode(); + result = prime * result + (prefix ? prefix->hashCode() : 0); + return result; +} + +bool PrefixQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - String PrefixQuery::toString(const String& field) - { - StringStream buffer; - if (prefix->field() != field) - buffer << prefix->field() << L":"; - buffer << prefix->text() << L"*" << boostString(); - return buffer.str(); + if (!MultiTermQuery::equals(other)) { + return false; } - - LuceneObjectPtr PrefixQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(prefix)); - PrefixQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->prefix = prefix; - return cloneQuery; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - int32_t PrefixQuery::hashCode() - { - int32_t prime = 31; - int32_t result = MultiTermQuery::hashCode(); - result = prime * result + (prefix ? prefix->hashCode() : 0); - return result; + PrefixQueryPtr otherPrefixQuery(boost::dynamic_pointer_cast(other)); + if (!otherPrefixQuery) { + return false; } - - bool PrefixQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!MultiTermQuery::equals(other)) + if (!prefix) { + if (otherPrefixQuery->prefix) { return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - PrefixQueryPtr otherPrefixQuery(boost::dynamic_pointer_cast(other)); - if (!otherPrefixQuery) - return false; - if (!prefix) - { - if (otherPrefixQuery->prefix) - return false; } - else if (!prefix->equals(otherPrefixQuery->prefix)) - return false; - return true; + } else if (!prefix->equals(otherPrefixQuery->prefix)) { + return false; } + return true; +} + } diff --git a/src/core/search/PrefixTermEnum.cpp b/src/core/search/PrefixTermEnum.cpp index fbc0458b..fa599f3f 100644 --- a/src/core/search/PrefixTermEnum.cpp +++ b/src/core/search/PrefixTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,40 +10,36 @@ #include "IndexReader.h" #include "Term.h" -namespace Lucene -{ - PrefixTermEnum::PrefixTermEnum(IndexReaderPtr reader, TermPtr prefix) - { - this->_endEnum = false; - this->prefix = prefix; - - setEnum(reader->terms(newLucene(prefix->field(), prefix->text()))); - } - - PrefixTermEnum::~PrefixTermEnum() - { - } - - double PrefixTermEnum::difference() - { - return 1.0; - } - - bool PrefixTermEnum::endEnum() - { - return _endEnum; - } - - TermPtr PrefixTermEnum::getPrefixTerm() - { - return prefix; - } - - bool PrefixTermEnum::termCompare(TermPtr term) - { - if (term->field() == prefix->field() && boost::starts_with(term->text(), prefix->text())) - return true; - _endEnum = true; - return false; +namespace Lucene { + +PrefixTermEnum::PrefixTermEnum(const IndexReaderPtr& reader, const TermPtr& prefix) { + this->_endEnum = false; + this->prefix = prefix; + + setEnum(reader->terms(newLucene(prefix->field(), prefix->text()))); +} + +PrefixTermEnum::~PrefixTermEnum() { +} + +double PrefixTermEnum::difference() { + return 1.0; +} + +bool PrefixTermEnum::endEnum() { + return _endEnum; +} + +TermPtr PrefixTermEnum::getPrefixTerm() { + return prefix; +} + +bool PrefixTermEnum::termCompare(const TermPtr& term) { + if (term->field() == prefix->field() && boost::starts_with(term->text(), prefix->text())) { + return true; } + _endEnum = true; + return false; +} + } diff --git a/src/core/search/Query.cpp b/src/core/search/Query.cpp index 33aac962..e9d27145 100644 --- a/src/core/search/Query.cpp +++ b/src/core/search/Query.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,159 +11,151 @@ #include "Similarity.h" #include "MiscUtils.h" -namespace Lucene -{ - Query::Query() - { - boost = 1.0; - } - - Query::~Query() - { - } - - void Query::setBoost(double boost) - { - this->boost = boost; - } - - double Query::getBoost() - { - return boost; - } - - String Query::toString(const String& field) - { - return L""; // override - } - - String Query::toString() - { - return toString(L""); - } - - WeightPtr Query::createWeight(SearcherPtr searcher) - { - boost::throw_exception(UnsupportedOperationException()); - return WeightPtr(); - } - - WeightPtr Query::weight(SearcherPtr searcher) - { - QueryPtr query(searcher->rewrite(shared_from_this())); - WeightPtr weight(query->createWeight(searcher)); - double sum = weight->sumOfSquaredWeights(); - double norm = getSimilarity(searcher)->queryNorm(sum); - if (MiscUtils::isInfinite(norm) || MiscUtils::isNaN(norm)) - norm = 1.0; - weight->normalize(norm); - return weight; - } - - QueryPtr Query::rewrite(IndexReaderPtr reader) - { - return shared_from_this(); - } - - QueryPtr Query::combine(Collection queries) - { - SetQuery uniques(SetQuery::newInstance()); - for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) - { - Collection clauses; - BooleanQueryPtr bq(boost::dynamic_pointer_cast(*query)); - // check if we can split the query into clauses - bool splittable = bq; - if (splittable) - { - splittable = bq->isCoordDisabled(); - clauses = bq->getClauses(); - for (Collection::iterator clause = clauses.begin(); splittable && clause != clauses.end(); ++clause) - splittable = ((*clause)->getOccur() == BooleanClause::SHOULD); +namespace Lucene { + +Query::Query() { + boost = 1.0; +} + +Query::~Query() { +} + +void Query::setBoost(double boost) { + this->boost = boost; +} + +double Query::getBoost() { + return boost; +} + +String Query::toString(const String& field) { + return L""; // override +} + +String Query::toString() { + return toString(L""); +} + +WeightPtr Query::createWeight(const SearcherPtr& searcher) { + boost::throw_exception(UnsupportedOperationException()); + return WeightPtr(); +} + +WeightPtr Query::weight(const SearcherPtr& searcher) { + QueryPtr query(searcher->rewrite(shared_from_this())); + WeightPtr weight(query->createWeight(searcher)); + double sum = weight->sumOfSquaredWeights(); + double norm = getSimilarity(searcher)->queryNorm(sum); + if (MiscUtils::isInfinite(norm) || MiscUtils::isNaN(norm)) { + norm = 1.0; + } + weight->normalize(norm); + return weight; +} + +QueryPtr Query::rewrite(const IndexReaderPtr& reader) { + return shared_from_this(); +} + +QueryPtr Query::combine(Collection queries) { + SetQuery uniques(SetQuery::newInstance()); + for (Collection::iterator query = queries.begin(); query != queries.end(); ++query) { + Collection clauses; + BooleanQueryPtr bq(boost::dynamic_pointer_cast(*query)); + // check if we can split the query into clauses + bool splittable = bq.get() != NULL; + if (splittable) { + splittable = bq->isCoordDisabled(); + clauses = bq->getClauses(); + for (Collection::iterator clause = clauses.begin(); splittable && clause != clauses.end(); ++clause) { + splittable = ((*clause)->getOccur() == BooleanClause::SHOULD); } - if (splittable) - { - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - uniques.add((*clause)->getQuery()); + } + if (splittable) { + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + uniques.add((*clause)->getQuery()); } - else - uniques.add(*query); + } else { + uniques.add(*query); } - // optimization: if we have just one query, just return it - if (uniques.size() == 1) - return *uniques.begin(); - BooleanQueryPtr result(newLucene(true)); - for (SetQuery::iterator query = uniques.begin(); query != uniques.end(); ++query) - result->add(*query, BooleanClause::SHOULD); - return result; } - - void Query::extractTerms(SetTerm terms) - { - // needs to be implemented by query subclasses - boost::throw_exception(UnsupportedOperationException()); + // optimization: if we have just one query, just return it + if (uniques.size() == 1) { + return *uniques.begin(); + } + BooleanQueryPtr result(newLucene(true)); + for (SetQuery::iterator query = uniques.begin(); query != uniques.end(); ++query) { + result->add(*query, BooleanClause::SHOULD); } - - QueryPtr Query::mergeBooleanQueries(Collection queries) - { - SetBooleanClause allClauses(SetBooleanClause::newInstance()); - for (Collection::iterator booleanQuery = queries.begin(); booleanQuery != queries.end(); ++booleanQuery) - { - for (Collection::iterator clause = (*booleanQuery)->begin(); clause != (*booleanQuery)->end(); ++clause) - allClauses.add(*clause); + return result; +} + +void Query::extractTerms(SetTerm terms) { + // needs to be implemented by query subclasses + boost::throw_exception(UnsupportedOperationException()); +} + +QueryPtr Query::mergeBooleanQueries(Collection queries) { + SetBooleanClause allClauses(SetBooleanClause::newInstance()); + for (Collection::iterator booleanQuery = queries.begin(); booleanQuery != queries.end(); ++booleanQuery) { + for (Collection::iterator clause = (*booleanQuery)->begin(); clause != (*booleanQuery)->end(); ++clause) { + allClauses.add(*clause); } - - bool coordDisabled = queries.empty() ? false : queries[0]->isCoordDisabled(); - BooleanQueryPtr result(newLucene(coordDisabled)); - for (SetBooleanClause::iterator clause2 = allClauses.begin(); clause2 != allClauses.end(); ++clause2) - result->add(*clause2); - return result; } - - SimilarityPtr Query::getSimilarity(SearcherPtr searcher) - { - return searcher->getSimilarity(); + + bool coordDisabled = queries.empty() ? false : queries[0]->isCoordDisabled(); + BooleanQueryPtr result(newLucene(coordDisabled)); + for (SetBooleanClause::iterator clause2 = allClauses.begin(); clause2 != allClauses.end(); ++clause2) { + result->add(*clause2); } - - LuceneObjectPtr Query::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); - QueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->boost = boost; - return cloneQuery; + return result; +} + +SimilarityPtr Query::getSimilarity(const SearcherPtr& searcher) { + return searcher->getSimilarity(); +} + +LuceneObjectPtr Query::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = LuceneObject::clone(other ? other : newLucene()); + QueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->boost = boost; + return cloneQuery; +} + +int32_t Query::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + MiscUtils::doubleToIntBits(boost); + return result; +} + +bool Query::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t Query::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + MiscUtils::doubleToIntBits(boost); - return result; + if (!other) { + return false; } - - bool Query::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - QueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - return (boost == otherQuery->boost); + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - String Query::boostString() - { - double boost = getBoost(); - if (boost == 1.0) - return L""; - StringStream boostString; - boostString.precision(1); - boostString.setf(std::ios::fixed); - boostString << L"^" << boost; - return boostString.str(); + QueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } + return (boost == otherQuery->boost); +} + +String Query::boostString() { + double boost = getBoost(); + if (boost == 1.0) { + return L""; + } + StringStream boostString; + boostString.precision(1); + boostString.setf(std::ios::fixed); + boostString << L"^" << boost; + return boostString.str(); +} + } diff --git a/src/core/search/QueryTermVector.cpp b/src/core/search/QueryTermVector.cpp index afa7d3ec..763d6a1c 100644 --- a/src/core/search/QueryTermVector.cpp +++ b/src/core/search/QueryTermVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,126 +11,108 @@ #include "StringReader.h" #include "TermAttribute.h" -namespace Lucene -{ - QueryTermVector::QueryTermVector(Collection queryTerms) - { - terms = Collection::newInstance(); - termFreqs = Collection::newInstance(); - processTerms(queryTerms); - } - - QueryTermVector::QueryTermVector(const String& queryString, AnalyzerPtr analyzer) - { - terms = Collection::newInstance(); - termFreqs = Collection::newInstance(); - if (analyzer) - { - TokenStreamPtr stream(analyzer->tokenStream(L"", newLucene(queryString))); - if (stream) - { - Collection terms = Collection::newInstance(); - try - { - bool hasMoreTokens = false; - - stream->reset(); - TermAttributePtr termAtt(stream->addAttribute()); - +namespace Lucene { + +QueryTermVector::QueryTermVector(Collection queryTerms) { + terms = Collection::newInstance(); + termFreqs = Collection::newInstance(); + processTerms(queryTerms); +} + +QueryTermVector::QueryTermVector(const String& queryString, const AnalyzerPtr& analyzer) { + terms = Collection::newInstance(); + termFreqs = Collection::newInstance(); + if (analyzer) { + TokenStreamPtr stream(analyzer->tokenStream(L"", newLucene(queryString))); + if (stream) { + Collection terms = Collection::newInstance(); + try { + bool hasMoreTokens = false; + + stream->reset(); + TermAttributePtr termAtt(stream->addAttribute()); + + hasMoreTokens = stream->incrementToken(); + while (hasMoreTokens) { + terms.add(termAtt->term()); hasMoreTokens = stream->incrementToken(); - while (hasMoreTokens) - { - terms.add(termAtt->term()); - hasMoreTokens = stream->incrementToken(); - } - processTerms(terms); - } - catch (IOException&) - { } + processTerms(terms); + } catch (IOException&) { } } } - - QueryTermVector::~QueryTermVector() - { - } - - void QueryTermVector::processTerms(Collection queryTerms) - { - if (queryTerms) - { - std::sort(queryTerms.begin(), queryTerms.end()); - MapStringInt tmpSet(MapStringInt::newInstance()); - - // filter out duplicates - Collection tmpList(Collection::newInstance()); - Collection tmpFreqs(Collection::newInstance()); - int32_t j = 0; - for (int32_t i = 0; i < queryTerms.size(); ++i) - { - String term(queryTerms[i]); - MapStringInt::iterator position = tmpSet.find(term); - if (position == tmpSet.end()) - { - tmpSet.put(term, j++); - tmpList.add(term); - tmpFreqs.add(1); - } - else - { - int32_t freq = tmpFreqs[position->second]; - tmpFreqs[position->second] = freq + 1; - } +} + +QueryTermVector::~QueryTermVector() { +} + +void QueryTermVector::processTerms(Collection queryTerms) { + if (queryTerms) { + std::sort(queryTerms.begin(), queryTerms.end()); + MapStringInt tmpSet(MapStringInt::newInstance()); + + // filter out duplicates + Collection tmpList(Collection::newInstance()); + Collection tmpFreqs(Collection::newInstance()); + int32_t j = 0; + for (int32_t i = 0; i < queryTerms.size(); ++i) { + String term(queryTerms[i]); + MapStringInt::iterator position = tmpSet.find(term); + if (position == tmpSet.end()) { + tmpSet.put(term, j++); + tmpList.add(term); + tmpFreqs.add(1); + } else { + int32_t freq = tmpFreqs[position->second]; + tmpFreqs[position->second] = freq + 1; } - terms = tmpList; - termFreqs = Collection::newInstance(tmpFreqs.size()); - int32_t i = 0; - for (Collection::iterator freq = tmpFreqs.begin(); freq != tmpFreqs.end(); ++freq) - termFreqs[i++] = *freq; } - } - - String QueryTermVector::toString() - { - StringStream buffer; - buffer << L"{"; - for (int32_t i = 0; i < terms.size(); ++i) - { - if (i > 0) - buffer << L", "; - buffer << terms[i] << L'/' << termFreqs[i]; + terms = tmpList; + termFreqs = Collection::newInstance(tmpFreqs.size()); + int32_t i = 0; + for (Collection::iterator freq = tmpFreqs.begin(); freq != tmpFreqs.end(); ++freq) { + termFreqs[i++] = *freq; } - buffer << L"}"; - return buffer.str(); - } - - int32_t QueryTermVector::size() - { - return terms.size(); } - - Collection QueryTermVector::getTerms() - { - return terms; - } - - Collection QueryTermVector::getTermFrequencies() - { - return termFreqs; - } - - int32_t QueryTermVector::indexOf(const String& term) - { - Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); - return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); +} + +String QueryTermVector::toString() { + StringStream buffer; + buffer << L"{"; + for (int32_t i = 0; i < terms.size(); ++i) { + if (i > 0) { + buffer << L", "; + } + buffer << terms[i] << L'/' << termFreqs[i]; } - - Collection QueryTermVector::indexesOf(Collection terms, int32_t start, int32_t length) - { - Collection res(Collection::newInstance(length)); - for (int32_t i = 0; i < length; ++i) - res[i] = indexOf(terms[i]); - return res; + buffer << L"}"; + return buffer.str(); +} + +int32_t QueryTermVector::size() { + return terms.size(); +} + +Collection QueryTermVector::getTerms() { + return terms; +} + +Collection QueryTermVector::getTermFrequencies() { + return termFreqs; +} + +int32_t QueryTermVector::indexOf(const String& term) { + Collection::iterator search = std::lower_bound(terms.begin(), terms.end(), term); + return (search == terms.end() || term < *search) ? -1 : std::distance(terms.begin(), search); +} + +Collection QueryTermVector::indexesOf(Collection terms, int32_t start, int32_t length) { + Collection res(Collection::newInstance(length)); + for (int32_t i = 0; i < length; ++i) { + res[i] = indexOf(terms[i]); } + return res; +} + } diff --git a/src/core/search/QueryWrapperFilter.cpp b/src/core/search/QueryWrapperFilter.cpp index fb5b7c08..ef717a5b 100644 --- a/src/core/search/QueryWrapperFilter.cpp +++ b/src/core/search/QueryWrapperFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,58 +12,50 @@ #include "Scorer.h" #include "IndexSearcher.h" -namespace Lucene -{ - QueryWrapperFilter::QueryWrapperFilter(QueryPtr query) - { - this->query = query; - } - - QueryWrapperFilter::~QueryWrapperFilter() - { - } - - DocIdSetPtr QueryWrapperFilter::getDocIdSet(IndexReaderPtr reader) - { - WeightPtr weight(query->weight(newLucene(reader))); - return newLucene(reader, weight); - } - - String QueryWrapperFilter::toString() - { - return L"QueryWrapperFilter(" + query->toString() + L")"; - } - - bool QueryWrapperFilter::equals(LuceneObjectPtr other) - { - QueryWrapperFilterPtr otherQueryWrapperFilter(boost::dynamic_pointer_cast(other)); - if (!otherQueryWrapperFilter) - return false; - return this->query->equals(otherQueryWrapperFilter->query); - } - - int32_t QueryWrapperFilter::hashCode() - { - return query->hashCode() ^ 0x923F64B9; - } - - QueryWrapperFilterDocIdSet::QueryWrapperFilterDocIdSet(IndexReaderPtr reader, WeightPtr weight) - { - this->reader = reader; - this->weight = weight; - } - - QueryWrapperFilterDocIdSet::~QueryWrapperFilterDocIdSet() - { - } - - DocIdSetIteratorPtr QueryWrapperFilterDocIdSet::iterator() - { - return weight->scorer(reader, true, false); - } - - bool QueryWrapperFilterDocIdSet::isCacheable() - { +namespace Lucene { + +QueryWrapperFilter::QueryWrapperFilter(const QueryPtr& query) { + this->query = query; +} + +QueryWrapperFilter::~QueryWrapperFilter() { +} + +DocIdSetPtr QueryWrapperFilter::getDocIdSet(const IndexReaderPtr& reader) { + WeightPtr weight(query->weight(newLucene(reader))); + return newLucene(reader, weight); +} + +String QueryWrapperFilter::toString() { + return L"QueryWrapperFilter(" + query->toString() + L")"; +} + +bool QueryWrapperFilter::equals(const LuceneObjectPtr& other) { + QueryWrapperFilterPtr otherQueryWrapperFilter(boost::dynamic_pointer_cast(other)); + if (!otherQueryWrapperFilter) { return false; } + return this->query->equals(otherQueryWrapperFilter->query); +} + +int32_t QueryWrapperFilter::hashCode() { + return query->hashCode() ^ 0x923F64B9; +} + +QueryWrapperFilterDocIdSet::QueryWrapperFilterDocIdSet(const IndexReaderPtr& reader, const WeightPtr& weight) { + this->reader = reader; + this->weight = weight; +} + +QueryWrapperFilterDocIdSet::~QueryWrapperFilterDocIdSet() { +} + +DocIdSetIteratorPtr QueryWrapperFilterDocIdSet::iterator() { + return weight->scorer(reader, true, false); +} + +bool QueryWrapperFilterDocIdSet::isCacheable() { + return false; +} + } diff --git a/src/core/search/ReqExclScorer.cpp b/src/core/search/ReqExclScorer.cpp index ee257591..04a2034e 100644 --- a/src/core/search/ReqExclScorer.cpp +++ b/src/core/search/ReqExclScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,89 +7,78 @@ #include "LuceneInc.h" #include "ReqExclScorer.h" -namespace Lucene -{ - ReqExclScorer::ReqExclScorer(ScorerPtr reqScorer, DocIdSetIteratorPtr exclDisi) : Scorer(SimilarityPtr()) // No similarity used. - { - this->reqScorer = reqScorer; - this->exclDisi = exclDisi; - this->doc = -1; +namespace Lucene { + +ReqExclScorer::ReqExclScorer(const ScorerPtr& reqScorer, const DocIdSetIteratorPtr& exclDisi) : Scorer(SimilarityPtr()) { // No similarity used. + this->reqScorer = reqScorer; + this->exclDisi = exclDisi; + this->doc = -1; +} + +ReqExclScorer::~ReqExclScorer() { +} + +int32_t ReqExclScorer::nextDoc() { + if (!reqScorer) { + return doc; } - - ReqExclScorer::~ReqExclScorer() - { + doc = reqScorer->nextDoc(); + if (doc == NO_MORE_DOCS) { + reqScorer.reset(); // exhausted, nothing left + return doc; } - - int32_t ReqExclScorer::nextDoc() - { - if (!reqScorer) - return doc; - doc = reqScorer->nextDoc(); - if (doc == NO_MORE_DOCS) - { - reqScorer.reset(); // exhausted, nothing left - return doc; - } - if (!exclDisi) - return doc; - doc = toNonExcluded(); + if (!exclDisi) { return doc; } - - int32_t ReqExclScorer::toNonExcluded() - { - int32_t exclDoc = exclDisi->docID(); - int32_t reqDoc = reqScorer->docID(); // may be excluded - do - { - if (reqDoc < exclDoc) - return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded - else if (reqDoc > exclDoc) - { - exclDoc = exclDisi->advance(reqDoc); - if (exclDoc == NO_MORE_DOCS) - { - exclDisi.reset(); // exhausted, no more exclusions - return reqDoc; - } - if (exclDoc > reqDoc) - return reqDoc; // not excluded + doc = toNonExcluded(); + return doc; +} + +int32_t ReqExclScorer::toNonExcluded() { + int32_t exclDoc = exclDisi->docID(); + int32_t reqDoc = reqScorer->docID(); // may be excluded + do { + if (reqDoc < exclDoc) { + return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded + } else if (reqDoc > exclDoc) { + exclDoc = exclDisi->advance(reqDoc); + if (exclDoc == NO_MORE_DOCS) { + exclDisi.reset(); // exhausted, no more exclusions + return reqDoc; + } + if (exclDoc > reqDoc) { + return reqDoc; // not excluded } } - while ((reqDoc = reqScorer->nextDoc()) != NO_MORE_DOCS); - reqScorer.reset(); // exhausted, nothing left - return NO_MORE_DOCS; - } - - int32_t ReqExclScorer::docID() - { + } while ((reqDoc = reqScorer->nextDoc()) != NO_MORE_DOCS); + reqScorer.reset(); // exhausted, nothing left + return NO_MORE_DOCS; +} + +int32_t ReqExclScorer::docID() { + return doc; +} + +double ReqExclScorer::score() { + return reqScorer->score(); // reqScorer may be null when next() or skipTo() already return false +} + +int32_t ReqExclScorer::advance(int32_t target) { + if (!reqScorer) { + doc = NO_MORE_DOCS; return doc; } - - double ReqExclScorer::score() - { - return reqScorer->score(); // reqScorer may be null when next() or skipTo() already return false + if (!exclDisi) { + doc = reqScorer->advance(target); + return doc; } - - int32_t ReqExclScorer::advance(int32_t target) - { - if (!reqScorer) - { - doc = NO_MORE_DOCS; - return doc; - } - if (!exclDisi) - { - doc = reqScorer->advance(target); - return doc; - } - if (reqScorer->advance(target) == NO_MORE_DOCS) - { - reqScorer.reset(); - doc = NO_MORE_DOCS; - return doc; - } - doc = toNonExcluded(); + if (reqScorer->advance(target) == NO_MORE_DOCS) { + reqScorer.reset(); + doc = NO_MORE_DOCS; return doc; } + doc = toNonExcluded(); + return doc; +} + } diff --git a/src/core/search/ReqOptSumScorer.cpp b/src/core/search/ReqOptSumScorer.cpp index af235b53..fb5c0a01 100644 --- a/src/core/search/ReqOptSumScorer.cpp +++ b/src/core/search/ReqOptSumScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,47 +7,42 @@ #include "LuceneInc.h" #include "ReqOptSumScorer.h" -namespace Lucene -{ - ReqOptSumScorer::ReqOptSumScorer(ScorerPtr reqScorer, ScorerPtr optScorer) : Scorer(SimilarityPtr()) // No similarity used. - { - this->reqScorer = reqScorer; - this->optScorer = optScorer; - } - - ReqOptSumScorer::~ReqOptSumScorer() - { - } - - int32_t ReqOptSumScorer::nextDoc() - { - return reqScorer->nextDoc(); - } - - int32_t ReqOptSumScorer::advance(int32_t target) - { - return reqScorer->advance(target); - } - - int32_t ReqOptSumScorer::docID() - { - return reqScorer->docID(); +namespace Lucene { + +ReqOptSumScorer::ReqOptSumScorer(const ScorerPtr& reqScorer, const ScorerPtr& optScorer) : Scorer(SimilarityPtr()) { // No similarity used. + this->reqScorer = reqScorer; + this->optScorer = optScorer; +} + +ReqOptSumScorer::~ReqOptSumScorer() { +} + +int32_t ReqOptSumScorer::nextDoc() { + return reqScorer->nextDoc(); +} + +int32_t ReqOptSumScorer::advance(int32_t target) { + return reqScorer->advance(target); +} + +int32_t ReqOptSumScorer::docID() { + return reqScorer->docID(); +} + +double ReqOptSumScorer::score() { + int32_t curDoc = reqScorer->docID(); + double reqScore = reqScorer->score(); + if (!optScorer) { + return reqScore; } - - double ReqOptSumScorer::score() - { - int32_t curDoc = reqScorer->docID(); - double reqScore = reqScorer->score(); - if (!optScorer) - return reqScore; - - int32_t optScorerDoc = optScorer->docID(); - if (optScorerDoc < curDoc && (optScorerDoc = optScorer->advance(curDoc)) == NO_MORE_DOCS) - { - optScorer.reset(); - return reqScore; - } - - return optScorerDoc == curDoc ? reqScore + optScorer->score() : reqScore; + + int32_t optScorerDoc = optScorer->docID(); + if (optScorerDoc < curDoc && (optScorerDoc = optScorer->advance(curDoc)) == NO_MORE_DOCS) { + optScorer.reset(); + return reqScore; } + + return optScorerDoc == curDoc ? reqScore + optScorer->score() : reqScore; +} + } diff --git a/src/core/search/ScoreCachingWrappingScorer.cpp b/src/core/search/ScoreCachingWrappingScorer.cpp index 30fa519c..a0d4dc2e 100644 --- a/src/core/search/ScoreCachingWrappingScorer.cpp +++ b/src/core/search/ScoreCachingWrappingScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,58 +7,49 @@ #include "LuceneInc.h" #include "ScoreCachingWrappingScorer.h" -namespace Lucene -{ - ScoreCachingWrappingScorer::ScoreCachingWrappingScorer(ScorerPtr scorer) : Scorer(scorer->getSimilarity()) - { - this->curDoc = -1; - this->curScore = 0.0; - this->_scorer = scorer; - } - - ScoreCachingWrappingScorer::~ScoreCachingWrappingScorer() - { - } - - bool ScoreCachingWrappingScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { - return ScorerPtr(_scorer)->score(collector, max, firstDocID); - } - - SimilarityPtr ScoreCachingWrappingScorer::getSimilarity() - { - return ScorerPtr(_scorer)->getSimilarity(); - } - - double ScoreCachingWrappingScorer::score() - { - ScorerPtr scorer(_scorer); - int32_t doc = scorer->docID(); - if (doc != curDoc) - { - curScore = scorer->score(); - curDoc = doc; - } - return curScore; - } - - int32_t ScoreCachingWrappingScorer::docID() - { - return ScorerPtr(_scorer)->docID(); - } - - int32_t ScoreCachingWrappingScorer::nextDoc() - { - return ScorerPtr(_scorer)->nextDoc(); - } - - void ScoreCachingWrappingScorer::score(CollectorPtr collector) - { - ScorerPtr(_scorer)->score(collector); - } - - int32_t ScoreCachingWrappingScorer::advance(int32_t target) - { - return ScorerPtr(_scorer)->advance(target); - } +namespace Lucene { + +ScoreCachingWrappingScorer::ScoreCachingWrappingScorer(const ScorerPtr& scorer) : Scorer(scorer->getSimilarity()) { + this->curDoc = -1; + this->curScore = 0.0; + this->_scorer = scorer; +} + +ScoreCachingWrappingScorer::~ScoreCachingWrappingScorer() { +} + +bool ScoreCachingWrappingScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { + return ScorerPtr(_scorer)->score(collector, max, firstDocID); +} + +SimilarityPtr ScoreCachingWrappingScorer::getSimilarity() { + return ScorerPtr(_scorer)->getSimilarity(); +} + +double ScoreCachingWrappingScorer::score() { + ScorerPtr scorer(_scorer); + int32_t doc = scorer->docID(); + if (doc != curDoc) { + curScore = scorer->score(); + curDoc = doc; + } + return curScore; +} + +int32_t ScoreCachingWrappingScorer::docID() { + return ScorerPtr(_scorer)->docID(); +} + +int32_t ScoreCachingWrappingScorer::nextDoc() { + return ScorerPtr(_scorer)->nextDoc(); +} + +void ScoreCachingWrappingScorer::score(const CollectorPtr& collector) { + ScorerPtr(_scorer)->score(collector); +} + +int32_t ScoreCachingWrappingScorer::advance(int32_t target) { + return ScorerPtr(_scorer)->advance(target); +} + } diff --git a/src/core/search/ScoreDoc.cpp b/src/core/search/ScoreDoc.cpp index 2d1a7d69..ba19a7cd 100644 --- a/src/core/search/ScoreDoc.cpp +++ b/src/core/search/ScoreDoc.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,22 +7,20 @@ #include "LuceneInc.h" #include "ScoreDoc.h" -namespace Lucene -{ - ScoreDoc::ScoreDoc(int32_t doc, double score) - { - this->doc = doc; - this->score = score; - } - - ScoreDoc::~ScoreDoc() - { - } - - String ScoreDoc::toString() - { - StringStream buffer; - buffer << L"doc=" << doc << L" score=" << score; - return buffer.str(); - } +namespace Lucene { + +ScoreDoc::ScoreDoc(int32_t doc, double score) { + this->doc = doc; + this->score = score; +} + +ScoreDoc::~ScoreDoc() { +} + +String ScoreDoc::toString() { + StringStream buffer; + buffer << L"doc=" << doc << L" score=" << score; + return buffer.str(); +} + } diff --git a/src/core/search/Scorer.cpp b/src/core/search/Scorer.cpp index 15f48fd0..36a0517a 100644 --- a/src/core/search/Scorer.cpp +++ b/src/core/search/Scorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,39 +8,63 @@ #include "Scorer.h" #include "Collector.h" -namespace Lucene -{ - Scorer::Scorer(SimilarityPtr similarity) - { +namespace Lucene { + + Scorer::Scorer(const SimilarityPtr& similarity) { this->similarity = similarity; } - Scorer::~Scorer() - { + Scorer::Scorer(const WeightPtr& weight) { + this->weight = weight; } - SimilarityPtr Scorer::getSimilarity() - { - return similarity; + Scorer::~Scorer() { } - void Scorer::score(CollectorPtr collector) - { + SimilarityPtr Scorer::getSimilarity() { + return similarity; + } + + void Scorer::score(const CollectorPtr& collector) { collector->setScorer(shared_from_this()); int32_t doc; - while ((doc = nextDoc()) != NO_MORE_DOCS) + while ((doc = nextDoc()) != NO_MORE_DOCS) { collector->collect(doc); + } } - bool Scorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { + bool Scorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { collector->setScorer(shared_from_this()); int32_t doc = firstDocID; - while (doc < max) - { + while (doc < max) { collector->collect(doc); doc = nextDoc(); } return (doc != NO_MORE_DOCS); } + + void Scorer::visitSubScorers(QueryPtr parent, BooleanClause::Occur relationship, + ScorerVisitor *visitor){ + QueryPtr q = weight->getQuery(); + switch (relationship) { + case BooleanClause::MUST: + visitor->visitRequired(parent, q, shared_from_this()); + break; + case BooleanClause::MUST_NOT: + visitor->visitProhibited(parent, q, shared_from_this()); + break; + case BooleanClause::SHOULD: + visitor->visitOptional(parent, q, shared_from_this()); + break; + } + } + + void Scorer::visitScorers(ScorerVisitor *visitor) { + boost::shared_ptr s_obj; + + visitSubScorers(s_obj, BooleanClause::MUST/*must id default*/, visitor); + } + + + } diff --git a/src/core/search/Searchable.cpp b/src/core/search/Searchable.cpp index 7b636277..a62a45d6 100644 --- a/src/core/search/Searchable.cpp +++ b/src/core/search/Searchable.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,71 +7,61 @@ #include "LuceneInc.h" #include "Searchable.h" -namespace Lucene -{ - void Searchable::search(WeightPtr weight, FilterPtr filter, CollectorPtr collector) - { - BOOST_ASSERT(false); - // override - } - - void Searchable::close() - { - BOOST_ASSERT(false); - // override - } - - int32_t Searchable::docFreq(TermPtr term) - { - BOOST_ASSERT(false); - return 0; // override - } - - Collection Searchable::docFreqs(Collection terms) - { - BOOST_ASSERT(false); - return Collection(); // override - } - - int32_t Searchable::maxDoc() - { - BOOST_ASSERT(false); - return 0; // override - } - - TopDocsPtr Searchable::search(WeightPtr weight, FilterPtr filter, int32_t n) - { - BOOST_ASSERT(false); - return TopDocsPtr(); // override - } - - DocumentPtr Searchable::doc(int32_t n) - { - BOOST_ASSERT(false); - return DocumentPtr(); // override - } - - DocumentPtr Searchable::doc(int32_t n, FieldSelectorPtr fieldSelector) - { - BOOST_ASSERT(false); - return DocumentPtr(); // override - } - - QueryPtr Searchable::rewrite(QueryPtr query) - { - BOOST_ASSERT(false); - return QueryPtr(); // override - } - - ExplanationPtr Searchable::explain(WeightPtr weight, int32_t doc) - { - BOOST_ASSERT(false); - return ExplanationPtr(); // override - } - - TopFieldDocsPtr Searchable::search(WeightPtr weight, FilterPtr filter, int32_t n, SortPtr sort) - { - BOOST_ASSERT(false); - return TopFieldDocsPtr(); // override - } +namespace Lucene { + +void Searchable::search(const WeightPtr& weight, const FilterPtr& filter, const CollectorPtr& collector) { + BOOST_ASSERT(false); + // override +} + +void Searchable::close() { + BOOST_ASSERT(false); + // override +} + +int32_t Searchable::docFreq(const TermPtr& term) { + BOOST_ASSERT(false); + return 0; // override +} + +Collection Searchable::docFreqs(Collection terms) { + BOOST_ASSERT(false); + return Collection(); // override +} + +int32_t Searchable::maxDoc() { + BOOST_ASSERT(false); + return 0; // override +} + +TopDocsPtr Searchable::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n) { + BOOST_ASSERT(false); + return TopDocsPtr(); // override +} + +DocumentPtr Searchable::doc(int32_t n) { + BOOST_ASSERT(false); + return DocumentPtr(); // override +} + +DocumentPtr Searchable::doc(int32_t n, const FieldSelectorPtr& fieldSelector) { + BOOST_ASSERT(false); + return DocumentPtr(); // override +} + +QueryPtr Searchable::rewrite(const QueryPtr& query) { + BOOST_ASSERT(false); + return QueryPtr(); // override +} + +ExplanationPtr Searchable::explain(const WeightPtr& weight, int32_t doc) { + BOOST_ASSERT(false); + return ExplanationPtr(); // override +} + +TopFieldDocsPtr Searchable::search(const WeightPtr& weight, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + BOOST_ASSERT(false); + return TopFieldDocsPtr(); // override +} + } diff --git a/src/core/search/Searcher.cpp b/src/core/search/Searcher.cpp index 684233dd..71c89050 100644 --- a/src/core/search/Searcher.cpp +++ b/src/core/search/Searcher.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,67 +10,57 @@ #include "Query.h" #include "Collector.h" -namespace Lucene -{ - Searcher::Searcher() - { - similarity = Similarity::getDefault(); - } - - Searcher::~Searcher() - { - } - - TopFieldDocsPtr Searcher::search(QueryPtr query, FilterPtr filter, int32_t n, SortPtr sort) - { - return search(createWeight(query), filter, n, sort); - } - - void Searcher::search(QueryPtr query, CollectorPtr results) - { - search(createWeight(query), FilterPtr(), results); - } - - void Searcher::search(QueryPtr query, FilterPtr filter, CollectorPtr results) - { - search(createWeight(query), filter, results); - } - - TopDocsPtr Searcher::search(QueryPtr query, FilterPtr filter, int32_t n) - { - return search(createWeight(query), filter, n); - } - - TopDocsPtr Searcher::search(QueryPtr query, int32_t n) - { - return search(query, FilterPtr(), n); - } - - ExplanationPtr Searcher::explain(QueryPtr query, int32_t doc) - { - return explain(createWeight(query), doc); - } - - void Searcher::setSimilarity(SimilarityPtr similarity) - { - this->similarity = similarity; - } - - SimilarityPtr Searcher::getSimilarity() - { - return this->similarity; - } - - WeightPtr Searcher::createWeight(QueryPtr query) - { - return query->weight(shared_from_this()); - } - - Collection Searcher::docFreqs(Collection terms) - { - Collection result(Collection::newInstance(terms.size())); - for (int32_t i = 0; i < terms.size(); ++i) - result[i] = docFreq(terms[i]); - return result; +namespace Lucene { + +Searcher::Searcher() { + similarity = Similarity::getDefault(); +} + +Searcher::~Searcher() { +} + +TopFieldDocsPtr Searcher::search(const QueryPtr& query, const FilterPtr& filter, int32_t n, const SortPtr& sort) { + return search(createWeight(query), filter, n, sort); +} + +void Searcher::search(const QueryPtr& query, const CollectorPtr& results) { + search(createWeight(query), FilterPtr(), results); +} + +void Searcher::search(const QueryPtr& query, const FilterPtr& filter, const CollectorPtr& results) { + search(createWeight(query), filter, results); +} + +TopDocsPtr Searcher::search(const QueryPtr& query, const FilterPtr& filter, int32_t n) { + return search(createWeight(query), filter, n); +} + +TopDocsPtr Searcher::search(const QueryPtr& query, int32_t n) { + return search(query, FilterPtr(), n); +} + +ExplanationPtr Searcher::explain(const QueryPtr& query, int32_t doc) { + return explain(createWeight(query), doc); +} + +void Searcher::setSimilarity(const SimilarityPtr& similarity) { + this->similarity = similarity; +} + +SimilarityPtr Searcher::getSimilarity() { + return this->similarity; +} + +WeightPtr Searcher::createWeight(const QueryPtr& query) { + return query->weight(shared_from_this()); +} + +Collection Searcher::docFreqs(Collection terms) { + Collection result(Collection::newInstance(terms.size())); + for (int32_t i = 0; i < terms.size(); ++i) { + result[i] = docFreq(terms[i]); } + return result; +} + } diff --git a/src/core/search/Similarity.cpp b/src/core/search/Similarity.cpp index b53a0a25..3df64e52 100644 --- a/src/core/search/Similarity.cpp +++ b/src/core/search/Similarity.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,117 +14,102 @@ #include "SmallDouble.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t Similarity::NO_DOC_ID_PROVIDED = -1; - - Similarity::Similarity() - { - } - - Similarity::~Similarity() - { - } - - SimilarityPtr Similarity::getDefault() - { - static SimilarityPtr defaultImpl; - if (!defaultImpl) - { - defaultImpl = newLucene(); - CycleCheck::addStatic(defaultImpl); - } - return defaultImpl; - } - - const Collection Similarity::NORM_TABLE() - { - static Collection _NORM_TABLE; - if (!_NORM_TABLE) - { - _NORM_TABLE = Collection::newInstance(256); - for (int32_t i = 0; i < 256; ++i) - _NORM_TABLE[i] = SmallDouble::byteToDouble((uint8_t)i); - } - return _NORM_TABLE; - } - - double Similarity::decodeNorm(uint8_t b) - { - return NORM_TABLE()[b & 0xff]; // & 0xff maps negative bytes to positive above 127 - } - - const Collection Similarity::getNormDecoder() - { - return NORM_TABLE(); - } - - double Similarity::computeNorm(const String& fieldName, FieldInvertStatePtr state) - { - return (double)(state->getBoost() * lengthNorm(fieldName, state->getLength())); - } - - uint8_t Similarity::encodeNorm(double f) - { - return SmallDouble::doubleToByte(f); - } - - double Similarity::tf(int32_t freq) - { - return tf((double)freq); - } - - IDFExplanationPtr Similarity::idfExplain(TermPtr term, SearcherPtr searcher) - { - int32_t df = searcher->docFreq(term); - int32_t max = searcher->maxDoc(); - double _idf = idf(df, max); - return newLucene(df, max, _idf); - } - - IDFExplanationPtr Similarity::idfExplain(Collection terms, SearcherPtr searcher) - { - int32_t max = searcher->maxDoc(); - double _idf = 0.0; - String exp; - for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) - { - int32_t df = searcher->docFreq(*term); - _idf += idf(df, max); - exp += L" " + (*term)->text() + L"=" + StringUtils::toString(df); +namespace Lucene { + +const int32_t Similarity::NO_DOC_ID_PROVIDED = -1; + +Similarity::Similarity() { +} + +Similarity::~Similarity() { +} + +SimilarityPtr Similarity::getDefault() { + // race condition? + static SimilarityPtr defaultImpl; + LUCENE_RUN_ONCE( + defaultImpl = newLucene(); + CycleCheck::addStatic(defaultImpl); + ); + return defaultImpl; +} + +static const Collection GEN_NORM_TABLE() { + static Collection _NORM_TABLE; + LUCENE_RUN_ONCE( + _NORM_TABLE = Collection::newInstance(256); + for (int32_t i = 0; i < 256; ++i) { + _NORM_TABLE[i] = SmallDouble::byteToDouble((uint8_t)i); } - return newLucene(exp, _idf); - } - - double Similarity::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) - { - return 1.0; - } - - SimilarityIDFExplanation::SimilarityIDFExplanation(int32_t df, int32_t max, double idf) - { - this->df = df; - this->max = max; - this->idf = idf; - } - - SimilarityIDFExplanation::SimilarityIDFExplanation(const String& exp, double idf) - { - this->exp = exp; - this->idf = idf; - } - - SimilarityIDFExplanation::~SimilarityIDFExplanation() - { - } - - String SimilarityIDFExplanation::explain() - { - return !exp.empty() ? exp : L"idf(docFreq=" + StringUtils::toString(df) + L", maxDocs=" + StringUtils::toString(max) + L")"; - } - - double SimilarityIDFExplanation::getIdf() - { - return idf; - } + ); + return _NORM_TABLE; +} + +const Collection Similarity::NORM_TABLE = GEN_NORM_TABLE(); + +double Similarity::decodeNorm(uint8_t b) { + return NORM_TABLE[b & 0xff]; // & 0xff maps negative bytes to positive above 127 +} + +const Collection& Similarity::getNormDecoder() { + return NORM_TABLE; +} + +double Similarity::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { + return (double)(state->getBoost() * lengthNorm(fieldName, state->getLength())); +} + +uint8_t Similarity::encodeNorm(double f) { + return SmallDouble::doubleToByte(f); +} + +double Similarity::tf(int32_t freq) { + return tf((double)freq); +} + +IDFExplanationPtr Similarity::idfExplain(const TermPtr& term, const SearcherPtr& searcher) { + int32_t df = searcher->docFreq(term); + int32_t max = searcher->maxDoc(); + double _idf = idf(df, max); + return newLucene(df, max, _idf); +} + +IDFExplanationPtr Similarity::idfExplain(Collection terms, const SearcherPtr& searcher) { + int32_t max = searcher->maxDoc(); + double _idf = 0.0; + String exp; + for (Collection::iterator term = terms.begin(); term != terms.end(); ++term) { + int32_t df = searcher->docFreq(*term); + _idf += idf(df, max); + exp += L" " + (*term)->text() + L"=" + StringUtils::toString(df); + } + return newLucene(exp, _idf); +} + +double Similarity::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { + return 1.0; +} + +SimilarityIDFExplanation::SimilarityIDFExplanation(int32_t df, int32_t max, double idf) { + this->df = df; + this->max = max; + this->idf = idf; +} + +SimilarityIDFExplanation::SimilarityIDFExplanation(const String& exp, double idf) { + this->exp = exp; + this->idf = idf; +} + +SimilarityIDFExplanation::~SimilarityIDFExplanation() { +} + +String SimilarityIDFExplanation::explain() { + return !exp.empty() ? exp : L"idf(docFreq=" + StringUtils::toString(df) + L", maxDocs=" + StringUtils::toString(max) + L")"; +} + +double SimilarityIDFExplanation::getIdf() { + return idf; +} + } diff --git a/src/core/search/SimilarityDelegator.cpp b/src/core/search/SimilarityDelegator.cpp index 47cfaa23..50aac0b9 100644 --- a/src/core/search/SimilarityDelegator.cpp +++ b/src/core/search/SimilarityDelegator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,54 +7,45 @@ #include "LuceneInc.h" #include "SimilarityDelegator.h" -namespace Lucene -{ - SimilarityDelegator::SimilarityDelegator(SimilarityPtr delegee) - { - this->delegee = delegee; - } - - SimilarityDelegator::~SimilarityDelegator() - { - } - - double SimilarityDelegator::computeNorm(const String& fieldName, FieldInvertStatePtr state) - { - return delegee->computeNorm(fieldName, state); - } - - double SimilarityDelegator::lengthNorm(const String& fieldName, int32_t numTokens) - { - return delegee->lengthNorm(fieldName, numTokens); - } - - double SimilarityDelegator::queryNorm(double sumOfSquaredWeights) - { - return delegee->queryNorm(sumOfSquaredWeights); - } - - double SimilarityDelegator::tf(double freq) - { - return delegee->tf(freq); - } - - double SimilarityDelegator::sloppyFreq(int32_t distance) - { - return delegee->sloppyFreq(distance); - } - - double SimilarityDelegator::idf(int32_t docFreq, int32_t numDocs) - { - return delegee->idf(docFreq, numDocs); - } - - double SimilarityDelegator::coord(int32_t overlap, int32_t maxOverlap) - { - return delegee->coord(overlap, maxOverlap); - } - - double SimilarityDelegator::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) - { - return delegee->scorePayload(docId, fieldName, start, end, payload, offset, length); - } +namespace Lucene { + +SimilarityDelegator::SimilarityDelegator(const SimilarityPtr& delegee) { + this->delegee = delegee; +} + +SimilarityDelegator::~SimilarityDelegator() { +} + +double SimilarityDelegator::computeNorm(const String& fieldName, const FieldInvertStatePtr& state) { + return delegee->computeNorm(fieldName, state); +} + +double SimilarityDelegator::lengthNorm(const String& fieldName, int32_t numTokens) { + return delegee->lengthNorm(fieldName, numTokens); +} + +double SimilarityDelegator::queryNorm(double sumOfSquaredWeights) { + return delegee->queryNorm(sumOfSquaredWeights); +} + +double SimilarityDelegator::tf(double freq) { + return delegee->tf(freq); +} + +double SimilarityDelegator::sloppyFreq(int32_t distance) { + return delegee->sloppyFreq(distance); +} + +double SimilarityDelegator::idf(int32_t docFreq, int32_t numDocs) { + return delegee->idf(docFreq, numDocs); +} + +double SimilarityDelegator::coord(int32_t overlap, int32_t maxOverlap) { + return delegee->coord(overlap, maxOverlap); +} + +double SimilarityDelegator::scorePayload(int32_t docId, const String& fieldName, int32_t start, int32_t end, ByteArray payload, int32_t offset, int32_t length) { + return delegee->scorePayload(docId, fieldName, start, end, payload, offset, length); +} + } diff --git a/src/core/search/SingleTermEnum.cpp b/src/core/search/SingleTermEnum.cpp index b81a6e33..8db9be73 100644 --- a/src/core/search/SingleTermEnum.cpp +++ b/src/core/search/SingleTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,34 +9,31 @@ #include "IndexReader.h" #include "Term.h" -namespace Lucene -{ - SingleTermEnum::SingleTermEnum(IndexReaderPtr reader, TermPtr singleTerm) - { - this->_endEnum = false; - this->singleTerm = singleTerm; - setEnum(reader->terms(singleTerm)); - } - - SingleTermEnum::~SingleTermEnum() - { - } - - double SingleTermEnum::difference() - { - return 1.0; - } - - bool SingleTermEnum::endEnum() - { - return _endEnum; - } - - bool SingleTermEnum::termCompare(TermPtr term) - { - if (term->equals(singleTerm)) - return true; - _endEnum = true; - return false; +namespace Lucene { + +SingleTermEnum::SingleTermEnum(const IndexReaderPtr& reader, const TermPtr& singleTerm) { + this->_endEnum = false; + this->singleTerm = singleTerm; + setEnum(reader->terms(singleTerm)); +} + +SingleTermEnum::~SingleTermEnum() { +} + +double SingleTermEnum::difference() { + return 1.0; +} + +bool SingleTermEnum::endEnum() { + return _endEnum; +} + +bool SingleTermEnum::termCompare(const TermPtr& term) { + if (term->equals(singleTerm)) { + return true; } + _endEnum = true; + return false; +} + } diff --git a/src/core/search/SloppyPhraseScorer.cpp b/src/core/search/SloppyPhraseScorer.cpp index 62233e08..5c4283cc 100644 --- a/src/core/search/SloppyPhraseScorer.cpp +++ b/src/core/search/SloppyPhraseScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,172 +10,175 @@ #include "PhraseQueue.h" #include "Similarity.h" -namespace Lucene -{ - SloppyPhraseScorer::SloppyPhraseScorer(WeightPtr weight, Collection tps, Collection offsets, SimilarityPtr similarity, int32_t slop, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) - { - this->slop = slop; - this->checkedRepeats = false; - } - - SloppyPhraseScorer::~SloppyPhraseScorer() - { +namespace Lucene { + +struct __luceneEquals { + inline bool operator()(const PhrasePositions* __first, const PhrasePositions* __second) const { + return __first ? (__second && __first == __second) : (!__first && !__second); } - - double SloppyPhraseScorer::phraseFreq() - { - int32_t end = initPhrasePositions(); - - double freq = 0.0; - bool done = (end < 0); - while (!done) - { - PhrasePositionsPtr pp(pq->pop()); - int32_t start = pp->position; - int32_t next = pq->top()->position; - - bool tpsDiffer = true; - for (int32_t pos = start; pos <= next || !tpsDiffer; pos = pp->position) - { - if (pos<=next && tpsDiffer) - start = pos; // advance pp to min window - if (!pp->nextPosition()) - { - done = true; // ran out of a term - done - break; - } - - PhrasePositionsPtr pp2; - tpsDiffer = (!pp->repeats || !(pp2 = termPositionsDiffer(pp))); - if (pp2 && pp2 != pp) - pp = flip(pp, pp2); // flip pp to pp2 +}; + +typedef HashMap< PhrasePositions*, LuceneObjectPtr, luceneHash, __luceneEquals > __MapPhrasePositionsLuceneObject; + +SloppyPhraseScorer::SloppyPhraseScorer(const WeightPtr& weight, Collection tps, Collection offsets, const SimilarityPtr& similarity, int32_t slop, ByteArray norms) : PhraseScorer(weight, tps, offsets, similarity, norms) { + this->slop = slop; + this->checkedRepeats = false; +} + +SloppyPhraseScorer::~SloppyPhraseScorer() { +} + +double SloppyPhraseScorer::phraseFreq() { + int32_t end = initPhrasePositions(); + + double freq = 0.0; + bool done = (end < 0); + while (!done) { + auto* __pp = pq->pop(); + int32_t start = __pp->position; + int32_t next = pq->top()->position; + + bool tpsDiffer = true; + for (int32_t pos = start; pos <= next || !tpsDiffer; pos = __pp->position) { + if (pos<=next && tpsDiffer) { + start = pos; // advance pp to min window + } + if (!__pp->nextPosition()) { + done = true; // ran out of a term - done + break; } - - int32_t matchLength = end - start; - if (matchLength <= slop) - freq += getSimilarity()->sloppyFreq(matchLength); // score match - - if (pp->position > end) - end = pp->position; - pq->add(pp); // restore pq + + PhrasePositions* __pp2 = nullptr; + tpsDiffer = (!__pp->repeats || !(__pp2 = termPositionsDiffer(__pp))); + if (__pp2 && __pp2 != __pp) { + __pp = flip(__pp, __pp2); // flip pp to pp2 + } + } + + int32_t matchLength = end - start; + if (matchLength <= slop) { + freq += getSimilarity()->sloppyFreq(matchLength); // score match + } + + if (__pp->position > end) { + end = __pp->position; } - - return freq; + pq->add(__pp); // restore pq } - - PhrasePositionsPtr SloppyPhraseScorer::flip(PhrasePositionsPtr pp, PhrasePositionsPtr pp2) - { - int32_t n = 0; - PhrasePositionsPtr pp3; - // pop until finding pp2 - while ((pp3 = pq->pop()) != pp2) - tmpPos[n++] = pp3; - // insert back all but pp2 - for (n--; n >= 0; --n) - pq->addOverflow(tmpPos[n]); - // insert pp back - pq->add(pp); - return pp2; + + return freq; +} + +PhrasePositions* SloppyPhraseScorer::flip(PhrasePositions* __pp, PhrasePositions* __pp2) { + int32_t n = 0; + PhrasePositions* __pp3; + // pop until finding pp2 + while ((__pp3 = pq->pop()) != __pp2) { + tmpPos[n++] = __pp3; } - - int32_t SloppyPhraseScorer::initPhrasePositions() - { - int32_t end = 0; - - // no repeats at all (most common case is also the simplest one) - if (checkedRepeats && !repeats) - { - // build queue from list - pq->clear(); - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) - { - pp->firstPosition(); - if (pp->position > end) - end = pp->position; - pq->add(pp); // build pq from list + // insert back all but pp2 + for (n--; n >= 0; --n) { + pq->addOverflow(tmpPos[n]); + } + // insert pp back + pq->add(__pp); + return __pp2; +} + +int32_t SloppyPhraseScorer::initPhrasePositions() { + int32_t end = 0; + + // no repeats at all (most common case is also the simplest one) + if (checkedRepeats && !repeats) { + // build queue from list + pq->clear(); + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + __pp->firstPosition(); + if (__pp->position > end) { + end = __pp->position; } - return end; + pq->add(__pp); // build pq from list } - - // position the pp's - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) - pp->firstPosition(); - - // one time initialization for this scorer - if (!checkedRepeats) - { - checkedRepeats = true; - // check for repeats - MapPhrasePositionsLuceneObject m; - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) - { - int32_t tpPos = pp->position + pp->offset; - for (PhrasePositionsPtr pp2(pp->_next); pp2; pp2 = pp2->_next) - { - int32_t tpPos2 = pp2->position + pp2->offset; - if (tpPos2 == tpPos) - { - if (!m) - m = MapPhrasePositionsLuceneObject::newInstance(); - pp->repeats = true; - pp2->repeats = true; - m.put(pp, LuceneObjectPtr()); - m.put(pp2, LuceneObjectPtr()); + return end; + } + + // position the pp's + for (PhrasePositions* __pp = __first; __pp; __pp = __pp->__next) { + __pp->firstPosition(); + } + + // one time initialization for this scorer + if (!checkedRepeats) { + checkedRepeats = true; + // check for repeats + __MapPhrasePositionsLuceneObject m; + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + int32_t tpPos = __pp->position + __pp->offset; + for (auto* __pp2 = __pp->__next; __pp2; __pp2 = __pp2->__next) { + int32_t tpPos2 = __pp2->position + __pp2->offset; + if (tpPos2 == tpPos) { + if (!m) { + m = __MapPhrasePositionsLuceneObject::newInstance(); } + __pp->repeats = true; + __pp2->repeats = true; + m.put(__pp, LuceneObjectPtr()); + m.put(__pp2, LuceneObjectPtr()); } } - if (m) - { - repeats = Collection::newInstance(); - for (MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) - repeats.add(key->first); + } + if (m) { + repeats = Collection::newInstance(); + for (__MapPhrasePositionsLuceneObject::iterator key = m.begin(); key != m.end(); ++key) { + repeats.add(key->first); } } - - // with repeats must advance some repeating pp's so they all start with differing tp's - if (repeats) - { - for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) - { - PhrasePositionsPtr pp2; - while (pp2 = termPositionsDiffer(*pp)) - { - if (!pp2->nextPosition()) // out of pps that do not differ, advance the pp with higher offset - return -1; // ran out of a term - done + } + + // with repeats must advance some repeating pp's so they all start with differing tp's + if (repeats) { + for (Collection::iterator pp = repeats.begin(); pp != repeats.end(); ++pp) { + PhrasePositions* pp2 = nullptr; + while ((pp2 = termPositionsDiffer(*pp))) { + if (!pp2->nextPosition()) { // out of pps that do not differ, advance the pp with higher offset + return -1; // ran out of a term - done } } } - - // build queue from list - pq->clear(); - for (PhrasePositionsPtr pp(first); pp; pp = pp->_next) - { - if (pp->position > end) - end = pp->position; - pq->add(pp); // build pq from list + } + + // build queue from list + pq->clear(); + for (auto* __pp = __first; __pp; __pp = __pp->__next) { + if (__pp->position > end) { + end = __pp->position; } - - if (repeats) - tmpPos = Collection::newInstance(pq->size()); - - return end; + pq->add(__pp); // build pq from list + } + + if (repeats) { + tmpPos = Collection::newInstance(pq->size()); } - - PhrasePositionsPtr SloppyPhraseScorer::termPositionsDiffer(PhrasePositionsPtr pp) - { - // Efficiency note: a more efficient implementation could keep a map between repeating pp's, so that if - // pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats of term2, pp2a would only be checked - // against pp2b but not against pp1a, pp1b, pp1c. However this would complicate code, for a rather rare - // case, so choice is to compromise here. - int32_t tpPos = pp->position + pp->offset; - for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) - { - if (*pp2 == pp) - continue; - int32_t tpPos2 = (*pp2)->position + (*pp2)->offset; - if (tpPos2 == tpPos) - return pp->offset > (*pp2)->offset ? pp : *pp2; // do not differ: return the one with higher offset. + + return end; +} + +PhrasePositions* SloppyPhraseScorer::termPositionsDiffer(PhrasePositions* __pp) { + // Efficiency note: a more efficient implementation could keep a map between repeating pp's, so that if + // pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats of term2, pp2a would only be checked + // against pp2b but not against pp1a, pp1b, pp1c. However this would complicate code, for a rather rare + // case, so choice is to compromise here. + int32_t tpPos = __pp->position + __pp->offset; + for (Collection::iterator pp2 = repeats.begin(); pp2 != repeats.end(); ++pp2) { + if (*pp2 == __pp) { + continue; + } + int32_t tpPos2 = (*pp2)->position + (*pp2)->offset; + if (tpPos2 == tpPos) { + return __pp->offset > (*pp2)->offset ? __pp : *pp2; // do not differ: return the one with higher offset. } - return PhrasePositionsPtr(); } + return nullptr; +} + } diff --git a/src/core/search/Sort.cpp b/src/core/search/Sort.cpp index 69f2577d..fe2f25d5 100644 --- a/src/core/search/Sort.cpp +++ b/src/core/search/Sort.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,89 +9,78 @@ #include "SortField.h" #include "MiscUtils.h" -namespace Lucene -{ - Sort::Sort() - { - setSort(SortField::FIELD_SCORE()); - } - - Sort::Sort(SortFieldPtr field) - { - setSort(field); - } - - Sort::Sort(Collection fields) - { - setSort(fields); - } - - Sort::~Sort() - { - } - - SortPtr Sort::RELEVANCE() - { - static SortPtr _RELEVANCE; - if (!_RELEVANCE) - { - _RELEVANCE = newLucene(); - CycleCheck::addStatic(_RELEVANCE); - } - return _RELEVANCE; - } - - SortPtr Sort::INDEXORDER() - { - static SortPtr _INDEXORDER; - if (!_INDEXORDER) - { - _INDEXORDER = newLucene(SortField::FIELD_DOC()); - CycleCheck::addStatic(_INDEXORDER); - } - return _INDEXORDER; - } - - void Sort::setSort(SortFieldPtr field) - { - this->fields = newCollection(field); - } - - void Sort::setSort(Collection fields) - { - this->fields = fields; - } - - Collection Sort::getSort() - { - return fields; - } - - String Sort::toString() - { - StringStream buffer; - for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) - { - if (field != fields.begin()) - buffer << L","; - buffer << (*field)->toString(); +namespace Lucene { + +Sort::Sort() { + setSort(SortField::FIELD_SCORE()); +} + +Sort::Sort(const SortFieldPtr& field) { + setSort(field); +} + +Sort::Sort(Collection fields) { + setSort(fields); +} + +Sort::~Sort() { +} + +SortPtr Sort::RELEVANCE() { + static SortPtr _RELEVANCE; + LUCENE_RUN_ONCE( + _RELEVANCE = newLucene(); + CycleCheck::addStatic(_RELEVANCE); + ); + return _RELEVANCE; +} + +SortPtr Sort::INDEXORDER() { + static SortPtr _INDEXORDER; + LUCENE_RUN_ONCE( + _INDEXORDER = newLucene(SortField::FIELD_DOC()); + CycleCheck::addStatic(_INDEXORDER); + ); + return _INDEXORDER; +} + +void Sort::setSort(const SortFieldPtr& field) { + this->fields = newCollection(field); +} + +void Sort::setSort(Collection fields) { + this->fields = fields; +} + +Collection Sort::getSort() { + return fields; +} + +String Sort::toString() { + StringStream buffer; + for (Collection::iterator field = fields.begin(); field != fields.end(); ++field) { + if (field != fields.begin()) { + buffer << L","; } - return buffer.str(); + buffer << (*field)->toString(); } - - bool Sort::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SortPtr otherSort(boost::dynamic_pointer_cast(other)); - if (!otherSort) - return false; - return fields.equals(otherSort->fields); + return buffer.str(); +} + +bool Sort::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t Sort::hashCode() - { - return 0x45aaf665 + MiscUtils::hashCode(fields.begin(), fields.end(), MiscUtils::hashLucene); + + SortPtr otherSort(boost::dynamic_pointer_cast(other)); + if (!otherSort) { + return false; } + return fields.equals(otherSort->fields); +} + +int32_t Sort::hashCode() { + return 0x45aaf665 + MiscUtils::hashCode(fields.begin(), fields.end(), MiscUtils::hashLucene); +} + } diff --git a/src/core/search/SortField.cpp b/src/core/search/SortField.cpp index 3ef8ec2d..9937b32e 100644 --- a/src/core/search/SortField.cpp +++ b/src/core/search/SortField.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,254 +11,243 @@ #include "FieldComparatorSource.h" #include "StringUtils.h" -namespace Lucene -{ - /// Sort by document score (relevancy). Sort values are Double and higher values are at the front. - const int32_t SortField::SCORE = 0; +namespace Lucene { - /// Sort by document number (index order). Sort values are Integer and lower values are at the front. - const int32_t SortField::DOC = 1; +/// Sort by document score (relevancy). Sort values are Double and higher values are at the front. +const int32_t SortField::SCORE = 0; - /// Sort using term values as Strings. Sort values are String and lower values are at the front. - const int32_t SortField::STRING = 3; +/// Sort by document number (index order). Sort values are Integer and lower values are at the front. +const int32_t SortField::DOC = 1; - /// Sort using term values as Integers. Sort values are Integer and lower values are at the front. - const int32_t SortField::INT = 4; +/// Sort using term values as Strings. Sort values are String and lower values are at the front. +const int32_t SortField::STRING = 3; - /// Sort using term values as Floats. Sort values are Float and lower values are at the front. - const int32_t SortField::FLOAT = 5; +/// Sort using term values as Integers. Sort values are Integer and lower values are at the front. +const int32_t SortField::INT = 4; - /// Sort using term values as Longs. Sort values are Long and lower values are at the front. - const int32_t SortField::LONG = 6; +/// Sort using term values as Floats. Sort values are Float and lower values are at the front. +const int32_t SortField::FLOAT = 5; - /// Sort using term values as Doubles. Sort values are Double and lower values are at the front. - const int32_t SortField::DOUBLE = 7; +/// Sort using term values as Longs. Sort values are Long and lower values are at the front. +const int32_t SortField::LONG = 6; - /// Sort using term values as Shorts. Sort values are Short and lower values are at the front. - const int32_t SortField::SHORT = 8; +/// Sort using term values as Doubles. Sort values are Double and lower values are at the front. +const int32_t SortField::DOUBLE = 7; - /// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according - /// to natural order. - const int32_t SortField::CUSTOM = 9; +/// Sort using term values as Shorts. Sort values are Short and lower values are at the front. +const int32_t SortField::SHORT = 8; - /// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. - const int32_t SortField::BYTE = 10; +/// Sort using a custom Comparator. Sort values are any ComparableValue and sorting is done according +/// to natural order. +const int32_t SortField::CUSTOM = 9; - /// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. - /// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. - const int32_t SortField::STRING_VAL = 11; +/// Sort using term values as Bytes. Sort values are Byte and lower values are at the front. +const int32_t SortField::BYTE = 10; - SortField::SortField(const String& field, int32_t type, bool reverse) - { - initFieldType(field, type); - this->reverse = reverse; - } - - SortField::SortField(const String& field, ParserPtr parser, bool reverse) - { - if (boost::dynamic_pointer_cast(parser)) - initFieldType(field, INT); - else if (boost::dynamic_pointer_cast(parser)) - initFieldType(field, BYTE); - else if (boost::dynamic_pointer_cast(parser)) - initFieldType(field, LONG); - else if (boost::dynamic_pointer_cast(parser)) - initFieldType(field, DOUBLE); - else - boost::throw_exception(IllegalArgumentException(L"Parser instance does not subclass existing numeric parser from FieldCache")); - this->reverse = reverse; - this->parser = parser; - } - - SortField::SortField(const String& field, const std::locale& locale, bool reverse) - { - initFieldType(field, STRING); - this->locale = newInstance(locale); - this->reverse = reverse; - } - - SortField::SortField(const String& field, FieldComparatorSourcePtr comparator, bool reverse) - { - initFieldType(field, CUSTOM); - this->comparatorSource = comparator; - this->reverse = reverse; - } - - SortField::~SortField() - { - } - - SortFieldPtr SortField::FIELD_SCORE() - { - static SortFieldPtr _FIELD_SCORE; - if (!_FIELD_SCORE) - { - _FIELD_SCORE = newLucene(L"", SCORE); - CycleCheck::addStatic(_FIELD_SCORE); - } - return _FIELD_SCORE; - } - - SortFieldPtr SortField::FIELD_DOC() - { - static SortFieldPtr _FIELD_DOC; - if (!_FIELD_DOC) - { - _FIELD_DOC = newLucene(L"", DOC); - CycleCheck::addStatic(_FIELD_DOC); - } - return _FIELD_DOC; - } - - void SortField::initFieldType(const String& field, int32_t type) - { - this->type = type; - if (field.empty() && type != SCORE && type != DOC) - boost::throw_exception(IllegalArgumentException(L"Field can only be null when type is SCORE or DOC")); - this->field = field; +/// Sort using term values as Strings, but comparing by value (using String::compare) for all comparisons. +/// This is typically slower than {@link #STRING}, which uses ordinals to do the sorting. +const int32_t SortField::STRING_VAL = 11; + +SortField::SortField(const String& field, int32_t type, bool reverse) { + initFieldType(field, type); + this->reverse = reverse; +} + +SortField::SortField(const String& field, const ParserPtr& parser, bool reverse) { + if (boost::dynamic_pointer_cast(parser)) { + initFieldType(field, INT); + } else if (boost::dynamic_pointer_cast(parser)) { + initFieldType(field, BYTE); + } else if (boost::dynamic_pointer_cast(parser)) { + initFieldType(field, LONG); + } else if (boost::dynamic_pointer_cast(parser)) { + initFieldType(field, DOUBLE); + } else { + boost::throw_exception(IllegalArgumentException(L"Parser instance does not subclass existing numeric parser from FieldCache")); + } + this->reverse = reverse; + this->parser = parser; +} + +SortField::SortField(const String& field, const std::locale& locale, bool reverse) { + initFieldType(field, STRING); + this->locale = newInstance(locale); + this->reverse = reverse; +} + +SortField::SortField(const String& field, const FieldComparatorSourcePtr& comparator, bool reverse) { + initFieldType(field, CUSTOM); + this->comparatorSource = comparator; + this->reverse = reverse; +} + +SortField::~SortField() { +} + +SortFieldPtr SortField::FIELD_SCORE() { + static SortFieldPtr _FIELD_SCORE; + LUCENE_RUN_ONCE( + _FIELD_SCORE = newLucene(L"", SCORE); + CycleCheck::addStatic(_FIELD_SCORE); + ); + return _FIELD_SCORE; +} + +SortFieldPtr SortField::FIELD_DOC() { + static SortFieldPtr _FIELD_DOC; + LUCENE_RUN_ONCE( + _FIELD_DOC = newLucene(L"", DOC); + CycleCheck::addStatic(_FIELD_DOC); + ); + return _FIELD_DOC; +} + +void SortField::initFieldType(const String& field, int32_t type) { + this->type = type; + if (field.empty() && type != SCORE && type != DOC) { + boost::throw_exception(IllegalArgumentException(L"Field can only be null when type is SCORE or DOC")); } - - String SortField::getField() - { - return field; + this->field = field; +} + +String SortField::getField() { + return field; +} + +int32_t SortField::getType() { + return type; +} + +localePtr SortField::getLocale() { + return locale; +} + +ParserPtr SortField::getParser() { + return parser; +} + +bool SortField::getReverse() { + return reverse; +} + +FieldComparatorSourcePtr SortField::getComparatorSource() { + return comparatorSource; +} + +String SortField::toString() { + StringStream buffer; + switch (type) { + case SCORE: + buffer << L""; + break; + case DOC: + buffer << L""; + break; + case STRING: + buffer << L""; + break; + case STRING_VAL: + buffer << L""; + break; + case BYTE: + buffer << L""; + break; + case SHORT: + buffer << L""; + break; + case INT: + buffer << L""; + break; + case LONG: + buffer << L""; + break; + case FLOAT: + buffer << L""; + break; + case DOUBLE: + buffer << L""; + break; + case CUSTOM: + buffer << L"toString() << L">"; + break; + default: + buffer << L""; + break; } - - int32_t SortField::getType() - { - return type; + + if (parser) { + buffer << L"(" << parser->toString() << L")"; } - - localePtr SortField::getLocale() - { - return locale; + if (reverse) { + buffer << L"!"; } - - ParserPtr SortField::getParser() - { - return parser; + + return buffer.str(); +} + +bool SortField::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - bool SortField::getReverse() - { - return reverse; + + SortFieldPtr otherSortField(boost::dynamic_pointer_cast(other)); + if (!otherSortField) { + return false; } - - FieldComparatorSourcePtr SortField::getComparatorSource() - { - return comparatorSource; + + return (field == otherSortField->field && type == otherSortField->type && + reverse == otherSortField->reverse && + ((locale && otherSortField->locale && *locale == *otherSortField->locale) || (!locale && !otherSortField->locale)) && + (comparatorSource ? comparatorSource->equals(otherSortField->comparatorSource) : !otherSortField->comparatorSource) && + (parser ? parser->equals(otherSortField->parser) : !otherSortField->parser)); +} + +int32_t SortField::hashCode() { + int32_t hash = type ^ 0x346565dd + (reverse ? 1 : 0) ^ 0xaf5998bb; + hash += StringUtils::hashCode(field) ^ 0xff5685dd; + if (locale) { + hash += StringUtils::hashCode(StringUtils::toUnicode(locale->name().c_str())) ^ 0xff5685dd; } - - String SortField::toString() - { - StringStream buffer; - switch (type) - { - case SCORE: - buffer << L""; - break; - case DOC: - buffer << L""; - break; - case STRING: - buffer << L""; - break; - case STRING_VAL: - buffer << L""; - break; - case BYTE: - buffer << L""; - break; - case SHORT: - buffer << L""; - break; - case INT: - buffer << L""; - break; - case LONG: - buffer << L""; - break; - case FLOAT: - buffer << L""; - break; - case DOUBLE: - buffer << L""; - break; - case CUSTOM: - buffer << L"toString() << L">"; - break; - default: - buffer << L""; - break; - } - - if (parser) - buffer << L"(" << parser->toString() << L")"; - if (reverse) - buffer << L"!"; - - return buffer.str(); + if (comparatorSource) { + hash += comparatorSource->hashCode(); } - - bool SortField::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SortFieldPtr otherSortField(boost::dynamic_pointer_cast(other)); - if (!otherSortField) - return false; - - return (field == otherSortField->field && type == otherSortField->type && - reverse == otherSortField->reverse && - ((locale && otherSortField->locale && *locale == *otherSortField->locale) || (!locale && !otherSortField->locale)) && - (comparatorSource ? comparatorSource->equals(otherSortField->comparatorSource) : !otherSortField->comparatorSource) && - (parser ? parser->equals(otherSortField->parser) : !otherSortField->parser)); + if (parser) { + hash += parser->hashCode() ^ 0x3aaf56ff; } - - int32_t SortField::hashCode() - { - int32_t hash = type ^ 0x346565dd + (reverse ? 1 : 0) ^ 0xaf5998bb; - hash += StringUtils::hashCode(field) ^ 0xff5685dd; - if (locale) - hash += StringUtils::hashCode(StringUtils::toUnicode(locale->name().c_str())) ^ 0xff5685dd; - if (comparatorSource) - hash += comparatorSource->hashCode(); - if (parser) - hash += parser->hashCode() ^ 0x3aaf56ff; - return hash; + return hash; +} + +FieldComparatorPtr SortField::getComparator(int32_t numHits, int32_t sortPos) { + if (locale) { + return newLucene(numHits, field, *locale); } - - FieldComparatorPtr SortField::getComparator(int32_t numHits, int32_t sortPos) - { - if (locale) - return newLucene(numHits, field, *locale); - - switch (type) - { - case SCORE: - return newLucene(numHits); - case DOC: - return newLucene(numHits); - case SHORT: - case INT: - return newLucene(numHits, field, parser); - case FLOAT: - case DOUBLE: - return newLucene(numHits, field, parser); - case LONG: - return newLucene(numHits, field, parser); - case BYTE: - return newLucene(numHits, field, parser); - case CUSTOM: - BOOST_ASSERT(comparatorSource); - return comparatorSource->newComparator(field, numHits, sortPos, reverse); - case STRING: - return newLucene(numHits, field, sortPos, reverse); - case STRING_VAL: - return newLucene(numHits, field); - default: - boost::throw_exception(IllegalStateException(L"Illegal sort type: " + StringUtils::toString(type))); - return FieldComparatorPtr(); - } + + switch (type) { + case SCORE: + return newLucene(numHits); + case DOC: + return newLucene(numHits); + case SHORT: + case INT: + return newLucene(numHits, field, parser); + case FLOAT: + case DOUBLE: + return newLucene(numHits, field, parser); + case LONG: + return newLucene(numHits, field, parser); + case BYTE: + return newLucene(numHits, field, parser); + case CUSTOM: + BOOST_ASSERT(comparatorSource); + return comparatorSource->newComparator(field, numHits, sortPos, reverse); + case STRING: + return newLucene(numHits, field, sortPos, reverse); + case STRING_VAL: + return newLucene(numHits, field); + default: + boost::throw_exception(IllegalStateException(L"Illegal sort type: " + StringUtils::toString(type))); + return FieldComparatorPtr(); } } + +} diff --git a/src/core/search/SpanFilter.cpp b/src/core/search/SpanFilter.cpp index 72092c3b..0a9771d5 100644 --- a/src/core/search/SpanFilter.cpp +++ b/src/core/search/SpanFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "SpanFilter.h" -namespace Lucene -{ - SpanFilter::~SpanFilter() - { - } +namespace Lucene { + +SpanFilter::~SpanFilter() { +} + } diff --git a/src/core/search/SpanFilterResult.cpp b/src/core/search/SpanFilterResult.cpp index 845d1431..2603e161 100644 --- a/src/core/search/SpanFilterResult.cpp +++ b/src/core/search/SpanFilterResult.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,70 +7,58 @@ #include "LuceneInc.h" #include "SpanFilterResult.h" -namespace Lucene -{ - SpanFilterResult::SpanFilterResult(DocIdSetPtr docIdSet, Collection positions) - { - this->docIdSet = docIdSet; - this->positions = positions; - } - - SpanFilterResult::~SpanFilterResult() - { - } - - Collection SpanFilterResult::getPositions() - { - return positions; - } - - DocIdSetPtr SpanFilterResult::getDocIdSet() - { - return docIdSet; - } - - PositionInfo::PositionInfo(int32_t doc) - { - this->doc = doc; - this->positions = Collection::newInstance(); - } - - PositionInfo::~PositionInfo() - { - } - - void PositionInfo::addPosition(int32_t start, int32_t end) - { - positions.add(newLucene(start, end)); - } - - int32_t PositionInfo::getDoc() - { - return doc; - } - - Collection PositionInfo::getPositions() - { - return positions; - } - - StartEnd::StartEnd(int32_t start, int32_t end) - { - this->start = start; - this->end = end; - } - - StartEnd::~StartEnd() - { - } - - int32_t StartEnd::getEnd() - { - return end; - } - - int32_t StartEnd::getStart() - { - return start; - } +namespace Lucene { + +SpanFilterResult::SpanFilterResult(const DocIdSetPtr& docIdSet, Collection positions) { + this->docIdSet = docIdSet; + this->positions = positions; +} + +SpanFilterResult::~SpanFilterResult() { +} + +Collection SpanFilterResult::getPositions() { + return positions; +} + +DocIdSetPtr SpanFilterResult::getDocIdSet() { + return docIdSet; +} + +PositionInfo::PositionInfo(int32_t doc) { + this->doc = doc; + this->positions = Collection::newInstance(); +} + +PositionInfo::~PositionInfo() { +} + +void PositionInfo::addPosition(int32_t start, int32_t end) { + positions.add(newLucene(start, end)); +} + +int32_t PositionInfo::getDoc() { + return doc; +} + +Collection PositionInfo::getPositions() { + return positions; +} + +StartEnd::StartEnd(int32_t start, int32_t end) { + this->start = start; + this->end = end; +} + +StartEnd::~StartEnd() { +} + +int32_t StartEnd::getEnd() { + return end; +} + +int32_t StartEnd::getStart() { + return start; +} + } diff --git a/src/core/search/SpanQueryFilter.cpp b/src/core/search/SpanQueryFilter.cpp index 9a965741..f147f889 100644 --- a/src/core/search/SpanQueryFilter.cpp +++ b/src/core/search/SpanQueryFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,68 +12,61 @@ #include "OpenBitSet.h" #include "IndexReader.h" -namespace Lucene -{ - SpanQueryFilter::SpanQueryFilter(SpanQueryPtr query) - { - this->query = query; - } - - SpanQueryFilter::~SpanQueryFilter() - { - } - - DocIdSetPtr SpanQueryFilter::getDocIdSet(IndexReaderPtr reader) - { - SpanFilterResultPtr result(bitSpans(reader)); - return result->getDocIdSet(); - } - - SpanFilterResultPtr SpanQueryFilter::bitSpans(IndexReaderPtr reader) - { - OpenBitSetPtr bits(newLucene(reader->maxDoc())); - SpansPtr spans(query->getSpans(reader)); - Collection tmp(Collection::newInstance()); - int32_t currentDoc = -1; - PositionInfoPtr currentInfo; - while (spans->next()) - { - int32_t doc = spans->doc(); - bits->set(doc); - if (currentDoc != doc) - { - currentInfo = newLucene(doc); - tmp.add(currentInfo); - currentDoc = doc; - } - currentInfo->addPosition(spans->start(), spans->end()); +namespace Lucene { + +SpanQueryFilter::SpanQueryFilter(const SpanQueryPtr& query) { + this->query = query; +} + +SpanQueryFilter::~SpanQueryFilter() { +} + +DocIdSetPtr SpanQueryFilter::getDocIdSet(const IndexReaderPtr& reader) { + SpanFilterResultPtr result(bitSpans(reader)); + return result->getDocIdSet(); +} + +SpanFilterResultPtr SpanQueryFilter::bitSpans(const IndexReaderPtr& reader) { + OpenBitSetPtr bits(newLucene(reader->maxDoc())); + SpansPtr spans(query->getSpans(reader)); + Collection tmp(Collection::newInstance()); + int32_t currentDoc = -1; + PositionInfoPtr currentInfo; + while (spans->next()) { + int32_t doc = spans->doc(); + bits->set(doc); + if (currentDoc != doc) { + currentInfo = newLucene(doc); + tmp.add(currentInfo); + currentDoc = doc; } - return newLucene(bits, tmp); - } - - SpanQueryPtr SpanQueryFilter::getQuery() - { - return query; - } - - String SpanQueryFilter::toString() - { - return L"SpanQueryFilter(" + query->toString() + L")"; + currentInfo->addPosition(spans->start(), spans->end()); } - - bool SpanQueryFilter::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SpanQueryFilterPtr otherSpanQueryFilter(boost::dynamic_pointer_cast(other)); - if (!otherSpanQueryFilter) - return false; - return query->equals(otherSpanQueryFilter->query); + return newLucene(bits, tmp); +} + +SpanQueryPtr SpanQueryFilter::getQuery() { + return query; +} + +String SpanQueryFilter::toString() { + return L"SpanQueryFilter(" + query->toString() + L")"; +} + +bool SpanQueryFilter::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t SpanQueryFilter::hashCode() - { - return query->hashCode() ^ 0x923f64b9; + + SpanQueryFilterPtr otherSpanQueryFilter(boost::dynamic_pointer_cast(other)); + if (!otherSpanQueryFilter) { + return false; } + return query->equals(otherSpanQueryFilter->query); +} + +int32_t SpanQueryFilter::hashCode() { + return query->hashCode() ^ 0x923f64b9; +} + } diff --git a/src/core/search/TermQuery.cpp b/src/core/search/TermQuery.cpp index 2c0e1492..1d0d3138 100644 --- a/src/core/search/TermQuery.cpp +++ b/src/core/search/TermQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,192 +16,175 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - TermQuery::TermQuery(TermPtr term) - { - this->term = term; - } - - TermQuery::~TermQuery() - { - } - - TermPtr TermQuery::getTerm() - { - return term; - } - - WeightPtr TermQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - void TermQuery::extractTerms(SetTerm terms) - { - terms.add(getTerm()); - } - - String TermQuery::toString(const String& field) - { - StringStream buffer; - if (term->field() != field) - buffer << term->field() << L":"; - buffer << term->text() << boostString(); - return buffer.str(); - } - - bool TermQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - TermQueryPtr otherTermQuery(boost::dynamic_pointer_cast(other)); - if (!otherTermQuery) - return false; - - return (getBoost() == otherTermQuery->getBoost() && term->equals(otherTermQuery->term)); - } - - int32_t TermQuery::hashCode() - { - return MiscUtils::doubleToIntBits(getBoost()) ^ term->hashCode(); - } - - LuceneObjectPtr TermQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(term); - TermQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->term = term; - return cloneQuery; - } - - TermWeight::TermWeight(TermQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - this->value = 0.0; - this->idf = 0.0; - this->queryNorm = 0.0; - this->queryWeight = 0.0; - - this->idfExp = similarity->idfExplain(query->term, searcher); - idf = idfExp->getIdf(); - } - - TermWeight::~TermWeight() - { - } - - String TermWeight::toString() - { - return L"weight(" + query->toString() + L")"; - } - - QueryPtr TermWeight::getQuery() - { - return query; - } - - double TermWeight::getValue() - { - return value; +namespace Lucene { + +TermQuery::TermQuery(const TermPtr& term) { + this->term = term; +} + +TermQuery::~TermQuery() { +} + +TermPtr TermQuery::getTerm() { + return term; +} + +WeightPtr TermQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +void TermQuery::extractTerms(SetTerm terms) { + terms.add(getTerm()); +} + +String TermQuery::toString(const String& field) { + StringStream buffer; + if (term->field() != field) { + buffer << term->field() << L":"; } - - double TermWeight::sumOfSquaredWeights() - { - queryWeight = idf * getQuery()->getBoost(); // compute query weight - return queryWeight * queryWeight; // square it + buffer << term->text() << boostString(); + return buffer.str(); +} + +bool TermQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - void TermWeight::normalize(double norm) - { - queryNorm = norm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + + TermQueryPtr otherTermQuery(boost::dynamic_pointer_cast(other)); + if (!otherTermQuery) { + return false; } - - ScorerPtr TermWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - TermDocsPtr termDocs(reader->termDocs(query->term)); - return termDocs ? newLucene(shared_from_this(), termDocs, similarity, reader->norms(query->term->field())) : ScorerPtr(); + + return (getBoost() == otherTermQuery->getBoost() && term->equals(otherTermQuery->term)); +} + +int32_t TermQuery::hashCode() { + return MiscUtils::doubleToIntBits(getBoost()) ^ term->hashCode(); +} + +LuceneObjectPtr TermQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(term); + TermQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->term = term; + return cloneQuery; +} + +TermWeight::TermWeight(const TermQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); + this->value = 0.0; + this->idf = 0.0; + this->queryNorm = 0.0; + this->queryWeight = 0.0; + + this->idfExp = similarity->idfExplain(query->term, searcher); + idf = idfExp->getIdf(); +} + +TermWeight::~TermWeight() { +} + +String TermWeight::toString() { + return L"weight(" + query->toString() + L")"; +} + +QueryPtr TermWeight::getQuery() { + return query; +} + +double TermWeight::getValue() { + return value; +} + +double TermWeight::sumOfSquaredWeights() { + queryWeight = idf * getQuery()->getBoost(); // compute query weight + return queryWeight * queryWeight; // square it +} + +void TermWeight::normalize(double norm) { + queryNorm = norm; + queryWeight *= queryNorm; // normalize query weight + value = queryWeight * idf; // idf for document +} + +ScorerPtr TermWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + TermDocsPtr termDocs(reader->termDocs(query->term)); + return termDocs ? newLucene(shared_from_this(), termDocs, similarity, reader->norms(query->term->field())) : ScorerPtr(); +} + +ExplanationPtr TermWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ComplexExplanationPtr result(newLucene()); + result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + ExplanationPtr expl(newLucene(idf, idfExp->explain())); + + // explain query weight + ExplanationPtr queryExpl(newLucene()); + queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); + + ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); + if (query->getBoost() != 1.0) { + queryExpl->addDetail(boostExpl); } - - ExplanationPtr TermWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ComplexExplanationPtr result(newLucene()); - result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - ExplanationPtr expl(newLucene(idf, idfExp->explain())); - - // explain query weight - ExplanationPtr queryExpl(newLucene()); - queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); - - ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); - if (query->getBoost() != 1.0) - queryExpl->addDetail(boostExpl); - queryExpl->addDetail(expl); - - ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); - queryExpl->addDetail(queryNormExpl); - - queryExpl->setValue(boostExpl->getValue() * expl->getValue() * queryNormExpl->getValue()); - result->addDetail(queryExpl); - - // explain field weight - String field(query->term->field()); - ComplexExplanationPtr fieldExpl(newLucene()); - fieldExpl->setDescription(L"fieldWeight(" + query->term->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - - ExplanationPtr tfExplanation(newLucene()); - int32_t tf = 0; - TermDocsPtr termDocs(reader->termDocs(query->term)); - if (termDocs) - { - LuceneException finally; - try - { - if (termDocs->skipTo(doc) && termDocs->doc() == doc) - tf = termDocs->freq(); - } - catch (LuceneException& e) - { - finally = e; + queryExpl->addDetail(expl); + + ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); + queryExpl->addDetail(queryNormExpl); + + queryExpl->setValue(boostExpl->getValue() * expl->getValue() * queryNormExpl->getValue()); + result->addDetail(queryExpl); + + // explain field weight + String field(query->term->field()); + ComplexExplanationPtr fieldExpl(newLucene()); + fieldExpl->setDescription(L"fieldWeight(" + query->term->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + + ExplanationPtr tfExplanation(newLucene()); + int32_t tf = 0; + TermDocsPtr termDocs(reader->termDocs(query->term)); + if (termDocs) { + LuceneException finally; + try { + if (termDocs->skipTo(doc) && termDocs->doc() == doc) { + tf = termDocs->freq(); } - termDocs->close(); - finally.throwException(); - tfExplanation->setValue(similarity->tf(tf)); - tfExplanation->setDescription(L"tf(termFreq(" + query->term->toString() + L")=" + StringUtils::toString(tf) + L")"); - } - else - { - tfExplanation->setValue(0.0); - tfExplanation->setDescription(L"no matching term"); + } catch (LuceneException& e) { + finally = e; } - - fieldExpl->addDetail(tfExplanation); - fieldExpl->addDetail(expl); - - ExplanationPtr fieldNormExpl(newLucene()); - ByteArray fieldNorms(reader->norms(field)); - double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; - fieldNormExpl->setValue(fieldNorm); - fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); - fieldExpl->addDetail(fieldNormExpl); - - fieldExpl->setMatch(tfExplanation->isMatch()); - fieldExpl->setValue(tfExplanation->getValue() * expl->getValue() * fieldNormExpl->getValue()); - - result->addDetail(fieldExpl); - result->setMatch(fieldExpl->getMatch()); - - // combine them - result->setValue(queryExpl->getValue() * fieldExpl->getValue()); - - if (queryExpl->getValue() == 1.0) - return fieldExpl; - - return result; + termDocs->close(); + finally.throwException(); + tfExplanation->setValue(similarity->tf(tf)); + tfExplanation->setDescription(L"tf(termFreq(" + query->term->toString() + L")=" + StringUtils::toString(tf) + L")"); + } else { + tfExplanation->setValue(0.0); + tfExplanation->setDescription(L"no matching term"); } + + fieldExpl->addDetail(tfExplanation); + fieldExpl->addDetail(expl); + + ExplanationPtr fieldNormExpl(newLucene()); + ByteArray fieldNorms(reader->norms(field)); + double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; + fieldNormExpl->setValue(fieldNorm); + fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); + fieldExpl->addDetail(fieldNormExpl); + + fieldExpl->setMatch(tfExplanation->isMatch()); + fieldExpl->setValue(tfExplanation->getValue() * expl->getValue() * fieldNormExpl->getValue()); + + result->addDetail(fieldExpl); + result->setMatch(fieldExpl->getMatch()); + + // combine them + result->setValue(queryExpl->getValue() * fieldExpl->getValue()); + + if (queryExpl->getValue() == 1.0) { + return fieldExpl; + } + + return result; +} + } diff --git a/src/core/search/TermRangeFilter.cpp b/src/core/search/TermRangeFilter.cpp index 6c5ce852..b214e9a5 100644 --- a/src/core/search/TermRangeFilter.cpp +++ b/src/core/search/TermRangeFilter.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,56 +10,47 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - TermRangeFilter::TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, - bool includeUpper, CollatorPtr collator) : - MultiTermQueryWrapperFilter(newLucene(fieldName, lowerTerm, upperTerm, - includeLower, includeUpper, collator)) - { - } - - TermRangeFilter::~TermRangeFilter() - { - } - - TermRangeFilterPtr TermRangeFilter::Less(const String& fieldName, StringValue upperTerm) - { - return newLucene(fieldName, VariantUtils::null(), upperTerm, false, true); - } - - TermRangeFilterPtr TermRangeFilter::More(const String& fieldName, StringValue lowerTerm) - { - return newLucene(fieldName, lowerTerm, VariantUtils::null(), true, false); - } - - String TermRangeFilter::getField() - { - return boost::static_pointer_cast(query)->getField(); - } - - String TermRangeFilter::getLowerTerm() - { - return boost::static_pointer_cast(query)->getLowerTerm(); - } - - String TermRangeFilter::getUpperTerm() - { - return boost::static_pointer_cast(query)->getUpperTerm(); - } - - bool TermRangeFilter::includesLower() - { - return boost::static_pointer_cast(query)->includesLower(); - } - - bool TermRangeFilter::includesUpper() - { - return boost::static_pointer_cast(query)->includesUpper(); - } - - CollatorPtr TermRangeFilter::getCollator() - { - return boost::static_pointer_cast(query)->getCollator(); - } +namespace Lucene { + +TermRangeFilter::TermRangeFilter(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, + bool includeUpper, CollatorPtr collator) : + MultiTermQueryWrapperFilter(newLucene(fieldName, lowerTerm, upperTerm, + includeLower, includeUpper, collator)) { +} + +TermRangeFilter::~TermRangeFilter() { +} + +TermRangeFilterPtr TermRangeFilter::Less(const String& fieldName, StringValue upperTerm) { + return newLucene(fieldName, VariantUtils::null(), upperTerm, false, true); +} + +TermRangeFilterPtr TermRangeFilter::More(const String& fieldName, StringValue lowerTerm) { + return newLucene(fieldName, lowerTerm, VariantUtils::null(), true, false); +} + +String TermRangeFilter::getField() { + return boost::static_pointer_cast(query)->getField(); +} + +String TermRangeFilter::getLowerTerm() { + return boost::static_pointer_cast(query)->getLowerTerm(); +} + +String TermRangeFilter::getUpperTerm() { + return boost::static_pointer_cast(query)->getUpperTerm(); +} + +bool TermRangeFilter::includesLower() { + return boost::static_pointer_cast(query)->includesLower(); +} + +bool TermRangeFilter::includesUpper() { + return boost::static_pointer_cast(query)->includesUpper(); +} + +CollatorPtr TermRangeFilter::getCollator() { + return boost::static_pointer_cast(query)->getCollator(); +} + } diff --git a/src/core/search/TermRangeQuery.cpp b/src/core/search/TermRangeQuery.cpp index 4643b9c1..14276cc5 100644 --- a/src/core/search/TermRangeQuery.cpp +++ b/src/core/search/TermRangeQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,142 +11,140 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - TermRangeQuery::TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, - bool includeUpper, CollatorPtr collator) - { - this->field = fieldName; - this->lowerTerm = lowerTerm; - this->upperTerm = upperTerm; - this->includeLower = includeLower; - this->includeUpper = includeUpper; - this->collator = collator; +namespace Lucene { + +TermRangeQuery::TermRangeQuery(const String& fieldName, StringValue lowerTerm, StringValue upperTerm, bool includeLower, + bool includeUpper, CollatorPtr collator) { + this->field = fieldName; + this->lowerTerm = lowerTerm; + this->upperTerm = upperTerm; + this->includeLower = includeLower; + this->includeUpper = includeUpper; + this->collator = collator; +} + +TermRangeQuery::~TermRangeQuery() { +} + +String TermRangeQuery::getField() { + return field; +} + +String TermRangeQuery::getLowerTerm() { + return VariantUtils::get(lowerTerm); +} + +String TermRangeQuery::getUpperTerm() { + return VariantUtils::get(upperTerm); +} + +bool TermRangeQuery::includesLower() { + return includeLower; +} + +bool TermRangeQuery::includesUpper() { + return includeUpper; +} + +CollatorPtr TermRangeQuery::getCollator() { + return collator; +} + +FilteredTermEnumPtr TermRangeQuery::getEnum(const IndexReaderPtr& reader) { + return newLucene(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); +} + +LuceneObjectPtr TermRangeQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, lowerTerm, upperTerm, includeLower, includeUpper, collator)); + TermRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->lowerTerm = lowerTerm; + cloneQuery->upperTerm = upperTerm; + cloneQuery->collator = collator; + cloneQuery->field = field; + cloneQuery->includeLower = includeLower; + cloneQuery->includeUpper = includeUpper; + return cloneQuery; +} + +String TermRangeQuery::toString(const String& field) { + StringStream buffer; + if (getField() != field) { + buffer << getField() << L":"; } - - TermRangeQuery::~TermRangeQuery() - { + buffer << (includeLower ? L"[" : L"{"); + if (VariantUtils::isNull(lowerTerm)) { + buffer << L"*"; + } else { + buffer << lowerTerm; } - - String TermRangeQuery::getField() - { - return field; + buffer << L" TO "; + if (VariantUtils::isNull(upperTerm)) { + buffer << L"*"; + } else { + buffer << upperTerm; } - - String TermRangeQuery::getLowerTerm() - { - return VariantUtils::get(lowerTerm); + buffer << (includeUpper ? L"]" : L"}"); + buffer << boostString(); + return buffer.str(); +} + +bool TermRangeQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - String TermRangeQuery::getUpperTerm() - { - return VariantUtils::get(upperTerm); + if (!MultiTermQuery::equals(other)) { + return false; } - - bool TermRangeQuery::includesLower() - { - return includeLower; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - bool TermRangeQuery::includesUpper() - { - return includeUpper; + TermRangeQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - CollatorPtr TermRangeQuery::getCollator() - { - return collator; + if (!collator) { + if (otherQuery->collator) { + return false; + } + } else if (!collator->equals(otherQuery->collator)) { + return false; } - - FilteredTermEnumPtr TermRangeQuery::getEnum(IndexReaderPtr reader) - { - return newLucene(reader, field, lowerTerm, upperTerm, includeLower, includeUpper, collator); + if (field != otherQuery->field) { + return false; } - - LuceneObjectPtr TermRangeQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(field, lowerTerm, upperTerm, includeLower, includeUpper, collator)); - TermRangeQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->lowerTerm = lowerTerm; - cloneQuery->upperTerm = upperTerm; - cloneQuery->collator = collator; - cloneQuery->field = field; - cloneQuery->includeLower = includeLower; - cloneQuery->includeUpper = includeUpper; - return cloneQuery; + if (includeLower != otherQuery->includeLower) { + return false; } - - String TermRangeQuery::toString(const String& field) - { - StringStream buffer; - if (getField() != field) - buffer << getField() << L":"; - buffer << (includeLower ? L"[" : L"{"); - if (VariantUtils::isNull(lowerTerm)) - buffer << L"*"; - else - buffer << lowerTerm; - buffer << L" TO "; - if (VariantUtils::isNull(upperTerm)) - buffer << L"*"; - else - buffer << upperTerm; - buffer << (includeUpper ? L"]" : L"}"); - buffer << boostString(); - return buffer.str(); + if (includeUpper != otherQuery->includeUpper) { + return false; } - - bool TermRangeQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!MultiTermQuery::equals(other)) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - TermRangeQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - if (!collator) - { - if (otherQuery->collator) - return false; - } - else if (!collator->equals(otherQuery->collator)) - return false; - if (field != otherQuery->field) + if (VariantUtils::isNull(lowerTerm)) { + if (!VariantUtils::isNull(otherQuery->lowerTerm)) { return false; - if (includeLower != otherQuery->includeLower) - return false; - if (includeUpper != otherQuery->includeUpper) - return false; - if (VariantUtils::isNull(lowerTerm)) - { - if (!VariantUtils::isNull(otherQuery->lowerTerm)) - return false; } - else if (!VariantUtils::equals(lowerTerm, otherQuery->lowerTerm)) + } else if (!VariantUtils::equals(lowerTerm, otherQuery->lowerTerm)) { + return false; + } + if (VariantUtils::isNull(upperTerm)) { + if (!VariantUtils::isNull(otherQuery->upperTerm)) { return false; - if (VariantUtils::isNull(upperTerm)) - { - if (!VariantUtils::isNull(otherQuery->upperTerm)) - return false; } - else if (!VariantUtils::equals(upperTerm, otherQuery->upperTerm)) - return false; - return true; - } - - int32_t TermRangeQuery::hashCode() - { - int32_t prime = 31; - int32_t result = MultiTermQuery::hashCode(); - result = prime * result + (collator ? collator->hashCode() : 0); - result = prime * result + (field.empty() ? 0 : StringUtils::hashCode(field)); - result = prime * result + (includeLower ? 1231 : 1237); - result = prime * result + (includeUpper ? 1231 : 1237); - result = prime * result + (VariantUtils::isNull(lowerTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(lowerTerm))); - result = prime * result + (VariantUtils::isNull(upperTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(upperTerm))); - return result; + } else if (!VariantUtils::equals(upperTerm, otherQuery->upperTerm)) { + return false; } + return true; +} + +int32_t TermRangeQuery::hashCode() { + int32_t prime = 31; + int32_t result = MultiTermQuery::hashCode(); + result = prime * result + (collator ? collator->hashCode() : 0); + result = prime * result + (field.empty() ? 0 : StringUtils::hashCode(field)); + result = prime * result + (includeLower ? 1231 : 1237); + result = prime * result + (includeUpper ? 1231 : 1237); + result = prime * result + (VariantUtils::isNull(lowerTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(lowerTerm))); + result = prime * result + (VariantUtils::isNull(upperTerm) ? 0 : StringUtils::hashCode(VariantUtils::get(upperTerm))); + return result; +} + } diff --git a/src/core/search/TermRangeTermEnum.cpp b/src/core/search/TermRangeTermEnum.cpp index 58d40bbc..077f300a 100644 --- a/src/core/search/TermRangeTermEnum.cpp +++ b/src/core/search/TermRangeTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,93 +12,83 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - TermRangeTermEnum::TermRangeTermEnum(IndexReaderPtr reader, const String& field, StringValue lowerTermText, - StringValue upperTermText, bool includeLower, bool includeUpper, CollatorPtr collator) - { - this->collator = collator; - this->_endEnum = false; - this->upperTermText = upperTermText; - this->lowerTermText = lowerTermText; - this->includeLower = includeLower; - this->includeUpper = includeUpper; - this->field = field; - - // do a little bit of normalization: open ended range queries should always be inclusive. - if (VariantUtils::isNull(this->lowerTermText)) - this->includeLower = true; - - if (VariantUtils::isNull(this->upperTermText)) - this->includeUpper = true; - - String startTermText(collator ? L"" : VariantUtils::get(this->lowerTermText)); - setEnum(reader->terms(newLucene(this->field, startTermText))); - } - - TermRangeTermEnum::~TermRangeTermEnum() - { - } - - double TermRangeTermEnum::difference() - { - return 1.0; +namespace Lucene { + +TermRangeTermEnum::TermRangeTermEnum(const IndexReaderPtr& reader, const String& field, StringValue lowerTermText, + StringValue upperTermText, bool includeLower, bool includeUpper, const CollatorPtr& collator) { + this->collator = collator; + this->_endEnum = false; + this->upperTermText = upperTermText; + this->lowerTermText = lowerTermText; + this->includeLower = includeLower; + this->includeUpper = includeUpper; + this->field = field; + + // do a little bit of normalization: open ended range queries should always be inclusive. + if (VariantUtils::isNull(this->lowerTermText)) { + this->includeLower = true; } - - bool TermRangeTermEnum::endEnum() - { - return _endEnum; + + if (VariantUtils::isNull(this->upperTermText)) { + this->includeUpper = true; } - - bool TermRangeTermEnum::termCompare(TermPtr term) - { - if (!collator) - { - // Use Unicode code point ordering - bool checkLower = false; - if (!includeLower) // make adjustments to set to exclusive - checkLower = true; - if (term && term->field() == field) - { - if (!checkLower || VariantUtils::isNull(lowerTermText) || term->text().compare(VariantUtils::get(lowerTermText)) > 0) - { - checkLower = false; - if (!VariantUtils::isNull(upperTermText)) - { - int32_t compare = VariantUtils::get(upperTermText).compare(term->text()); - // if beyond the upper term, or is exclusive and this is equal to the upper term, break out - if (compare < 0 || (!includeUpper && compare == 0)) - { - _endEnum = true; - return false; - } + + String startTermText(collator ? L"" : VariantUtils::get(this->lowerTermText)); + setEnum(reader->terms(newLucene(this->field, startTermText))); +} + +TermRangeTermEnum::~TermRangeTermEnum() { +} + +double TermRangeTermEnum::difference() { + return 1.0; +} + +bool TermRangeTermEnum::endEnum() { + return _endEnum; +} + +bool TermRangeTermEnum::termCompare(const TermPtr& term) { + if (!collator) { + // Use Unicode code point ordering + bool checkLower = false; + if (!includeLower) { // make adjustments to set to exclusive + checkLower = true; + } + if (term && term->field() == field) { + if (!checkLower || VariantUtils::isNull(lowerTermText) || term->text().compare(VariantUtils::get(lowerTermText)) > 0) { + checkLower = false; + if (!VariantUtils::isNull(upperTermText)) { + int32_t compare = VariantUtils::get(upperTermText).compare(term->text()); + // if beyond the upper term, or is exclusive and this is equal to the upper term, break out + if (compare < 0 || (!includeUpper && compare == 0)) { + _endEnum = true; + return false; } - return true; } + return true; } - else - { - // break - _endEnum = true; - return false; - } + } else { + // break + _endEnum = true; return false; } - else - { - if (term && term->field() == field) - { - if ((VariantUtils::isNull(lowerTermText) || - (includeLower ? collator->compare(term->text(), VariantUtils::get(lowerTermText)) >= 0 : - collator->compare(term->text(), VariantUtils::get(lowerTermText)) > 0)) && + return false; + } else { + if (term && term->field() == field) { + if ((VariantUtils::isNull(lowerTermText) || + (includeLower ? collator->compare(term->text(), VariantUtils::get(lowerTermText)) >= 0 : + collator->compare(term->text(), VariantUtils::get(lowerTermText)) > 0)) && (VariantUtils::isNull(upperTermText) || - (includeUpper ? collator->compare(term->text(), VariantUtils::get(upperTermText)) <= 0 : - collator->compare(term->text(), VariantUtils::get(upperTermText)) < 0))) - return true; - return false; + (includeUpper ? collator->compare(term->text(), VariantUtils::get(upperTermText)) <= 0 : + collator->compare(term->text(), VariantUtils::get(upperTermText)) < 0))) { + return true; } - _endEnum = true; return false; } + _endEnum = true; + return false; } } + +} diff --git a/src/core/search/TermScorer.cpp b/src/core/search/TermScorer.cpp index e33cf8f2..d2623e3c 100644 --- a/src/core/search/TermScorer.cpp +++ b/src/core/search/TermScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,127 +11,119 @@ #include "Weight.h" #include "Collector.h" -namespace Lucene -{ - const int32_t TermScorer::SCORE_CACHE_SIZE = 32; - - TermScorer::TermScorer(WeightPtr weight, TermDocsPtr td, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) - { - this->weight = weight; - this->termDocs = td; - this->norms = norms; - this->weightValue = weight->getValue(); - this->doc = -1; - this->docs = Collection::newInstance(32); - this->freqs = Collection::newInstance(32); - this->pointer = 0; - this->pointerMax = 0; - this->scoreCache = Collection::newInstance(SCORE_CACHE_SIZE); - - for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) - scoreCache[i] = getSimilarity()->tf(i) * weightValue; - } - - TermScorer::~TermScorer() - { - } - - const Collection TermScorer::SIM_NORM_DECODER() - { - return Similarity::getNormDecoder(); - } - - void TermScorer::score(CollectorPtr collector) - { - score(collector, INT_MAX, nextDoc()); - } - - bool TermScorer::score(CollectorPtr collector, int32_t max, int32_t firstDocID) - { - // firstDocID is ignored since nextDoc() sets 'doc' - collector->setScorer(shared_from_this()); - while (doc < max) // for docs in window - { - collector->collect(doc); - - if (++pointer >= pointerMax) - { - pointerMax = termDocs->read(docs, freqs); // refill buffers - if (pointerMax != 0) - pointer = 0; - else - { - termDocs->close(); // close stream - doc = INT_MAX; // set to sentinel value - return false; - } - } - doc = docs[pointer]; - } - return true; - } - - int32_t TermScorer::docID() - { - return doc; +namespace Lucene { + +const int32_t TermScorer::SCORE_CACHE_SIZE = 32; + +TermScorer::TermScorer(const WeightPtr& weight, const TermDocsPtr& td, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { + this->weight = weight; + this->termDocs = td; + this->__termDocs = this->termDocs.get(); + this->norms = norms; + this->weightValue = weight->getValue(); + this->doc = -1; + this->docs = Collection::newInstance(123); + this->__docs = this->docs.get(); + this->freqs = Collection::newInstance(128); + this->__freqs = this->freqs.get(); + this->pointer = 0; + this->pointerMax = 0; + this->scoreCache = Collection::newInstance(SCORE_CACHE_SIZE); + + for (int32_t i = 0; i < SCORE_CACHE_SIZE; ++i) { + scoreCache[i] = similarity->tf(i) * weightValue; } - - int32_t TermScorer::nextDoc() - { - ++pointer; - if (pointer >= pointerMax) - { - pointerMax = termDocs->read(docs, freqs); // refill buffer - if (pointerMax != 0) +} + +TermScorer::~TermScorer() { +} + +inline const Collection& TermScorer::SIM_NORM_DECODER() { + return Similarity::NORM_TABLE; +} + +void TermScorer::score(const CollectorPtr& collector) { + score(collector, INT_MAX, nextDoc()); +} + +bool TermScorer::score(const CollectorPtr& collector, int32_t max, int32_t firstDocID) { + // firstDocID is ignored since nextDoc() sets 'doc' + auto* __collector = collector.get(); + __collector->setScorer(shared_from_this()); + while (doc < max) { // for docs in window + __collector->collect(doc); + + if (++pointer >= pointerMax) { + pointerMax = __termDocs->read(docs, freqs); // refill buffers + if (pointerMax != 0) { pointer = 0; - else - { - termDocs->close(); // close stream - doc = NO_MORE_DOCS; - return doc; + } else { + __termDocs->close(); // close stream + doc = INT_MAX; // set to sentinel value + return false; } } - doc = docs[pointer]; - return doc; - } - - double TermScorer::score() - { - BOOST_ASSERT(doc != -1); - int32_t f = freqs[pointer]; - double raw = f < SCORE_CACHE_SIZE ? scoreCache[f] : getSimilarity()->tf(f) * weightValue; // compute tf(f) * weight - return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); } - - int32_t TermScorer::advance(int32_t target) - { - // first scan in cache - for (++pointer; pointer < pointerMax; ++pointer) - { - if (docs[pointer] >= target) - { - doc = docs[pointer]; - return doc; - } - } - - // not found in cache, seek underlying stream - bool result = termDocs->skipTo(target); - if (result) - { - pointerMax = 1; + return true; +} + +int32_t TermScorer::docID() { + return doc; +} + +int32_t TermScorer::nextDoc() { + ++pointer; + if (pointer >= pointerMax) { + pointerMax = __termDocs->read(docs, freqs); // refill buffer + if (pointerMax != 0) { pointer = 0; - doc = termDocs->doc(); - docs[pointer] = doc; - freqs[pointer] = termDocs->freq(); - } - else + } else { + __termDocs->close(); // close stream doc = NO_MORE_DOCS; - return doc; + return doc; + } + } + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); + + return doc; +} + +double TermScorer::score() { + BOOST_ASSERT(doc != -1); + double raw = freq < SCORE_CACHE_SIZE ? scoreCache[freq] : similarity->tf(freq) * weightValue; // compute tf(f) * weight + return norms ? raw * SIM_NORM_DECODER()[norms[doc] & 0xff] : raw; // normalize for field +} + +int32_t TermScorer::advance(int32_t target) { + // first scan in cache + for (++pointer; pointer < pointerMax; ++pointer) { + if (__docs->operator[](pointer) >= target) { + doc = __docs->operator[](pointer); + freq = __freqs->operator[](pointer); + return doc; + } } - - String TermScorer::toString() - { - return L"scorer(" + weight->toString() + L")"; + + // not found in cache, seek underlying stream + bool result = __termDocs->skipTo(target); + if (result) { + pointerMax = 1; + pointer = 0; + doc = __termDocs->doc(); + __docs->operator[](pointer) = doc; + freq = __termDocs->freq(); + __freqs->operator[](pointer) = freq; + } else { + doc = NO_MORE_DOCS; } + return doc; +} + +String TermScorer::toString() { + return L"term scorer(" + weight->toString() + L")"; +} + } diff --git a/src/core/search/TimeLimitingCollector.cpp b/src/core/search/TimeLimitingCollector.cpp index 7a23a1ba..39174f49 100644 --- a/src/core/search/TimeLimitingCollector.cpp +++ b/src/core/search/TimeLimitingCollector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,137 +9,118 @@ #include "_TimeLimitingCollector.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default timer resolution. - const int32_t TimeLimitingCollector::DEFAULT_RESOLUTION = 20; - - int64_t TimeLimitingCollector::resolution = TimeLimitingCollector::DEFAULT_RESOLUTION; - - TimeLimitingCollector::TimeLimitingCollector(CollectorPtr collector, int64_t timeAllowed) - { - this->DEFAULT_GREEDY = false; - this->greedy = DEFAULT_GREEDY; - this->collector = collector; - this->t0 = TIMER_THREAD()->getMilliseconds(); - this->timeout = t0 + timeAllowed; - this->docBase = 0; - } - - TimeLimitingCollector::~TimeLimitingCollector() - { - } - - TimerThreadPtr TimeLimitingCollector::TIMER_THREAD() - { - static TimerThreadPtr _TIMER_THREAD; - if (!_TIMER_THREAD) - { - _TIMER_THREAD = newLucene(); - CycleCheck::addStatic(_TIMER_THREAD); - } - if (!_TIMER_THREAD->isAlive()) - _TIMER_THREAD->start(); // start single thread instance - return _TIMER_THREAD; - } - - int64_t TimeLimitingCollector::getResolution() - { - return resolution; - } - - void TimeLimitingCollector::setResolution(int64_t newResolution) - { - resolution = std::max(newResolution, (int64_t)5); // 5 milliseconds is about the minimum reasonable time for a wait call. - } - - void TimeLimitingCollector::stopTimer() - { - if (TIMER_THREAD()->isAlive()) - { - TIMER_THREAD()->stopThread(); - TIMER_THREAD()->join(); - } - } - - bool TimeLimitingCollector::isGreedy() - { - return greedy; - } - - void TimeLimitingCollector::setGreedy(bool greedy) - { - this->greedy = greedy; +namespace Lucene { + +/// Default timer resolution. +const int32_t TimeLimitingCollector::DEFAULT_RESOLUTION = 20; + +int64_t TimeLimitingCollector::resolution = TimeLimitingCollector::DEFAULT_RESOLUTION; + +TimeLimitingCollector::TimeLimitingCollector(const CollectorPtr& collector, int64_t timeAllowed) { + this->DEFAULT_GREEDY = false; + this->greedy = DEFAULT_GREEDY; + this->collector = collector; + this->t0 = TIMER_THREAD()->getMilliseconds(); + this->timeout = t0 + timeAllowed; + this->docBase = 0; +} + +TimeLimitingCollector::~TimeLimitingCollector() { +} + +TimerThreadPtr TimeLimitingCollector::TIMER_THREAD() { + static TimerThreadPtr _TIMER_THREAD; + LUCENE_RUN_ONCE( + _TIMER_THREAD = newLucene(); + CycleCheck::addStatic(_TIMER_THREAD); + ); + if (!_TIMER_THREAD->isAlive()) { + _TIMER_THREAD->start(); // start single thread instance + } + return _TIMER_THREAD; +} + +int64_t TimeLimitingCollector::getResolution() { + return resolution; +} + +void TimeLimitingCollector::setResolution(int64_t newResolution) { + resolution = std::max(newResolution, (int64_t)5); // 5 milliseconds is about the minimum reasonable time for a wait call. +} + +void TimeLimitingCollector::stopTimer() { + if (TIMER_THREAD()->isAlive()) { + TIMER_THREAD()->stopThread(); + TIMER_THREAD()->join(); } - - void TimeLimitingCollector::collect(int32_t doc) - { - int64_t time = TIMER_THREAD()->getMilliseconds(); - if (timeout < time) - { - if (greedy) - collector->collect(doc); - boost::throw_exception(TimeExceededException(L"Elapsed time:" + StringUtils::toString(timeout - t0) + L" ms. " + - L"Exceeded allowed search time:" + StringUtils::toString(time - t0) + L" ms. " + - L"Last doc:" + StringUtils::toString(docBase + doc))); +} + +bool TimeLimitingCollector::isGreedy() { + return greedy; +} + +void TimeLimitingCollector::setGreedy(bool greedy) { + this->greedy = greedy; +} + +void TimeLimitingCollector::collect(int32_t doc) { + int64_t time = TIMER_THREAD()->getMilliseconds(); + if (timeout < time) { + if (greedy) { + collector->collect(doc); } - collector->collect(doc); - } - - void TimeLimitingCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - collector->setNextReader(reader, docBase); - this->docBase = docBase; - } - - void TimeLimitingCollector::setScorer(ScorerPtr scorer) - { - collector->setScorer(scorer); - } - - bool TimeLimitingCollector::acceptsDocsOutOfOrder() - { - return collector->acceptsDocsOutOfOrder(); + boost::throw_exception(TimeExceededException(L"Elapsed time:" + StringUtils::toString(timeout - t0) + L" ms. " + + L"Exceeded allowed search time:" + StringUtils::toString(time - t0) + L" ms. " + + L"Last doc:" + StringUtils::toString(docBase + doc))); } - - TimerThread::TimerThread() - { - time = 0; - _stopThread = false; - } - - TimerThread::~TimerThread() - { - } - - void TimerThread::start() - { - _stopThread = false; - LuceneThread::start(); - } - - void TimerThread::run() - { - while (!_stopThread) + collector->collect(doc); +} + +void TimeLimitingCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + collector->setNextReader(reader, docBase); + this->docBase = docBase; +} + +void TimeLimitingCollector::setScorer(const ScorerPtr& scorer) { + collector->setScorer(scorer); +} + +bool TimeLimitingCollector::acceptsDocsOutOfOrder() { + return collector->acceptsDocsOutOfOrder(); +} + +TimerThread::TimerThread() { + time = 0; + _stopThread = false; +} + +TimerThread::~TimerThread() { +} + +void TimerThread::start() { + _stopThread = false; + LuceneThread::start(); +} + +void TimerThread::run() { + while (!_stopThread) { + int64_t resolution; { - int64_t resolution; - { - SyncLock syncLock(this); - resolution = TimeLimitingCollector::resolution; - time += resolution; - } - LuceneThread::threadSleep(resolution); + SyncLock syncLock(this); + resolution = TimeLimitingCollector::resolution; + time += resolution; } + LuceneThread::threadSleep(resolution); } - - int64_t TimerThread::getMilliseconds() - { - SyncLock syncLock(this); - return time; - } - - void TimerThread::stopThread() - { - _stopThread = true; - } +} + +int64_t TimerThread::getMilliseconds() { + SyncLock syncLock(this); + return time; +} + +void TimerThread::stopThread() { + _stopThread = true; +} + } diff --git a/src/core/search/TopDocs.cpp b/src/core/search/TopDocs.cpp index e79d4ae1..75a3e4be 100644 --- a/src/core/search/TopDocs.cpp +++ b/src/core/search/TopDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,33 +7,29 @@ #include "LuceneInc.h" #include "TopDocs.h" -namespace Lucene -{ - TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs) - { - this->totalHits = totalHits; - this->scoreDocs = scoreDocs; - this->maxScore = std::numeric_limits::quiet_NaN(); - } - - TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore) - { - this->totalHits = totalHits; - this->scoreDocs = scoreDocs; - this->maxScore = maxScore; - } - - TopDocs::~TopDocs() - { - } - - double TopDocs::getMaxScore() - { - return maxScore; - } - - void TopDocs::setMaxScore(double maxScore) - { - this->maxScore = maxScore; - } +namespace Lucene { + +TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs) { + this->totalHits = totalHits; + this->scoreDocs = scoreDocs; + this->maxScore = std::numeric_limits::quiet_NaN(); +} + +TopDocs::TopDocs(int32_t totalHits, Collection scoreDocs, double maxScore) { + this->totalHits = totalHits; + this->scoreDocs = scoreDocs; + this->maxScore = maxScore; +} + +TopDocs::~TopDocs() { +} + +double TopDocs::getMaxScore() { + return maxScore; +} + +void TopDocs::setMaxScore(double maxScore) { + this->maxScore = maxScore; +} + } diff --git a/src/core/search/TopDocsCollector.cpp b/src/core/search/TopDocsCollector.cpp index 9356d02e..c16c3cd4 100644 --- a/src/core/search/TopDocsCollector.cpp +++ b/src/core/search/TopDocsCollector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,84 +9,78 @@ #include "TopDocs.h" #include "HitQueueBase.h" -namespace Lucene -{ - TopDocsCollector::TopDocsCollector(HitQueueBasePtr pq) - { - this->pq = pq; - this->totalHits = 0; - } - - TopDocsCollector::~TopDocsCollector() - { - } - - TopDocsPtr TopDocsCollector::EMPTY_TOPDOCS() - { - static TopDocsPtr _EMPTY_TOPDOCS; - if (!_EMPTY_TOPDOCS) - { - _EMPTY_TOPDOCS = newLucene(0, Collection::newInstance(), std::numeric_limits::quiet_NaN()); - CycleCheck::addStatic(_EMPTY_TOPDOCS); - } - return _EMPTY_TOPDOCS; - } - - void TopDocsCollector::populateResults(Collection results, int32_t howMany) - { - for (int32_t i = howMany - 1; i >= 0; --i) - results[i] = pq->pop(); - } - - TopDocsPtr TopDocsCollector::newTopDocs(Collection results, int32_t start) - { - return results ? newLucene(totalHits, results) : EMPTY_TOPDOCS(); - } - - int32_t TopDocsCollector::getTotalHits() - { - return totalHits; - } - - TopDocsPtr TopDocsCollector::topDocs() - { - // In case pq was populated with sentinel values, there might be less results than pq.size(). - // Therefore return all results until either pq.size() or totalHits. - return topDocs(0, totalHits < pq->size() ? totalHits : pq->size()); +namespace Lucene { + +TopDocsCollector::TopDocsCollector(const HitQueueBasePtr& pq) { + this->pq = pq; + this->totalHits = 0; +} + +TopDocsCollector::~TopDocsCollector() { +} + +TopDocsPtr TopDocsCollector::EMPTY_TOPDOCS() { + static TopDocsPtr _EMPTY_TOPDOCS; + LUCENE_RUN_ONCE( + _EMPTY_TOPDOCS = newLucene(0, Collection::newInstance(), std::numeric_limits::quiet_NaN()); + CycleCheck::addStatic(_EMPTY_TOPDOCS); + ); + return _EMPTY_TOPDOCS; +} + +void TopDocsCollector::populateResults(Collection results, int32_t howMany) { + for (int32_t i = howMany - 1; i >= 0; --i) { + results[i] = pq->pop(); } - - TopDocsPtr TopDocsCollector::topDocs(int32_t start) - { - // In case pq was populated with sentinel values, there might be less results than pq.size(). - // Therefore return all results until either pq.size() or totalHits. - return topDocs(start, totalHits < pq->size() ? totalHits : pq->size()); +} + +TopDocsPtr TopDocsCollector::newTopDocs(Collection results, int32_t start) { + return results ? newLucene(totalHits, results) : EMPTY_TOPDOCS(); +} + +int32_t TopDocsCollector::getTotalHits() { + return totalHits; +} + +TopDocsPtr TopDocsCollector::topDocs() { + // In case pq was populated with sentinel values, there might be less results than pq.size(). + // Therefore return all results until either pq.size() or totalHits. + return topDocs(0, totalHits < pq->size() ? totalHits : pq->size()); +} + +TopDocsPtr TopDocsCollector::topDocs(int32_t start) { + // In case pq was populated with sentinel values, there might be less results than pq.size(). + // Therefore return all results until either pq.size() or totalHits. + return topDocs(start, totalHits < pq->size() ? totalHits : pq->size()); +} + +TopDocsPtr TopDocsCollector::topDocs(int32_t start, int32_t howMany) { + // In case pq was populated with sentinel values, there might be less results than pq.size(). + // Therefore return all results until either pq.size() or totalHits. + int32_t size = totalHits < pq->size() ? totalHits : pq->size(); + + // Don't bother to throw an exception, just return an empty TopDocs in case the parameters are + // invalid or out of range. + if (start < 0 || start >= size || howMany <= 0) { + return newTopDocs(Collection(), start); } - - TopDocsPtr TopDocsCollector::topDocs(int32_t start, int32_t howMany) - { - // In case pq was populated with sentinel values, there might be less results than pq.size(). - // Therefore return all results until either pq.size() or totalHits. - int32_t size = totalHits < pq->size() ? totalHits : pq->size(); - - // Don't bother to throw an exception, just return an empty TopDocs in case the parameters are - // invalid or out of range. - if (start < 0 || start >= size || howMany <= 0) - return newTopDocs(Collection(), start); - - // We know that start < pq.size, so just fix howMany. - howMany = std::min(size - start, howMany); - Collection results = Collection::newInstance(howMany); - - // pq's pop() returns the 'least' element in the queue, therefore need to discard the first ones, - // until we reach the requested range. Note that this loop will usually not be executed, since the - // common usage should be that the caller asks for the last howMany results. However it's needed - // here for completeness. - for (int32_t i = pq->size() - start - howMany; i > 0; --i) - pq->pop(); - - // Get the requested results from pq. - populateResults(results, howMany); - - return newTopDocs(results, start); + + // We know that start < pq.size, so just fix howMany. + howMany = std::min(size - start, howMany); + Collection results = Collection::newInstance(howMany); + + // pq's pop() returns the 'least' element in the queue, therefore need to discard the first ones, + // until we reach the requested range. Note that this loop will usually not be executed, since the + // common usage should be that the caller asks for the last howMany results. However it's needed + // here for completeness. + for (int32_t i = pq->size() - start - howMany; i > 0; --i) { + pq->pop(); } + + // Get the requested results from pq. + populateResults(results, howMany); + + return newTopDocs(results, start); +} + } diff --git a/src/core/search/TopFieldCollector.cpp b/src/core/search/TopFieldCollector.cpp index 6987d9c9..188232f2 100644 --- a/src/core/search/TopFieldCollector.cpp +++ b/src/core/search/TopFieldCollector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,889 +14,773 @@ #include "Sort.h" #include "TopFieldDocs.h" -namespace Lucene -{ - TopFieldCollector::TopFieldCollector(HitQueueBasePtr pq, int32_t numHits, bool fillFields) : TopDocsCollector(pq) - { - this->numHits = numHits; - this->fillFields = fillFields; - this->maxScore = std::numeric_limits::quiet_NaN(); - this->queueFull = false; - this->docBase = 0; - } - - TopFieldCollector::~TopFieldCollector() - { - } - - const Collection TopFieldCollector::EMPTY_SCOREDOCS() - { - static Collection _EMPTY_SCOREDOCS; - if (!_EMPTY_SCOREDOCS) - _EMPTY_SCOREDOCS = Collection::newInstance(); - return _EMPTY_SCOREDOCS; +namespace Lucene { + +TopFieldCollector::TopFieldCollector(const HitQueueBasePtr& pq, int32_t numHits, bool fillFields) : TopDocsCollector(pq) { + this->numHits = numHits; + this->fillFields = fillFields; + this->maxScore = std::numeric_limits::quiet_NaN(); + this->queueFull = false; + this->docBase = 0; +} + +TopFieldCollector::~TopFieldCollector() { +} + +const Collection TopFieldCollector::EMPTY_SCOREDOCS() { + static Collection _EMPTY_SCOREDOCS; + LUCENE_RUN_ONCE( + _EMPTY_SCOREDOCS = Collection::newInstance(); + ); + return _EMPTY_SCOREDOCS; +} + +TopFieldCollectorPtr TopFieldCollector::create(const SortPtr& sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder) { + if (sort->fields.empty()) { + boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); } - - TopFieldCollectorPtr TopFieldCollector::create(SortPtr sort, int32_t numHits, bool fillFields, bool trackDocScores, bool trackMaxScore, bool docsScoredInOrder) - { - if (sort->fields.empty()) - boost::throw_exception(IllegalArgumentException(L"Sort must contain at least one field")); - - FieldValueHitQueuePtr queue(FieldValueHitQueue::create(sort->fields, numHits)); - if (queue->getComparators().size() == 1) - { - if (docsScoredInOrder) - { - if (trackMaxScore) - return newLucene(queue, numHits, fillFields); - else if (trackDocScores) - return newLucene(queue, numHits, fillFields); - else - return newLucene(queue, numHits, fillFields); + + FieldValueHitQueuePtr queue(FieldValueHitQueue::create(sort->fields, numHits)); + if (queue->getComparators().size() == 1) { + if (docsScoredInOrder) { + if (trackMaxScore) { + return newLucene(queue, numHits, fillFields); + } else if (trackDocScores) { + return newLucene(queue, numHits, fillFields); + } else { + return newLucene(queue, numHits, fillFields); } - else - { - if (trackMaxScore) - return newLucene(queue, numHits, fillFields); - else if (trackDocScores) - return newLucene(queue, numHits, fillFields); - else - return newLucene(queue, numHits, fillFields); + } else { + if (trackMaxScore) { + return newLucene(queue, numHits, fillFields); + } else if (trackDocScores) { + return newLucene(queue, numHits, fillFields); + } else { + return newLucene(queue, numHits, fillFields); } } - - // multiple comparators - if (docsScoredInOrder) - { - if (trackMaxScore) - return newLucene(queue, numHits, fillFields); - else if (trackDocScores) - return newLucene(queue, numHits, fillFields); - else - return newLucene(queue, numHits, fillFields); - } - else - { - if (trackMaxScore) - return newLucene(queue, numHits, fillFields); - else if (trackDocScores) - return newLucene(queue, numHits, fillFields); - else - return newLucene(queue, numHits, fillFields); - } - } - - void TopFieldCollector::add(int32_t slot, int32_t doc, double score) - { - bottom = boost::static_pointer_cast(pq->add(newLucene(slot, docBase + doc, score))); - queueFull = (totalHits == numHits); } - - void TopFieldCollector::populateResults(Collection results, int32_t howMany) - { - if (fillFields) - { - FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); - for (int32_t i = howMany - 1; i >= 0; --i) - results[i] = queue->fillFields(boost::static_pointer_cast(queue->pop())); - } - else - { - for (int32_t i = howMany - 1; i >= 0; --i) - { - FieldValueHitQueueEntryPtr entry(boost::static_pointer_cast(pq->pop())); - results[i] = newLucene(entry->doc, entry->score); - } + + // multiple comparators + if (docsScoredInOrder) { + if (trackMaxScore) { + return newLucene(queue, numHits, fillFields); + } else if (trackDocScores) { + return newLucene(queue, numHits, fillFields); + } else { + return newLucene(queue, numHits, fillFields); + } + } else { + if (trackMaxScore) { + return newLucene(queue, numHits, fillFields); + } else if (trackDocScores) { + return newLucene(queue, numHits, fillFields); + } else { + return newLucene(queue, numHits, fillFields); } } - - TopDocsPtr TopFieldCollector::newTopDocs(Collection results, int32_t start) - { - if (!results) - { - results = EMPTY_SCOREDOCS(); - // Set maxScore to NaN, in case this is a maxScore tracking collector - maxScore = std::numeric_limits::quiet_NaN(); - } - - // If this is a maxScoring tracking collector and there were no results - return newLucene(totalHits, results, boost::static_pointer_cast(pq)->getFields(), maxScore); - } - - bool TopFieldCollector::acceptsDocsOutOfOrder() - { - return false; - } - - OneComparatorNonScoringCollector::OneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) - { - } - - OneComparatorNonScoringCollector::~OneComparatorNonScoringCollector() - { - } - - void OneComparatorNonScoringCollector::initialize() - { - TopFieldCollector::initialize(); +} + +void TopFieldCollector::add(int32_t slot, int32_t doc, double score) { + bottom = boost::static_pointer_cast(pq->add(newLucene(slot, docBase + doc, score))); + queueFull = (totalHits == numHits); +} + +void TopFieldCollector::populateResults(Collection results, int32_t howMany) { + if (fillFields) { FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); - comparator = queue->getComparators()[0]; - reverseMul = queue->getReverseMul()[0]; - } - - void OneComparatorNonScoringCollector::updateBottom(int32_t doc) - { - // bottom.score is already set to NaN in add(). - bottom->doc = docBase + doc; - bottom = boost::static_pointer_cast(pq->updateTop()); - } - - void OneComparatorNonScoringCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - if ((reverseMul * comparator->compareBottom(doc)) <= 0) - { - // since docs are visited in doc Id order, if compare is 0, it means this document is largest - // than anything else in the queue, and therefore not competitive. - return; - } - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc); - comparator->setBottom(bottom->slot); + for (int32_t i = howMany - 1; i >= 0; --i) { + results[i] = queue->fillFields(boost::static_pointer_cast(queue->pop())); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, std::numeric_limits::quiet_NaN()); - if (queueFull) - comparator->setBottom(bottom->slot); + } else { + for (int32_t i = howMany - 1; i >= 0; --i) { + FieldValueHitQueueEntryPtr entry(boost::static_pointer_cast(pq->pop())); + results[i] = newLucene(entry->doc, entry->score); } } - - void OneComparatorNonScoringCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - this->docBase = docBase; - comparator->setNextReader(reader, docBase); - } - - void OneComparatorNonScoringCollector::setScorer(ScorerPtr scorer) - { - comparator->setScorer(scorer); - } - - OutOfOrderOneComparatorNonScoringCollector::OutOfOrderOneComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) - { - } - - OutOfOrderOneComparatorNonScoringCollector::~OutOfOrderOneComparatorNonScoringCollector() - { +} + +TopDocsPtr TopFieldCollector::newTopDocs(Collection results, int32_t start) { + if (!results) { + results = EMPTY_SCOREDOCS(); + // Set maxScore to NaN, in case this is a maxScore tracking collector + maxScore = std::numeric_limits::quiet_NaN(); } - - void OutOfOrderOneComparatorNonScoringCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - int32_t cmp = reverseMul * comparator->compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) - return; - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc); - comparator->setBottom(bottom->slot); - } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, std::numeric_limits::quiet_NaN()); - if (queueFull) - comparator->setBottom(bottom->slot); + + // If this is a maxScoring tracking collector and there were no results + return newLucene(totalHits, results, boost::static_pointer_cast(pq)->getFields(), maxScore); +} + +bool TopFieldCollector::acceptsDocsOutOfOrder() { + return false; +} + +OneComparatorNonScoringCollector::OneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { +} + +OneComparatorNonScoringCollector::~OneComparatorNonScoringCollector() { +} + +void OneComparatorNonScoringCollector::initialize() { + TopFieldCollector::initialize(); + FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); + comparator = queue->getComparators()[0]; + reverseMul = queue->getReverseMul()[0]; +} + +void OneComparatorNonScoringCollector::updateBottom(int32_t doc) { + // bottom.score is already set to NaN in add(). + bottom->doc = docBase + doc; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void OneComparatorNonScoringCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + if ((reverseMul * comparator->compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means this document is largest + // than anything else in the queue, and therefore not competitive. + return; } - } - - bool OutOfOrderOneComparatorNonScoringCollector::acceptsDocsOutOfOrder() - { - return true; - } - - OneComparatorScoringNoMaxScoreCollector::OneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) - { - } - - OneComparatorScoringNoMaxScoreCollector::~OneComparatorScoringNoMaxScoreCollector() - { - } - - void OneComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) - { - bottom->doc = docBase + doc; - bottom->score = score; - bottom = boost::static_pointer_cast(pq->updateTop()); - } - - void OneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - if ((reverseMul * comparator->compareBottom(doc)) <= 0) - { - // since docs are visited in doc Id order, if compare is 0, it means this document is largest - // than anything else in the queue, and therefore not competitive. - return; - } - - // Compute the score only if the hit is competitive. - double score = scorer->score(); - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc, score); + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc); + comparator->setBottom(bottom->slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, std::numeric_limits::quiet_NaN()); + if (queueFull) { comparator->setBottom(bottom->slot); } - else - { - // Compute the score only if the hit is competitive. - double score = scorer->score(); - - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - comparator->setBottom(bottom->slot); - } - } - - void OneComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) - { - this->scorer = scorer; - comparator->setScorer(scorer); - } - - OutOfOrderOneComparatorScoringNoMaxScoreCollector::OutOfOrderOneComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) - { } - - OutOfOrderOneComparatorScoringNoMaxScoreCollector::~OutOfOrderOneComparatorScoringNoMaxScoreCollector() - { - } - - void OutOfOrderOneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - int32_t cmp = reverseMul * comparator->compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) - return; - - // Compute the score only if the hit is competitive. - double score = scorer->score(); - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc, score); - comparator->setBottom(bottom->slot); +} + +void OneComparatorNonScoringCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + this->docBase = docBase; + comparator->setNextReader(reader, docBase); +} + +void OneComparatorNonScoringCollector::setScorer(const ScorerPtr& scorer) { + comparator->setScorer(scorer); +} + +OutOfOrderOneComparatorNonScoringCollector::OutOfOrderOneComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { +} + +OutOfOrderOneComparatorNonScoringCollector::~OutOfOrderOneComparatorNonScoringCollector() { +} + +void OutOfOrderOneComparatorNonScoringCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + int32_t cmp = reverseMul * comparator->compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { + return; } - else - { - // Compute the score only if the hit is competitive. - double score = scorer->score(); - - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - comparator->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc); + comparator->setBottom(bottom->slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, std::numeric_limits::quiet_NaN()); + if (queueFull) { + comparator->setBottom(bottom->slot); } } - - bool OutOfOrderOneComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() - { - return true; - } - - OneComparatorScoringMaxScoreCollector::OneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) - { - // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. - this->maxScore = -std::numeric_limits::infinity(); - } - - OneComparatorScoringMaxScoreCollector::~OneComparatorScoringMaxScoreCollector() - { - } - - void OneComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) - { - bottom->doc = docBase + doc; - bottom->score = score; - bottom = boost::static_pointer_cast(pq->updateTop()); - } - - void OneComparatorScoringMaxScoreCollector::collect(int32_t doc) - { +} + +bool OutOfOrderOneComparatorNonScoringCollector::acceptsDocsOutOfOrder() { + return true; +} + +OneComparatorScoringNoMaxScoreCollector::OneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { +} + +OneComparatorScoringNoMaxScoreCollector::~OneComparatorScoringNoMaxScoreCollector() { +} + +void OneComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { + bottom->doc = docBase + doc; + bottom->score = score; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void OneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + if ((reverseMul * comparator->compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means this document is largest + // than anything else in the queue, and therefore not competitive. + return; + } + + // Compute the score only if the hit is competitive. double score = scorer->score(); - if (score > maxScore) - maxScore = score; - ++totalHits; - if (queueFull) - { - if ((reverseMul * comparator->compareBottom(doc)) <= 0) - { - // since docs are visited in doc Id order, if compare is 0, it means this document is largest - // than anything else in the queue, and therefore not competitive. - return; - } - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc, score); + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc, score); + comparator->setBottom(bottom->slot); + } else { + // Compute the score only if the hit is competitive. + double score = scorer->score(); + + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, score); + if (queueFull) { comparator->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - comparator->setBottom(bottom->slot); - } - } - - void OneComparatorScoringMaxScoreCollector::setScorer(ScorerPtr scorer) - { - this->scorer = scorer; - OneComparatorNonScoringCollector::setScorer(scorer); - } - - OutOfOrderOneComparatorScoringMaxScoreCollector::OutOfOrderOneComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields) - { } - - OutOfOrderOneComparatorScoringMaxScoreCollector::~OutOfOrderOneComparatorScoringMaxScoreCollector() - { - } - - void OutOfOrderOneComparatorScoringMaxScoreCollector::collect(int32_t doc) - { +} + +void OneComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { + this->scorer = scorer; + comparator->setScorer(scorer); +} + +OutOfOrderOneComparatorScoringNoMaxScoreCollector::OutOfOrderOneComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { +} + +OutOfOrderOneComparatorScoringNoMaxScoreCollector::~OutOfOrderOneComparatorScoringNoMaxScoreCollector() { +} + +void OutOfOrderOneComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + int32_t cmp = reverseMul * comparator->compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { + return; + } + + // Compute the score only if the hit is competitive. double score = scorer->score(); - if (score > maxScore) - maxScore = score; - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - int32_t cmp = reverseMul * comparator->compareBottom(doc); - if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) - return; - - // This hit is competitive - replace bottom element in queue and adjustTop - comparator->copy(bottom->slot, doc); - updateBottom(doc, score); + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc, score); + comparator->setBottom(bottom->slot); + } else { + // Compute the score only if the hit is competitive. + double score = scorer->score(); + + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, score); + if (queueFull) { comparator->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - comparator->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - comparator->setBottom(bottom->slot); - } - } - - bool OutOfOrderOneComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() - { - return true; - } - - MultiComparatorNonScoringCollector::MultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) - { - } - - MultiComparatorNonScoringCollector::~MultiComparatorNonScoringCollector() - { } - - void MultiComparatorNonScoringCollector::initialize() - { - TopFieldCollector::initialize(); - FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); - comparators = queue->getComparators(); - reverseMul = queue->getReverseMul(); +} + +bool OutOfOrderOneComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { + return true; +} + +OneComparatorScoringMaxScoreCollector::OneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorNonScoringCollector(queue, numHits, fillFields) { + // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. + this->maxScore = -std::numeric_limits::infinity(); +} + +OneComparatorScoringMaxScoreCollector::~OneComparatorScoringMaxScoreCollector() { +} + +void OneComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { + bottom->doc = docBase + doc; + bottom->score = score; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void OneComparatorScoringMaxScoreCollector::collect(int32_t doc) { + double score = scorer->score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + if ((reverseMul * comparator->compareBottom(doc)) <= 0) { + // since docs are visited in doc Id order, if compare is 0, it means this document is largest + // than anything else in the queue, and therefore not competitive. + return; + } + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc, score); + comparator->setBottom(bottom->slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, score); + if (queueFull) { + comparator->setBottom(bottom->slot); + } } - - void MultiComparatorNonScoringCollector::updateBottom(int32_t doc) - { - // bottom.score is already set to NaN in add(). - bottom->doc = docBase + doc; - bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void OneComparatorScoringMaxScoreCollector::setScorer(const ScorerPtr& scorer) { + this->scorer = scorer; + OneComparatorNonScoringCollector::setScorer(scorer); +} + +OutOfOrderOneComparatorScoringMaxScoreCollector::OutOfOrderOneComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { +} + +OutOfOrderOneComparatorScoringMaxScoreCollector::~OutOfOrderOneComparatorScoringMaxScoreCollector() { +} + +void OutOfOrderOneComparatorScoringMaxScoreCollector::collect(int32_t doc) { + double score = scorer->score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + int32_t cmp = reverseMul * comparator->compareBottom(doc); + if (cmp < 0 || (cmp == 0 && doc + docBase > bottom->doc)) { + return; + } + + // This hit is competitive - replace bottom element in queue and adjustTop + comparator->copy(bottom->slot, doc); + updateBottom(doc, score); + comparator->setBottom(bottom->slot); + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + comparator->copy(slot, doc); + add(slot, doc, score); + if (queueFull) { + comparator->setBottom(bottom->slot); + } } - - void MultiComparatorNonScoringCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. - return; - } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are - // visited in doc Id order, which means this doc cannot compete with any other document - // in the queue. - return; - } +} + +bool OutOfOrderOneComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { + return true; +} + +MultiComparatorNonScoringCollector::MultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : TopFieldCollector(queue, numHits, fillFields) { +} + +MultiComparatorNonScoringCollector::~MultiComparatorNonScoringCollector() { +} + +void MultiComparatorNonScoringCollector::initialize() { + TopFieldCollector::initialize(); + FieldValueHitQueuePtr queue(boost::static_pointer_cast(pq)); + comparators = queue->getComparators(); + reverseMul = queue->getReverseMul(); +} + +void MultiComparatorNonScoringCollector::updateBottom(int32_t doc) { + // bottom.score is already set to NaN in add(). + bottom->doc = docBase + doc; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void MultiComparatorNonScoringCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are + // visited in doc Id order, which means this doc cannot compete with any other document + // in the queue. + return; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - updateBottom(doc); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - add(slot, doc, std::numeric_limits::quiet_NaN()); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + updateBottom(doc); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + add(slot, doc, std::numeric_limits::quiet_NaN()); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - void MultiComparatorNonScoringCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - this->docBase = docBase; - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setNextReader(reader, docBase); - } - - void MultiComparatorNonScoringCollector::setScorer(ScorerPtr scorer) - { - // set the scorer on all comparators - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setScorer(scorer); - } - - OutOfOrderMultiComparatorNonScoringCollector::OutOfOrderMultiComparatorNonScoringCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) - { +} + +void MultiComparatorNonScoringCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + this->docBase = docBase; + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setNextReader(reader, docBase); } - - OutOfOrderMultiComparatorNonScoringCollector::~OutOfOrderMultiComparatorNonScoringCollector() - { +} + +void MultiComparatorNonScoringCollector::setScorer(const ScorerPtr& scorer) { + // set the scorer on all comparators + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setScorer(scorer); } - - void OutOfOrderMultiComparatorNonScoringCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. +} + +OutOfOrderMultiComparatorNonScoringCollector::OutOfOrderMultiComparatorNonScoringCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { +} + +OutOfOrderMultiComparatorNonScoringCollector::~OutOfOrderMultiComparatorNonScoringCollector() { +} + +void OutOfOrderMultiComparatorNonScoringCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // This is the equals case. + if (doc + docBase > bottom->doc) { + // Definitely not competitive return; } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // This is the equals case. - if (doc + docBase > bottom->doc) - { - // Definitely not competitive - return; - } - break; - } + break; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - updateBottom(doc); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - add(slot, doc, std::numeric_limits::quiet_NaN()); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + updateBottom(doc); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + add(slot, doc, std::numeric_limits::quiet_NaN()); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - bool OutOfOrderMultiComparatorNonScoringCollector::acceptsDocsOutOfOrder() - { - return true; - } - - MultiComparatorScoringMaxScoreCollector::MultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) - { - // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. - this->maxScore = -std::numeric_limits::infinity(); - } - - MultiComparatorScoringMaxScoreCollector::~MultiComparatorScoringMaxScoreCollector() - { - } - - void MultiComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) - { - bottom->doc = docBase + doc; - bottom->score = score; - bottom = boost::static_pointer_cast(pq->updateTop()); - } - - void MultiComparatorScoringMaxScoreCollector::collect(int32_t doc) - { - double score = ScorerPtr(_scorer)->score(); - if (score > maxScore) - maxScore = score; - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. - return; - } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are - // visited in doc Id order, which means this doc cannot compete with any other document - // in the queue. - return; - } +} + +bool OutOfOrderMultiComparatorNonScoringCollector::acceptsDocsOutOfOrder() { + return true; +} + +MultiComparatorScoringMaxScoreCollector::MultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { + // Must set maxScore to NEG_INF, or otherwise std::max always returns NaN. + this->maxScore = -std::numeric_limits::infinity(); +} + +MultiComparatorScoringMaxScoreCollector::~MultiComparatorScoringMaxScoreCollector() { +} + +void MultiComparatorScoringMaxScoreCollector::updateBottom(int32_t doc, double score) { + bottom->doc = docBase + doc; + bottom->score = score; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void MultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { + double score = ScorerPtr(_scorer)->score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are + // visited in doc Id order, which means this doc cannot compete with any other document + // in the queue. + return; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - updateBottom(doc, score); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + updateBottom(doc, score); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + add(slot, doc, score); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - void MultiComparatorScoringMaxScoreCollector::setScorer(ScorerPtr scorer) - { - this->_scorer = scorer; - MultiComparatorNonScoringCollector::setScorer(scorer); - } - - OutOfOrderMultiComparatorScoringMaxScoreCollector::OutOfOrderMultiComparatorScoringMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields) - { - } - - OutOfOrderMultiComparatorScoringMaxScoreCollector::~OutOfOrderMultiComparatorScoringMaxScoreCollector() - { - } - - void OutOfOrderMultiComparatorScoringMaxScoreCollector::collect(int32_t doc) - { - double score = ScorerPtr(_scorer)->score(); - if (score > maxScore) - maxScore = score; - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. +} + +void MultiComparatorScoringMaxScoreCollector::setScorer(const ScorerPtr& scorer) { + this->_scorer = scorer; + MultiComparatorNonScoringCollector::setScorer(scorer); +} + +OutOfOrderMultiComparatorScoringMaxScoreCollector::OutOfOrderMultiComparatorScoringMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields) { +} + +OutOfOrderMultiComparatorScoringMaxScoreCollector::~OutOfOrderMultiComparatorScoringMaxScoreCollector() { +} + +void OutOfOrderMultiComparatorScoringMaxScoreCollector::collect(int32_t doc) { + double score = ScorerPtr(_scorer)->score(); + if (score > maxScore) { + maxScore = score; + } + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // This is the equals case. + if (doc + docBase > bottom->doc) { + // Definitely not competitive return; } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // This is the equals case. - if (doc + docBase > bottom->doc) - { - // Definitely not competitive - return; - } - break; - } + break; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - updateBottom(doc, score); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - add(slot, doc, score); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + updateBottom(doc, score); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + add(slot, doc, score); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - bool OutOfOrderMultiComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() - { - return true; - } - - MultiComparatorScoringNoMaxScoreCollector::MultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) - { - } - - MultiComparatorScoringNoMaxScoreCollector::~MultiComparatorScoringNoMaxScoreCollector() - { - } - - void MultiComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) - { - bottom->doc = docBase + doc; - bottom->score = score; - bottom = boost::static_pointer_cast(pq->updateTop()); - } - - void MultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. - return; - } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are - // visited in doc Id order, which means this doc cannot compete with any other document - // in the queue. - return; - } +} + +bool OutOfOrderMultiComparatorScoringMaxScoreCollector::acceptsDocsOutOfOrder() { + return true; +} + +MultiComparatorScoringNoMaxScoreCollector::MultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorNonScoringCollector(queue, numHits, fillFields) { +} + +MultiComparatorScoringNoMaxScoreCollector::~MultiComparatorScoringNoMaxScoreCollector() { +} + +void MultiComparatorScoringNoMaxScoreCollector::updateBottom(int32_t doc, double score) { + bottom->doc = docBase + doc; + bottom->score = score; + bottom = boost::static_pointer_cast(pq->updateTop()); +} + +void MultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // Here c=0. If we're at the last comparator, this doc is not competitive, since docs are + // visited in doc Id order, which means this doc cannot compete with any other document + // in the queue. + return; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - // Compute score only if it is competitive. - double score = ScorerPtr(_scorer)->score(); - updateBottom(doc, score); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - - // Compute score only if it is competitive. - double score = ScorerPtr(_scorer)->score(); - add(slot, doc, score); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + // Compute score only if it is competitive. + double score = ScorerPtr(_scorer)->score(); + updateBottom(doc, score); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + + // Compute score only if it is competitive. + double score = ScorerPtr(_scorer)->score(); + add(slot, doc, score); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - void MultiComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) - { - this->_scorer = scorer; - MultiComparatorNonScoringCollector::setScorer(scorer); - } - - OutOfOrderMultiComparatorScoringNoMaxScoreCollector::OutOfOrderMultiComparatorScoringNoMaxScoreCollector(FieldValueHitQueuePtr queue, int32_t numHits, bool fillFields) : MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) - { - } - - OutOfOrderMultiComparatorScoringNoMaxScoreCollector::~OutOfOrderMultiComparatorScoringNoMaxScoreCollector() - { - } - - void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) - { - ++totalHits; - if (queueFull) - { - // Fastmatch: return if this hit is not competitive - for (int32_t i = 0; ; ++i) - { - int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); - if (c < 0) - { - // Definitely not competitive. +} + +void MultiComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { + this->_scorer = scorer; + MultiComparatorNonScoringCollector::setScorer(scorer); +} + +OutOfOrderMultiComparatorScoringNoMaxScoreCollector::OutOfOrderMultiComparatorScoringNoMaxScoreCollector(const FieldValueHitQueuePtr& queue, int32_t numHits, bool fillFields) : MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields) { +} + +OutOfOrderMultiComparatorScoringNoMaxScoreCollector::~OutOfOrderMultiComparatorScoringNoMaxScoreCollector() { +} + +void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::collect(int32_t doc) { + ++totalHits; + if (queueFull) { + // Fastmatch: return if this hit is not competitive + for (int32_t i = 0; ; ++i) { + int32_t c = reverseMul[i] * comparators[i]->compareBottom(doc); + if (c < 0) { + // Definitely not competitive. + return; + } else if (c > 0) { + // Definitely competitive. + break; + } else if (i == comparators.size() - 1) { + // This is the equals case. + if (doc + docBase > bottom->doc) { + // Definitely not competitive return; } - else if (c > 0) - { - // Definitely competitive. - break; - } - else if (i == comparators.size() - 1) - { - // This is the equals case. - if (doc + docBase > bottom->doc) - { - // Definitely not competitive - return; - } - break; - } + break; } - - // This hit is competitive - replace bottom element in queue and adjustTop - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(bottom->slot, doc); - - // Compute score only if it is competitive. - double score = ScorerPtr(_scorer)->score(); - updateBottom(doc, score); - - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); } - else - { - // Startup transient: queue hasn't gathered numHits yet - int32_t slot = totalHits - 1; - // Copy hit into queue - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->copy(slot, doc); - - // Compute score only if it is competitive. - double score = ScorerPtr(_scorer)->score(); - add(slot, doc, score); - if (queueFull) - { - for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) - (*cmp)->setBottom(bottom->slot); + + // This hit is competitive - replace bottom element in queue and adjustTop + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(bottom->slot, doc); + } + + // Compute score only if it is competitive. + double score = ScorerPtr(_scorer)->score(); + updateBottom(doc, score); + + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); + } + } else { + // Startup transient: queue hasn't gathered numHits yet + int32_t slot = totalHits - 1; + // Copy hit into queue + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->copy(slot, doc); + } + + // Compute score only if it is competitive. + double score = ScorerPtr(_scorer)->score(); + add(slot, doc, score); + if (queueFull) { + for (Collection::iterator cmp = comparators.begin(); cmp != comparators.end(); ++cmp) { + (*cmp)->setBottom(bottom->slot); } } } - - void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::setScorer(ScorerPtr scorer) - { - this->_scorer = scorer; - MultiComparatorScoringNoMaxScoreCollector::setScorer(scorer); - } - - bool OutOfOrderMultiComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() - { - return true; - } +} + +void OutOfOrderMultiComparatorScoringNoMaxScoreCollector::setScorer(const ScorerPtr& scorer) { + this->_scorer = scorer; + MultiComparatorScoringNoMaxScoreCollector::setScorer(scorer); +} + +bool OutOfOrderMultiComparatorScoringNoMaxScoreCollector::acceptsDocsOutOfOrder() { + return true; +} + } diff --git a/src/core/search/TopFieldDocs.cpp b/src/core/search/TopFieldDocs.cpp index 5a22e38f..674d2905 100644 --- a/src/core/search/TopFieldDocs.cpp +++ b/src/core/search/TopFieldDocs.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "TopFieldDocs.h" -namespace Lucene -{ - TopFieldDocs::TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore) : TopDocs(totalHits, scoreDocs, maxScore) - { - this->fields = fields; - } - - TopFieldDocs::~TopFieldDocs() - { - } +namespace Lucene { + +TopFieldDocs::TopFieldDocs(int32_t totalHits, Collection scoreDocs, Collection fields, double maxScore) : TopDocs(totalHits, scoreDocs, maxScore) { + this->fields = fields; +} + +TopFieldDocs::~TopFieldDocs() { +} + } diff --git a/src/core/search/TopScoreDocCollector.cpp b/src/core/search/TopScoreDocCollector.cpp index e29c6b22..15b16103 100644 --- a/src/core/search/TopScoreDocCollector.cpp +++ b/src/core/search/TopScoreDocCollector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,119 +13,109 @@ #include "TopDocs.h" #include "MiscUtils.h" -namespace Lucene -{ - TopScoreDocCollector::TopScoreDocCollector(int32_t numHits) : TopDocsCollector(newLucene(numHits, true)) - { - // HitQueue implements getSentinelObject to return a ScoreDoc, so we know that at this point top() - // is already initialized. - pqTop = pq->top(); - docBase = 0; - } - - TopScoreDocCollector::~TopScoreDocCollector() - { - } - - TopScoreDocCollectorPtr TopScoreDocCollector::create(int32_t numHits, bool docsScoredInOrder) - { - if (docsScoredInOrder) - return newLucene(numHits); - else - return newLucene(numHits); - } - - TopDocsPtr TopScoreDocCollector::newTopDocs(Collection results, int32_t start) - { - if (!results) - return EMPTY_TOPDOCS(); - - // We need to compute maxScore in order to set it in TopDocs. If start == 0, it means the largest element - // is already in results, use its score as maxScore. Otherwise pop everything else, until the largest - // element is extracted and use its score as maxScore. - double maxScore = std::numeric_limits::quiet_NaN(); - if (start == 0) - maxScore = results[0]->score; - else - { - for (int32_t i = pq->size(); i > 1; --i) - pq->pop(); - maxScore = pq->pop()->score; - } - - return newLucene(totalHits, results, maxScore); - } - - void TopScoreDocCollector::setNextReader(IndexReaderPtr reader, int32_t docBase) - { - this->docBase = docBase; - } - - void TopScoreDocCollector::setScorer(ScorerPtr scorer) - { - this->_scorer = scorer; - } - - InOrderTopScoreDocCollector::InOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) - { +namespace Lucene { + +TopScoreDocCollector::TopScoreDocCollector(int32_t numHits) : TopDocsCollector(newLucene(numHits, true)) { + // HitQueue implements getSentinelObject to return a ScoreDoc, so we know that at this point top() + // is already initialized. + pqTop = pq->top(); + docBase = 0; +} + +TopScoreDocCollector::~TopScoreDocCollector() { +} + +TopScoreDocCollectorPtr TopScoreDocCollector::create(int32_t numHits, bool docsScoredInOrder) { + if (docsScoredInOrder) { + return newLucene(numHits); + } else { + return newLucene(numHits); } - - InOrderTopScoreDocCollector::~InOrderTopScoreDocCollector() - { +} + +TopDocsPtr TopScoreDocCollector::newTopDocs(Collection results, int32_t start) { + if (!results) { + return EMPTY_TOPDOCS(); } - - void InOrderTopScoreDocCollector::collect(int32_t doc) - { - double score = ScorerPtr(_scorer)->score(); - - // This collector cannot handle these scores - BOOST_ASSERT(score != -std::numeric_limits::infinity()); - BOOST_ASSERT(!MiscUtils::isNaN(score)); - - ++totalHits; - if (score <= pqTop->score) - { - // Since docs are returned in-order (ie., increasing doc Id), a document with equal score to - // pqTop.score cannot compete since HitQueue favours documents with lower doc Ids. Therefore - // reject those docs too. - return; + + // We need to compute maxScore in order to set it in TopDocs. If start == 0, it means the largest element + // is already in results, use its score as maxScore. Otherwise pop everything else, until the largest + // element is extracted and use its score as maxScore. + double maxScore = std::numeric_limits::quiet_NaN(); + if (start == 0) { + maxScore = results[0]->score; + } else { + for (int32_t i = pq->size(); i > 1; --i) { + pq->pop(); } - pqTop->doc = doc + docBase; - pqTop->score = score; - pqTop = pq->updateTop(); - } - - bool InOrderTopScoreDocCollector::acceptsDocsOutOfOrder() - { - return false; - } - - OutOfOrderTopScoreDocCollector::OutOfOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) - { - } - - OutOfOrderTopScoreDocCollector::~OutOfOrderTopScoreDocCollector() - { + maxScore = pq->pop()->score; } - - void OutOfOrderTopScoreDocCollector::collect(int32_t doc) - { - double score = ScorerPtr(_scorer)->score(); - - // This collector cannot handle NaN - BOOST_ASSERT(!MiscUtils::isNaN(score)); - - ++totalHits; - doc += docBase; - if (score < pqTop->score || (score == pqTop->score && doc > pqTop->doc)) - return; - pqTop->doc = doc; - pqTop->score = score; - pqTop = pq->updateTop(); + + return newLucene(totalHits, results, maxScore); +} + +void TopScoreDocCollector::setNextReader(const IndexReaderPtr& reader, int32_t docBase) { + this->docBase = docBase; +} + +void TopScoreDocCollector::setScorer(const ScorerPtr& scorer) { + this->_scorer = scorer; + this->__scorer = scorer.get(); +} + +InOrderTopScoreDocCollector::InOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { +} + +InOrderTopScoreDocCollector::~InOrderTopScoreDocCollector() { +} + +void InOrderTopScoreDocCollector::collect(int32_t doc) { + double score = __scorer->score(); + + // This collector cannot handle these scores + BOOST_ASSERT(score != -std::numeric_limits::infinity()); + BOOST_ASSERT(!MiscUtils::isNaN(score)); + + ++totalHits; + if (score <= pqTop->score) { + // Since docs are returned in-order (ie., increasing doc Id), a document with equal score to + // pqTop.score cannot compete since HitQueue favours documents with lower doc Ids. Therefore + // reject those docs too. + return; } - - bool OutOfOrderTopScoreDocCollector::acceptsDocsOutOfOrder() - { - return true; + pqTop->doc = doc + docBase; + pqTop->score = score; + pqTop = pq->updateTop(); +} + +bool InOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { + return false; +} + +OutOfOrderTopScoreDocCollector::OutOfOrderTopScoreDocCollector(int32_t numHits) : TopScoreDocCollector(numHits) { +} + +OutOfOrderTopScoreDocCollector::~OutOfOrderTopScoreDocCollector() { +} + +void OutOfOrderTopScoreDocCollector::collect(int32_t doc) { + double score = __scorer->score(); + + // This collector cannot handle NaN + BOOST_ASSERT(!MiscUtils::isNaN(score)); + + ++totalHits; + doc += docBase; + if (score < pqTop->score || (score == pqTop->score && doc > pqTop->doc)) { + return; } + pqTop->doc = doc; + pqTop->score = score; + pqTop = pq->updateTop(); +} + +bool OutOfOrderTopScoreDocCollector::acceptsDocsOutOfOrder() { + return true; +} + } diff --git a/src/core/search/Weight.cpp b/src/core/search/Weight.cpp index fdc7aa63..80126de3 100644 --- a/src/core/search/Weight.cpp +++ b/src/core/search/Weight.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "Weight.h" -namespace Lucene -{ - Weight::~Weight() - { - } - - bool Weight::scoresDocsOutOfOrder() - { - return false; - } +namespace Lucene { + +Weight::~Weight() { +} + +bool Weight::scoresDocsOutOfOrder() { + return false; +} + } diff --git a/src/core/search/WildcardQuery.cpp b/src/core/search/WildcardQuery.cpp index 3bc7dbb6..3fce507f 100644 --- a/src/core/search/WildcardQuery.cpp +++ b/src/core/search/WildcardQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,93 +13,90 @@ #include "SingleTermEnum.h" #include "MiscUtils.h" -namespace Lucene -{ - WildcardQuery::WildcardQuery(TermPtr term) - { - this->term = term; - String text(term->text()); - this->termContainsWildcard = boost::contains(text, L"*") || boost::contains(text, L"?"); - this->termIsPrefix = termContainsWildcard && - !boost::contains(text, L"?") && - text.find_first_of(L"*") == text.length() - 1; - } - - WildcardQuery::~WildcardQuery() - { +namespace Lucene { + +WildcardQuery::WildcardQuery(const TermPtr& term) { + this->term = term; + String text(term->text()); + this->termContainsWildcard = boost::contains(text, L"*") || boost::contains(text, L"?"); + this->termIsPrefix = termContainsWildcard && + !boost::contains(text, L"?") && + text.find_first_of(L"*") == text.length() - 1; +} + +WildcardQuery::~WildcardQuery() { +} + +FilteredTermEnumPtr WildcardQuery::getEnum(const IndexReaderPtr& reader) { + if (termContainsWildcard) { + return newLucene(reader, getTerm()); + } else { + return newLucene(reader, getTerm()); } - - FilteredTermEnumPtr WildcardQuery::getEnum(IndexReaderPtr reader) - { - if (termContainsWildcard) - return newLucene(reader, getTerm()); - else - return newLucene(reader, getTerm()); +} + +TermPtr WildcardQuery::getTerm() { + return term; +} + +QueryPtr WildcardQuery::rewrite(const IndexReaderPtr& reader) { + if (termIsPrefix) { + MultiTermQueryPtr rewritten(newLucene(term->createTerm(term->text().substr(0, term->text().find('*'))))); + rewritten->setBoost(getBoost()); + rewritten->setRewriteMethod(getRewriteMethod()); + return rewritten; + } else { + return MultiTermQuery::rewrite(reader); } - - TermPtr WildcardQuery::getTerm() - { - return term; +} + +String WildcardQuery::toString(const String& field) { + StringStream buffer; + if (term->field() != field) { + buffer << term->field() << L":"; } - - QueryPtr WildcardQuery::rewrite(IndexReaderPtr reader) - { - if (termIsPrefix) - { - MultiTermQueryPtr rewritten(newLucene(term->createTerm(term->text().substr(0, term->text().find('*'))))); - rewritten->setBoost(getBoost()); - rewritten->setRewriteMethod(getRewriteMethod()); - return rewritten; - } - else - return MultiTermQuery::rewrite(reader); + buffer << term->text() << boostString(); + return buffer.str(); +} + +LuceneObjectPtr WildcardQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); + WildcardQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->termContainsWildcard = termContainsWildcard; + cloneQuery->termIsPrefix = termIsPrefix; + cloneQuery->term = term; + return cloneQuery; +} + +int32_t WildcardQuery::hashCode() { + int32_t prime = 31; + int32_t result = MultiTermQuery::hashCode(); + result = prime * result + (term ? term->hashCode() : 0); + return result; +} + +bool WildcardQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - String WildcardQuery::toString(const String& field) - { - StringStream buffer; - if (term->field() != field) - buffer << term->field() << L":"; - buffer << term->text() << boostString(); - return buffer.str(); + if (!MultiTermQuery::equals(other)) { + return false; } - - LuceneObjectPtr WildcardQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = MultiTermQuery::clone(other ? other : newLucene(term)); - WildcardQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->termContainsWildcard = termContainsWildcard; - cloneQuery->termIsPrefix = termIsPrefix; - cloneQuery->term = term; - return cloneQuery; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - int32_t WildcardQuery::hashCode() - { - int32_t prime = 31; - int32_t result = MultiTermQuery::hashCode(); - result = prime * result + (term ? term->hashCode() : 0); - return result; + WildcardQueryPtr otherWildcardQuery(boost::dynamic_pointer_cast(other)); + if (!otherWildcardQuery) { + return false; } - - bool WildcardQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!MultiTermQuery::equals(other)) + if (!term) { + if (otherWildcardQuery->term) { return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - WildcardQueryPtr otherWildcardQuery(boost::dynamic_pointer_cast(other)); - if (!otherWildcardQuery) - return false; - if (!term) - { - if (otherWildcardQuery->term) - return false; } - else if (!term->equals(otherWildcardQuery->term)) - return false; - return true; + } else if (!term->equals(otherWildcardQuery->term)) { + return false; } + return true; +} + } diff --git a/src/core/search/WildcardTermEnum.cpp b/src/core/search/WildcardTermEnum.cpp index 78a7d602..d8e7a88c 100644 --- a/src/core/search/WildcardTermEnum.cpp +++ b/src/core/search/WildcardTermEnum.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,125 +10,122 @@ #include "Term.h" #include "IndexReader.h" -namespace Lucene -{ - const wchar_t WildcardTermEnum::WILDCARD_STRING = L'*'; - const wchar_t WildcardTermEnum::WILDCARD_CHAR = L'?'; - - WildcardTermEnum::WildcardTermEnum(IndexReaderPtr reader, TermPtr term) - { - _endEnum = false; - searchTerm = term; - field = searchTerm->field(); - String searchTermText(searchTerm->text()); - - String::size_type sidx = searchTermText.find(WILDCARD_STRING); - String::size_type cidx = searchTermText.find(WILDCARD_CHAR); - String::size_type idx = sidx; - if (idx == String::npos) - idx = cidx; - else if (cidx != String::npos) - idx = std::min(idx, cidx); - pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L""; - - preLen = pre.length(); - text = searchTermText.substr(preLen); - setEnum(reader->terms(newLucene(searchTerm->field(), pre))); - } - - WildcardTermEnum::~WildcardTermEnum() - { +namespace Lucene { + +const wchar_t WildcardTermEnum::WILDCARD_STRING = L'*'; +const wchar_t WildcardTermEnum::WILDCARD_CHAR = L'?'; + +WildcardTermEnum::WildcardTermEnum(const IndexReaderPtr& reader, const TermPtr& term) { + _endEnum = false; + searchTerm = term; + field = searchTerm->field(); + String searchTermText(searchTerm->text()); + + String::size_type sidx = searchTermText.find(WILDCARD_STRING); + String::size_type cidx = searchTermText.find(WILDCARD_CHAR); + String::size_type idx = sidx; + if (idx == String::npos) { + idx = cidx; + } else if (cidx != String::npos) { + idx = std::min(idx, cidx); } - - bool WildcardTermEnum::termCompare(TermPtr term) - { - if (field == term->field()) - { - String searchText(term->text()); - if (boost::starts_with(searchText, pre)) - return wildcardEquals(text, 0, searchText, preLen); + pre = idx != String::npos ? searchTerm->text().substr(0, idx) : L""; + + preLen = pre.length(); + text = searchTermText.substr(preLen); + setEnum(reader->terms(newLucene(searchTerm->field(), pre))); +} + +WildcardTermEnum::~WildcardTermEnum() { +} + +bool WildcardTermEnum::termCompare(const TermPtr& term) { + if (field == term->field()) { + String searchText(term->text()); + if (boost::starts_with(searchText, pre)) { + return wildcardEquals(text, 0, searchText, preLen); } - _endEnum = true; - return false; } - - double WildcardTermEnum::difference() - { - return 1.0; - } - - bool WildcardTermEnum::endEnum() - { - return _endEnum; - } - - bool WildcardTermEnum::wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx) - { - int32_t p = patternIdx; - for (int32_t s = stringIdx; ; ++p, ++s) - { - // End of string yet? - bool sEnd = (s >= (int32_t)string.length()); - // End of pattern yet? - bool pEnd = (p >= (int32_t)pattern.length()); - - // If we're looking at the end of the string - if (sEnd) - { - // Assume the only thing left on the pattern is/are wildcards - bool justWildcardsLeft = true; - - // Current wildcard position - int32_t wildcardSearchPos = p; - - // While we haven't found the end of the pattern, and haven't encountered any non-wildcard characters - while (wildcardSearchPos < (int32_t)pattern.length() && justWildcardsLeft) - { - // Check the character at the current position - wchar_t wildchar = pattern[wildcardSearchPos]; - - // If it's not a wildcard character, then there is more pattern information after this/these wildcards. - if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) - justWildcardsLeft = false; - else - { - // to prevent "cat" matches "ca??" - if (wildchar == WILDCARD_CHAR) - return false; - // Look at the next character - ++wildcardSearchPos; + _endEnum = true; + return false; +} + +double WildcardTermEnum::difference() { + return 1.0; +} + +bool WildcardTermEnum::endEnum() { + return _endEnum; +} + +bool WildcardTermEnum::wildcardEquals(const String& pattern, int32_t patternIdx, const String& string, int32_t stringIdx) { + int32_t p = patternIdx; + for (int32_t s = stringIdx; ; ++p, ++s) { + // End of string yet? + bool sEnd = (s >= (int32_t)string.length()); + // End of pattern yet? + bool pEnd = (p >= (int32_t)pattern.length()); + + // If we're looking at the end of the string + if (sEnd) { + // Assume the only thing left on the pattern is/are wildcards + bool justWildcardsLeft = true; + + // Current wildcard position + int32_t wildcardSearchPos = p; + + // While we haven't found the end of the pattern, and haven't encountered any non-wildcard characters + while (wildcardSearchPos < (int32_t)pattern.length() && justWildcardsLeft) { + // Check the character at the current position + wchar_t wildchar = pattern[wildcardSearchPos]; + + // If it's not a wildcard character, then there is more pattern information after this/these wildcards. + if (wildchar != WILDCARD_CHAR && wildchar != WILDCARD_STRING) { + justWildcardsLeft = false; + } else { + // to prevent "cat" matches "ca??" + if (wildchar == WILDCARD_CHAR) { + return false; } + // Look at the next character + ++wildcardSearchPos; } - - // This was a prefix wildcard search, and we've matched, so return true. - if (justWildcardsLeft) - return true; } - - // If we've gone past the end of the string, or the pattern, return false. - if (sEnd || pEnd) - break; - - // Match a single character, so continue. - if (pattern[p] == WILDCARD_CHAR) - continue; - - if (pattern[p] == WILDCARD_STRING) - { - // Look at the character beyond the '*' characters. - while (p < (int32_t)pattern.length() && pattern[p] == WILDCARD_STRING) - ++p; - // Examine the string, starting at the last character. - for (int32_t i = string.length(); i >= s; --i) - { - if (wildcardEquals(pattern, p, string, i)) - return true; + + // This was a prefix wildcard search, and we've matched, so return true. + if (justWildcardsLeft) { + return true; + } + } + + // If we've gone past the end of the string, or the pattern, return false. + if (sEnd || pEnd) { + break; + } + + // Match a single character, so continue. + if (pattern[p] == WILDCARD_CHAR) { + continue; + } + + if (pattern[p] == WILDCARD_STRING) { + // Look at the character beyond the '*' characters. + while (p < (int32_t)pattern.length() && pattern[p] == WILDCARD_STRING) { + ++p; + } + // Examine the string, starting at the last character. + for (int32_t i = string.length(); i >= s; --i) { + if (wildcardEquals(pattern, p, string, i)) { + return true; } - break; } - if (pattern[p] != string[s]) - break; + break; + } + if (pattern[p] != string[s]) { + break; } - return false; } + return false; +} + } diff --git a/src/core/search/function/ByteFieldSource.cpp b/src/core/search/function/ByteFieldSource.cpp index 7a646005..f2b98651 100644 --- a/src/core/search/function/ByteFieldSource.cpp +++ b/src/core/search/function/ByteFieldSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,74 +11,67 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - ByteFieldSource::ByteFieldSource(const String& field, ByteParserPtr parser) : FieldCacheSource(field) - { - this->parser = parser; - } - - ByteFieldSource::~ByteFieldSource() - { - } - - String ByteFieldSource::description() - { - return L"byte(" + FieldCacheSource::description() + L")"; - } - - DocValuesPtr ByteFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) - { - Collection arr(cache->getBytes(reader, field, parser)); - return newLucene(shared_from_this(), arr); - } - - bool ByteFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) - { - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - ByteFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; - } - - int32_t ByteFieldSource::cachedFieldSourceHashCode() - { - return StringUtils::hashCode(parser ? ByteParser::_getClassName() : ByteFieldSource::_getClassName()); - } - - ByteDocValues::ByteDocValues(ByteFieldSourcePtr source, Collection arr) - { - this->_source = source; - this->arr = arr; - } - - ByteDocValues::~ByteDocValues() - { - } - - double ByteDocValues::doubleVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (double)arr[doc]; +namespace Lucene { + +ByteFieldSource::ByteFieldSource(const String& field, const ByteParserPtr& parser) : FieldCacheSource(field) { + this->parser = parser; +} + +ByteFieldSource::~ByteFieldSource() { +} + +String ByteFieldSource::description() { + return L"byte(" + FieldCacheSource::description() + L")"; +} + +DocValuesPtr ByteFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { + Collection arr(cache->getBytes(reader, field, parser)); + return newLucene(shared_from_this(), arr); +} + +bool ByteFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - int32_t ByteDocValues::intVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (int32_t)arr[doc]; + ByteFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } - - String ByteDocValues::toString(int32_t doc) - { - return ByteFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); + return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; +} + +int32_t ByteFieldSource::cachedFieldSourceHashCode() { + return StringUtils::hashCode(parser ? ByteParser::_getClassName() : ByteFieldSource::_getClassName()); +} + +ByteDocValues::ByteDocValues(const ByteFieldSourcePtr& source, Collection arr) { + this->_source = source; + this->arr = arr; +} + +ByteDocValues::~ByteDocValues() { +} + +double ByteDocValues::doubleVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - CollectionValue ByteDocValues::getInnerArray() - { - return arr; + return (double)arr[doc]; +} + +int32_t ByteDocValues::intVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } + return (int32_t)arr[doc]; +} + +String ByteDocValues::toString(int32_t doc) { + return ByteFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); +} + +CollectionValue ByteDocValues::getInnerArray() { + return arr; +} + } diff --git a/src/core/search/function/CustomScoreProvider.cpp b/src/core/search/function/CustomScoreProvider.cpp index c46cb84c..7f8805f9 100644 --- a/src/core/search/function/CustomScoreProvider.cpp +++ b/src/core/search/function/CustomScoreProvider.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,58 +8,61 @@ #include "CustomScoreProvider.h" #include "Explanation.h" -namespace Lucene -{ - CustomScoreProvider::CustomScoreProvider(IndexReaderPtr reader) - { - this->reader = reader; +namespace Lucene { + +CustomScoreProvider::CustomScoreProvider(const IndexReaderPtr& reader) { + this->reader = reader; +} + +CustomScoreProvider::~CustomScoreProvider() { +} + +double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { + if (valSrcScores.size() == 1) { + return customScore(doc, subQueryScore, valSrcScores[0]); + } + if (valSrcScores.empty()) { + return customScore(doc, subQueryScore, 1); } - - CustomScoreProvider::~CustomScoreProvider() - { + double score = subQueryScore; + for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) { + score *= *srcScore; } + return score; +} - double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) - { - if (valSrcScores.size() == 1) - return customScore(doc, subQueryScore, valSrcScores[0]); - if (valSrcScores.empty()) - return customScore(doc, subQueryScore, 1); - double score = subQueryScore; - for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) - score *= *srcScore; - return score; +double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { + return subQueryScore * valSrcScore; +} + +ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { + if (valSrcExpls.size() == 1) { + return customExplain(doc, subQueryExpl, valSrcExpls[0]); } - - double CustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) - { - return subQueryScore * valSrcScore; + if (valSrcExpls.empty()) { + return subQueryExpl; } - - ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) - { - if (valSrcExpls.size() == 1) - return customExplain(doc, subQueryExpl, valSrcExpls[0]); - if (valSrcExpls.empty()) - return subQueryExpl; - double valSrcScore = 1; - for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) - valSrcScore *= (*srcExpl)->getValue(); - ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); - exp->addDetail(subQueryExpl); - for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) - exp->addDetail(*srcExpl); - return exp; + double valSrcScore = 1; + for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { + valSrcScore *= (*srcExpl)->getValue(); } - - ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) - { - double valSrcScore = 1; - if (valSrcExpl) - valSrcScore *= valSrcExpl->getValue(); - ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); - exp->addDetail(subQueryExpl); - exp->addDetail(valSrcExpl); - return exp; + ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); + exp->addDetail(subQueryExpl); + for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { + exp->addDetail(*srcExpl); } + return exp; +} + +ExplanationPtr CustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { + double valSrcScore = 1; + if (valSrcExpl) { + valSrcScore *= valSrcExpl->getValue(); + } + ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); + exp->addDetail(subQueryExpl); + exp->addDetail(valSrcExpl); + return exp; +} + } diff --git a/src/core/search/function/CustomScoreQuery.cpp b/src/core/search/function/CustomScoreQuery.cpp index 3ccf4a60..ff0869d5 100644 --- a/src/core/search/function/CustomScoreQuery.cpp +++ b/src/core/search/function/CustomScoreQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,348 +12,326 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery) - { - ConstructQuery(subQuery, Collection::newInstance()); - } - - CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery, ValueSourceQueryPtr valSrcQuery) - { - Collection valSrcQueries(Collection::newInstance()); - if (valSrcQuery) - valSrcQueries.add(valSrcQuery); - ConstructQuery(subQuery, valSrcQueries); - } - - CustomScoreQuery::CustomScoreQuery(QueryPtr subQuery, Collection valSrcQueries) - { - ConstructQuery(subQuery, valSrcQueries); +namespace Lucene { + +CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery) { + ConstructQuery(subQuery, Collection::newInstance()); +} + +CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery, const ValueSourceQueryPtr& valSrcQuery) { + Collection valSrcQueries(Collection::newInstance()); + if (valSrcQuery) { + valSrcQueries.add(valSrcQuery); } - - CustomScoreQuery::~CustomScoreQuery() - { + ConstructQuery(subQuery, valSrcQueries); +} + +CustomScoreQuery::CustomScoreQuery(const QueryPtr& subQuery, Collection valSrcQueries) { + ConstructQuery(subQuery, valSrcQueries); +} + +CustomScoreQuery::~CustomScoreQuery() { +} + +void CustomScoreQuery::ConstructQuery(const QueryPtr& subQuery, Collection valSrcQueries) { + this->strict = false; + this->subQuery = subQuery; + this->valSrcQueries = valSrcQueries ? valSrcQueries : Collection::newInstance(); + if (!subQuery) { + boost::throw_exception(IllegalArgumentException(L" must not be null!")); } - - void CustomScoreQuery::ConstructQuery(QueryPtr subQuery, Collection valSrcQueries) - { - this->strict = false; - this->subQuery = subQuery; - this->valSrcQueries = valSrcQueries ? valSrcQueries : Collection::newInstance(); - if (!subQuery) - boost::throw_exception(IllegalArgumentException(L" must not be null!")); +} + +QueryPtr CustomScoreQuery::rewrite(const IndexReaderPtr& reader) { + CustomScoreQueryPtr cloneQuery; + + QueryPtr sq = subQuery->rewrite(reader); + if (sq != subQuery) { + cloneQuery = boost::static_pointer_cast(clone()); + cloneQuery->subQuery = sq; } - - QueryPtr CustomScoreQuery::rewrite(IndexReaderPtr reader) - { - CustomScoreQueryPtr cloneQuery; - - QueryPtr sq = subQuery->rewrite(reader); - if (sq != subQuery) - { - cloneQuery = boost::static_pointer_cast(clone()); - cloneQuery->subQuery = sq; - } - - for (int32_t i = 0; i < valSrcQueries.size(); ++i) - { - ValueSourceQueryPtr v = boost::dynamic_pointer_cast(valSrcQueries[i]->rewrite(reader)); - if (v != valSrcQueries[i]) - { - if (!cloneQuery) - cloneQuery = boost::static_pointer_cast(clone()); - cloneQuery->valSrcQueries[i] = v; + + for (int32_t i = 0; i < valSrcQueries.size(); ++i) { + ValueSourceQueryPtr v = boost::dynamic_pointer_cast(valSrcQueries[i]->rewrite(reader)); + if (v != valSrcQueries[i]) { + if (!cloneQuery) { + cloneQuery = boost::static_pointer_cast(clone()); } + cloneQuery->valSrcQueries[i] = v; } - - return cloneQuery ? cloneQuery : shared_from_this(); - } - - void CustomScoreQuery::extractTerms(SetTerm terms) - { - subQuery->extractTerms(terms); - for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) - (*srcQuery)->extractTerms(terms); - } - - LuceneObjectPtr CustomScoreQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = Query::clone(other ? other : newLucene(subQuery)); - CustomScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->strict = strict; - cloneQuery->subQuery = boost::dynamic_pointer_cast(subQuery->clone()); - cloneQuery->valSrcQueries = Collection::newInstance(valSrcQueries.size()); - for (int32_t i = 0; i < valSrcQueries.size(); ++i) - cloneQuery->valSrcQueries[i] = boost::dynamic_pointer_cast(valSrcQueries[i]->clone()); - return cloneQuery; - } - - String CustomScoreQuery::toString(const String& field) - { - StringStream buffer; - buffer << name() << L"(" << subQuery->toString(field); - for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) - buffer << L", " << (*srcQuery)->toString(field); - buffer << L")" << (strict ? L" STRICT" : L"") << boostString(); - return buffer.str(); - } - - bool CustomScoreQuery::equals(LuceneObjectPtr other) - { - CustomScoreQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - if (getBoost() != otherQuery->getBoost() || !subQuery->equals(otherQuery->subQuery) || strict != otherQuery->strict) - return false; - return valSrcQueries.equals(otherQuery->valSrcQueries, luceneEquals()); - } - - int32_t CustomScoreQuery::hashCode() - { - return (StringUtils::hashCode(CustomScoreQuery::_getClassName()) + StringUtils::hashCode(Query::_getClassName()) + - MiscUtils::hashCode(valSrcQueries.begin(), valSrcQueries.end(), MiscUtils::hashLucene)) ^ - MiscUtils::doubleToIntBits(getBoost()) ^ (strict ? 1234 : 4321); - } - - CustomScoreProviderPtr CustomScoreQuery::getCustomScoreProvider(IndexReaderPtr reader) - { - // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider - return newLucene(shared_from_this(), reader); - } - - double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) - { - if (valSrcScores.size() == 1) - return customScore(doc, subQueryScore, valSrcScores[0]); - if (valSrcScores.empty()) - return customScore(doc, subQueryScore, 1); - double score = subQueryScore; - for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) - score *= *srcScore; - return score; - } - - double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, double valSrcScore) - { - return subQueryScore * valSrcScore; - } - - ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) - { - if (valSrcExpls.size() == 1) - return customExplain(doc, subQueryExpl, valSrcExpls[0]); - if (valSrcExpls.empty()) - return subQueryExpl; - double valSrcScore = 1; - for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) - valSrcScore *= (*srcExpl)->getValue(); - ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); - exp->addDetail(subQueryExpl); - for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) - exp->addDetail(*srcExpl); - return exp; - } - - ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) - { - double valSrcScore = 1; - if (valSrcExpl) - valSrcScore *= valSrcExpl->getValue(); - ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); - exp->addDetail(subQueryExpl); - exp->addDetail(valSrcExpl); - return exp; - } - - WeightPtr CustomScoreQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - bool CustomScoreQuery::isStrict() - { - return strict; - } - - void CustomScoreQuery::setStrict(bool strict) - { - this->strict = strict; } - - String CustomScoreQuery::name() - { - return L"custom"; - } - - DefaultCustomScoreProvider::DefaultCustomScoreProvider(CustomScoreQueryPtr customQuery, IndexReaderPtr reader) : CustomScoreProvider(reader) - { - _customQuery = customQuery; + + return cloneQuery ? cloneQuery : shared_from_this(); +} + +void CustomScoreQuery::extractTerms(SetTerm terms) { + subQuery->extractTerms(terms); + for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) { + (*srcQuery)->extractTerms(terms); } - - DefaultCustomScoreProvider::~DefaultCustomScoreProvider() - { +} + +LuceneObjectPtr CustomScoreQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = Query::clone(other ? other : newLucene(subQuery)); + CustomScoreQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->strict = strict; + cloneQuery->subQuery = boost::dynamic_pointer_cast(subQuery->clone()); + cloneQuery->valSrcQueries = Collection::newInstance(valSrcQueries.size()); + for (int32_t i = 0; i < valSrcQueries.size(); ++i) { + cloneQuery->valSrcQueries[i] = boost::dynamic_pointer_cast(valSrcQueries[i]->clone()); + } + return cloneQuery; +} + +String CustomScoreQuery::toString(const String& field) { + StringStream buffer; + buffer << name() << L"(" << subQuery->toString(field); + for (Collection::iterator srcQuery = valSrcQueries.begin(); srcQuery != valSrcQueries.end(); ++srcQuery) { + buffer << L", " << (*srcQuery)->toString(field); + } + buffer << L")" << (strict ? L" STRICT" : L"") << boostString(); + return buffer.str(); +} + +bool CustomScoreQuery::equals(const LuceneObjectPtr& other) { + CustomScoreQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) - { - return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScores); + if (getBoost() != otherQuery->getBoost() || !subQuery->equals(otherQuery->subQuery) || strict != otherQuery->strict) { + return false; } - - double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) - { - return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScore); + return valSrcQueries.equals(otherQuery->valSrcQueries, luceneEquals()); +} + +int32_t CustomScoreQuery::hashCode() { + return (StringUtils::hashCode(CustomScoreQuery::_getClassName()) + StringUtils::hashCode(Query::_getClassName()) + + MiscUtils::hashCode(valSrcQueries.begin(), valSrcQueries.end(), MiscUtils::hashLucene)) ^ + MiscUtils::doubleToIntBits(getBoost()) ^ (strict ? 1234 : 4321); +} + +CustomScoreProviderPtr CustomScoreQuery::getCustomScoreProvider(const IndexReaderPtr& reader) { + // when deprecated methods are removed, do not extend class here, just return new default CustomScoreProvider + return newLucene(shared_from_this(), reader); +} + +double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { + if (valSrcScores.size() == 1) { + return customScore(doc, subQueryScore, valSrcScores[0]); } - - ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, Collection valSrcExpls) - { - return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpls); + if (valSrcScores.empty()) { + return customScore(doc, subQueryScore, 1); } - - ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, ExplanationPtr subQueryExpl, ExplanationPtr valSrcExpl) - { - return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpl); + double score = subQueryScore; + for (Collection::iterator srcScore = valSrcScores.begin(); srcScore != valSrcScores.end(); ++srcScore) { + score *= *srcScore; } - - CustomWeight::CustomWeight(CustomScoreQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - this->subQueryWeight = query->subQuery->weight(searcher); - this->valSrcWeights = Collection::newInstance(query->valSrcQueries.size()); - for (int32_t i = 0; i < query->valSrcQueries.size(); ++i) - this->valSrcWeights[i] = query->valSrcQueries[i]->createWeight(searcher); - this->qStrict = query->strict; + return score; +} + +double CustomScoreQuery::customScore(int32_t doc, double subQueryScore, double valSrcScore) { + return subQueryScore * valSrcScore; +} + +ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { + if (valSrcExpls.size() == 1) { + return customExplain(doc, subQueryExpl, valSrcExpls[0]); } - - CustomWeight::~CustomWeight() - { + if (valSrcExpls.empty()) { + return subQueryExpl; } - - QueryPtr CustomWeight::getQuery() - { - return query; + double valSrcScore = 1; + for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { + valSrcScore *= (*srcExpl)->getValue(); } - - double CustomWeight::getValue() - { - return query->getBoost(); + ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); + exp->addDetail(subQueryExpl); + for (Collection::iterator srcExpl = valSrcExpls.begin(); srcExpl != valSrcExpls.end(); ++srcExpl) { + exp->addDetail(*srcExpl); } - - double CustomWeight::sumOfSquaredWeights() - { - double sum = subQueryWeight->sumOfSquaredWeights(); - for (int32_t i = 0; i < valSrcWeights.size(); ++i) - { - if (qStrict) - valSrcWeights[i]->sumOfSquaredWeights(); // do not include ValueSource part in the query normalization - else - sum += valSrcWeights[i]->sumOfSquaredWeights(); + return exp; +} + +ExplanationPtr CustomScoreQuery::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { + double valSrcScore = 1; + if (valSrcExpl) { + valSrcScore *= valSrcExpl->getValue(); + } + ExplanationPtr exp(newLucene(valSrcScore * subQueryExpl->getValue(), L"custom score: product of:")); + exp->addDetail(subQueryExpl); + exp->addDetail(valSrcExpl); + return exp; +} + +WeightPtr CustomScoreQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +bool CustomScoreQuery::isStrict() { + return strict; +} + +void CustomScoreQuery::setStrict(bool strict) { + this->strict = strict; +} + +String CustomScoreQuery::name() { + return L"custom"; +} + +DefaultCustomScoreProvider::DefaultCustomScoreProvider(const CustomScoreQueryPtr& customQuery, const IndexReaderPtr& reader) : CustomScoreProvider(reader) { + _customQuery = customQuery; +} + +DefaultCustomScoreProvider::~DefaultCustomScoreProvider() { +} + +double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, Collection valSrcScores) { + return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScores); +} + +double DefaultCustomScoreProvider::customScore(int32_t doc, double subQueryScore, double valSrcScore) { + return CustomScoreQueryPtr(_customQuery)->customScore(doc, subQueryScore, valSrcScore); +} + +ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, Collection valSrcExpls) { + return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpls); +} + +ExplanationPtr DefaultCustomScoreProvider::customExplain(int32_t doc, const ExplanationPtr& subQueryExpl, const ExplanationPtr& valSrcExpl) { + return CustomScoreQueryPtr(_customQuery)->customExplain(doc, subQueryExpl, valSrcExpl); +} + +CustomWeight::CustomWeight(const CustomScoreQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); + this->subQueryWeight = query->subQuery->weight(searcher); + this->valSrcWeights = Collection::newInstance(query->valSrcQueries.size()); + for (int32_t i = 0; i < query->valSrcQueries.size(); ++i) { + this->valSrcWeights[i] = query->valSrcQueries[i]->createWeight(searcher); + } + this->qStrict = query->strict; +} + +CustomWeight::~CustomWeight() { +} + +QueryPtr CustomWeight::getQuery() { + return query; +} + +double CustomWeight::getValue() { + return query->getBoost(); +} + +double CustomWeight::sumOfSquaredWeights() { + double sum = subQueryWeight->sumOfSquaredWeights(); + for (int32_t i = 0; i < valSrcWeights.size(); ++i) { + if (qStrict) { + valSrcWeights[i]->sumOfSquaredWeights(); // do not include ValueSource part in the query normalization + } else { + sum += valSrcWeights[i]->sumOfSquaredWeights(); } - sum *= query->getBoost() * query->getBoost(); // boost each sub-weight - return sum; } - - void CustomWeight::normalize(double norm) - { - norm *= query->getBoost(); // incorporate boost - subQueryWeight->normalize(norm); - for (int32_t i = 0; i < valSrcWeights.size(); ++i) - { - if (qStrict) - valSrcWeights[i]->normalize(1.0); // do not normalize the ValueSource part - else - valSrcWeights[i]->normalize(norm); + sum *= query->getBoost() * query->getBoost(); // boost each sub-weight + return sum; +} + +void CustomWeight::normalize(double norm) { + norm *= query->getBoost(); // incorporate boost + subQueryWeight->normalize(norm); + for (int32_t i = 0; i < valSrcWeights.size(); ++i) { + if (qStrict) { + valSrcWeights[i]->normalize(1.0); // do not normalize the ValueSource part + } else { + valSrcWeights[i]->normalize(norm); } } - - ScorerPtr CustomWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - // Pass true for "scoresDocsInOrder", because we require in-order scoring, even if caller does not, - // since we call advance on the valSrcScorers. Pass false for "topScorer" because we will not invoke - // score(Collector) on these scorers - ScorerPtr subQueryScorer(subQueryWeight->scorer(reader, true, false)); - if (!subQueryScorer) - return ScorerPtr(); - Collection valSrcScorers(Collection::newInstance(valSrcWeights.size())); - for (int32_t i = 0; i < valSrcScorers.size(); ++i) - valSrcScorers[i] = valSrcWeights[i]->scorer(reader, true, topScorer); - return newLucene(similarity, reader, shared_from_this(), subQueryScorer, valSrcScorers); - } - - ExplanationPtr CustomWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ExplanationPtr explain(doExplain(reader, doc)); - return explain ? explain : newLucene(0.0, L"no matching docs"); - } - - ExplanationPtr CustomWeight::doExplain(IndexReaderPtr reader, int32_t doc) - { - ExplanationPtr subQueryExpl(subQueryWeight->explain(reader, doc)); - if (!subQueryExpl->isMatch()) - return subQueryExpl; - // match - Collection valSrcExpls(Collection::newInstance(valSrcWeights.size())); - for (int32_t i = 0; i < valSrcWeights.size(); ++i) - valSrcExpls[i] = valSrcWeights[i]->explain(reader, doc); - ExplanationPtr customExp(query->getCustomScoreProvider(reader)->customExplain(doc, subQueryExpl, valSrcExpls)); - double sc = getValue() * customExp->getValue(); - ExplanationPtr res(newLucene(true, sc, query->toString() + L", product of:")); - res->addDetail(customExp); - res->addDetail(newLucene(getValue(), L"queryBoost")); // actually using the q boost as q weight (== weight value) - return res; - } - - bool CustomWeight::scoresDocsOutOfOrder() - { - return false; - } - - CustomScorer::CustomScorer(SimilarityPtr similarity, IndexReaderPtr reader, CustomWeightPtr weight, ScorerPtr subQueryScorer, Collection valSrcScorers) : Scorer(similarity) - { - this->qWeight = weight->getValue(); - this->subQueryScorer = subQueryScorer; - this->valSrcScorers = valSrcScorers; - this->reader = reader; - this->vScores = Collection::newInstance(valSrcScorers.size()); - this->provider = weight->query->getCustomScoreProvider(reader); - } - - CustomScorer::~CustomScorer() - { - } - - int32_t CustomScorer::nextDoc() - { - int32_t doc = subQueryScorer->nextDoc(); - if (doc != NO_MORE_DOCS) - { - for (int32_t i = 0; i < valSrcScorers.size(); ++i) - valSrcScorers[i]->advance(doc); +} + +ScorerPtr CustomWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + // Pass true for "scoresDocsInOrder", because we require in-order scoring, even if caller does not, + // since we call advance on the valSrcScorers. Pass false for "topScorer" because we will not invoke + // score(Collector) on these scorers + ScorerPtr subQueryScorer(subQueryWeight->scorer(reader, true, false)); + if (!subQueryScorer) { + return ScorerPtr(); + } + Collection valSrcScorers(Collection::newInstance(valSrcWeights.size())); + for (int32_t i = 0; i < valSrcScorers.size(); ++i) { + valSrcScorers[i] = valSrcWeights[i]->scorer(reader, true, topScorer); + } + return newLucene(similarity, reader, shared_from_this(), subQueryScorer, valSrcScorers); +} + +ExplanationPtr CustomWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ExplanationPtr explain(doExplain(reader, doc)); + return explain ? explain : newLucene(0.0, L"no matching docs"); +} + +ExplanationPtr CustomWeight::doExplain(const IndexReaderPtr& reader, int32_t doc) { + ExplanationPtr subQueryExpl(subQueryWeight->explain(reader, doc)); + if (!subQueryExpl->isMatch()) { + return subQueryExpl; + } + // match + Collection valSrcExpls(Collection::newInstance(valSrcWeights.size())); + for (int32_t i = 0; i < valSrcWeights.size(); ++i) { + valSrcExpls[i] = valSrcWeights[i]->explain(reader, doc); + } + ExplanationPtr customExp(query->getCustomScoreProvider(reader)->customExplain(doc, subQueryExpl, valSrcExpls)); + double sc = getValue() * customExp->getValue(); + ExplanationPtr res(newLucene(true, sc, query->toString() + L", product of:")); + res->addDetail(customExp); + res->addDetail(newLucene(getValue(), L"queryBoost")); // actually using the q boost as q weight (== weight value) + return res; +} + +bool CustomWeight::scoresDocsOutOfOrder() { + return false; +} + +CustomScorer::CustomScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const CustomWeightPtr& weight, const ScorerPtr& subQueryScorer, Collection valSrcScorers) : Scorer(similarity) { + this->qWeight = weight->getValue(); + this->subQueryScorer = subQueryScorer; + this->valSrcScorers = valSrcScorers; + this->reader = reader; + this->vScores = Collection::newInstance(valSrcScorers.size()); + this->provider = weight->query->getCustomScoreProvider(reader); +} + +CustomScorer::~CustomScorer() { +} + +int32_t CustomScorer::nextDoc() { + int32_t doc = subQueryScorer->nextDoc(); + if (doc != NO_MORE_DOCS) { + for (int32_t i = 0; i < valSrcScorers.size(); ++i) { + valSrcScorers[i]->advance(doc); } - return doc; - } - - int32_t CustomScorer::docID() - { - return subQueryScorer->docID(); } - - double CustomScorer::score() - { - for (int32_t i = 0; i < valSrcScorers.size(); ++i) - vScores[i] = valSrcScorers[i]->score(); - return qWeight * provider->customScore(subQueryScorer->docID(), subQueryScorer->score(), vScores); + return doc; +} + +int32_t CustomScorer::docID() { + return subQueryScorer->docID(); +} + +double CustomScorer::score() { + for (int32_t i = 0; i < valSrcScorers.size(); ++i) { + vScores[i] = valSrcScorers[i]->score(); } - - int32_t CustomScorer::advance(int32_t target) - { - int32_t doc = subQueryScorer->advance(target); - if (doc != NO_MORE_DOCS) - { - for (int32_t i = 0; i < valSrcScorers.size(); ++i) - valSrcScorers[i]->advance(doc); + return qWeight * provider->customScore(subQueryScorer->docID(), subQueryScorer->score(), vScores); +} + +int32_t CustomScorer::advance(int32_t target) { + int32_t doc = subQueryScorer->advance(target); + if (doc != NO_MORE_DOCS) { + for (int32_t i = 0; i < valSrcScorers.size(); ++i) { + valSrcScorers[i]->advance(doc); } - return doc; } + return doc; +} + } diff --git a/src/core/search/function/DocValues.cpp b/src/core/search/function/DocValues.cpp index 94ac8716..a95858b6 100644 --- a/src/core/search/function/DocValues.cpp +++ b/src/core/search/function/DocValues.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,88 +11,75 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - DocValues::DocValues() - { - minVal = std::numeric_limits::quiet_NaN(); - maxVal = std::numeric_limits::quiet_NaN(); - avgVal = std::numeric_limits::quiet_NaN(); - computed = false; - } - - DocValues::~DocValues() - { - } - - int32_t DocValues::intVal(int32_t doc) - { - return (int32_t)doubleVal(doc); - } - - int64_t DocValues::longVal(int32_t doc) - { - return (int64_t)doubleVal(doc); - } - - String DocValues::strVal(int32_t doc) - { - return StringUtils::toString(doubleVal(doc)); - } - - ExplanationPtr DocValues::explain(int32_t doc) - { - return newLucene(doubleVal(doc), toString(doc)); - } - - CollectionValue DocValues::getInnerArray() - { - boost::throw_exception(UnsupportedOperationException(L"This optional method is for test purposes only")); - return VariantUtils::null(); +namespace Lucene { + +DocValues::DocValues() { + minVal = std::numeric_limits::quiet_NaN(); + maxVal = std::numeric_limits::quiet_NaN(); + avgVal = std::numeric_limits::quiet_NaN(); + computed = false; +} + +DocValues::~DocValues() { +} + +int32_t DocValues::intVal(int32_t doc) { + return (int32_t)doubleVal(doc); +} + +int64_t DocValues::longVal(int32_t doc) { + return (int64_t)doubleVal(doc); +} + +String DocValues::strVal(int32_t doc) { + return StringUtils::toString(doubleVal(doc)); +} + +ExplanationPtr DocValues::explain(int32_t doc) { + return newLucene(doubleVal(doc), toString(doc)); +} + +CollectionValue DocValues::getInnerArray() { + boost::throw_exception(UnsupportedOperationException(L"This optional method is for test purposes only")); + return VariantUtils::null(); +} + +void DocValues::compute() { + if (computed) { + return; } - - void DocValues::compute() - { - if (computed) - return; - double sum = 0; - int32_t n = 0; - while (true) - { - double val; - try - { - val = doubleVal(n); - } - catch (IndexOutOfBoundsException&) - { - break; - } - sum += val; - minVal = MiscUtils::isNaN(minVal) ? val : std::min(minVal, val); - maxVal = MiscUtils::isNaN(maxVal) ? val : std::max(maxVal, val); - ++n; + double sum = 0; + int32_t n = 0; + while (true) { + double val; + try { + val = doubleVal(n); + } catch (IndexOutOfBoundsException&) { + break; } - - avgVal = n == 0 ? std::numeric_limits::quiet_NaN() : sum / (double)n; - computed = true; - } - - double DocValues::getMinValue() - { - compute(); - return minVal; - } - - double DocValues::getMaxValue() - { - compute(); - return maxVal; - } - - double DocValues::getAverageValue() - { - compute(); - return avgVal; + sum += val; + minVal = MiscUtils::isNaN(minVal) ? val : std::min(minVal, val); + maxVal = MiscUtils::isNaN(maxVal) ? val : std::max(maxVal, val); + ++n; } + + avgVal = n == 0 ? std::numeric_limits::quiet_NaN() : sum / (double)n; + computed = true; +} + +double DocValues::getMinValue() { + compute(); + return minVal; +} + +double DocValues::getMaxValue() { + compute(); + return maxVal; +} + +double DocValues::getAverageValue() { + compute(); + return avgVal; +} + } diff --git a/src/core/search/function/DoubleFieldSource.cpp b/src/core/search/function/DoubleFieldSource.cpp index ead5194e..39127345 100644 --- a/src/core/search/function/DoubleFieldSource.cpp +++ b/src/core/search/function/DoubleFieldSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,67 +10,60 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - DoubleFieldSource::DoubleFieldSource(const String& field, DoubleParserPtr parser) : FieldCacheSource(field) - { - this->parser = parser; - } - - DoubleFieldSource::~DoubleFieldSource() - { - } - - String DoubleFieldSource::description() - { - return L"double(" + FieldCacheSource::description() + L")"; - } - - DocValuesPtr DoubleFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) - { - Collection arr(cache->getDoubles(reader, field, parser)); - return newLucene(shared_from_this(), arr); - } - - bool DoubleFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) - { - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - DoubleFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; - } - - int32_t DoubleFieldSource::cachedFieldSourceHashCode() - { - return StringUtils::hashCode(parser ? DoubleParser::_getClassName() : DoubleFieldSource::_getClassName()); - } - - DoubleDocValues::DoubleDocValues(DoubleFieldSourcePtr source, Collection arr) - { - this->_source = source; - this->arr = arr; - } - - DoubleDocValues::~DoubleDocValues() - { - } - - double DoubleDocValues::doubleVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return arr[doc]; +namespace Lucene { + +DoubleFieldSource::DoubleFieldSource(const String& field, const DoubleParserPtr& parser) : FieldCacheSource(field) { + this->parser = parser; +} + +DoubleFieldSource::~DoubleFieldSource() { +} + +String DoubleFieldSource::description() { + return L"double(" + FieldCacheSource::description() + L")"; +} + +DocValuesPtr DoubleFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { + Collection arr(cache->getDoubles(reader, field, parser)); + return newLucene(shared_from_this(), arr); +} + +bool DoubleFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - String DoubleDocValues::toString(int32_t doc) - { - return DoubleFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(doubleVal(doc)); + DoubleFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } - - CollectionValue DoubleDocValues::getInnerArray() - { - return arr; + return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; +} + +int32_t DoubleFieldSource::cachedFieldSourceHashCode() { + return StringUtils::hashCode(parser ? DoubleParser::_getClassName() : DoubleFieldSource::_getClassName()); +} + +DoubleDocValues::DoubleDocValues(const DoubleFieldSourcePtr& source, Collection arr) { + this->_source = source; + this->arr = arr; +} + +DoubleDocValues::~DoubleDocValues() { +} + +double DoubleDocValues::doubleVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } + return arr[doc]; +} + +String DoubleDocValues::toString(int32_t doc) { + return DoubleFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(doubleVal(doc)); +} + +CollectionValue DoubleDocValues::getInnerArray() { + return arr; +} + } diff --git a/src/core/search/function/FieldCacheSource.cpp b/src/core/search/function/FieldCacheSource.cpp index d9122ff2..4350e1ec 100644 --- a/src/core/search/function/FieldCacheSource.cpp +++ b/src/core/search/function/FieldCacheSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,37 +9,33 @@ #include "FieldCache.h" #include "StringUtils.h" -namespace Lucene -{ - FieldCacheSource::FieldCacheSource(const String& field) - { - this->field = field; - } - - FieldCacheSource::~FieldCacheSource() - { - } - - DocValuesPtr FieldCacheSource::getValues(IndexReaderPtr reader) - { - return getCachedFieldValues(FieldCache::DEFAULT(), field, reader); - } - - String FieldCacheSource::description() - { - return field; - } - - bool FieldCacheSource::equals(LuceneObjectPtr other) - { - FieldCacheSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return field == otherSource->field && cachedFieldSourceEquals(otherSource); - } - - int32_t FieldCacheSource::hashCode() - { - return StringUtils::hashCode(field) + cachedFieldSourceHashCode(); +namespace Lucene { + +FieldCacheSource::FieldCacheSource(const String& field) { + this->field = field; +} + +FieldCacheSource::~FieldCacheSource() { +} + +DocValuesPtr FieldCacheSource::getValues(const IndexReaderPtr& reader) { + return getCachedFieldValues(FieldCache::DEFAULT(), field, reader); +} + +String FieldCacheSource::description() { + return field; +} + +bool FieldCacheSource::equals(const LuceneObjectPtr& other) { + FieldCacheSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } + return field == otherSource->field && cachedFieldSourceEquals(otherSource); +} + +int32_t FieldCacheSource::hashCode() { + return StringUtils::hashCode(field) + cachedFieldSourceHashCode(); +} + } diff --git a/src/core/search/function/FieldScoreQuery.cpp b/src/core/search/function/FieldScoreQuery.cpp index 677a4740..4a5d4acb 100644 --- a/src/core/search/function/FieldScoreQuery.cpp +++ b/src/core/search/function/FieldScoreQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,29 +10,26 @@ #include "IntFieldSource.h" #include "DoubleFieldSource.h" -namespace Lucene -{ - FieldScoreQuery::FieldScoreQuery(const String& field, Type type) : ValueSourceQuery(getValueSource(field,type)) - { - } - - FieldScoreQuery::~FieldScoreQuery() - { - } - - ValueSourcePtr FieldScoreQuery::getValueSource(const String& field, Type type) - { - switch (type) - { - case BYTE: - return newLucene(field); - case INT: - return newLucene(field); - case DOUBLE: - return newLucene(field); - default: - boost::throw_exception(IllegalArgumentException(L"not a known Field Score Query Type")); - return ValueSourcePtr(); - } +namespace Lucene { + +FieldScoreQuery::FieldScoreQuery(const String& field, Type type) : ValueSourceQuery(getValueSource(field,type)) { +} + +FieldScoreQuery::~FieldScoreQuery() { +} + +ValueSourcePtr FieldScoreQuery::getValueSource(const String& field, Type type) { + switch (type) { + case BYTE: + return newLucene(field); + case INT: + return newLucene(field); + case DOUBLE: + return newLucene(field); + default: + boost::throw_exception(IllegalArgumentException(L"not a known Field Score Query Type")); + return ValueSourcePtr(); } } + +} diff --git a/src/core/search/function/IntFieldSource.cpp b/src/core/search/function/IntFieldSource.cpp index 64f12a11..0af3425f 100644 --- a/src/core/search/function/IntFieldSource.cpp +++ b/src/core/search/function/IntFieldSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,74 +11,67 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - IntFieldSource::IntFieldSource(const String& field, IntParserPtr parser) : FieldCacheSource(field) - { - this->parser = parser; - } - - IntFieldSource::~IntFieldSource() - { - } - - String IntFieldSource::description() - { - return L"int(" + FieldCacheSource::description() + L")"; - } - - DocValuesPtr IntFieldSource::getCachedFieldValues(FieldCachePtr cache, const String& field, IndexReaderPtr reader) - { - Collection arr(cache->getInts(reader, field, parser)); - return newLucene(shared_from_this(), arr); - } - - bool IntFieldSource::cachedFieldSourceEquals(FieldCacheSourcePtr other) - { - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - IntFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; - } - - int32_t IntFieldSource::cachedFieldSourceHashCode() - { - return StringUtils::hashCode(parser ? IntParser::_getClassName() : IntFieldSource::_getClassName()); - } - - IntDocValues::IntDocValues(IntFieldSourcePtr source, Collection arr) - { - this->_source = source; - this->arr = arr; - } - - IntDocValues::~IntDocValues() - { - } - - double IntDocValues::doubleVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (double)arr[doc]; +namespace Lucene { + +IntFieldSource::IntFieldSource(const String& field, const IntParserPtr& parser) : FieldCacheSource(field) { + this->parser = parser; +} + +IntFieldSource::~IntFieldSource() { +} + +String IntFieldSource::description() { + return L"int(" + FieldCacheSource::description() + L")"; +} + +DocValuesPtr IntFieldSource::getCachedFieldValues(const FieldCachePtr& cache, const String& field, const IndexReaderPtr& reader) { + Collection arr(cache->getInts(reader, field, parser)); + return newLucene(shared_from_this(), arr); +} + +bool IntFieldSource::cachedFieldSourceEquals(const FieldCacheSourcePtr& other) { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - int32_t IntDocValues::intVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return arr[doc]; + IntFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } - - String IntDocValues::toString(int32_t doc) - { - return IntFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); + return parser ? MiscUtils::equalTypes(parser, otherSource->parser) : !otherSource->parser; +} + +int32_t IntFieldSource::cachedFieldSourceHashCode() { + return StringUtils::hashCode(parser ? IntParser::_getClassName() : IntFieldSource::_getClassName()); +} + +IntDocValues::IntDocValues(const IntFieldSourcePtr& source, Collection arr) { + this->_source = source; + this->arr = arr; +} + +IntDocValues::~IntDocValues() { +} + +double IntDocValues::doubleVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - CollectionValue IntDocValues::getInnerArray() - { - return arr; + return (double)arr[doc]; +} + +int32_t IntDocValues::intVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } + return arr[doc]; +} + +String IntDocValues::toString(int32_t doc) { + return IntFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); +} + +CollectionValue IntDocValues::getInnerArray() { + return arr; +} + } diff --git a/src/core/search/function/OrdFieldSource.cpp b/src/core/search/function/OrdFieldSource.cpp index 5f76a18c..66332b78 100644 --- a/src/core/search/function/OrdFieldSource.cpp +++ b/src/core/search/function/OrdFieldSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,75 +11,68 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - OrdFieldSource::OrdFieldSource(const String& field) - { - this->field = field; - } - - OrdFieldSource::~OrdFieldSource() - { - } - - String OrdFieldSource::description() - { - return L"ord(" + field + L")"; - } - - DocValuesPtr OrdFieldSource::getValues(IndexReaderPtr reader) - { - Collection arr(FieldCache::DEFAULT()->getStringIndex(reader, field)->order); - return newLucene(shared_from_this(), arr); - } - - bool OrdFieldSource::equals(LuceneObjectPtr other) - { - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - OrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return field == otherSource->field; - } - - int32_t OrdFieldSource::hashCode() - { - return StringUtils::hashCode(OrdFieldSource::_getClassName()) + StringUtils::hashCode(field); - } - - OrdDocValues::OrdDocValues(OrdFieldSourcePtr source, Collection arr) - { - this->_source = source; - this->arr = arr; - } - - OrdDocValues::~OrdDocValues() - { - } - - double OrdDocValues::doubleVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (double)arr[doc]; +namespace Lucene { + +OrdFieldSource::OrdFieldSource(const String& field) { + this->field = field; +} + +OrdFieldSource::~OrdFieldSource() { +} + +String OrdFieldSource::description() { + return L"ord(" + field + L")"; +} + +DocValuesPtr OrdFieldSource::getValues(const IndexReaderPtr& reader) { + Collection arr(FieldCache::DEFAULT()->getStringIndex(reader, field)->order); + return newLucene(shared_from_this(), arr); +} + +bool OrdFieldSource::equals(const LuceneObjectPtr& other) { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - String OrdDocValues::strVal(int32_t doc) - { - // the string value of the ordinal, not the string itself - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return StringUtils::toString(arr[doc]); + OrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } - - String OrdDocValues::toString(int32_t doc) - { - return OrdFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); + return field == otherSource->field; +} + +int32_t OrdFieldSource::hashCode() { + return StringUtils::hashCode(OrdFieldSource::_getClassName()) + StringUtils::hashCode(field); +} + +OrdDocValues::OrdDocValues(const OrdFieldSourcePtr& source, Collection arr) { + this->_source = source; + this->arr = arr; +} + +OrdDocValues::~OrdDocValues() { +} + +double OrdDocValues::doubleVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - CollectionValue OrdDocValues::getInnerArray() - { - return arr; + return (double)arr[doc]; +} + +String OrdDocValues::strVal(int32_t doc) { + // the string value of the ordinal, not the string itself + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } + return StringUtils::toString(arr[doc]); +} + +String OrdDocValues::toString(int32_t doc) { + return OrdFieldSourcePtr(_source)->description() + L"=" + StringUtils::toString(intVal(doc)); +} + +CollectionValue OrdDocValues::getInnerArray() { + return arr; +} + } diff --git a/src/core/search/function/ReverseOrdFieldSource.cpp b/src/core/search/function/ReverseOrdFieldSource.cpp index 0464a9e6..e7f7f8dd 100644 --- a/src/core/search/function/ReverseOrdFieldSource.cpp +++ b/src/core/search/function/ReverseOrdFieldSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,83 +11,75 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - ReverseOrdFieldSource::ReverseOrdFieldSource(const String& field) - { - this->field = field; - } - - ReverseOrdFieldSource::~ReverseOrdFieldSource() - { - } - - String ReverseOrdFieldSource::description() - { - return L"rord(" + field + L")"; - } - - DocValuesPtr ReverseOrdFieldSource::getValues(IndexReaderPtr reader) - { - StringIndexPtr sindex(FieldCache::DEFAULT()->getStringIndex(reader, field)); - Collection arr(sindex->order); - int32_t end = sindex->lookup.size(); - return newLucene(shared_from_this(), arr, end); - } - - bool ReverseOrdFieldSource::equals(LuceneObjectPtr other) - { - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - ReverseOrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); - if (!otherSource) - return false; - return field == otherSource->field; - } - - int32_t ReverseOrdFieldSource::hashCode() - { - return StringUtils::hashCode(ReverseOrdFieldSource::_getClassName()) + StringUtils::hashCode(field); - } - - ReverseOrdDocValues::ReverseOrdDocValues(ReverseOrdFieldSourcePtr source, Collection arr, int32_t end) - { - this->_source = source; - this->arr = arr; - this->end = end; - } - - ReverseOrdDocValues::~ReverseOrdDocValues() - { - } - - double ReverseOrdDocValues::doubleVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (double)(end - arr[doc]); - } - - int32_t ReverseOrdDocValues::intVal(int32_t doc) - { - if (doc < 0 || doc >= arr.size()) - boost::throw_exception(IndexOutOfBoundsException()); - return (end - arr[doc]); +namespace Lucene { + +ReverseOrdFieldSource::ReverseOrdFieldSource(const String& field) { + this->field = field; +} + +ReverseOrdFieldSource::~ReverseOrdFieldSource() { +} + +String ReverseOrdFieldSource::description() { + return L"rord(" + field + L")"; +} + +DocValuesPtr ReverseOrdFieldSource::getValues(const IndexReaderPtr& reader) { + StringIndexPtr sindex(FieldCache::DEFAULT()->getStringIndex(reader, field)); + Collection arr(sindex->order); + int32_t end = sindex->lookup.size(); + return newLucene(shared_from_this(), arr, end); +} + +bool ReverseOrdFieldSource::equals(const LuceneObjectPtr& other) { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - String ReverseOrdDocValues::strVal(int32_t doc) - { - // the string value of the ordinal, not the string itself - return StringUtils::toString(intVal(doc)); + ReverseOrdFieldSourcePtr otherSource(boost::dynamic_pointer_cast(other)); + if (!otherSource) { + return false; } - - String ReverseOrdDocValues::toString(int32_t doc) - { - return ReverseOrdFieldSourcePtr(_source)->description() + L"=" + strVal(doc); + return field == otherSource->field; +} + +int32_t ReverseOrdFieldSource::hashCode() { + return StringUtils::hashCode(ReverseOrdFieldSource::_getClassName()) + StringUtils::hashCode(field); +} + +ReverseOrdDocValues::ReverseOrdDocValues(const ReverseOrdFieldSourcePtr& source, Collection arr, int32_t end) { + this->_source = source; + this->arr = arr; + this->end = end; +} + +ReverseOrdDocValues::~ReverseOrdDocValues() { +} + +double ReverseOrdDocValues::doubleVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } - - CollectionValue ReverseOrdDocValues::getInnerArray() - { - return arr; + return (double)(end - arr[doc]); +} + +int32_t ReverseOrdDocValues::intVal(int32_t doc) { + if (doc < 0 || doc >= arr.size()) { + boost::throw_exception(IndexOutOfBoundsException()); } + return (end - arr[doc]); +} + +String ReverseOrdDocValues::strVal(int32_t doc) { + // the string value of the ordinal, not the string itself + return StringUtils::toString(intVal(doc)); +} + +String ReverseOrdDocValues::toString(int32_t doc) { + return ReverseOrdFieldSourcePtr(_source)->description() + L"=" + strVal(doc); +} + +CollectionValue ReverseOrdDocValues::getInnerArray() { + return arr; +} + } diff --git a/src/core/search/function/ValueSource.cpp b/src/core/search/function/ValueSource.cpp index 2c80cf3f..fa4b30c7 100644 --- a/src/core/search/function/ValueSource.cpp +++ b/src/core/search/function/ValueSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,14 +7,13 @@ #include "LuceneInc.h" #include "ValueSource.h" -namespace Lucene -{ - ValueSource::~ValueSource() - { - } - - String ValueSource::toString() - { - return description(); - } +namespace Lucene { + +ValueSource::~ValueSource() { +} + +String ValueSource::toString() { + return description(); +} + } diff --git a/src/core/search/function/ValueSourceQuery.cpp b/src/core/search/function/ValueSourceQuery.cpp index 5ad06e2c..07f6e3fb 100644 --- a/src/core/search/function/ValueSourceQuery.cpp +++ b/src/core/search/function/ValueSourceQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,141 +15,120 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - ValueSourceQuery::ValueSourceQuery(ValueSourcePtr valSrc) - { - this->valSrc = valSrc; - } - - ValueSourceQuery::~ValueSourceQuery() - { - } - - QueryPtr ValueSourceQuery::rewrite(IndexReaderPtr reader) - { - return shared_from_this(); - } - - void ValueSourceQuery::extractTerms(SetTerm terms) - { - // no terms involved here - } - - WeightPtr ValueSourceQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - String ValueSourceQuery::toString(const String& field) - { - return valSrc->toString() + boostString(); - } - - bool ValueSourceQuery::equals(LuceneObjectPtr other) - { - ValueSourceQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - return (getBoost() == otherQuery->getBoost() && valSrc->equals(otherQuery->valSrc)); - } - - int32_t ValueSourceQuery::hashCode() - { - return (StringUtils::hashCode(ValueSourceQuery::_getClassName()) + valSrc->hashCode()) ^ MiscUtils::doubleToIntBits(getBoost()); - } - - LuceneObjectPtr ValueSourceQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(valSrc); - ValueSourceQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); - cloneQuery->valSrc = valSrc; - return cloneQuery; - } - - ValueSourceWeight::ValueSourceWeight(ValueSourceQueryPtr query, SearcherPtr searcher) - { - this->query = query; - this->similarity = query->getSimilarity(searcher); - } - - ValueSourceWeight::~ValueSourceWeight() - { - } - - QueryPtr ValueSourceWeight::getQuery() - { - return query; - } - - double ValueSourceWeight::getValue() - { - return queryWeight; - } - - double ValueSourceWeight::sumOfSquaredWeights() - { - queryWeight = query->getBoost(); - return queryWeight * queryWeight; - } - - void ValueSourceWeight::normalize(double norm) - { - queryNorm = norm; - queryWeight *= queryNorm; - } - - ScorerPtr ValueSourceWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(similarity, reader, shared_from_this()); - } - - ExplanationPtr ValueSourceWeight::explain(IndexReaderPtr reader, int32_t doc) - { - DocValuesPtr vals(query->valSrc->getValues(reader)); - double sc = queryWeight * vals->doubleVal(doc); - - ExplanationPtr result(newLucene(true, sc, query->toString() + L", product of:")); - - result->addDetail(vals->explain(doc)); - result->addDetail(newLucene(query->getBoost(), L"boost")); - result->addDetail(newLucene(queryNorm, L"queryNorm")); - return result; - } - - ValueSourceScorer::ValueSourceScorer(SimilarityPtr similarity, IndexReaderPtr reader, ValueSourceWeightPtr weight) : Scorer(similarity) - { - this->weight = weight; - this->qWeight = weight->getValue(); - this->doc = -1; - // this is when/where the values are first created. - vals = weight->query->valSrc->getValues(reader); - termDocs = reader->termDocs(TermPtr()); - } - - ValueSourceScorer::~ValueSourceScorer() - { - } - - int32_t ValueSourceScorer::nextDoc() - { - doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; - return doc; - } - - int32_t ValueSourceScorer::docID() - { - return doc; - } - - int32_t ValueSourceScorer::advance(int32_t target) - { - doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; - return doc; - } - - double ValueSourceScorer::score() - { - return qWeight * vals->doubleVal(termDocs->doc()); +namespace Lucene { + +ValueSourceQuery::ValueSourceQuery(const ValueSourcePtr& valSrc) { + this->valSrc = valSrc; +} + +ValueSourceQuery::~ValueSourceQuery() { +} + +QueryPtr ValueSourceQuery::rewrite(const IndexReaderPtr& reader) { + return shared_from_this(); +} + +void ValueSourceQuery::extractTerms(SetTerm terms) { + // no terms involved here +} + +WeightPtr ValueSourceQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +String ValueSourceQuery::toString(const String& field) { + return valSrc->toString() + boostString(); +} + +bool ValueSourceQuery::equals(const LuceneObjectPtr& other) { + ValueSourceQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } + return (getBoost() == otherQuery->getBoost() && valSrc->equals(otherQuery->valSrc)); +} + +int32_t ValueSourceQuery::hashCode() { + return (StringUtils::hashCode(ValueSourceQuery::_getClassName()) + valSrc->hashCode()) ^ MiscUtils::doubleToIntBits(getBoost()); +} + +LuceneObjectPtr ValueSourceQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(valSrc); + ValueSourceQueryPtr cloneQuery(boost::dynamic_pointer_cast(Query::clone(clone))); + cloneQuery->valSrc = valSrc; + return cloneQuery; +} + +ValueSourceWeight::ValueSourceWeight(const ValueSourceQueryPtr& query, const SearcherPtr& searcher) { + this->query = query; + this->similarity = query->getSimilarity(searcher); +} + +ValueSourceWeight::~ValueSourceWeight() { +} + +QueryPtr ValueSourceWeight::getQuery() { + return query; +} + +double ValueSourceWeight::getValue() { + return queryWeight; +} + +double ValueSourceWeight::sumOfSquaredWeights() { + queryWeight = query->getBoost(); + return queryWeight * queryWeight; +} + +void ValueSourceWeight::normalize(double norm) { + queryNorm = norm; + queryWeight *= queryNorm; +} + +ScorerPtr ValueSourceWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(similarity, reader, shared_from_this()); +} + +ExplanationPtr ValueSourceWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + DocValuesPtr vals(query->valSrc->getValues(reader)); + double sc = queryWeight * vals->doubleVal(doc); + + ExplanationPtr result(newLucene(true, sc, query->toString() + L", product of:")); + + result->addDetail(vals->explain(doc)); + result->addDetail(newLucene(query->getBoost(), L"boost")); + result->addDetail(newLucene(queryNorm, L"queryNorm")); + return result; +} + +ValueSourceScorer::ValueSourceScorer(const SimilarityPtr& similarity, const IndexReaderPtr& reader, const ValueSourceWeightPtr& weight) : Scorer(similarity) { + this->weight = weight; + this->qWeight = weight->getValue(); + this->doc = -1; + // this is when/where the values are first created. + vals = weight->query->valSrc->getValues(reader); + termDocs = reader->termDocs(TermPtr()); +} + +ValueSourceScorer::~ValueSourceScorer() { +} + +int32_t ValueSourceScorer::nextDoc() { + doc = termDocs->next() ? termDocs->doc() : NO_MORE_DOCS; + return doc; +} + +int32_t ValueSourceScorer::docID() { + return doc; +} + +int32_t ValueSourceScorer::advance(int32_t target) { + doc = termDocs->skipTo(target) ? termDocs->doc() : NO_MORE_DOCS; + return doc; +} + +double ValueSourceScorer::score() { + return qWeight * vals->doubleVal(termDocs->doc()); +} + } diff --git a/src/core/search/payloads/AveragePayloadFunction.cpp b/src/core/search/payloads/AveragePayloadFunction.cpp index b98ad9d4..f34ba229 100644 --- a/src/core/search/payloads/AveragePayloadFunction.cpp +++ b/src/core/search/payloads/AveragePayloadFunction.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,39 +9,38 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - AveragePayloadFunction::~AveragePayloadFunction() - { - } - - double AveragePayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, - int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) - { - return currentPayloadScore + currentScore; - } - - double AveragePayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) - { - return numPayloadsSeen > 0 ? (payloadScore / (double)numPayloadsSeen) : 1.0; +namespace Lucene { + +AveragePayloadFunction::~AveragePayloadFunction() { +} + +double AveragePayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, + int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { + return currentPayloadScore + currentScore; +} + +double AveragePayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { + return numPayloadsSeen > 0 ? (payloadScore / (double)numPayloadsSeen) : 1.0; +} + +int32_t AveragePayloadFunction::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + StringUtils::hashCode(getClassName()); + return result; +} + +bool AveragePayloadFunction::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t AveragePayloadFunction::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + StringUtils::hashCode(getClassName()); - return result; + if (!other) { + return false; } - - bool AveragePayloadFunction::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - return true; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } + return true; +} + } diff --git a/src/core/search/payloads/MaxPayloadFunction.cpp b/src/core/search/payloads/MaxPayloadFunction.cpp index a3bf6648..ff7db7ce 100644 --- a/src/core/search/payloads/MaxPayloadFunction.cpp +++ b/src/core/search/payloads/MaxPayloadFunction.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,42 +9,42 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - MaxPayloadFunction::~MaxPayloadFunction() - { - } - - double MaxPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, - int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) - { - if (numPayloadsSeen == 0) - return currentPayloadScore; - else - return std::max(currentPayloadScore, currentScore); +namespace Lucene { + +MaxPayloadFunction::~MaxPayloadFunction() { +} + +double MaxPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, + int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { + if (numPayloadsSeen == 0) { + return currentPayloadScore; + } else { + return std::max(currentPayloadScore, currentScore); } - - double MaxPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) - { - return numPayloadsSeen > 0 ? payloadScore : 1.0; +} + +double MaxPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { + return numPayloadsSeen > 0 ? payloadScore : 1.0; +} + +int32_t MaxPayloadFunction::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + StringUtils::hashCode(getClassName()); + return result; +} + +bool MaxPayloadFunction::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t MaxPayloadFunction::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + StringUtils::hashCode(getClassName()); - return result; + if (!other) { + return false; } - - bool MaxPayloadFunction::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - return true; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } + return true; +} + } diff --git a/src/core/search/payloads/MinPayloadFunction.cpp b/src/core/search/payloads/MinPayloadFunction.cpp index d6ce41eb..4dec9aef 100644 --- a/src/core/search/payloads/MinPayloadFunction.cpp +++ b/src/core/search/payloads/MinPayloadFunction.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,42 +9,42 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - MinPayloadFunction::~MinPayloadFunction() - { - } - - double MinPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, - int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) - { - if (numPayloadsSeen == 0) - return currentPayloadScore; - else - return std::min(currentPayloadScore, currentScore); +namespace Lucene { + +MinPayloadFunction::~MinPayloadFunction() { +} + +double MinPayloadFunction::currentScore(int32_t docId, const String& field, int32_t start, int32_t end, + int32_t numPayloadsSeen, double currentScore, double currentPayloadScore) { + if (numPayloadsSeen == 0) { + return currentPayloadScore; + } else { + return std::min(currentPayloadScore, currentScore); } - - double MinPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) - { - return numPayloadsSeen > 0 ? payloadScore : 1.0; +} + +double MinPayloadFunction::docScore(int32_t docId, const String& field, int32_t numPayloadsSeen, double payloadScore) { + return numPayloadsSeen > 0 ? payloadScore : 1.0; +} + +int32_t MinPayloadFunction::hashCode() { + int32_t prime = 31; + int32_t result = 1; + result = prime * result + StringUtils::hashCode(getClassName()); + return result; +} + +bool MinPayloadFunction::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - int32_t MinPayloadFunction::hashCode() - { - int32_t prime = 31; - int32_t result = 1; - result = prime * result + StringUtils::hashCode(getClassName()); - return result; + if (!other) { + return false; } - - bool MinPayloadFunction::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!other) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - return true; + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } + return true; +} + } diff --git a/src/core/search/payloads/PayloadFunction.cpp b/src/core/search/payloads/PayloadFunction.cpp index ea1d07ff..05aeec73 100644 --- a/src/core/search/payloads/PayloadFunction.cpp +++ b/src/core/search/payloads/PayloadFunction.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,13 +7,12 @@ #include "LuceneInc.h" #include "PayloadFunction.h" -namespace Lucene -{ - PayloadFunction::PayloadFunction() - { - } +namespace Lucene { + +PayloadFunction::PayloadFunction() { +} + +PayloadFunction::~PayloadFunction() { +} - PayloadFunction::~PayloadFunction() - { - } } diff --git a/src/core/search/payloads/PayloadNearQuery.cpp b/src/core/search/payloads/PayloadNearQuery.cpp index 64467eee..7283a021 100644 --- a/src/core/search/payloads/PayloadNearQuery.cpp +++ b/src/core/search/payloads/PayloadNearQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,178 +14,165 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder) : SpanNearQuery(clauses, slop, inOrder) - { - fieldName = clauses[0]->getField(); // all clauses must have same field - this->function = newLucene(); - } - - PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, PayloadFunctionPtr function) : SpanNearQuery(clauses, slop, inOrder) - { - fieldName = clauses[0]->getField(); // all clauses must have same field - this->function = function; - } - - PayloadNearQuery::~PayloadNearQuery() - { - } - - WeightPtr PayloadNearQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - LuceneObjectPtr PayloadNearQuery::clone(LuceneObjectPtr other) - { - int32_t sz = clauses.size(); - Collection newClauses(Collection::newInstance(sz)); - - for (int32_t i = 0; i < sz; ++i) - newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); - - PayloadNearQueryPtr payloadNearQuery(newLucene(newClauses, slop, inOrder)); - payloadNearQuery->setBoost(getBoost()); - return payloadNearQuery; +namespace Lucene { + +PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder) : SpanNearQuery(clauses, slop, inOrder) { + fieldName = clauses[0]->getField(); // all clauses must have same field + this->function = newLucene(); +} + +PayloadNearQuery::PayloadNearQuery(Collection clauses, int32_t slop, bool inOrder, const PayloadFunctionPtr& function) : SpanNearQuery(clauses, slop, inOrder) { + fieldName = clauses[0]->getField(); // all clauses must have same field + this->function = function; +} + +PayloadNearQuery::~PayloadNearQuery() { +} + +WeightPtr PayloadNearQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +LuceneObjectPtr PayloadNearQuery::clone(const LuceneObjectPtr& other) { + int32_t sz = clauses.size(); + Collection newClauses(Collection::newInstance(sz)); + + for (int32_t i = 0; i < sz; ++i) { + newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } - - String PayloadNearQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"payloadNear(["; - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (clause != clauses.begin()) - buffer << L", "; - buffer << (*clause)->toString(field); + + PayloadNearQueryPtr payloadNearQuery(newLucene(newClauses, slop, inOrder)); + payloadNearQuery->setBoost(getBoost()); + return payloadNearQuery; +} + +String PayloadNearQuery::toString(const String& field) { + StringStream buffer; + buffer << L"payloadNear(["; + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (clause != clauses.begin()) { + buffer << L", "; } - buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); - return buffer.str(); + buffer << (*clause)->toString(field); } - - bool PayloadNearQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!SpanNearQuery::equals(other)) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - PayloadNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - if (fieldName != otherQuery->fieldName) - return false; - if (!function) - { - if (otherQuery->function) - return false; - } - else if (!function->equals(otherQuery->function)) - return false; + buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); + return buffer.str(); +} + +bool PayloadNearQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { return true; } - - int32_t PayloadNearQuery::hashCode() - { - int32_t prime = 31; - int32_t result = SpanNearQuery::hashCode(); - result = prime * result + (fieldName.empty() ? 0 : StringUtils::hashCode(fieldName)); - result = prime * result + (!function ? 0 : function->hashCode()); - return result; + if (!SpanNearQuery::equals(other)) { + return false; } - - PayloadNearSpanWeight::PayloadNearSpanWeight(SpanQueryPtr query, SearcherPtr searcher) : SpanWeight(query, searcher) - { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - PayloadNearSpanWeight::~PayloadNearSpanWeight() - { + PayloadNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - ScorerPtr PayloadNearSpanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); + if (fieldName != otherQuery->fieldName) { + return false; } - - PayloadNearSpanScorer::PayloadNearSpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) - { - this->spans = spans; - this->payloadScore = 0.0; - this->payloadsSeen = 0; - this->similarity = getSimilarity(); - } - - PayloadNearSpanScorer::~PayloadNearSpanScorer() - { + if (!function) { + if (otherQuery->function) { + return false; + } + } else if (!function->equals(otherQuery->function)) { + return false; } - - void PayloadNearSpanScorer::getPayloads(Collection subSpans) - { - for (Collection::iterator span = subSpans.begin(); span != subSpans.end(); ++span) - { - if (MiscUtils::typeOf(*span)) - { - NearSpansOrderedPtr ordered(boost::static_pointer_cast(*span)); - if (ordered->isPayloadAvailable()) - processPayloads(ordered->getPayload(), ordered->start(), ordered->end()); - getPayloads(ordered->getSubSpans()); + return true; +} + +int32_t PayloadNearQuery::hashCode() { + int32_t prime = 31; + int32_t result = SpanNearQuery::hashCode(); + result = prime * result + (fieldName.empty() ? 0 : StringUtils::hashCode(fieldName)); + result = prime * result + (!function ? 0 : function->hashCode()); + return result; +} + +PayloadNearSpanWeight::PayloadNearSpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher) : SpanWeight(query, searcher) { +} + +PayloadNearSpanWeight::~PayloadNearSpanWeight() { +} + +ScorerPtr PayloadNearSpanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); +} + +PayloadNearSpanScorer::PayloadNearSpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { + this->spans = spans; + this->payloadScore = 0.0; + this->payloadsSeen = 0; + this->similarity = getSimilarity(); +} + +PayloadNearSpanScorer::~PayloadNearSpanScorer() { +} + +void PayloadNearSpanScorer::getPayloads(Collection subSpans) { + for (Collection::iterator span = subSpans.begin(); span != subSpans.end(); ++span) { + if (MiscUtils::typeOf(*span)) { + NearSpansOrderedPtr ordered(boost::static_pointer_cast(*span)); + if (ordered->isPayloadAvailable()) { + processPayloads(ordered->getPayload(), ordered->start(), ordered->end()); } - else if (MiscUtils::typeOf(*span)) - { - NearSpansUnorderedPtr unordered(boost::static_pointer_cast(*span)); - if (unordered->isPayloadAvailable()) - processPayloads(unordered->getPayload(), unordered->start(), unordered->end()); - getPayloads(unordered->getSubSpans()); + getPayloads(ordered->getSubSpans()); + } else if (MiscUtils::typeOf(*span)) { + NearSpansUnorderedPtr unordered(boost::static_pointer_cast(*span)); + if (unordered->isPayloadAvailable()) { + processPayloads(unordered->getPayload(), unordered->start(), unordered->end()); } + getPayloads(unordered->getSubSpans()); } } - - void PayloadNearSpanScorer::processPayloads(Collection payLoads, int32_t start, int32_t end) - { - PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); - PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); - - for (Collection::iterator payload = payLoads.begin(); payload != payLoads.end(); ++payload) - { - payloadScore = nearQuery->function->currentScore(doc, nearQuery->fieldName, start, end, payloadsSeen, payloadScore, - similarity->scorePayload(doc, nearQuery->fieldName, spans->start(), - spans->end(), *payload, 0, payload->size())); - ++payloadsSeen; - } - } - - bool PayloadNearSpanScorer::setFreqCurrentDoc() - { - if (!more) - return false; - Collection spansArr(newCollection(spans)); - payloadScore = 0.0; - payloadsSeen = 0; - getPayloads(spansArr); - return SpanScorer::setFreqCurrentDoc(); - } - - double PayloadNearSpanScorer::score() - { - PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); - PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); - return SpanScorer::score() * nearQuery->function->docScore(doc, nearQuery->fieldName, payloadsSeen, payloadScore); - } - - ExplanationPtr PayloadNearSpanScorer::explain(int32_t doc) - { - ExplanationPtr result(newLucene()); - ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); - result->addDetail(nonPayloadExpl); - ExplanationPtr payloadBoost(newLucene()); - result->addDetail(payloadBoost); - double avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / (double)payloadsSeen) : 1.0); - payloadBoost->setValue(avgPayloadScore); - payloadBoost->setDescription(L"scorePayload(...)"); - result->setValue(nonPayloadExpl->getValue() * avgPayloadScore); - result->setDescription(L"bnq, product of:"); - return result; +} + +void PayloadNearSpanScorer::processPayloads(Collection payLoads, int32_t start, int32_t end) { + PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); + PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); + + for (Collection::iterator payload = payLoads.begin(); payload != payLoads.end(); ++payload) { + payloadScore = nearQuery->function->currentScore(doc, nearQuery->fieldName, start, end, payloadsSeen, payloadScore, + similarity->scorePayload(doc, nearQuery->fieldName, spans->start(), + spans->end(), *payload, 0, payload->size())); + ++payloadsSeen; } } + +bool PayloadNearSpanScorer::setFreqCurrentDoc() { + if (!more) { + return false; + } + Collection spansArr(newCollection(spans)); + payloadScore = 0.0; + payloadsSeen = 0; + getPayloads(spansArr); + return SpanScorer::setFreqCurrentDoc(); +} + +double PayloadNearSpanScorer::score() { + PayloadNearSpanWeightPtr spanWeight(boost::static_pointer_cast(weight)); + PayloadNearQueryPtr nearQuery(boost::static_pointer_cast(spanWeight->query)); + return SpanScorer::score() * nearQuery->function->docScore(doc, nearQuery->fieldName, payloadsSeen, payloadScore); +} + +ExplanationPtr PayloadNearSpanScorer::explain(int32_t doc) { + ExplanationPtr result(newLucene()); + ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); + result->addDetail(nonPayloadExpl); + ExplanationPtr payloadBoost(newLucene()); + result->addDetail(payloadBoost); + double avgPayloadScore = (payloadsSeen > 0 ? (payloadScore / (double)payloadsSeen) : 1.0); + payloadBoost->setValue(avgPayloadScore); + payloadBoost->setDescription(L"scorePayload(...)"); + result->setValue(nonPayloadExpl->getValue() * avgPayloadScore); + result->setDescription(L"bnq, product of:"); + return result; +} + +} diff --git a/src/core/search/payloads/PayloadSpanUtil.cpp b/src/core/search/payloads/PayloadSpanUtil.cpp index 510d46a3..751f3924 100644 --- a/src/core/search/payloads/PayloadSpanUtil.cpp +++ b/src/core/search/payloads/PayloadSpanUtil.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -20,147 +20,129 @@ #include "Spans.h" #include "MiscUtils.h" -namespace Lucene -{ - PayloadSpanUtil::PayloadSpanUtil(IndexReaderPtr reader) - { - this->reader = reader; - } - - PayloadSpanUtil::~PayloadSpanUtil() - { - } - - Collection PayloadSpanUtil::getPayloadsForQuery(QueryPtr query) - { - Collection payloads(Collection::newInstance()); - queryToSpanQuery(query, payloads); - return payloads; - } - - void PayloadSpanUtil::queryToSpanQuery(QueryPtr query, Collection payloads) - { - if (MiscUtils::typeOf(query)) - { - BooleanQueryPtr booleanQuery(boost::dynamic_pointer_cast(query)); - Collection queryClauses(booleanQuery->getClauses()); - for (Collection::iterator clause = queryClauses.begin(); clause != queryClauses.end(); ++clause) - { - if (!(*clause)->isProhibited()) - queryToSpanQuery((*clause)->getQuery(), payloads); +namespace Lucene { + +PayloadSpanUtil::PayloadSpanUtil(const IndexReaderPtr& reader) { + this->reader = reader; +} + +PayloadSpanUtil::~PayloadSpanUtil() { +} + +Collection PayloadSpanUtil::getPayloadsForQuery(const QueryPtr& query) { + Collection payloads(Collection::newInstance()); + queryToSpanQuery(query, payloads); + return payloads; +} + +void PayloadSpanUtil::queryToSpanQuery(const QueryPtr& query, Collection payloads) { + if (MiscUtils::typeOf(query)) { + BooleanQueryPtr booleanQuery(boost::dynamic_pointer_cast(query)); + Collection queryClauses(booleanQuery->getClauses()); + for (Collection::iterator clause = queryClauses.begin(); clause != queryClauses.end(); ++clause) { + if (!(*clause)->isProhibited()) { + queryToSpanQuery((*clause)->getQuery(), payloads); } } - else if (MiscUtils::typeOf(query)) - { - PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); - Collection phraseQueryTerms(phraseQuery->getTerms()); - Collection clauses(Collection::newInstance(phraseQueryTerms.size())); - for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) - clauses[i] = newLucene(phraseQueryTerms[i]); - - int32_t slop = phraseQuery->getSlop(); - bool inorder = false; - - if (slop == 0) - inorder = true; - - SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); - sp->setBoost(query->getBoost()); - getPayloads(payloads, sp); - } - else if (MiscUtils::typeOf(query)) - { - TermQueryPtr termQuery(boost::dynamic_pointer_cast(query)); - SpanTermQueryPtr stq(newLucene(termQuery->getTerm())); - stq->setBoost(query->getBoost()); - getPayloads(payloads, stq); + } else if (MiscUtils::typeOf(query)) { + PhraseQueryPtr phraseQuery(boost::dynamic_pointer_cast(query)); + Collection phraseQueryTerms(phraseQuery->getTerms()); + Collection clauses(Collection::newInstance(phraseQueryTerms.size())); + for (int32_t i = 0; i < phraseQueryTerms.size(); ++i) { + clauses[i] = newLucene(phraseQueryTerms[i]); } - else if (MiscUtils::typeOf(query)) - { - SpanQueryPtr spanQuery(boost::dynamic_pointer_cast(query)); - getPayloads(payloads, spanQuery); - } - else if (MiscUtils::typeOf(query)) - { - FilteredQueryPtr filteredQuery(boost::dynamic_pointer_cast(query)); - queryToSpanQuery(filteredQuery->getQuery(), payloads); + + int32_t slop = phraseQuery->getSlop(); + bool inorder = false; + + if (slop == 0) { + inorder = true; } - else if (MiscUtils::typeOf(query)) - { - DisjunctionMaxQueryPtr maxQuery(boost::dynamic_pointer_cast(query)); - for (Collection::iterator disjunct = maxQuery->begin(); disjunct != maxQuery->end(); ++disjunct) - queryToSpanQuery(*disjunct, payloads); + + SpanNearQueryPtr sp(newLucene(clauses, slop, inorder)); + sp->setBoost(query->getBoost()); + getPayloads(payloads, sp); + } else if (MiscUtils::typeOf(query)) { + TermQueryPtr termQuery(boost::dynamic_pointer_cast(query)); + SpanTermQueryPtr stq(newLucene(termQuery->getTerm())); + stq->setBoost(query->getBoost()); + getPayloads(payloads, stq); + } else if (MiscUtils::typeOf(query)) { + SpanQueryPtr spanQuery(boost::dynamic_pointer_cast(query)); + getPayloads(payloads, spanQuery); + } else if (MiscUtils::typeOf(query)) { + FilteredQueryPtr filteredQuery(boost::dynamic_pointer_cast(query)); + queryToSpanQuery(filteredQuery->getQuery(), payloads); + } else if (MiscUtils::typeOf(query)) { + DisjunctionMaxQueryPtr maxQuery(boost::dynamic_pointer_cast(query)); + for (Collection::iterator disjunct = maxQuery->begin(); disjunct != maxQuery->end(); ++disjunct) { + queryToSpanQuery(*disjunct, payloads); } - else if (MiscUtils::typeOf(query)) - { - MultiPhraseQueryPtr multiphraseQuery(boost::dynamic_pointer_cast(query)); - Collection< Collection > termArrays(multiphraseQuery->getTermArrays()); - Collection positions(multiphraseQuery->getPositions()); - if (!positions.empty()) - { - int32_t maxPosition = positions[positions.size() - 1]; - for (int32_t i = 0; i < positions.size() - 1; ++i) - { - if (positions[i] > maxPosition) - maxPosition = positions[i]; + } else if (MiscUtils::typeOf(query)) { + MultiPhraseQueryPtr multiphraseQuery(boost::dynamic_pointer_cast(query)); + Collection< Collection > termArrays(multiphraseQuery->getTermArrays()); + Collection positions(multiphraseQuery->getPositions()); + if (!positions.empty()) { + int32_t maxPosition = positions[positions.size() - 1]; + for (int32_t i = 0; i < positions.size() - 1; ++i) { + if (positions[i] > maxPosition) { + maxPosition = positions[i]; } - - Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); - int32_t distinctPositions = 0; - - for (int32_t i = 0; i < termArrays.size(); ++i) - { - Collection termArray(termArrays[i]); - Collection disjuncts(disjunctLists[positions[i]]); - if (!disjuncts) - { - disjuncts = Collection::newInstance(); - disjunctLists[positions[i]] = disjuncts; - ++distinctPositions; - } - for (Collection::iterator term = termArray.begin(); term != termArray.end(); ++term) - disjuncts.add(newLucene(*term)); + } + + Collection< Collection > disjunctLists(Collection< Collection >::newInstance(maxPosition + 1)); + int32_t distinctPositions = 0; + + for (int32_t i = 0; i < termArrays.size(); ++i) { + Collection termArray(termArrays[i]); + Collection disjuncts(disjunctLists[positions[i]]); + if (!disjuncts) { + disjuncts = Collection::newInstance(); + disjunctLists[positions[i]] = disjuncts; + ++distinctPositions; } - - int32_t positionGaps = 0; - int32_t position = 0; - Collection clauses(Collection::newInstance(distinctPositions)); - for (int32_t i = 0; i < disjunctLists.size(); ++i) - { - Collection disjuncts(disjunctLists[i]); - if (disjuncts) - { - Collection spanDisjuncts(Collection::newInstance(disjuncts.size())); - for (int32_t j = 0; j < disjuncts.size(); ++j) - spanDisjuncts[j] = boost::dynamic_pointer_cast(disjuncts[j]); - clauses[position++] = newLucene(spanDisjuncts); + for (Collection::iterator term = termArray.begin(); term != termArray.end(); ++term) { + disjuncts.add(newLucene(*term)); + } + } + + int32_t positionGaps = 0; + int32_t position = 0; + Collection clauses(Collection::newInstance(distinctPositions)); + for (int32_t i = 0; i < disjunctLists.size(); ++i) { + Collection disjuncts(disjunctLists[i]); + if (disjuncts) { + Collection spanDisjuncts(Collection::newInstance(disjuncts.size())); + for (int32_t j = 0; j < disjuncts.size(); ++j) { + spanDisjuncts[j] = boost::dynamic_pointer_cast(disjuncts[j]); } - else - ++positionGaps; + clauses[position++] = newLucene(spanDisjuncts); + } else { + ++positionGaps; } - - int32_t slop = multiphraseQuery->getSlop(); - bool inorder = (slop == 0); - - SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); - sp->setBoost(query->getBoost()); - getPayloads(payloads, sp); } + + int32_t slop = multiphraseQuery->getSlop(); + bool inorder = (slop == 0); + + SpanNearQueryPtr sp(newLucene(clauses, slop + positionGaps, inorder)); + sp->setBoost(query->getBoost()); + getPayloads(payloads, sp); } } - - void PayloadSpanUtil::getPayloads(Collection payloads, SpanQueryPtr query) - { - SpansPtr spans(query->getSpans(reader)); - - while (spans->next()) - { - if (spans->isPayloadAvailable()) - { - Collection payload(spans->getPayload()); - for (Collection::iterator bytes = payload.begin(); bytes != payload.end(); ++bytes) - payloads.add(*bytes); +} + +void PayloadSpanUtil::getPayloads(Collection payloads, const SpanQueryPtr& query) { + SpansPtr spans(query->getSpans(reader)); + + while (spans->next()) { + if (spans->isPayloadAvailable()) { + Collection payload(spans->getPayload()); + for (Collection::iterator bytes = payload.begin(); bytes != payload.end(); ++bytes) { + payloads.add(*bytes); } } } } + +} diff --git a/src/core/search/payloads/PayloadTermQuery.cpp b/src/core/search/payloads/PayloadTermQuery.cpp index 6ef2beb6..9d8e4af5 100644 --- a/src/core/search/payloads/PayloadTermQuery.cpp +++ b/src/core/search/payloads/PayloadTermQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,166 +16,152 @@ #include "PayloadFunction.h" #include "MiscUtils.h" -namespace Lucene -{ - PayloadTermQuery::PayloadTermQuery(TermPtr term, PayloadFunctionPtr function, bool includeSpanScore) : SpanTermQuery(term) - { - this->function = function; - this->includeSpanScore = includeSpanScore; - } - - PayloadTermQuery::~PayloadTermQuery() - { - } - - WeightPtr PayloadTermQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } - - LuceneObjectPtr PayloadTermQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term, function, includeSpanScore)); - PayloadTermQueryPtr termQuery(boost::dynamic_pointer_cast(clone)); - termQuery->function = function; - termQuery->includeSpanScore = includeSpanScore; - return termQuery; - } - - bool PayloadTermQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!SpanTermQuery::equals(other)) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) - return false; - PayloadTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - if (!function) - { - if (otherQuery->function) - return false; - } - else if (!function->equals(otherQuery->function)) - return false; - if (includeSpanScore != otherQuery->includeSpanScore) - return false; +namespace Lucene { + +PayloadTermQuery::PayloadTermQuery(const TermPtr& term, const PayloadFunctionPtr& function, bool includeSpanScore) : SpanTermQuery(term) { + this->function = function; + this->includeSpanScore = includeSpanScore; +} + +PayloadTermQuery::~PayloadTermQuery() { +} + +WeightPtr PayloadTermQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + +LuceneObjectPtr PayloadTermQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term, function, includeSpanScore)); + PayloadTermQueryPtr termQuery(boost::dynamic_pointer_cast(clone)); + termQuery->function = function; + termQuery->includeSpanScore = includeSpanScore; + return termQuery; +} + +bool PayloadTermQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { return true; } - - int32_t PayloadTermQuery::hashCode() - { - int32_t prime = 31; - int32_t result = SpanTermQuery::hashCode(); - result = prime * result + (function ? function->hashCode() : 0); - result = prime * result + (includeSpanScore ? 1231 : 1237); - return result; - } - - PayloadTermWeight::PayloadTermWeight(PayloadTermQueryPtr query, SearcherPtr searcher) : SpanWeight(query, searcher) - { + if (!SpanTermQuery::equals(other)) { + return false; } - - PayloadTermWeight::~PayloadTermWeight() - { + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - ScorerPtr PayloadTermWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(boost::dynamic_pointer_cast(query->getSpans(reader)), shared_from_this(), similarity, reader->norms(query->getField())); + PayloadTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - PayloadTermSpanScorer::PayloadTermSpanScorer(TermSpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) - { - positions = spans->getPositions(); - payload = ByteArray::newInstance(256); - payloadScore = 0.0; - payloadsSeen = 0; - } - - PayloadTermSpanScorer::~PayloadTermSpanScorer() - { - } - - bool PayloadTermSpanScorer::setFreqCurrentDoc() - { - if (!more) + if (!function) { + if (otherQuery->function) { return false; - doc = spans->doc(); - freq = 0.0; - payloadScore = 0.0; - payloadsSeen = 0; - SimilarityPtr similarity1(getSimilarity()); - while (more && doc == spans->doc()) - { - int32_t matchLength = spans->end() - spans->start(); - - freq += similarity1->sloppyFreq(matchLength); - processPayload(similarity1); - - more = spans->next(); // this moves positions to the next match in this document } - return more || (freq != 0); + } else if (!function->equals(otherQuery->function)) { + return false; } - - void PayloadTermSpanScorer::processPayload(SimilarityPtr similarity) - { - if (positions->isPayloadAvailable()) - { - PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); - PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); - - payload = positions->getPayload(payload, 0); - payloadScore = payloadQuery->function->currentScore(doc, payloadQuery->term->field(), spans->start(), spans->end(), - payloadsSeen, payloadScore, similarity->scorePayload(doc, - payloadQuery->term->field(), spans->start(), spans->end(), - payload, 0, positions->getPayloadLength())); - ++payloadsSeen; - } - else - { - // zero out the payload? - } + if (includeSpanScore != otherQuery->includeSpanScore) { + return false; } - - double PayloadTermSpanScorer::score() - { - PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); - PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); - return payloadQuery->includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); - } - - double PayloadTermSpanScorer::getSpanScore() - { - return SpanScorer::score(); + return true; +} + +int32_t PayloadTermQuery::hashCode() { + int32_t prime = 31; + int32_t result = SpanTermQuery::hashCode(); + result = prime * result + (function ? function->hashCode() : 0); + result = prime * result + (includeSpanScore ? 1231 : 1237); + return result; +} + +PayloadTermWeight::PayloadTermWeight(const PayloadTermQueryPtr& query, const SearcherPtr& searcher) : SpanWeight(query, searcher) { +} + +PayloadTermWeight::~PayloadTermWeight() { +} + +ScorerPtr PayloadTermWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(boost::dynamic_pointer_cast(query->getSpans(reader)), shared_from_this(), similarity, reader->norms(query->getField())); +} + +PayloadTermSpanScorer::PayloadTermSpanScorer(const TermSpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : SpanScorer(spans, weight, similarity, norms) { + positions = spans->getPositions(); + payload = ByteArray::newInstance(256); + payloadScore = 0.0; + payloadsSeen = 0; +} + +PayloadTermSpanScorer::~PayloadTermSpanScorer() { +} + +bool PayloadTermSpanScorer::setFreqCurrentDoc() { + if (!more) { + return false; + } + doc = spans->doc(); + freq = 0.0; + payloadScore = 0.0; + payloadsSeen = 0; + SimilarityPtr similarity1(getSimilarity()); + while (more && doc == spans->doc()) { + int32_t matchLength = spans->end() - spans->start(); + + freq += similarity1->sloppyFreq(matchLength); + processPayload(similarity1); + + more = spans->next(); // this moves positions to the next match in this document } - - double PayloadTermSpanScorer::getPayloadScore() - { + return more || (freq != 0); +} + +void PayloadTermSpanScorer::processPayload(const SimilarityPtr& similarity) { + if (positions->isPayloadAvailable()) { PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); - return payloadQuery->function->docScore(doc, payloadQuery->term->field(), payloadsSeen, payloadScore); - } - - ExplanationPtr PayloadTermSpanScorer::explain(int32_t doc) - { - ComplexExplanationPtr result(newLucene()); - ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); - result->addDetail(nonPayloadExpl); - - ExplanationPtr payloadBoost(newLucene()); - result->addDetail(payloadBoost); - - double payloadScore = getPayloadScore(); - payloadBoost->setValue(payloadScore); - payloadBoost->setDescription(L"scorePayload(...)"); - - result->setValue(nonPayloadExpl->getValue() * payloadScore); - result->setDescription(L"btq, product of:"); - result->setMatch(nonPayloadExpl->getValue() != 0.0); - - return result; + + payload = positions->getPayload(payload, 0); + payloadScore = payloadQuery->function->currentScore(doc, payloadQuery->term->field(), spans->start(), spans->end(), + payloadsSeen, payloadScore, similarity->scorePayload(doc, + payloadQuery->term->field(), spans->start(), spans->end(), + payload, 0, positions->getPayloadLength())); + ++payloadsSeen; + } else { + // zero out the payload? } } + +double PayloadTermSpanScorer::score() { + PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); + PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); + return payloadQuery->includeSpanScore ? getSpanScore() * getPayloadScore() : getPayloadScore(); +} + +double PayloadTermSpanScorer::getSpanScore() { + return SpanScorer::score(); +} + +double PayloadTermSpanScorer::getPayloadScore() { + PayloadTermWeightPtr payloadWeight(boost::static_pointer_cast(weight)); + PayloadTermQueryPtr payloadQuery(boost::static_pointer_cast(payloadWeight->query)); + return payloadQuery->function->docScore(doc, payloadQuery->term->field(), payloadsSeen, payloadScore); +} + +ExplanationPtr PayloadTermSpanScorer::explain(int32_t doc) { + ComplexExplanationPtr result(newLucene()); + ExplanationPtr nonPayloadExpl(SpanScorer::explain(doc)); + result->addDetail(nonPayloadExpl); + + ExplanationPtr payloadBoost(newLucene()); + result->addDetail(payloadBoost); + + double payloadScore = getPayloadScore(); + payloadBoost->setValue(payloadScore); + payloadBoost->setDescription(L"scorePayload(...)"); + + result->setValue(nonPayloadExpl->getValue() * payloadScore); + result->setDescription(L"btq, product of:"); + result->setMatch(nonPayloadExpl->getValue() != 0.0); + + return result; +} + +} diff --git a/src/core/search/spans/FieldMaskingSpanQuery.cpp b/src/core/search/spans/FieldMaskingSpanQuery.cpp index f1b902d1..920dcccf 100644 --- a/src/core/search/spans/FieldMaskingSpanQuery.cpp +++ b/src/core/search/spans/FieldMaskingSpanQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,100 +9,90 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - FieldMaskingSpanQuery::FieldMaskingSpanQuery(SpanQueryPtr maskedQuery, const String& maskedField) - { - this->maskedQuery = maskedQuery; - this->field = maskedField; - } - - FieldMaskingSpanQuery::~FieldMaskingSpanQuery() - { - } - - String FieldMaskingSpanQuery::getField() - { - return field; - } - - SpanQueryPtr FieldMaskingSpanQuery::getMaskedQuery() - { - return maskedQuery; - } - - // :NOTE: getBoost and setBoost are not proxied to the maskedQuery - // ...this is done to be more consistent with things like SpanFirstQuery - - SpansPtr FieldMaskingSpanQuery::getSpans(IndexReaderPtr reader) - { - return maskedQuery->getSpans(reader); - } - - void FieldMaskingSpanQuery::extractTerms(SetTerm terms) - { - maskedQuery->extractTerms(terms); - } - - WeightPtr FieldMaskingSpanQuery::createWeight(SearcherPtr searcher) - { - return maskedQuery->createWeight(searcher); - } - - SimilarityPtr FieldMaskingSpanQuery::getSimilarity(SearcherPtr searcher) - { - return maskedQuery->getSimilarity(searcher); - } - - QueryPtr FieldMaskingSpanQuery::rewrite(IndexReaderPtr reader) - { - FieldMaskingSpanQueryPtr clone; - - SpanQueryPtr rewritten(boost::dynamic_pointer_cast(maskedQuery->rewrite(reader))); - if (rewritten != maskedQuery) - { - clone = boost::dynamic_pointer_cast(this->clone()); - clone->maskedQuery = rewritten; - } - - if (clone) - return clone; - else - return shared_from_this(); - } - - String FieldMaskingSpanQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"mask(" << maskedQuery->toString(field) << L")"; - buffer << boostString() << L" as " << this->field; - return buffer.str(); +namespace Lucene { + +FieldMaskingSpanQuery::FieldMaskingSpanQuery(const SpanQueryPtr& maskedQuery, const String& maskedField) { + this->maskedQuery = maskedQuery; + this->field = maskedField; +} + +FieldMaskingSpanQuery::~FieldMaskingSpanQuery() { +} + +String FieldMaskingSpanQuery::getField() { + return field; +} + +SpanQueryPtr FieldMaskingSpanQuery::getMaskedQuery() { + return maskedQuery; +} + +// :NOTE: getBoost and setBoost are not proxied to the maskedQuery +// ...this is done to be more consistent with things like SpanFirstQuery + +SpansPtr FieldMaskingSpanQuery::getSpans(const IndexReaderPtr& reader) { + return maskedQuery->getSpans(reader); +} + +void FieldMaskingSpanQuery::extractTerms(SetTerm terms) { + maskedQuery->extractTerms(terms); +} + +WeightPtr FieldMaskingSpanQuery::createWeight(const SearcherPtr& searcher) { + return maskedQuery->createWeight(searcher); +} + +SimilarityPtr FieldMaskingSpanQuery::getSimilarity(const SearcherPtr& searcher) { + return maskedQuery->getSimilarity(searcher); +} + +QueryPtr FieldMaskingSpanQuery::rewrite(const IndexReaderPtr& reader) { + FieldMaskingSpanQueryPtr clone; + + SpanQueryPtr rewritten(boost::dynamic_pointer_cast(maskedQuery->rewrite(reader))); + if (rewritten != maskedQuery) { + clone = boost::dynamic_pointer_cast(this->clone()); + clone->maskedQuery = rewritten; } - - bool FieldMaskingSpanQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - FieldMaskingSpanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - - return (getField() == otherQuery->getField() && getBoost() == otherQuery->getBoost() && - getMaskedQuery()->equals(otherQuery->getMaskedQuery())); + + if (clone) { + return clone; + } else { + return shared_from_this(); } - - int32_t FieldMaskingSpanQuery::hashCode() - { - return getMaskedQuery()->hashCode() ^ StringUtils::hashCode(getField()) ^ MiscUtils::doubleToRawIntBits(getBoost()); +} + +String FieldMaskingSpanQuery::toString(const String& field) { + StringStream buffer; + buffer << L"mask(" << maskedQuery->toString(field) << L")"; + buffer << boostString() << L" as " << this->field; + return buffer.str(); +} + +bool FieldMaskingSpanQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - LuceneObjectPtr FieldMaskingSpanQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(maskedQuery, field)); - FieldMaskingSpanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); - cloneQuery->maskedQuery = maskedQuery; - cloneQuery->field = field; - return cloneQuery; + + FieldMaskingSpanQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } + + return (getField() == otherQuery->getField() && getBoost() == otherQuery->getBoost() && + getMaskedQuery()->equals(otherQuery->getMaskedQuery())); +} + +int32_t FieldMaskingSpanQuery::hashCode() { + return getMaskedQuery()->hashCode() ^ StringUtils::hashCode(getField()) ^ MiscUtils::doubleToRawIntBits(getBoost()); +} + +LuceneObjectPtr FieldMaskingSpanQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(maskedQuery, field)); + FieldMaskingSpanQueryPtr cloneQuery(boost::dynamic_pointer_cast(clone)); + cloneQuery->maskedQuery = maskedQuery; + cloneQuery->field = field; + return cloneQuery; +} + } diff --git a/src/core/search/spans/NearSpansOrdered.cpp b/src/core/search/spans/NearSpansOrdered.cpp index e53e470c..2e86e540 100644 --- a/src/core/search/spans/NearSpansOrdered.cpp +++ b/src/core/search/spans/NearSpansOrdered.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,291 +8,251 @@ #include "NearSpansOrdered.h" #include "SpanNearQuery.h" -namespace Lucene -{ - NearSpansOrdered::NearSpansOrdered(SpanNearQueryPtr spanNearQuery, IndexReaderPtr reader, bool collectPayloads) - { - if (spanNearQuery->getClauses().size() < 2) - boost::throw_exception(IllegalArgumentException(L"Less than 2 clauses: " + spanNearQuery->toString())); - this->firstTime = true; - this->more = false; - this->inSameDoc = false; - this->matchDoc = -1; - this->matchStart = -1; - this->matchEnd = -1; - this->collectPayloads = collectPayloads; - this->allowedSlop = spanNearQuery->getSlop(); - Collection clauses(spanNearQuery->getClauses()); - this->subSpans = Collection::newInstance(clauses.size()); - this->matchPayload = Collection::newInstance(); - this->subSpansByDoc = Collection::newInstance(clauses.size()); - for (int32_t i = 0; i < clauses.size(); ++i) - { - subSpans[i] = clauses[i]->getSpans(reader); - subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() - } - this->query = spanNearQuery; // kept for toString() only. - } - - NearSpansOrdered::~NearSpansOrdered() - { - } - - int32_t NearSpansOrdered::doc() - { - return matchDoc; - } - - int32_t NearSpansOrdered::start() - { - return matchStart; - } - - int32_t NearSpansOrdered::end() - { - return matchEnd; - } - - Collection NearSpansOrdered::getSubSpans() - { - return subSpans; - } - - Collection NearSpansOrdered::getPayload() - { - return matchPayload; +namespace Lucene { + +NearSpansOrdered::NearSpansOrdered(const SpanNearQueryPtr& spanNearQuery, const IndexReaderPtr& reader, bool collectPayloads) { + if (spanNearQuery->getClauses().size() < 2) { + boost::throw_exception(IllegalArgumentException(L"Less than 2 clauses: " + spanNearQuery->toString())); } - - bool NearSpansOrdered::isPayloadAvailable() - { - return !matchPayload.empty(); + this->firstTime = true; + this->more = false; + this->inSameDoc = false; + this->matchDoc = -1; + this->matchStart = -1; + this->matchEnd = -1; + this->collectPayloads = collectPayloads; + this->allowedSlop = spanNearQuery->getSlop(); + Collection clauses(spanNearQuery->getClauses()); + this->subSpans = Collection::newInstance(clauses.size()); + this->matchPayload = Collection::newInstance(); + this->subSpansByDoc = Collection::newInstance(clauses.size()); + for (int32_t i = 0; i < clauses.size(); ++i) { + subSpans[i] = clauses[i]->getSpans(reader); + subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } - - bool NearSpansOrdered::next() - { - if (firstTime) - { - firstTime = false; - for (int32_t i = 0; i < subSpans.size(); ++i) - { - if (!subSpans[i]->next()) - { - more = false; - return false; - } - } - more = true; - } - if (collectPayloads) - matchPayload.clear(); - return advanceAfterOrdered(); - } - - bool NearSpansOrdered::skipTo(int32_t target) - { - if (firstTime) - { - firstTime = false; - for (int32_t i = 0; i < subSpans.size(); ++i) - { - if (!subSpans[i]->skipTo(target)) - { - more = false; - return false; - } - } - more = true; - } - else if (more && (subSpans[0]->doc() < target)) - { - if (subSpans[0]->skipTo(target)) - inSameDoc = false; - else - { + this->query = spanNearQuery; // kept for toString() only. +} + +NearSpansOrdered::~NearSpansOrdered() { +} + +int32_t NearSpansOrdered::doc() { + return matchDoc; +} + +int32_t NearSpansOrdered::start() { + return matchStart; +} + +int32_t NearSpansOrdered::end() { + return matchEnd; +} + +Collection NearSpansOrdered::getSubSpans() { + return subSpans; +} + +Collection NearSpansOrdered::getPayload() { + return matchPayload; +} + +bool NearSpansOrdered::isPayloadAvailable() { + return !matchPayload.empty(); +} + +bool NearSpansOrdered::next() { + if (firstTime) { + firstTime = false; + for (int32_t i = 0; i < subSpans.size(); ++i) { + if (!subSpans[i]->next()) { more = false; return false; } } - if (collectPayloads) - matchPayload.clear(); - return advanceAfterOrdered(); + more = true; } - - bool NearSpansOrdered::advanceAfterOrdered() - { - while (more && (inSameDoc || toSameDoc())) - { - if (stretchToOrder() && shrinkToAfterShortestMatch()) - return true; - } - return false; // no more matches + if (collectPayloads) { + matchPayload.clear(); } - - struct lessSpanDoc - { - inline bool operator()(const SpansPtr& first, const SpansPtr& second) const - { - return ((first->doc() - second->doc()) < 0); - } - }; - - bool NearSpansOrdered::toSameDoc() - { - std::sort(subSpansByDoc.begin(), subSpansByDoc.end(), lessSpanDoc()); - int32_t firstIndex = 0; - int32_t maxDoc = subSpansByDoc[subSpansByDoc.size() - 1]->doc(); - while (subSpansByDoc[firstIndex]->doc() != maxDoc) - { - if (!subSpansByDoc[firstIndex]->skipTo(maxDoc)) - { + return advanceAfterOrdered(); +} + +bool NearSpansOrdered::skipTo(int32_t target) { + if (firstTime) { + firstTime = false; + for (int32_t i = 0; i < subSpans.size(); ++i) { + if (!subSpans[i]->skipTo(target)) { more = false; - inSameDoc = false; return false; } - maxDoc = subSpansByDoc[firstIndex]->doc(); - if (++firstIndex == subSpansByDoc.size()) - firstIndex = 0; } - for (int32_t i = 0; i < subSpansByDoc.size(); ++i) - { - BOOST_ASSERT(subSpansByDoc[i]->doc() == maxDoc); + more = true; + } else if (more && (subSpans[0]->doc() < target)) { + if (subSpans[0]->skipTo(target)) { + inSameDoc = false; + } else { + more = false; + return false; } - inSameDoc = true; - return true; } - - bool NearSpansOrdered::docSpansOrdered(SpansPtr spans1, SpansPtr spans2) - { - BOOST_ASSERT(spans1->doc() == spans2->doc()); - int32_t start1 = spans1->start(); - int32_t start2 = spans2->start(); - // Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() - return start1 == start2 ? (spans1->end() < spans2->end()) : (start1 < start2); + if (collectPayloads) { + matchPayload.clear(); } - - bool NearSpansOrdered::docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2) - { - return start1 == start2 ? (end1 < end2) : (start1 < start2); + return advanceAfterOrdered(); +} + +bool NearSpansOrdered::advanceAfterOrdered() { + while (more && (inSameDoc || toSameDoc())) { + if (stretchToOrder() && shrinkToAfterShortestMatch()) { + return true; + } } - - bool NearSpansOrdered::stretchToOrder() - { - matchDoc = subSpans[0]->doc(); - for (int32_t i = 1; inSameDoc && (i < subSpans.size()); ++i) - { - while (!docSpansOrdered(subSpans[i - 1], subSpans[i])) - { - if (!subSpans[i]->next()) - { - inSameDoc = false; - more = false; - break; - } - else if (matchDoc != subSpans[i]->doc()) - { - inSameDoc = false; - break; - } + return false; // no more matches +} + +struct lessSpanDoc { + inline bool operator()(const SpansPtr& first, const SpansPtr& second) const { + return ((first->doc() - second->doc()) < 0); + } +}; + +bool NearSpansOrdered::toSameDoc() { + std::sort(subSpansByDoc.begin(), subSpansByDoc.end(), lessSpanDoc()); + int32_t firstIndex = 0; + int32_t maxDoc = subSpansByDoc[subSpansByDoc.size() - 1]->doc(); + while (subSpansByDoc[firstIndex]->doc() != maxDoc) { + if (!subSpansByDoc[firstIndex]->skipTo(maxDoc)) { + more = false; + inSameDoc = false; + return false; + } + maxDoc = subSpansByDoc[firstIndex]->doc(); + if (++firstIndex == subSpansByDoc.size()) { + firstIndex = 0; + } + } + for (int32_t i = 0; i < subSpansByDoc.size(); ++i) { + BOOST_ASSERT(subSpansByDoc[i]->doc() == maxDoc); + } + inSameDoc = true; + return true; +} + +bool NearSpansOrdered::docSpansOrdered(const SpansPtr& spans1, const SpansPtr& spans2) { + BOOST_ASSERT(spans1->doc() == spans2->doc()); + int32_t start1 = spans1->start(); + int32_t start2 = spans2->start(); + // Do not call docSpansOrdered(int,int,int,int) to avoid invoking .end() + return start1 == start2 ? (spans1->end() < spans2->end()) : (start1 < start2); +} + +bool NearSpansOrdered::docSpansOrdered(int32_t start1, int32_t end1, int32_t start2, int32_t end2) { + return start1 == start2 ? (end1 < end2) : (start1 < start2); +} + +bool NearSpansOrdered::stretchToOrder() { + matchDoc = subSpans[0]->doc(); + for (int32_t i = 1; inSameDoc && (i < subSpans.size()); ++i) { + while (!docSpansOrdered(subSpans[i - 1], subSpans[i])) { + if (!subSpans[i]->next()) { + inSameDoc = false; + more = false; + break; + } else if (matchDoc != subSpans[i]->doc()) { + inSameDoc = false; + break; } } - return inSameDoc; } - - bool NearSpansOrdered::shrinkToAfterShortestMatch() - { - SpansPtr subSpan(subSpans[subSpans.size() - 1]); - matchStart = subSpan->start(); - matchEnd = subSpan->end(); - SetByteArray possibleMatchPayloads(SetByteArray::newInstance()); - if (subSpan->isPayloadAvailable()) - { - Collection payload(subSpan->getPayload()); - possibleMatchPayloads.addAll(payload.begin(), payload.end()); + return inSameDoc; +} + +bool NearSpansOrdered::shrinkToAfterShortestMatch() { + SpansPtr subSpan(subSpans[subSpans.size() - 1]); + matchStart = subSpan->start(); + matchEnd = subSpan->end(); + SetByteArray possibleMatchPayloads(SetByteArray::newInstance()); + if (subSpan->isPayloadAvailable()) { + Collection payload(subSpan->getPayload()); + possibleMatchPayloads.addAll(payload.begin(), payload.end()); + } + + Collection possiblePayload; + + int32_t matchSlop = 0; + int32_t lastStart = matchStart; + int32_t lastEnd = matchEnd; + for (int32_t i = subSpans.size() - 2; i >= 0; --i) { + SpansPtr prevSpans(subSpans[i]); + if (collectPayloads && prevSpans->isPayloadAvailable()) { + Collection payload(prevSpans->getPayload()); + possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } - - Collection possiblePayload; - - int32_t matchSlop = 0; - int32_t lastStart = matchStart; - int32_t lastEnd = matchEnd; - for (int32_t i = subSpans.size() - 2; i >= 0; --i) - { - SpansPtr prevSpans(subSpans[i]); - if (collectPayloads && prevSpans->isPayloadAvailable()) - { - Collection payload(prevSpans->getPayload()); - possiblePayload = Collection::newInstance(payload.begin(), payload.end()); - } - - int32_t prevStart = prevSpans->start(); - int32_t prevEnd = prevSpans->end(); - while (true) // Advance prevSpans until after (lastStart, lastEnd) - { - if (!prevSpans->next()) - { - inSameDoc = false; - more = false; - break; // Check remaining subSpans for final match. - } - else if (matchDoc != prevSpans->doc()) - { - inSameDoc = false; // The last subSpans is not advanced here. - break; // Check remaining subSpans for last match in this document. - } - else - { - int32_t ppStart = prevSpans->start(); - int32_t ppEnd = prevSpans->end(); // Cannot avoid invoking .end() - if (!docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) - break; // Check remaining subSpans. - else - { - prevStart = ppStart; - prevEnd = ppEnd; - if (collectPayloads && prevSpans->isPayloadAvailable()) - { - Collection payload(prevSpans->getPayload()); - possiblePayload = Collection::newInstance(payload.begin(), payload.end()); - } + + int32_t prevStart = prevSpans->start(); + int32_t prevEnd = prevSpans->end(); + while (true) { // Advance prevSpans until after (lastStart, lastEnd) + if (!prevSpans->next()) { + inSameDoc = false; + more = false; + break; // Check remaining subSpans for final match. + } else if (matchDoc != prevSpans->doc()) { + inSameDoc = false; // The last subSpans is not advanced here. + break; // Check remaining subSpans for last match in this document. + } else { + int32_t ppStart = prevSpans->start(); + int32_t ppEnd = prevSpans->end(); // Cannot avoid invoking .end() + if (!docSpansOrdered(ppStart, ppEnd, lastStart, lastEnd)) { + break; // Check remaining subSpans. + } else { + prevStart = ppStart; + prevEnd = ppEnd; + if (collectPayloads && prevSpans->isPayloadAvailable()) { + Collection payload(prevSpans->getPayload()); + possiblePayload = Collection::newInstance(payload.begin(), payload.end()); } } } - - if (collectPayloads && possiblePayload) - possibleMatchPayloads.addAll(possiblePayload.begin(), possiblePayload.end()); - - BOOST_ASSERT(prevStart <= matchStart); - if (matchStart > prevEnd) // Only non overlapping spans add to slop. - matchSlop += (matchStart - prevEnd); - - // Do not break on (matchSlop > allowedSlop) here to make sure that subSpans[0] is - // advanced after the match, if any. - matchStart = prevStart; - lastStart = prevStart; - lastEnd = prevEnd; } - - bool match = (matchSlop <= allowedSlop); - - if (collectPayloads && match && !possibleMatchPayloads.empty()) - matchPayload.addAll(possibleMatchPayloads.begin(), possibleMatchPayloads.end()); - - return match; // ordered and allowed slop + + if (collectPayloads && possiblePayload) { + possibleMatchPayloads.addAll(possiblePayload.begin(), possiblePayload.end()); + } + + BOOST_ASSERT(prevStart <= matchStart); + if (matchStart > prevEnd) { // Only non overlapping spans add to slop. + matchSlop += (matchStart - prevEnd); + } + + // Do not break on (matchSlop > allowedSlop) here to make sure that subSpans[0] is + // advanced after the match, if any. + matchStart = prevStart; + lastStart = prevStart; + lastEnd = prevEnd; + } + + bool match = (matchSlop <= allowedSlop); + + if (collectPayloads && match && !possibleMatchPayloads.empty()) { + matchPayload.addAll(possibleMatchPayloads.begin(), possibleMatchPayloads.end()); } - - String NearSpansOrdered::toString() - { - StringStream buffer; - buffer << getClassName() << L"(" << query->toString() << L")@"; - if (firstTime) - buffer << L"START"; - else - { - if (more) - buffer << doc() << L":" << start() << L"-" << end(); - else - buffer << L"END"; + + return match; // ordered and allowed slop +} + +String NearSpansOrdered::toString() { + StringStream buffer; + buffer << getClassName() << L"(" << query->toString() << L")@"; + if (firstTime) { + buffer << L"START"; + } else { + if (more) { + buffer << doc() << L":" << start() << L"-" << end(); + } else { + buffer << L"END"; } - return buffer.str(); } + return buffer.str(); +} + } diff --git a/src/core/search/spans/NearSpansUnordered.cpp b/src/core/search/spans/NearSpansUnordered.cpp index fd494e55..0bb752ce 100644 --- a/src/core/search/spans/NearSpansUnordered.cpp +++ b/src/core/search/spans/NearSpansUnordered.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,317 +11,284 @@ #include "SpanNearQuery.h" #include "StringUtils.h" -namespace Lucene -{ - NearSpansUnordered::NearSpansUnordered(SpanNearQueryPtr query, IndexReaderPtr reader) - { - this->query = query; - this->reader = reader; - } - - NearSpansUnordered::~NearSpansUnordered() - { +namespace Lucene { + +NearSpansUnordered::NearSpansUnordered(const SpanNearQueryPtr& query, const IndexReaderPtr& reader) { + this->query = query; + this->reader = reader; +} + +NearSpansUnordered::~NearSpansUnordered() { +} + +void NearSpansUnordered::initialize() { + this->slop = query->getSlop(); + this->totalLength = 0; + this->more = true; + this->firstTime = true; + + Collection clauses(query->getClauses()); + queue = newLucene(clauses.size()); + subSpans = Collection::newInstance(clauses.size()); + ordered = Collection::newInstance(); + + for (int32_t i = 0; i < clauses.size(); ++i) { + SpansCellPtr cell(newLucene(shared_from_this(), clauses[i]->getSpans(reader), i)); + ordered.add(cell); + subSpans[i] = cell->spans; } - - void NearSpansUnordered::initialize() - { - this->slop = query->getSlop(); - this->totalLength = 0; - this->more = true; - this->firstTime = true; - - Collection clauses(query->getClauses()); - queue = newLucene(clauses.size()); - subSpans = Collection::newInstance(clauses.size()); - ordered = Collection::newInstance(); - - for (int32_t i = 0; i < clauses.size(); ++i) - { - SpansCellPtr cell(newLucene(shared_from_this(), clauses[i]->getSpans(reader), i)); - ordered.add(cell); - subSpans[i] = cell->spans; +} + +Collection NearSpansUnordered::getSubSpans() { + return subSpans; +} + +bool NearSpansUnordered::next() { + if (firstTime) { + initList(true); + listToQueue(); // initialize queue + firstTime = false; + } else if (more) { + if (min()->next()) { // trigger further scanning + queue->updateTop(); // maintain queue + } else { + more = false; } } - - Collection NearSpansUnordered::getSubSpans() - { - return subSpans; - } - - bool NearSpansUnordered::next() - { - if (firstTime) - { - initList(true); - listToQueue(); // initialize queue - firstTime = false; + + while (more) { + bool queueStale = false; + + if (min()->doc() != max->doc()) { // maintain list + queueToList(); + queueStale = true; } - else if (more) - { - if (min()->next()) // trigger further scanning - queue->updateTop(); // maintain queue - else - more = false; + + // skip to doc with all clauses + + while (more && first->doc() < last->doc()) { + more = first->skipTo(last->doc()); // skip first upto last + firstToLast(); // and move it to the end + queueStale = true; } - - while (more) - { - bool queueStale = false; - - if (min()->doc() != max->doc()) // maintain list - { - queueToList(); - queueStale = true; - } - - // skip to doc with all clauses - - while (more && first->doc() < last->doc()) - { - more = first->skipTo(last->doc()); // skip first upto last - firstToLast(); // and move it to the end - queueStale = true; - } - - if (!more) - return false; - - // found doc with all clauses - - if (queueStale) // maintain the queue - { - listToQueue(); - queueStale = false; - } - - if (atMatch()) - return true; - - more = min()->next(); - if (more) - queue->updateTop(); // maintain queue + + if (!more) { + return false; } - return false; // no more matches - } - - bool NearSpansUnordered::skipTo(int32_t target) - { - if (firstTime) // initialize - { - initList(false); - for (SpansCellPtr cell(first); more && cell; cell = cell->_next) - more = cell->skipTo(target); // skip all - if (more) - listToQueue(); - firstTime = false; + + // found doc with all clauses + + if (queueStale) { // maintain the queue + listToQueue(); + queueStale = false; } - else // normal case - { - while (more && min()->doc() < target) // skip as needed - { - if (min()->skipTo(target)) - queue->updateTop(); - else - more = false; - } + + if (atMatch()) { + return true; + } + + more = min()->next(); + if (more) { + queue->updateTop(); // maintain queue } - return (more && (atMatch() || next())); - } - - SpansCellPtr NearSpansUnordered::min() - { - return queue->top(); - } - - int32_t NearSpansUnordered::doc() - { - return min()->doc(); - } - - int32_t NearSpansUnordered::start() - { - return min()->start(); - } - - int32_t NearSpansUnordered::end() - { - return max->end(); } - - Collection NearSpansUnordered::getPayload() - { - SetByteArray matchPayload(SetByteArray::newInstance()); - for (SpansCellPtr cell(first); cell; cell = cell->_next) - { - if (cell->isPayloadAvailable()) - { - Collection payload(cell->getPayload()); - matchPayload.addAll(payload.begin(), payload.end()); + return false; // no more matches +} + +bool NearSpansUnordered::skipTo(int32_t target) { + if (firstTime) { // initialize + initList(false); + for (SpansCellPtr cell(first); more && cell; cell = cell->_next) { + more = cell->skipTo(target); // skip all + } + if (more) { + listToQueue(); + } + firstTime = false; + } else { // normal case + while (more && min()->doc() < target) { // skip as needed + if (min()->skipTo(target)) { + queue->updateTop(); + } else { + more = false; } } - return Collection::newInstance(matchPayload.begin(), matchPayload.end()); } - - bool NearSpansUnordered::isPayloadAvailable() - { - SpansCellPtr pointer(min()); - while (pointer) - { - if (pointer->isPayloadAvailable()) - return true; - pointer = pointer->_next; + return (more && (atMatch() || next())); +} + +SpansCellPtr NearSpansUnordered::min() { + return queue->top(); +} + +int32_t NearSpansUnordered::doc() { + return min()->doc(); +} + +int32_t NearSpansUnordered::start() { + return min()->start(); +} + +int32_t NearSpansUnordered::end() { + return max->end(); +} + +Collection NearSpansUnordered::getPayload() { + SetByteArray matchPayload(SetByteArray::newInstance()); + for (SpansCellPtr cell(first); cell; cell = cell->_next) { + if (cell->isPayloadAvailable()) { + Collection payload(cell->getPayload()); + matchPayload.addAll(payload.begin(), payload.end()); } - return false; } - - String NearSpansUnordered::toString() - { - StringStream buffer; - buffer << getClassName() << L"(" << query->toString() << L")@"; - if (firstTime) - buffer << L"START"; - else - { - if (more) - buffer << doc() << L":" << start() << L"-" << end(); - else - buffer << L"END"; + return Collection::newInstance(matchPayload.begin(), matchPayload.end()); +} + +bool NearSpansUnordered::isPayloadAvailable() { + SpansCellPtr pointer(min()); + while (pointer) { + if (pointer->isPayloadAvailable()) { + return true; } - return buffer.str(); + pointer = pointer->_next; } - - void NearSpansUnordered::initList(bool next) - { - for (Collection::iterator cell = ordered.begin(); more && cell != ordered.end(); ++cell) - { - if (next) - more = (*cell)->next(); // move to first entry - if (more) - addToList(*cell); // add to list + return false; +} + +String NearSpansUnordered::toString() { + StringStream buffer; + buffer << getClassName() << L"(" << query->toString() << L")@"; + if (firstTime) { + buffer << L"START"; + } else { + if (more) { + buffer << doc() << L":" << start() << L"-" << end(); + } else { + buffer << L"END"; } } - - void NearSpansUnordered::addToList(SpansCellPtr cell) - { - if (last) // add next to end of list - last->_next = cell; - else - first = cell; - last = cell; - cell->_next.reset(); - } - - void NearSpansUnordered::firstToLast() - { - last->_next = first; // move first to end of list - last = first; - first = first->_next; - last->_next.reset(); - } - - void NearSpansUnordered::queueToList() - { - first.reset(); - last.reset(); - while (queue->top()) - addToList(queue->pop()); - } - - void NearSpansUnordered::listToQueue() - { - queue->clear(); // rebuild queue - for (SpansCellPtr cell(first); cell; cell = cell->_next) - queue->add(cell); // add to queue from list - } - - bool NearSpansUnordered::atMatch() - { - return ((min()->doc() == max->doc()) && ((max->end() - min()->start() - totalLength) <= slop)); - } - - SpansCell::SpansCell(NearSpansUnorderedPtr unordered, SpansPtr spans, int32_t index) - { - this->_unordered = unordered; - this->spans = spans; - this->index = index; - this->length = -1; - } - - SpansCell::~SpansCell() - { - } - - bool SpansCell::next() - { - return adjust(spans->next()); - } - - bool SpansCell::skipTo(int32_t target) - { - return adjust(spans->skipTo(target)); - } - - bool SpansCell::adjust(bool condition) - { - NearSpansUnorderedPtr unordered(_unordered); - if (length != -1) - unordered->totalLength -= length; // subtract old length - if (condition) - { - length = end() - start(); - unordered->totalLength += length; // add new length - - if (!unordered->max || doc() > unordered->max->doc() || (doc() == unordered->max->doc()) && (end() > unordered->max->end())) - unordered->max = shared_from_this(); + return buffer.str(); +} + +void NearSpansUnordered::initList(bool next) { + for (Collection::iterator cell = ordered.begin(); more && cell != ordered.end(); ++cell) { + if (next) { + more = (*cell)->next(); // move to first entry + } + if (more) { + addToList(*cell); // add to list } - unordered->more = condition; - return condition; - } - - int32_t SpansCell::doc() - { - return spans->doc(); - } - - int32_t SpansCell::start() - { - return spans->start(); - } - - int32_t SpansCell::end() - { - return spans->end(); - } - - Collection SpansCell::getPayload() - { - Collection payload(spans->getPayload()); - return Collection::newInstance(payload.begin(), payload.end()); - } - - bool SpansCell::isPayloadAvailable() - { - return spans->isPayloadAvailable(); } - - String SpansCell::toString() - { - return spans->toString() + L"#" + StringUtils::toString(index); +} + +void NearSpansUnordered::addToList(const SpansCellPtr& cell) { + if (last) { // add next to end of list + last->_next = cell; + } else { + first = cell; + } + last = cell; + cell->_next.reset(); +} + +void NearSpansUnordered::firstToLast() { + last->_next = first; // move first to end of list + last = first; + first = first->_next; + last->_next.reset(); +} + +void NearSpansUnordered::queueToList() { + first.reset(); + last.reset(); + while (queue->top()) { + addToList(queue->pop()); } - - CellQueue::CellQueue(int32_t size) : PriorityQueue(size) - { +} + +void NearSpansUnordered::listToQueue() { + queue->clear(); // rebuild queue + for (SpansCellPtr cell(first); cell; cell = cell->_next) { + queue->add(cell); // add to queue from list } - - CellQueue::~CellQueue() - { +} + +bool NearSpansUnordered::atMatch() { + return ((min()->doc() == max->doc()) && ((max->end() - min()->start() - totalLength) <= slop)); +} + +SpansCell::SpansCell(const NearSpansUnorderedPtr& unordered, const SpansPtr& spans, int32_t index) { + this->_unordered = unordered; + this->spans = spans; + this->index = index; + this->length = -1; +} + +SpansCell::~SpansCell() { +} + +bool SpansCell::next() { + return adjust(spans->next()); +} + +bool SpansCell::skipTo(int32_t target) { + return adjust(spans->skipTo(target)); +} + +bool SpansCell::adjust(bool condition) { + NearSpansUnorderedPtr unordered(_unordered); + if (length != -1) { + unordered->totalLength -= length; // subtract old length + } + if (condition) { + length = end() - start(); + unordered->totalLength += length; // add new length + + if (!unordered->max || doc() > unordered->max->doc() || ((doc() == unordered->max->doc()) && (end() > unordered->max->end()))) { + unordered->max = shared_from_this(); + } } - - bool CellQueue::lessThan(const SpansCellPtr& first, const SpansCellPtr& second) - { - if (first->doc() == second->doc()) - return NearSpansOrdered::docSpansOrdered(first, second); - else - return (first->doc() < second->doc()); + unordered->more = condition; + return condition; +} + +int32_t SpansCell::doc() { + return spans->doc(); +} + +int32_t SpansCell::start() { + return spans->start(); +} + +int32_t SpansCell::end() { + return spans->end(); +} + +Collection SpansCell::getPayload() { + Collection payload(spans->getPayload()); + return Collection::newInstance(payload.begin(), payload.end()); +} + +bool SpansCell::isPayloadAvailable() { + return spans->isPayloadAvailable(); +} + +String SpansCell::toString() { + return spans->toString() + L"#" + StringUtils::toString(index); +} + +CellQueue::CellQueue(int32_t size) : PriorityQueue(size) { +} + +CellQueue::~CellQueue() { +} + +bool CellQueue::lessThan(const SpansCellPtr& first, const SpansCellPtr& second) { + if (first->doc() == second->doc()) { + return NearSpansOrdered::docSpansOrdered(first, second); + } else { + return (first->doc() < second->doc()); } } + +} diff --git a/src/core/search/spans/SpanFirstQuery.cpp b/src/core/search/spans/SpanFirstQuery.cpp index 562d3b65..b195e634 100644 --- a/src/core/search/spans/SpanFirstQuery.cpp +++ b/src/core/search/spans/SpanFirstQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,151 +10,133 @@ #include "SpanQuery.h" #include "MiscUtils.h" -namespace Lucene -{ - SpanFirstQuery::SpanFirstQuery(SpanQueryPtr match, int32_t end) - { - this->match = match; - this->end = end; - } - - SpanFirstQuery::~SpanFirstQuery() - { - } - - SpanQueryPtr SpanFirstQuery::getMatch() - { - return match; - } - - int32_t SpanFirstQuery::getEnd() - { - return end; - } - - String SpanFirstQuery::getField() - { - return match->getField(); - } - - String SpanFirstQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"spanFirst(" << match->toString(field) << L", " << end << L")" << boostString(); - return buffer.str(); - } - - LuceneObjectPtr SpanFirstQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(boost::dynamic_pointer_cast(match->clone()), end)); - SpanFirstQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); - spanFirstQuery->match = match; - spanFirstQuery->end = end; - spanFirstQuery->setBoost(getBoost()); - return spanFirstQuery; +namespace Lucene { + +SpanFirstQuery::SpanFirstQuery(const SpanQueryPtr& match, int32_t end) { + this->match = match; + this->end = end; +} + +SpanFirstQuery::~SpanFirstQuery() { +} + +SpanQueryPtr SpanFirstQuery::getMatch() { + return match; +} + +int32_t SpanFirstQuery::getEnd() { + return end; +} + +String SpanFirstQuery::getField() { + return match->getField(); +} + +String SpanFirstQuery::toString(const String& field) { + StringStream buffer; + buffer << L"spanFirst(" << match->toString(field) << L", " << end << L")" << boostString(); + return buffer.str(); +} + +LuceneObjectPtr SpanFirstQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(boost::dynamic_pointer_cast(match->clone()), end)); + SpanFirstQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); + spanFirstQuery->match = match; + spanFirstQuery->end = end; + spanFirstQuery->setBoost(getBoost()); + return spanFirstQuery; +} + +void SpanFirstQuery::extractTerms(SetTerm terms) { + match->extractTerms(terms); +} + +SpansPtr SpanFirstQuery::getSpans(const IndexReaderPtr& reader) { + return newLucene(shared_from_this(), match->getSpans(reader)); +} + +QueryPtr SpanFirstQuery::rewrite(const IndexReaderPtr& reader) { + SpanFirstQueryPtr clone; + SpanQueryPtr rewritten(boost::dynamic_pointer_cast(match->rewrite(reader))); + if (rewritten != match) { + clone = boost::dynamic_pointer_cast(this->clone()); + clone->match = rewritten; } - - void SpanFirstQuery::extractTerms(SetTerm terms) - { - match->extractTerms(terms); + + if (clone) { + return clone; // some clauses rewrote + } else { + return shared_from_this(); // no clauses rewrote } - - SpansPtr SpanFirstQuery::getSpans(IndexReaderPtr reader) - { - return newLucene(shared_from_this(), match->getSpans(reader)); +} + +bool SpanFirstQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - QueryPtr SpanFirstQuery::rewrite(IndexReaderPtr reader) - { - SpanFirstQueryPtr clone; - SpanQueryPtr rewritten(boost::dynamic_pointer_cast(match->rewrite(reader))); - if (rewritten != match) - { - clone = boost::dynamic_pointer_cast(this->clone()); - clone->match = rewritten; - } - - if (clone) - return clone; // some clauses rewrote - else - return shared_from_this(); // no clauses rewrote + + SpanFirstQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - bool SpanFirstQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) + + return (end == otherQuery->end && match->equals(otherQuery->match) && getBoost() == otherQuery->getBoost()); +} + +int32_t SpanFirstQuery::hashCode() { + int32_t result = match->hashCode(); + result ^= (result << 8) | MiscUtils::unsignedShift(result, 25); // reversible + result ^= MiscUtils::doubleToRawIntBits(getBoost()) ^ end; + return result; +} + +FirstSpans::FirstSpans(const SpanFirstQueryPtr& query, const SpansPtr& spans) { + this->query = query; + this->spans = spans; +} + +FirstSpans::~FirstSpans() { +} + +bool FirstSpans::next() { + while (spans->next()) { // scan to next match + if (end() <= query->end) { return true; - - SpanFirstQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - - return (end == otherQuery->end && match->equals(otherQuery->match) && getBoost() == otherQuery->getBoost()); - } - - int32_t SpanFirstQuery::hashCode() - { - int32_t result = match->hashCode(); - result ^= (result << 8) | MiscUtils::unsignedShift(result, 25); // reversible - result ^= MiscUtils::doubleToRawIntBits(getBoost()) ^ end; - return result; - } - - FirstSpans::FirstSpans(SpanFirstQueryPtr query, SpansPtr spans) - { - this->query = query; - this->spans = spans; - } - - FirstSpans::~FirstSpans() - { - } - - bool FirstSpans::next() - { - while (spans->next()) // scan to next match - { - if (end() <= query->end) - return true; } - return false; - } - - bool FirstSpans::skipTo(int32_t target) - { - if (!spans->skipTo(target)) - return false; - return (spans->end() <= query->end || next()); - } - - int32_t FirstSpans::doc() - { - return spans->doc(); - } - - int32_t FirstSpans::start() - { - return spans->start(); - } - - int32_t FirstSpans::end() - { - return spans->end(); } - - Collection FirstSpans::getPayload() - { - Collection result; - if (spans->isPayloadAvailable()) - { - Collection payload(spans->getPayload()); - result = Collection::newInstance(payload.begin(), payload.end()); - } - return result; + return false; +} + +bool FirstSpans::skipTo(int32_t target) { + if (!spans->skipTo(target)) { + return false; } - - bool FirstSpans::isPayloadAvailable() - { - return spans->isPayloadAvailable(); + return (spans->end() <= query->end || next()); +} + +int32_t FirstSpans::doc() { + return spans->doc(); +} + +int32_t FirstSpans::start() { + return spans->start(); +} + +int32_t FirstSpans::end() { + return spans->end(); +} + +Collection FirstSpans::getPayload() { + Collection result; + if (spans->isPayloadAvailable()) { + Collection payload(spans->getPayload()); + result = Collection::newInstance(payload.begin(), payload.end()); } + return result; +} + +bool FirstSpans::isPayloadAvailable() { + return spans->isPayloadAvailable(); +} + } diff --git a/src/core/search/spans/SpanNearQuery.cpp b/src/core/search/spans/SpanNearQuery.cpp index 14654a55..0c95d8d1 100644 --- a/src/core/search/spans/SpanNearQuery.cpp +++ b/src/core/search/spans/SpanNearQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,143 +12,140 @@ #include "NearSpansUnordered.h" #include "MiscUtils.h" -namespace Lucene -{ - SpanNearQuery::SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads) - { - this->clauses = Collection::newInstance(); - for (int32_t i = 0; i < clauses.size(); ++i) - { - SpanQueryPtr clause(clauses[i]); - if (i == 0) // check field - field = clause->getField(); - else if (clause->getField() != field) - boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); - this->clauses.add(clause); +namespace Lucene { + +SpanNearQuery::SpanNearQuery(Collection clauses, int32_t slop, bool inOrder, bool collectPayloads) { + this->clauses = Collection::newInstance(); + for (int32_t i = 0; i < clauses.size(); ++i) { + SpanQueryPtr clause(clauses[i]); + if (i == 0) { // check field + field = clause->getField(); + } else if (clause->getField() != field) { + boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } - this->collectPayloads = collectPayloads; - this->slop = slop; - this->inOrder = inOrder; + this->clauses.add(clause); } - - SpanNearQuery::~SpanNearQuery() - { + this->collectPayloads = collectPayloads; + this->slop = slop; + this->inOrder = inOrder; +} + +SpanNearQuery::~SpanNearQuery() { +} + +Collection SpanNearQuery::getClauses() { + return clauses; +} + +int32_t SpanNearQuery::getSlop() { + return slop; +} + +bool SpanNearQuery::isInOrder() { + return inOrder; +} + +String SpanNearQuery::getField() { + return field; +} + +void SpanNearQuery::extractTerms(SetTerm terms) { + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + (*clause)->extractTerms(terms); } - - Collection SpanNearQuery::getClauses() - { - return clauses; +} + +String SpanNearQuery::toString(const String& field) { + StringStream buffer; + buffer << L"spanNear(["; + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (clause != clauses.begin()) { + buffer << L", "; + } + buffer << (*clause)->toString(field); } - - int32_t SpanNearQuery::getSlop() - { - return slop; + buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); + return buffer.str(); +} + +SpansPtr SpanNearQuery::getSpans(const IndexReaderPtr& reader) { + if (clauses.empty()) { // optimize 0-clause case + return newLucene(getClauses())->getSpans(reader); } - - bool SpanNearQuery::isInOrder() - { - return inOrder; + + if (clauses.size() == 1) { // optimize 1-clause case + return clauses[0]->getSpans(reader); } - - String SpanNearQuery::getField() - { - return field; + + return inOrder + ? boost::static_pointer_cast(newLucene(shared_from_this(), reader, collectPayloads)) + : boost::static_pointer_cast(newLucene(shared_from_this(), reader)); +} + +QueryPtr SpanNearQuery::rewrite(const IndexReaderPtr& reader) { + SpanNearQueryPtr clone; + for (int32_t i = 0; i < clauses.size(); ++i) { + SpanQueryPtr clause(clauses[i]); + SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); + if (query != clause) { // clause rewrote: must clone + if (!clone) { + clone = boost::dynamic_pointer_cast(this->clone()); + } + clone->clauses[i] = query; + } } - - void SpanNearQuery::extractTerms(SetTerm terms) - { - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - (*clause)->extractTerms(terms); + if (clone) { + return clone; // some clauses rewrote + } else { + return shared_from_this(); // no clauses rewrote } - - String SpanNearQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"spanNear(["; - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (clause != clauses.begin()) - buffer << L", "; - buffer << (*clause)->toString(field); - } - buffer << L"], " << slop << L", " << inOrder << L")" << boostString(); - return buffer.str(); +} + +LuceneObjectPtr SpanNearQuery::clone(const LuceneObjectPtr& other) { + int32_t sz = clauses.size(); + Collection newClauses(Collection::newInstance(sz)); + + for (int32_t i = 0; i < sz; ++i) { + newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } - - SpansPtr SpanNearQuery::getSpans(IndexReaderPtr reader) - { - if (clauses.empty()) // optimize 0-clause case - return newLucene(getClauses())->getSpans(reader); - - if (clauses.size() == 1) // optimize 1-clause case - return clauses[0]->getSpans(reader); - - return inOrder - ? boost::static_pointer_cast(newLucene(shared_from_this(), reader, collectPayloads)) - : boost::static_pointer_cast(newLucene(shared_from_this(), reader)); + + SpanNearQueryPtr spanNearQuery(newLucene(newClauses, slop, inOrder)); + spanNearQuery->setBoost(getBoost()); + return spanNearQuery; +} + +bool SpanNearQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - QueryPtr SpanNearQuery::rewrite(IndexReaderPtr reader) - { - SpanNearQueryPtr clone; - for (int32_t i = 0; i < clauses.size(); ++i) - { - SpanQueryPtr clause(clauses[i]); - SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); - if (query != clause) // clause rewrote: must clone - { - if (!clone) - clone = boost::dynamic_pointer_cast(this->clone()); - clone->clauses[i] = query; - } - } - if (clone) - return clone; // some clauses rewrote - else - return shared_from_this(); // no clauses rewrote + + SpanNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - LuceneObjectPtr SpanNearQuery::clone(LuceneObjectPtr other) - { - int32_t sz = clauses.size(); - Collection newClauses(Collection::newInstance(sz)); - - for (int32_t i = 0; i < sz; ++i) - newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); - - SpanNearQueryPtr spanNearQuery(newLucene(newClauses, slop, inOrder)); - spanNearQuery->setBoost(getBoost()); - return spanNearQuery; + + if (inOrder != otherQuery->inOrder) { + return false; } - - bool SpanNearQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SpanNearQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - - if (inOrder != otherQuery->inOrder) - return false; - if (slop != otherQuery->slop) - return false; - if (!clauses.equals(otherQuery->clauses, luceneEquals())) - return false; - - return (getBoost() == otherQuery->getBoost()); + if (slop != otherQuery->slop) { + return false; } - - int32_t SpanNearQuery::hashCode() - { - int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); - // Mix bits before folding in things like boost, since it could cancel the last element of clauses. - // This particular mix also serves to differentiate SpanNearQuery hashcodes from others. - result ^= (result << 14) | MiscUtils::unsignedShift(result, 19); // reversible - result += MiscUtils::doubleToRawIntBits(getBoost()); - result += slop; - result ^= (inOrder ? 0x99afd3bd : 0); - return result; + if (!clauses.equals(otherQuery->clauses, luceneEquals())) { + return false; } + + return (getBoost() == otherQuery->getBoost()); +} + +int32_t SpanNearQuery::hashCode() { + int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); + // Mix bits before folding in things like boost, since it could cancel the last element of clauses. + // This particular mix also serves to differentiate SpanNearQuery hashcodes from others. + result ^= (result << 14) | MiscUtils::unsignedShift(result, 19); // reversible + result += MiscUtils::doubleToRawIntBits(getBoost()); + result += slop; + result ^= (inOrder ? 0x99afd3bd : 0); + return result; +} + } diff --git a/src/core/search/spans/SpanNotQuery.cpp b/src/core/search/spans/SpanNotQuery.cpp index 7986b5e6..064999f8 100644 --- a/src/core/search/spans/SpanNotQuery.cpp +++ b/src/core/search/spans/SpanNotQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,200 +9,189 @@ #include "_SpanNotQuery.h" #include "MiscUtils.h" -namespace Lucene -{ - SpanNotQuery::SpanNotQuery(SpanQueryPtr include, SpanQueryPtr exclude) - { - this->include = include; - this->exclude = exclude; - - if (include->getField() != exclude->getField()) - boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); - } - - SpanNotQuery::~SpanNotQuery() - { - } - - SpanQueryPtr SpanNotQuery::getInclude() - { - return include; - } - - SpanQueryPtr SpanNotQuery::getExclude() - { - return exclude; - } - - String SpanNotQuery::getField() - { - return include->getField(); - } - - void SpanNotQuery::extractTerms(SetTerm terms) - { - include->extractTerms(terms); - } - - String SpanNotQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"spanNot(" << include->toString(field) << L", " << exclude->toString(field) << L")"; - buffer << boostString(); - return buffer.str(); - } - - LuceneObjectPtr SpanNotQuery::clone(LuceneObjectPtr other) - { - SpanNotQueryPtr spanNotQuery(newLucene(boost::dynamic_pointer_cast(include->clone()), - boost::dynamic_pointer_cast(exclude->clone()))); - spanNotQuery->setBoost(getBoost()); - return spanNotQuery; +namespace Lucene { + +SpanNotQuery::SpanNotQuery(const SpanQueryPtr& include, const SpanQueryPtr& exclude) { + this->include = include; + this->exclude = exclude; + + if (include->getField() != exclude->getField()) { + boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } - - SpansPtr SpanNotQuery::getSpans(IndexReaderPtr reader) - { - return newLucene(shared_from_this(), include->getSpans(reader), exclude->getSpans(reader)); +} + +SpanNotQuery::~SpanNotQuery() { +} + +SpanQueryPtr SpanNotQuery::getInclude() { + return include; +} + +SpanQueryPtr SpanNotQuery::getExclude() { + return exclude; +} + +String SpanNotQuery::getField() { + return include->getField(); +} + +void SpanNotQuery::extractTerms(SetTerm terms) { + include->extractTerms(terms); +} + +String SpanNotQuery::toString(const String& field) { + StringStream buffer; + buffer << L"spanNot(" << include->toString(field) << L", " << exclude->toString(field) << L")"; + buffer << boostString(); + return buffer.str(); +} + +LuceneObjectPtr SpanNotQuery::clone(const LuceneObjectPtr& other) { + SpanNotQueryPtr spanNotQuery(newLucene(boost::dynamic_pointer_cast(include->clone()), + boost::dynamic_pointer_cast(exclude->clone()))); + spanNotQuery->setBoost(getBoost()); + return spanNotQuery; +} + +SpansPtr SpanNotQuery::getSpans(const IndexReaderPtr& reader) { + return newLucene(shared_from_this(), include->getSpans(reader), exclude->getSpans(reader)); +} + +QueryPtr SpanNotQuery::rewrite(const IndexReaderPtr& reader) { + SpanNotQueryPtr clone; + SpanQueryPtr rewrittenInclude(boost::dynamic_pointer_cast(include->rewrite(reader))); + if (rewrittenInclude != include) { + clone = boost::dynamic_pointer_cast(this->clone()); + clone->include = rewrittenInclude; } - - QueryPtr SpanNotQuery::rewrite(IndexReaderPtr reader) - { - SpanNotQueryPtr clone; - SpanQueryPtr rewrittenInclude(boost::dynamic_pointer_cast(include->rewrite(reader))); - if (rewrittenInclude != include) - { + + SpanQueryPtr rewrittenExclude(boost::dynamic_pointer_cast(exclude->rewrite(reader))); + if (rewrittenExclude != exclude) { + if (!clone) { clone = boost::dynamic_pointer_cast(this->clone()); - clone->include = rewrittenInclude; } - - SpanQueryPtr rewrittenExclude(boost::dynamic_pointer_cast(exclude->rewrite(reader))); - if (rewrittenExclude != exclude) - { - if (!clone) - clone = boost::dynamic_pointer_cast(this->clone()); - clone->exclude = rewrittenExclude; - } - - if (clone) - return clone; // some clauses rewrote - else - return shared_from_this(); // no clauses rewrote - } - - bool SpanNotQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SpanNotQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - - return (include->equals(otherQuery->include) && exclude->equals(otherQuery->exclude) && getBoost() == otherQuery->getBoost()); + clone->exclude = rewrittenExclude; } - - int32_t SpanNotQuery::hashCode() - { - int32_t result = include->hashCode(); - result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left - result ^= exclude->hashCode(); - result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left - result ^= MiscUtils::doubleToRawIntBits(getBoost()); - return result; + + if (clone) { + return clone; // some clauses rewrote + } else { + return shared_from_this(); // no clauses rewrote } - - NotSpans::NotSpans(SpanNotQueryPtr query, SpansPtr includeSpans, SpansPtr excludeSpans) - { - this->query = query; - this->includeSpans = includeSpans; - this->moreInclude = true; - this->excludeSpans = excludeSpans; - this->moreExclude = excludeSpans->next(); +} + +bool SpanNotQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - NotSpans::~NotSpans() - { + + SpanNotQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - bool NotSpans::next() - { - if (moreInclude) // move to next include - moreInclude = includeSpans->next(); - - while (moreInclude && moreExclude) - { - if (includeSpans->doc() > excludeSpans->doc()) // skip exclude - moreExclude = excludeSpans->skipTo(includeSpans->doc()); - - // while exclude is before - while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) - moreExclude = excludeSpans->next(); // increment exclude - - // if no intersection - if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) - break; // we found a match - - moreInclude = includeSpans->next(); // intersected: keep scanning - } - return moreInclude; + + return (include->equals(otherQuery->include) && exclude->equals(otherQuery->exclude) && getBoost() == otherQuery->getBoost()); +} + +int32_t SpanNotQuery::hashCode() { + int32_t result = include->hashCode(); + result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left + result ^= exclude->hashCode(); + result = (result << 1) | MiscUtils::unsignedShift(result, 31); // rotate left + result ^= MiscUtils::doubleToRawIntBits(getBoost()); + return result; +} + +NotSpans::NotSpans(const SpanNotQueryPtr& query, const SpansPtr& includeSpans, const SpansPtr& excludeSpans) { + this->query = query; + this->includeSpans = includeSpans; + this->moreInclude = true; + this->excludeSpans = excludeSpans; + this->moreExclude = excludeSpans->next(); +} + +NotSpans::~NotSpans() { +} + +bool NotSpans::next() { + if (moreInclude) { // move to next include + moreInclude = includeSpans->next(); } - - bool NotSpans::skipTo(int32_t target) - { - if (moreInclude) // skip include - moreInclude = includeSpans->skipTo(target); - - if (!moreInclude) - return false; - - // skip exclude - if (moreExclude && includeSpans->doc() > excludeSpans->doc()) + + while (moreInclude && moreExclude) { + if (includeSpans->doc() > excludeSpans->doc()) { // skip exclude moreExclude = excludeSpans->skipTo(includeSpans->doc()); - + } + // while exclude is before - while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) - moreExclude = excludeSpans->next(); // increment exclude - + while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) { + moreExclude = excludeSpans->next(); // increment exclude + } + // if no intersection - if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) - return true; // we found a match - - return next(); // scan to next match + if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) { + break; // we found a match + } + + moreInclude = includeSpans->next(); // intersected: keep scanning } - - int32_t NotSpans::doc() - { - return includeSpans->doc(); + return moreInclude; +} + +bool NotSpans::skipTo(int32_t target) { + if (moreInclude) { // skip include + moreInclude = includeSpans->skipTo(target); } - - int32_t NotSpans::start() - { - return includeSpans->start(); + + if (!moreInclude) { + return false; } - - int32_t NotSpans::end() - { - return includeSpans->end(); + + // skip exclude + if (moreExclude && includeSpans->doc() > excludeSpans->doc()) { + moreExclude = excludeSpans->skipTo(includeSpans->doc()); } - - Collection NotSpans::getPayload() - { - Collection result; - if (includeSpans->isPayloadAvailable()) - { - Collection payload(includeSpans->getPayload()); - result = Collection::newInstance(payload.begin(), payload.end()); - } - return result; + + // while exclude is before + while (moreExclude && includeSpans->doc() == excludeSpans->doc() && excludeSpans->end() <= includeSpans->start()) { + moreExclude = excludeSpans->next(); // increment exclude } - - bool NotSpans::isPayloadAvailable() - { - return includeSpans->isPayloadAvailable(); + + // if no intersection + if (!moreExclude || includeSpans->doc() != excludeSpans->doc() || includeSpans->end() <= excludeSpans->start()) { + return true; // we found a match } - - String NotSpans::toString() - { - return L"spans(" + query->toString() + L")"; + + return next(); // scan to next match +} + +int32_t NotSpans::doc() { + return includeSpans->doc(); +} + +int32_t NotSpans::start() { + return includeSpans->start(); +} + +int32_t NotSpans::end() { + return includeSpans->end(); +} + +Collection NotSpans::getPayload() { + Collection result; + if (includeSpans->isPayloadAvailable()) { + Collection payload(includeSpans->getPayload()); + result = Collection::newInstance(payload.begin(), payload.end()); } + return result; +} + +bool NotSpans::isPayloadAvailable() { + return includeSpans->isPayloadAvailable(); +} + +String NotSpans::toString() { + return L"spans(" + query->toString() + L")"; +} + } diff --git a/src/core/search/spans/SpanOrQuery.cpp b/src/core/search/spans/SpanOrQuery.cpp index e7a655cc..26349b1b 100644 --- a/src/core/search/spans/SpanOrQuery.cpp +++ b/src/core/search/spans/SpanOrQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,254 +9,238 @@ #include "_SpanOrQuery.h" #include "MiscUtils.h" -namespace Lucene -{ - SpanOrQuery::SpanOrQuery(Collection clauses) - { - // copy clauses array into an ArrayList - this->clauses = Collection::newInstance(); - for (int32_t i = 0; i < clauses.size(); ++i) - { - SpanQueryPtr clause(clauses[i]); - if (i == 0) // check field - field = clause->getField(); - else if (clause->getField() != field) - boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); - this->clauses.add(clause); +namespace Lucene { + +SpanOrQuery::SpanOrQuery(Collection clauses) { + // copy clauses array into an ArrayList + this->clauses = Collection::newInstance(); + for (int32_t i = 0; i < clauses.size(); ++i) { + SpanQueryPtr clause(clauses[i]); + if (i == 0) { // check field + field = clause->getField(); + } else if (clause->getField() != field) { + boost::throw_exception(IllegalArgumentException(L"Clauses must have same field.")); } + this->clauses.add(clause); } - - SpanOrQuery::~SpanOrQuery() - { - } - - Collection SpanOrQuery::getClauses() - { - return clauses; - } - - String SpanOrQuery::getField() - { - return field; - } - - void SpanOrQuery::extractTerms(SetTerm terms) - { - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - (*clause)->extractTerms(terms); +} + +SpanOrQuery::~SpanOrQuery() { +} + +Collection SpanOrQuery::getClauses() { + return clauses; +} + +String SpanOrQuery::getField() { + return field; +} + +void SpanOrQuery::extractTerms(SetTerm terms) { + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + (*clause)->extractTerms(terms); } - - LuceneObjectPtr SpanOrQuery::clone(LuceneObjectPtr other) - { - int32_t sz = clauses.size(); - Collection newClauses(Collection::newInstance(sz)); - - for (int32_t i = 0; i < sz; ++i) - newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); - - SpanOrQueryPtr spanOrQuery(newLucene(newClauses)); - spanOrQuery->setBoost(getBoost()); - return spanOrQuery; +} + +LuceneObjectPtr SpanOrQuery::clone(const LuceneObjectPtr& other) { + int32_t sz = clauses.size(); + Collection newClauses(Collection::newInstance(sz)); + + for (int32_t i = 0; i < sz; ++i) { + newClauses[i] = boost::dynamic_pointer_cast(clauses[i]->clone()); } - - QueryPtr SpanOrQuery::rewrite(IndexReaderPtr reader) - { - SpanOrQueryPtr clone; - for (int32_t i = 0; i < clauses.size(); ++i) - { - SpanQueryPtr clause(clauses[i]); - SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); - if (query != clause) // clause rewrote: must clone - { - if (!clone) - clone = boost::dynamic_pointer_cast(this->clone()); - clone->clauses[i] = query; + + SpanOrQueryPtr spanOrQuery(newLucene(newClauses)); + spanOrQuery->setBoost(getBoost()); + return spanOrQuery; +} + +QueryPtr SpanOrQuery::rewrite(const IndexReaderPtr& reader) { + SpanOrQueryPtr clone; + for (int32_t i = 0; i < clauses.size(); ++i) { + SpanQueryPtr clause(clauses[i]); + SpanQueryPtr query(boost::dynamic_pointer_cast(clause->rewrite(reader))); + if (query != clause) { // clause rewrote: must clone + if (!clone) { + clone = boost::dynamic_pointer_cast(this->clone()); } + clone->clauses[i] = query; } - if (clone) - return clone; // some clauses rewrote - else - return shared_from_this(); // no clauses rewrote - } - - String SpanOrQuery::toString(const String& field) - { - StringStream buffer; - buffer << L"SpanOr(["; - for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) - { - if (clause != clauses.begin()) - buffer << L", "; - buffer << (*clause)->toString(field); - } - buffer << L"])" << boostString(); - return buffer.str(); } - - bool SpanOrQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - SpanOrQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - - if (!clauses.equals(otherQuery->clauses, luceneEquals())) - return false; - if (!clauses.empty() && field != otherQuery->field) - return false; - - return (getBoost() == otherQuery->getBoost()); + if (clone) { + return clone; // some clauses rewrote + } else { + return shared_from_this(); // no clauses rewrote } - - int32_t SpanOrQuery::hashCode() - { - int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); - result ^= (result << 10) | MiscUtils::unsignedShift(result, 23); - result ^= MiscUtils::doubleToRawIntBits(getBoost()); - return result; - } - - SpansPtr SpanOrQuery::getSpans(IndexReaderPtr reader) - { - if (clauses.size() == 1) // optimize 1-clause case - return clauses[0]->getSpans(reader); - return newLucene(shared_from_this(), reader); +} + +String SpanOrQuery::toString(const String& field) { + StringStream buffer; + buffer << L"SpanOr(["; + for (Collection::iterator clause = clauses.begin(); clause != clauses.end(); ++clause) { + if (clause != clauses.begin()) { + buffer << L", "; + } + buffer << (*clause)->toString(field); } - - SpanQueue::SpanQueue(int32_t size) : PriorityQueue(size) - { + buffer << L"])" << boostString(); + return buffer.str(); +} + +bool SpanOrQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - SpanQueue::~SpanQueue() - { + + SpanOrQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - bool SpanQueue::lessThan(const SpansPtr& first, const SpansPtr& second) - { - if (first->doc() == second->doc()) - { - if (first->start() == second->start()) - return (first->end() < second->end()); - else - return (first->start() < second->start()); - } - else - return (first->doc() < second->doc()); + + if (!clauses.equals(otherQuery->clauses, luceneEquals())) { + return false; } - - OrSpans::OrSpans(SpanOrQueryPtr query, IndexReaderPtr reader) - { - this->query = query; - this->reader = reader; + if (!clauses.empty() && field != otherQuery->field) { + return false; } - - OrSpans::~OrSpans() - { + + return (getBoost() == otherQuery->getBoost()); +} + +int32_t SpanOrQuery::hashCode() { + int32_t result = MiscUtils::hashCode(clauses.begin(), clauses.end(), MiscUtils::hashLucene); + result ^= (result << 10) | MiscUtils::unsignedShift(result, 23); + result ^= MiscUtils::doubleToRawIntBits(getBoost()); + return result; +} + +SpansPtr SpanOrQuery::getSpans(const IndexReaderPtr& reader) { + if (clauses.size() == 1) { // optimize 1-clause case + return clauses[0]->getSpans(reader); } - - bool OrSpans::initSpanQueue(int32_t target) - { - queue = newLucene(query->clauses.size()); - for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) - { - SpansPtr spans((*clause)->getSpans(reader)); - if ((target == -1 && spans->next()) || (target != -1 && spans->skipTo(target))) - queue->add(spans); + return newLucene(shared_from_this(), reader); +} + +SpanQueue::SpanQueue(int32_t size) : PriorityQueue(size) { +} + +SpanQueue::~SpanQueue() { +} + +bool SpanQueue::lessThan(const SpansPtr& first, const SpansPtr& second) { + if (first->doc() == second->doc()) { + if (first->start() == second->start()) { + return (first->end() < second->end()); + } else { + return (first->start() < second->start()); } - return !queue->empty(); + } else { + return (first->doc() < second->doc()); } - - bool OrSpans::next() - { - if (!queue) - return initSpanQueue(-1); - - if (queue->empty()) // all done - return false; - - if (top()->next()) // move to next - { - queue->updateTop(); - return true; +} + +OrSpans::OrSpans(const SpanOrQueryPtr& query, const IndexReaderPtr& reader) { + this->query = query; + this->reader = reader; +} + +OrSpans::~OrSpans() { +} + +bool OrSpans::initSpanQueue(int32_t target) { + queue = newLucene(query->clauses.size()); + for (Collection::iterator clause = query->clauses.begin(); clause != query->clauses.end(); ++clause) { + SpansPtr spans((*clause)->getSpans(reader)); + if ((target == -1 && spans->next()) || (target != -1 && spans->skipTo(target))) { + queue->add(spans); } - - queue->pop(); // exhausted a clause - return !queue->empty(); } - - SpansPtr OrSpans::top() - { - return queue->top(); - } - - bool OrSpans::skipTo(int32_t target) - { - if (!queue) - return initSpanQueue(target); - - bool skipCalled = false; - while (!queue->empty() && top()->doc() < target) - { - if (top()->skipTo(target)) - queue->updateTop(); - else - queue->pop(); - skipCalled = true; - } - - if (skipCalled) - return !queue->empty(); - return next(); + return !queue->empty(); +} + +bool OrSpans::next() { + if (!queue) { + return initSpanQueue(-1); } - - int32_t OrSpans::doc() - { - return top()->doc(); + + if (queue->empty()) { // all done + return false; } - - int32_t OrSpans::start() - { - return top()->start(); + + if (top()->next()) { // move to next + queue->updateTop(); + return true; } - - int32_t OrSpans::end() - { - return top()->end(); + + queue->pop(); // exhausted a clause + return !queue->empty(); +} + +SpansPtr OrSpans::top() { + return queue->top(); +} + +bool OrSpans::skipTo(int32_t target) { + if (!queue) { + return initSpanQueue(target); } - - Collection OrSpans::getPayload() - { - Collection result; - SpansPtr theTop(top()); - if (theTop && theTop->isPayloadAvailable()) - { - Collection payload(theTop->getPayload()); - result = Collection::newInstance(payload.begin(), payload.end()); + + bool skipCalled = false; + while (!queue->empty() && top()->doc() < target) { + if (top()->skipTo(target)) { + queue->updateTop(); + } else { + queue->pop(); } - return result; + skipCalled = true; } - - bool OrSpans::isPayloadAvailable() - { - SpansPtr theTop(top()); - return (theTop && theTop->isPayloadAvailable()); + + if (skipCalled) { + return !queue->empty(); } - - String OrSpans::toString() - { - StringStream buffer; - buffer << L"spans(" << query->toString() << L")@"; - if (!queue) - buffer << L"START"; - else - { - if (!queue->empty()) - buffer << doc() << L":" << start() << L"-" << end(); - else - buffer << L"END"; + return next(); +} + +int32_t OrSpans::doc() { + return top()->doc(); +} + +int32_t OrSpans::start() { + return top()->start(); +} + +int32_t OrSpans::end() { + return top()->end(); +} + +Collection OrSpans::getPayload() { + Collection result; + SpansPtr theTop(top()); + if (theTop && theTop->isPayloadAvailable()) { + Collection payload(theTop->getPayload()); + result = Collection::newInstance(payload.begin(), payload.end()); + } + return result; +} + +bool OrSpans::isPayloadAvailable() { + SpansPtr theTop(top()); + return (theTop && theTop->isPayloadAvailable()); +} + +String OrSpans::toString() { + StringStream buffer; + buffer << L"spans(" << query->toString() << L")@"; + if (!queue) { + buffer << L"START"; + } else { + if (!queue->empty()) { + buffer << doc() << L":" << start() << L"-" << end(); + } else { + buffer << L"END"; } - return buffer.str(); } + return buffer.str(); +} + } diff --git a/src/core/search/spans/SpanQuery.cpp b/src/core/search/spans/SpanQuery.cpp index a6e20748..bef1fb72 100644 --- a/src/core/search/spans/SpanQuery.cpp +++ b/src/core/search/spans/SpanQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,14 +8,13 @@ #include "SpanQuery.h" #include "SpanWeight.h" -namespace Lucene -{ - SpanQuery::~SpanQuery() - { - } - - WeightPtr SpanQuery::createWeight(SearcherPtr searcher) - { - return newLucene(shared_from_this(), searcher); - } +namespace Lucene { + +SpanQuery::~SpanQuery() { +} + +WeightPtr SpanQuery::createWeight(const SearcherPtr& searcher) { + return newLucene(shared_from_this(), searcher); +} + } diff --git a/src/core/search/spans/SpanScorer.cpp b/src/core/search/spans/SpanScorer.cpp index e0be498d..0e39b957 100644 --- a/src/core/search/spans/SpanScorer.cpp +++ b/src/core/search/spans/SpanScorer.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,89 +12,80 @@ #include "Spans.h" #include "StringUtils.h" -namespace Lucene -{ - SpanScorer::SpanScorer(SpansPtr spans, WeightPtr weight, SimilarityPtr similarity, ByteArray norms) : Scorer(similarity) - { - this->spans = spans; - this->norms = norms; - this->weight = weight; - this->value = weight->getValue(); - this->freq = 0.0; - if (this->spans->next()) - { - doc = -1; - more = true; - } - else - { - doc = NO_MORE_DOCS; - more = false; - } - } - - SpanScorer::~SpanScorer() - { +namespace Lucene { + +SpanScorer::SpanScorer(const SpansPtr& spans, const WeightPtr& weight, const SimilarityPtr& similarity, ByteArray norms) : Scorer(similarity) { + this->spans = spans; + this->norms = norms; + this->weight = weight; + this->value = weight->getValue(); + this->freq = 0.0; + if (this->spans->next()) { + doc = -1; + more = true; + } else { + doc = NO_MORE_DOCS; + more = false; } - - int32_t SpanScorer::nextDoc() - { - if (!setFreqCurrentDoc()) - doc = NO_MORE_DOCS; - return doc; +} + +SpanScorer::~SpanScorer() { +} + +int32_t SpanScorer::nextDoc() { + if (!setFreqCurrentDoc()) { + doc = NO_MORE_DOCS; } - - int32_t SpanScorer::advance(int32_t target) - { - if (!more) - { - doc = NO_MORE_DOCS; - return doc; - } - if (spans->doc() < target) // setFreqCurrentDoc() leaves spans->doc() ahead - more = spans->skipTo(target); - if (!setFreqCurrentDoc()) - doc = NO_MORE_DOCS; + return doc; +} + +int32_t SpanScorer::advance(int32_t target) { + if (!more) { + doc = NO_MORE_DOCS; return doc; } - - bool SpanScorer::setFreqCurrentDoc() - { - if (!more) - return false; - doc = spans->doc(); - freq = 0.0; - do - { - int32_t matchLength = spans->end() - spans->start(); - freq += getSimilarity()->sloppyFreq(matchLength); - more = spans->next(); - } - while (more && (doc == spans->doc())); - return true; + if (spans->doc() < target) { // setFreqCurrentDoc() leaves spans->doc() ahead + more = spans->skipTo(target); } - - int32_t SpanScorer::docID() - { - return doc; - } - - double SpanScorer::score() - { - double raw = getSimilarity()->tf(freq) * value; // raw score - return norms ? raw * Similarity::decodeNorm(norms[doc]) : raw; // normalize + if (!setFreqCurrentDoc()) { + doc = NO_MORE_DOCS; } - - ExplanationPtr SpanScorer::explain(int32_t doc) - { - ExplanationPtr tfExplanation(newLucene()); - - int32_t expDoc = advance(doc); - - double phraseFreq = expDoc == doc ? freq : 0.0; - tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); - tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); - - return tfExplanation; + return doc; +} + +bool SpanScorer::setFreqCurrentDoc() { + if (!more) { + return false; } + doc = spans->doc(); + freq = 0.0; + do { + int32_t matchLength = spans->end() - spans->start(); + freq += getSimilarity()->sloppyFreq(matchLength); + more = spans->next(); + } while (more && (doc == spans->doc())); + return true; +} + +int32_t SpanScorer::docID() { + return doc; +} + +double SpanScorer::score() { + double raw = getSimilarity()->tf(freq) * value; // raw score + return norms ? raw * Similarity::decodeNorm(norms[doc]) : raw; // normalize +} + +ExplanationPtr SpanScorer::explain(int32_t doc) { + ExplanationPtr tfExplanation(newLucene()); + + int32_t expDoc = advance(doc); + + double phraseFreq = expDoc == doc ? freq : 0.0; + tfExplanation->setValue(getSimilarity()->tf(phraseFreq)); + tfExplanation->setDescription(L"tf(phraseFreq=" + StringUtils::toString(phraseFreq) + L")"); + + return tfExplanation; +} + } diff --git a/src/core/search/spans/SpanTermQuery.cpp b/src/core/search/spans/SpanTermQuery.cpp index 47294341..3510a05d 100644 --- a/src/core/search/spans/SpanTermQuery.cpp +++ b/src/core/search/spans/SpanTermQuery.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,82 +11,78 @@ #include "IndexReader.h" #include "MiscUtils.h" -namespace Lucene -{ - SpanTermQuery::SpanTermQuery(TermPtr term) - { - this->term = term; - } - - SpanTermQuery::~SpanTermQuery() - { - } - - TermPtr SpanTermQuery::getTerm() - { - return term; +namespace Lucene { + +SpanTermQuery::SpanTermQuery(const TermPtr& term) { + this->term = term; +} + +SpanTermQuery::~SpanTermQuery() { +} + +TermPtr SpanTermQuery::getTerm() { + return term; +} + +String SpanTermQuery::getField() { + return term->field(); +} + +void SpanTermQuery::extractTerms(SetTerm terms) { + terms.add(term); +} + +String SpanTermQuery::toString(const String& field) { + StringStream buffer; + if (term->field() == field) { + buffer << term->text(); + } else { + buffer << term->toString(); } - - String SpanTermQuery::getField() - { - return term->field(); + buffer << boostString(); + return buffer.str(); +} + +int32_t SpanTermQuery::hashCode() { + int32_t prime = 31; + int32_t result = SpanQuery::hashCode(); + result = prime * result + (term ? term->hashCode() : 0); + return result; +} + +bool SpanTermQuery::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - void SpanTermQuery::extractTerms(SetTerm terms) - { - terms.add(term); + if (!SpanQuery::equals(other)) { + return false; } - - String SpanTermQuery::toString(const String& field) - { - StringStream buffer; - if (term->field() == field) - buffer << term->text(); - else - buffer << term->toString(); - buffer << boostString(); - return buffer.str(); + if (!MiscUtils::equalTypes(shared_from_this(), other)) { + return false; } - - int32_t SpanTermQuery::hashCode() - { - int32_t prime = 31; - int32_t result = SpanQuery::hashCode(); - result = prime * result + (term ? term->hashCode() : 0); - return result; + SpanTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); + if (!otherQuery) { + return false; } - - bool SpanTermQuery::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - if (!SpanQuery::equals(other)) - return false; - if (!MiscUtils::equalTypes(shared_from_this(), other)) + if (!term) { + if (otherQuery->term) { return false; - SpanTermQueryPtr otherQuery(boost::dynamic_pointer_cast(other)); - if (!otherQuery) - return false; - if (!term) - { - if (otherQuery->term) - return false; } - else if (!term->equals(otherQuery->term)) - return false; - return true; - } - - LuceneObjectPtr SpanTermQuery::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term)); - SpanTermQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); - spanFirstQuery->term = term; - return spanFirstQuery; - } - - SpansPtr SpanTermQuery::getSpans(IndexReaderPtr reader) - { - return newLucene(reader->termPositions(term), term); + } else if (!term->equals(otherQuery->term)) { + return false; } + return true; +} + +LuceneObjectPtr SpanTermQuery::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = SpanQuery::clone(other ? other : newLucene(term)); + SpanTermQueryPtr spanFirstQuery(boost::dynamic_pointer_cast(clone)); + spanFirstQuery->term = term; + return spanFirstQuery; +} + +SpansPtr SpanTermQuery::getSpans(const IndexReaderPtr& reader) { + return newLucene(reader->termPositions(term), term); +} + } diff --git a/src/core/search/spans/SpanWeight.cpp b/src/core/search/spans/SpanWeight.cpp index 44805e28..e8374d0e 100644 --- a/src/core/search/spans/SpanWeight.cpp +++ b/src/core/search/spans/SpanWeight.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,105 +13,100 @@ #include "Similarity.h" #include "StringUtils.h" -namespace Lucene -{ - SpanWeight::SpanWeight(SpanQueryPtr query, SearcherPtr searcher) - { - this->similarity = query->getSimilarity(searcher); - this->query = query; - - terms = SetTerm::newInstance(); - query->extractTerms(terms); - - idfExp = similarity->idfExplain(Collection::newInstance(terms.begin(), terms.end()), searcher); - idf = idfExp->getIdf(); - value = 0.0; - queryNorm = 0.0; - queryWeight = 0.0; - } - - SpanWeight::~SpanWeight() - { - } - - QueryPtr SpanWeight::getQuery() - { - return query; - } - - double SpanWeight::getValue() - { - return value; - } - - double SpanWeight::sumOfSquaredWeights() - { - queryWeight = idf * getQuery()->getBoost(); // compute query weight - return queryWeight * queryWeight; // square it - } - - void SpanWeight::normalize(double norm) - { - queryNorm = norm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document - } - - ScorerPtr SpanWeight::scorer(IndexReaderPtr reader, bool scoreDocsInOrder, bool topScorer) - { - return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); +namespace Lucene { + +SpanWeight::SpanWeight(const SpanQueryPtr& query, const SearcherPtr& searcher) { + this->similarity = query->getSimilarity(searcher); + this->query = query; + + terms = SetTerm::newInstance(); + query->extractTerms(terms); + + idfExp = similarity->idfExplain(Collection::newInstance(terms.begin(), terms.end()), searcher); + idf = idfExp->getIdf(); + value = 0.0; + queryNorm = 0.0; + queryWeight = 0.0; +} + +SpanWeight::~SpanWeight() { +} + +QueryPtr SpanWeight::getQuery() { + return query; +} + +double SpanWeight::getValue() { + return value; +} + +double SpanWeight::sumOfSquaredWeights() { + queryWeight = idf * getQuery()->getBoost(); // compute query weight + return queryWeight * queryWeight; // square it +} + +void SpanWeight::normalize(double norm) { + queryNorm = norm; + queryWeight *= queryNorm; // normalize query weight + value = queryWeight * idf; // idf for document +} + +ScorerPtr SpanWeight::scorer(const IndexReaderPtr& reader, bool scoreDocsInOrder, bool topScorer) { + return newLucene(query->getSpans(reader), shared_from_this(), similarity, reader->norms(query->getField())); +} + +ExplanationPtr SpanWeight::explain(const IndexReaderPtr& reader, int32_t doc) { + ComplexExplanationPtr result(newLucene()); + result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); + String field(query->getField()); + + ExplanationPtr idfExpl(newLucene(idf, L"idf(" + field + L":" + idfExp->explain() + L")")); + + // explain query weight + ExplanationPtr queryExpl(newLucene()); + queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); + + ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); + if (query->getBoost() != 1.0) { + queryExpl->addDetail(boostExpl); } - - ExplanationPtr SpanWeight::explain(IndexReaderPtr reader, int32_t doc) - { - ComplexExplanationPtr result(newLucene()); - result->setDescription(L"weight(" + query->toString() + L" in " + StringUtils::toString(doc) + L"), product of:"); - String field(query->getField()); - - ExplanationPtr idfExpl(newLucene(idf, L"idf(" + field + L":" + idfExp->explain() + L")")); - - // explain query weight - ExplanationPtr queryExpl(newLucene()); - queryExpl->setDescription(L"queryWeight(" + query->toString() + L"), product of:"); - - ExplanationPtr boostExpl(newLucene(query->getBoost(), L"boost")); - if (query->getBoost() != 1.0) - queryExpl->addDetail(boostExpl); - queryExpl->addDetail(idfExpl); - - ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); - queryExpl->addDetail(queryNormExpl); - - queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); - result->addDetail(queryExpl); - - // explain field weight - ComplexExplanationPtr fieldExpl(newLucene()); - fieldExpl->setDescription(L"fieldWeight(" + field + L":" + query->toString(field) + L" in " + StringUtils::toString(doc) + L"), product of:"); - - ExplanationPtr tfExpl(boost::dynamic_pointer_cast(scorer(reader, true, false))->explain(doc)); - fieldExpl->addDetail(tfExpl); - fieldExpl->addDetail(idfExpl); - - ExplanationPtr fieldNormExpl(newLucene()); - ByteArray fieldNorms(reader->norms(field)); - double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; - fieldNormExpl->setValue(fieldNorm); - fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); - fieldExpl->addDetail(fieldNormExpl); - - fieldExpl->setMatch(tfExpl->isMatch()); - fieldExpl->setValue(tfExpl->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); - - result->addDetail(fieldExpl); - result->setMatch(fieldExpl->getMatch()); - - // combine them - result->setValue(queryExpl->getValue() * fieldExpl->getValue()); - - if (queryExpl->getValue() == 1.0) - return fieldExpl; - - return result; + queryExpl->addDetail(idfExpl); + + ExplanationPtr queryNormExpl(newLucene(queryNorm, L"queryNorm")); + queryExpl->addDetail(queryNormExpl); + + queryExpl->setValue(boostExpl->getValue() * idfExpl->getValue() * queryNormExpl->getValue()); + result->addDetail(queryExpl); + + // explain field weight + ComplexExplanationPtr fieldExpl(newLucene()); + fieldExpl->setDescription(L"fieldWeight(" + field + L":" + query->toString(field) + L" in " + StringUtils::toString(doc) + L"), product of:"); + + ExplanationPtr tfExpl(boost::dynamic_pointer_cast(scorer(reader, true, false))->explain(doc)); + fieldExpl->addDetail(tfExpl); + fieldExpl->addDetail(idfExpl); + + ExplanationPtr fieldNormExpl(newLucene()); + ByteArray fieldNorms(reader->norms(field)); + double fieldNorm = fieldNorms ? Similarity::decodeNorm(fieldNorms[doc]) : 1.0; + fieldNormExpl->setValue(fieldNorm); + fieldNormExpl->setDescription(L"fieldNorm(field=" + field + L", doc=" + StringUtils::toString(doc) + L")"); + fieldExpl->addDetail(fieldNormExpl); + + fieldExpl->setMatch(tfExpl->isMatch()); + fieldExpl->setValue(tfExpl->getValue() * idfExpl->getValue() * fieldNormExpl->getValue()); + + result->addDetail(fieldExpl); + result->setMatch(fieldExpl->getMatch()); + + // combine them + result->setValue(queryExpl->getValue() * fieldExpl->getValue()); + + if (queryExpl->getValue() == 1.0) { + return fieldExpl; } + + return result; +} + } diff --git a/src/core/search/spans/Spans.cpp b/src/core/search/spans/Spans.cpp index b0d114b2..7991d50a 100644 --- a/src/core/search/spans/Spans.cpp +++ b/src/core/search/spans/Spans.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,9 +7,9 @@ #include "LuceneInc.h" #include "Spans.h" -namespace Lucene -{ - Spans::~Spans() - { - } +namespace Lucene { + +Spans::~Spans() { +} + } diff --git a/src/core/search/spans/TermSpans.cpp b/src/core/search/spans/TermSpans.cpp index a2988f52..09a30842 100644 --- a/src/core/search/spans/TermSpans.cpp +++ b/src/core/search/spans/TermSpans.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,100 +9,88 @@ #include "TermPositions.h" #include "Term.h" -namespace Lucene -{ - TermSpans::TermSpans(TermPositionsPtr positions, TermPtr term) - { - this->positions = positions; - this->term = term; - this->_doc = -1; - this->freq = 0; - this->count = 0; - this->position = 0; - } - - TermSpans::~TermSpans() - { - } - - bool TermSpans::next() - { - if (count == freq) - { - if (!positions->next()) - { - _doc = INT_MAX; - return false; - } - _doc = positions->doc(); - freq = positions->freq(); - count = 0; - } - position = positions->nextPosition(); - ++count; - return true; - } - - bool TermSpans::skipTo(int32_t target) - { - if (!positions->skipTo(target)) - { +namespace Lucene { + +TermSpans::TermSpans(const TermPositionsPtr& positions, const TermPtr& term) { + this->positions = positions; + this->term = term; + this->_doc = -1; + this->freq = 0; + this->count = 0; + this->position = 0; +} + +TermSpans::~TermSpans() { +} + +bool TermSpans::next() { + if (count == freq) { + if (!positions->next()) { _doc = INT_MAX; return false; } - _doc = positions->doc(); freq = positions->freq(); count = 0; - - position = positions->nextPosition(); - ++count; - - return true; } - - int32_t TermSpans::doc() - { - return _doc; - } - - int32_t TermSpans::start() - { - return position; - } - - int32_t TermSpans::end() - { - return position + 1; - } - - Collection TermSpans::getPayload() - { - Collection payload(newCollection(ByteArray::newInstance(positions->getPayloadLength()))); - payload[0] = positions->getPayload(payload[0], 0); - return payload; - } - - bool TermSpans::isPayloadAvailable() - { - return positions->isPayloadAvailable(); - } - - String TermSpans::toString() - { - StringStream buffer; - buffer << L"spans(" << term->toString() << L")@"; - if (_doc == -1) - buffer << L"START"; - else if (_doc == INT_MAX) - buffer << L"END"; - else - buffer << _doc << L"-" << position; - return buffer.str(); + position = positions->nextPosition(); + ++count; + return true; +} + +bool TermSpans::skipTo(int32_t target) { + if (!positions->skipTo(target)) { + _doc = INT_MAX; + return false; } - - TermPositionsPtr TermSpans::getPositions() - { - return positions; + + _doc = positions->doc(); + freq = positions->freq(); + count = 0; + + position = positions->nextPosition(); + ++count; + + return true; +} + +int32_t TermSpans::doc() { + return _doc; +} + +int32_t TermSpans::start() { + return position; +} + +int32_t TermSpans::end() { + return position + 1; +} + +Collection TermSpans::getPayload() { + Collection payload(newCollection(ByteArray::newInstance(positions->getPayloadLength()))); + payload[0] = positions->getPayload(payload[0], 0); + return payload; +} + +bool TermSpans::isPayloadAvailable() { + return positions->isPayloadAvailable(); +} + +String TermSpans::toString() { + StringStream buffer; + buffer << L"spans(" << term->toString() << L")@"; + if (_doc == -1) { + buffer << L"START"; + } else if (_doc == INT_MAX) { + buffer << L"END"; + } else { + buffer << _doc << L"-" << position; } + return buffer.str(); +} + +TermPositionsPtr TermSpans::getPositions() { + return positions; +} + } diff --git a/src/core/store/BufferedIndexInput.cpp b/src/core/store/BufferedIndexInput.cpp index 7b6d7e10..96d03ddb 100644 --- a/src/core/store/BufferedIndexInput.cpp +++ b/src/core/store/BufferedIndexInput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,183 +9,187 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// Default buffer size. - const int32_t BufferedIndexInput::BUFFER_SIZE = 1024; - - BufferedIndexInput::BufferedIndexInput(int32_t bufferSize) - { - this->bufferSize = bufferSize; - bufferStart = 0; - bufferLength = 0; - bufferPosition = 0; - } - - BufferedIndexInput::~BufferedIndexInput() - { - } - - uint8_t BufferedIndexInput::readByte() - { - if (bufferPosition >= bufferLength) - refill(); - return buffer[bufferPosition++]; +namespace Lucene { + +/// Default buffer size. +const int32_t BufferedIndexInput::BUFFER_SIZE = 1024 * 2; + +BufferedIndexInput::BufferedIndexInput(int32_t bufferSize) { + this->bufferSize = bufferSize; + bufferStart = 0; + bufferLength = 0; + bufferPosition = 0; +} + +BufferedIndexInput::~BufferedIndexInput() { +} + +uint8_t BufferedIndexInput::readByte() { + if (bufferPosition < bufferLength) { + return __buffer[bufferPosition++]; } - - void BufferedIndexInput::setBufferSize(int32_t newSize) - { - if (newSize != bufferSize) - { - bufferSize = newSize; - if (buffer) - { - // Resize the existing buffer and carefully save as many bytes as possible starting from the current bufferPosition - ByteArray _newBuffer(ByteArray::newInstance(newSize)); - int32_t leftInBuffer = bufferLength - bufferPosition; - int32_t numToCopy = leftInBuffer > newSize ? newSize : leftInBuffer; - - MiscUtils::arrayCopy(buffer.get(), bufferPosition, _newBuffer.get(), 0, numToCopy); - bufferStart += bufferPosition; - bufferPosition = 0; - bufferLength = numToCopy; - newBuffer(_newBuffer); - } + refill(); + return __buffer[bufferPosition++]; +} + +static const int MAX_VARINT32_LENGHT = 5; + +int32_t BufferedIndexInput::readVInt() { + if (bufferPosition + MAX_VARINT32_LENGHT < bufferLength) { + uint8_t b = __buffer[bufferPosition++]; + int32_t i = (b & 0x7f); + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = __buffer[bufferPosition++]; + i |= (b & 0x7f) << shift; } + return i; } - - void BufferedIndexInput::newBuffer(ByteArray newBuffer) - { - // Subclasses can do something here - buffer = newBuffer; - } - - int32_t BufferedIndexInput::getBufferSize() - { - return bufferSize; + else { + uint8_t b = readByte(); + int32_t i = (b & 0x7f); + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (b & 0x7f) << shift; + } + return i; } - - void BufferedIndexInput::checkBufferSize(int32_t bufferSize) - { - if (bufferSize <= 0) - boost::throw_exception(IllegalArgumentException(L"bufferSize must be greater than 0 (got " + StringUtils::toString(bufferSize) + L")")); +} + +void BufferedIndexInput::setBufferSize(int32_t newSize) { + if (newSize != bufferSize) { + bufferSize = newSize; + if (buffer) { + // Resize the existing buffer and carefully save as many bytes as possible starting from the current bufferPosition + ByteArray _newBuffer(ByteArray::newInstance(newSize)); + int32_t leftInBuffer = bufferLength - bufferPosition; + int32_t numToCopy = leftInBuffer > newSize ? newSize : leftInBuffer; + + MiscUtils::arrayCopy(buffer.get(), bufferPosition, _newBuffer.get(), 0, numToCopy); + bufferStart += bufferPosition; + bufferPosition = 0; + bufferLength = numToCopy; + newBuffer(_newBuffer); + } } - - void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - readBytes(b, offset, length, true); +} + +void BufferedIndexInput::newBuffer(ByteArray newBuffer) { + // Subclasses can do something here + buffer = newBuffer; + __buffer = newBuffer.get(); +} + +int32_t BufferedIndexInput::getBufferSize() { + return bufferSize; +} + +void BufferedIndexInput::checkBufferSize(int32_t bufferSize) { + if (bufferSize <= 0) { + boost::throw_exception(IllegalArgumentException(L"bufferSize must be greater than 0 (got " + StringUtils::toString(bufferSize) + L")")); } - - void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) - { - if (length <= (bufferLength - bufferPosition)) - { - // the buffer contains enough data to satisfy this request - if (length > 0) // to allow b to be null if length is 0 - MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, length); - bufferPosition += length; +} + +void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { + readBytes(b, offset, length, true); +} + +void BufferedIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { + if (length <= (bufferLength - bufferPosition)) { + // the buffer contains enough data to satisfy this request + if (length > 0) { // to allow b to be null if length is 0 + MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, length); } - else - { - // the buffer does not have enough data, first serve all we've got - int32_t available = bufferLength - bufferPosition; - if (available > 0) - { - MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); - offset += available; - length -= available; - bufferPosition += available; - } - - // and now, read the remaining 'length' bytes - if (useBuffer && length < bufferSize) - { - // If the amount left to read is small enough, and we are allowed to use our buffer, - // do it in the usual buffered way: fill the buffer and copy from it - refill(); - if (bufferLength < length) - { - // throw an exception when refill() could not read length bytes - MiscUtils::arrayCopy(buffer.get(), 0, b, offset, bufferLength); - boost::throw_exception(IOException(L"Read past EOF")); - } - else - { - MiscUtils::arrayCopy(buffer.get(), 0, b, offset, length); - bufferPosition = length; - } + bufferPosition += length; + } else { + // the buffer does not have enough data, first serve all we've got + int32_t available = bufferLength - bufferPosition; + if (available > 0) { + MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); + offset += available; + length -= available; + bufferPosition += available; + } + + // and now, read the remaining 'length' bytes + if (useBuffer && length < bufferSize) { + // If the amount left to read is small enough, and we are allowed to use our buffer, + // do it in the usual buffered way: fill the buffer and copy from it + refill(); + if (bufferLength < length) { + // throw an exception when refill() could not read length bytes + MiscUtils::arrayCopy(buffer.get(), 0, b, offset, bufferLength); + boost::throw_exception(IOException(L"Read past EOF")); + } else { + MiscUtils::arrayCopy(buffer.get(), 0, b, offset, length); + bufferPosition = length; } - else - { - // The amount left to read is larger than the buffer or we've been asked to not use - // our buffer - there's no performance reason not to read it all at once. - // Note that unlike the previous code of this function, there is no need to do a seek - // here, because there's no need to reread what we had in the buffer. - int64_t after = bufferStart + bufferPosition + length; - if (after > this->length()) - boost::throw_exception(IOException(L"Read past EOF")); - readInternal(b, offset, length); - bufferStart = after; - bufferPosition = 0; - bufferLength = 0; // trigger refill() on read + } else { + // The amount left to read is larger than the buffer or we've been asked to not use + // our buffer - there's no performance reason not to read it all at once. + // Note that unlike the previous code of this function, there is no need to do a seek + // here, because there's no need to reread what we had in the buffer. + int64_t after = bufferStart + bufferPosition + length; + if (after > this->length()) { + boost::throw_exception(IOException(L"Read past EOF")); } + readInternal(b, offset, length); + bufferStart = after; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read } } - - void BufferedIndexInput::refill() - { - int64_t start = bufferStart + bufferPosition; - int64_t end = start + bufferSize; - if (end > length()) // don't read past EOF - end = length(); - int32_t newLength = (int32_t)(end - start); - if (newLength <= 0) - boost::throw_exception(IOException(L"Read past EOF")); - - if (!buffer) - { - newBuffer(ByteArray::newInstance(bufferSize)); // allocate buffer lazily - seekInternal(bufferStart); - } - readInternal(buffer.get(), 0, newLength); - bufferLength = newLength; - bufferStart = start; - bufferPosition = 0; - } - - void BufferedIndexInput::close() - { - bufferStart = 0; - bufferLength = 0; - bufferPosition = 0; +} + +void BufferedIndexInput::refill() { + int64_t start = bufferStart + bufferPosition; + int64_t end = start + bufferSize; + if (end > length()) { // don't read past EOF + end = length(); } - - int64_t BufferedIndexInput::getFilePointer() - { - return bufferStart + bufferPosition; + int32_t newLength = (int32_t)(end - start); + if (newLength <= 0) { + boost::throw_exception(IOException(L"Read past EOF")); } - - void BufferedIndexInput::seek(int64_t pos) - { - if (pos >= bufferStart && pos < (bufferStart + bufferLength)) - bufferPosition = (int32_t)(pos - bufferStart); // seek within buffer - else - { - bufferStart = pos; - bufferPosition = 0; - bufferLength = 0; // trigger refill() on read() - seekInternal(pos); - } + + if (!buffer) { + newBuffer(ByteArray::newInstance(bufferSize)); // allocate buffer lazily + seekInternal(bufferStart); } - - LuceneObjectPtr BufferedIndexInput::clone(LuceneObjectPtr other) - { - BufferedIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(IndexInput::clone(other))); - cloneIndexInput->bufferSize = bufferSize; - cloneIndexInput->buffer.reset(); - cloneIndexInput->bufferLength = 0; - cloneIndexInput->bufferPosition = 0; - cloneIndexInput->bufferStart = getFilePointer(); - return cloneIndexInput; + readInternal(__buffer, 0, newLength); + bufferLength = newLength; + bufferStart = start; + bufferPosition = 0; +} + +void BufferedIndexInput::close() { + bufferStart = 0; + bufferLength = 0; + bufferPosition = 0; +} + +int64_t BufferedIndexInput::getFilePointer() { + return bufferStart + bufferPosition; +} + +void BufferedIndexInput::seek(int64_t pos) { + if (pos >= bufferStart && pos < (bufferStart + bufferLength)) { + bufferPosition = (int32_t)(pos - bufferStart); // seek within buffer + } else { + bufferStart = pos; + bufferPosition = 0; + bufferLength = 0; // trigger refill() on read() + seekInternal(pos); } } + +LuceneObjectPtr BufferedIndexInput::clone(const LuceneObjectPtr& other) { + BufferedIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(IndexInput::clone(other))); + cloneIndexInput->bufferSize = bufferSize; + cloneIndexInput->buffer.reset(); + cloneIndexInput->bufferLength = 0; + cloneIndexInput->bufferPosition = 0; + cloneIndexInput->bufferStart = getFilePointer(); + return cloneIndexInput; +} + +} diff --git a/src/core/store/BufferedIndexOutput.cpp b/src/core/store/BufferedIndexOutput.cpp index e4771c7b..b033b2fb 100644 --- a/src/core/store/BufferedIndexOutput.cpp +++ b/src/core/store/BufferedIndexOutput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,101 +8,88 @@ #include "BufferedIndexOutput.h" #include "MiscUtils.h" -namespace Lucene -{ - const int32_t BufferedIndexOutput::BUFFER_SIZE = 16384; - - BufferedIndexOutput::BufferedIndexOutput() - { - bufferStart = 0; - bufferPosition = 0; - buffer = ByteArray::newInstance(BUFFER_SIZE); - } - - BufferedIndexOutput::~BufferedIndexOutput() - { +namespace Lucene { + +const int32_t BufferedIndexOutput::BUFFER_SIZE = 16384; + +BufferedIndexOutput::BufferedIndexOutput() { + bufferStart = 0; + bufferPosition = 0; + buffer = ByteArray::newInstance(BUFFER_SIZE); +} + +BufferedIndexOutput::~BufferedIndexOutput() { +} + +void BufferedIndexOutput::writeByte(uint8_t b) { + if (bufferPosition >= BUFFER_SIZE) { + flush(); } - - void BufferedIndexOutput::writeByte(uint8_t b) - { - if (bufferPosition >= BUFFER_SIZE) + buffer[bufferPosition++] = b; +} + +void BufferedIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { + int32_t bytesLeft = BUFFER_SIZE - bufferPosition; + if (bytesLeft >= length) { + // we add the data to the end of the buffer + MiscUtils::arrayCopy(b, offset, buffer.get(), bufferPosition, length); + bufferPosition += length; + // if the buffer is full, flush it + if (BUFFER_SIZE - bufferPosition == 0) { flush(); - buffer[bufferPosition++] = b; - } - - void BufferedIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) - { - int32_t bytesLeft = BUFFER_SIZE - bufferPosition; - if (bytesLeft >= length) - { - // we add the data to the end of the buffer - MiscUtils::arrayCopy(b, offset, buffer.get(), bufferPosition, length); - bufferPosition += length; - // if the buffer is full, flush it - if (BUFFER_SIZE - bufferPosition == 0) - flush(); } - else if (length > BUFFER_SIZE) - { - // we flush the buffer - if (bufferPosition > 0) - flush(); - // and write data at once - flushBuffer(b, offset, length); - bufferStart += length; + } else if (length > BUFFER_SIZE) { + // we flush the buffer + if (bufferPosition > 0) { + flush(); } - else - { - // we fill/flush the buffer (until the input is written) - int32_t pos = 0; // position in the input data - int32_t pieceLength; - while (pos < length) - { - pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft; - MiscUtils::arrayCopy(b, pos + offset, buffer.get(), bufferPosition, pieceLength); - pos += pieceLength; - bufferPosition += pieceLength; - // if the buffer is full, flush it - bytesLeft = BUFFER_SIZE - bufferPosition; - if (bytesLeft == 0) - { - flush(); - bytesLeft = BUFFER_SIZE; - } + // and write data at once + flushBuffer(b, offset, length); + bufferStart += length; + } else { + // we fill/flush the buffer (until the input is written) + int32_t pos = 0; // position in the input data + int32_t pieceLength; + while (pos < length) { + pieceLength = (length - pos < bytesLeft) ? length - pos : bytesLeft; + MiscUtils::arrayCopy(b, pos + offset, buffer.get(), bufferPosition, pieceLength); + pos += pieceLength; + bufferPosition += pieceLength; + // if the buffer is full, flush it + bytesLeft = BUFFER_SIZE - bufferPosition; + if (bytesLeft == 0) { + flush(); + bytesLeft = BUFFER_SIZE; } } } - - void BufferedIndexOutput::flush() - { - flushBuffer(buffer.get(), bufferPosition); - bufferStart += bufferPosition; - bufferPosition = 0; - } - - void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t length) - { - flushBuffer(b, 0, length); - } - - void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) - { - // override - } - - void BufferedIndexOutput::close() - { - flush(); - } - - int64_t BufferedIndexOutput::getFilePointer() - { - return bufferStart + bufferPosition; - } - - void BufferedIndexOutput::seek(int64_t pos) - { - flush(); - bufferStart = pos; - } +} + +void BufferedIndexOutput::flush() { + flushBuffer(buffer.get(), bufferPosition); + bufferStart += bufferPosition; + bufferPosition = 0; +} + +void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t length) { + flushBuffer(b, 0, length); +} + +void BufferedIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { + // override +} + +void BufferedIndexOutput::close() { + flush(); +} + +int64_t BufferedIndexOutput::getFilePointer() { + return bufferStart + bufferPosition; +} + +void BufferedIndexOutput::seek(int64_t pos) { + flush(); + bufferStart = pos; +} + } diff --git a/src/core/store/ChecksumIndexInput.cpp b/src/core/store/ChecksumIndexInput.cpp index 3711767a..d01e2181 100644 --- a/src/core/store/ChecksumIndexInput.cpp +++ b/src/core/store/ChecksumIndexInput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,61 +7,52 @@ #include "LuceneInc.h" #include "ChecksumIndexInput.h" -namespace Lucene -{ - ChecksumIndexInput::ChecksumIndexInput(IndexInputPtr main) - { - this->main = main; - } - - ChecksumIndexInput::~ChecksumIndexInput() - { - } - - uint8_t ChecksumIndexInput::readByte() - { - uint8_t b = main->readByte(); - checksum.process_byte(b); - return b; - } - - void ChecksumIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - main->readBytes(b, offset, length); - checksum.process_bytes(b + offset, length); - } - - int64_t ChecksumIndexInput::getChecksum() - { - return checksum.checksum(); - } - - void ChecksumIndexInput::close() - { - main->close(); - } - - int64_t ChecksumIndexInput::getFilePointer() - { - return main->getFilePointer(); - } - - void ChecksumIndexInput::seek(int64_t pos) - { - boost::throw_exception(RuntimeException(L"Seek not allowed")); - } - - int64_t ChecksumIndexInput::length() - { - return main->length(); - } - - LuceneObjectPtr ChecksumIndexInput::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene(main)); - ChecksumIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); - cloneIndexInput->main = main; - cloneIndexInput->checksum = checksum; - return cloneIndexInput; - } +namespace Lucene { + +ChecksumIndexInput::ChecksumIndexInput(const IndexInputPtr& main) { + this->main = main; +} + +ChecksumIndexInput::~ChecksumIndexInput() { +} + +uint8_t ChecksumIndexInput::readByte() { + uint8_t b = main->readByte(); + checksum.process_byte(b); + return b; +} + +void ChecksumIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { + main->readBytes(b, offset, length); + checksum.process_bytes(b + offset, length); +} + +int64_t ChecksumIndexInput::getChecksum() { + return checksum.checksum(); +} + +void ChecksumIndexInput::close() { + main->close(); +} + +int64_t ChecksumIndexInput::getFilePointer() { + return main->getFilePointer(); +} + +void ChecksumIndexInput::seek(int64_t pos) { + boost::throw_exception(RuntimeException(L"Seek not allowed")); +} + +int64_t ChecksumIndexInput::length() { + return main->length(); +} + +LuceneObjectPtr ChecksumIndexInput::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene(main)); + ChecksumIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); + cloneIndexInput->main = main; + cloneIndexInput->checksum = checksum; + return cloneIndexInput; +} + } diff --git a/src/core/store/ChecksumIndexOutput.cpp b/src/core/store/ChecksumIndexOutput.cpp index b56a1526..62f4171a 100644 --- a/src/core/store/ChecksumIndexOutput.cpp +++ b/src/core/store/ChecksumIndexOutput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,74 +7,63 @@ #include "LuceneInc.h" #include "ChecksumIndexOutput.h" -namespace Lucene -{ - ChecksumIndexOutput::ChecksumIndexOutput(IndexOutputPtr main) - { - this->main = main; - } - - ChecksumIndexOutput::~ChecksumIndexOutput() - { - } - - void ChecksumIndexOutput::writeByte(uint8_t b) - { - checksum.process_byte(b); - main->writeByte(b); - } - - void ChecksumIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) - { - checksum.process_bytes(b + offset, length); - main->writeBytes(b, offset, length); - } - - int64_t ChecksumIndexOutput::getChecksum() - { - return checksum.checksum(); - } - - void ChecksumIndexOutput::flush() - { - main->flush(); - } - - void ChecksumIndexOutput::close() - { - main->close(); - } - - int64_t ChecksumIndexOutput::getFilePointer() - { - return main->getFilePointer(); - } - - void ChecksumIndexOutput::seek(int64_t pos) - { - boost::throw_exception(RuntimeException(L"Seek not allowed")); - } - - void ChecksumIndexOutput::prepareCommit() - { - int64_t checksum = getChecksum(); - - // Intentionally write a mismatched checksum. This is because we want to 1) test, as best we can, that we - // are able to write a long to the file, but 2) not actually "commit" the file yet. This (prepare commit) - // is phase 1 of a two-phase commit. - int64_t pos = main->getFilePointer(); - main->writeLong(checksum - 1); - main->flush(); - main->seek(pos); - } - - void ChecksumIndexOutput::finishCommit() - { - main->writeLong(getChecksum()); - } - - int64_t ChecksumIndexOutput::length() - { - return main->length(); - } +namespace Lucene { + +ChecksumIndexOutput::ChecksumIndexOutput(const IndexOutputPtr& main) { + this->main = main; +} + +ChecksumIndexOutput::~ChecksumIndexOutput() { +} + +void ChecksumIndexOutput::writeByte(uint8_t b) { + checksum.process_byte(b); + main->writeByte(b); +} + +void ChecksumIndexOutput::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { + checksum.process_bytes(b + offset, length); + main->writeBytes(b, offset, length); +} + +int64_t ChecksumIndexOutput::getChecksum() { + return checksum.checksum(); +} + +void ChecksumIndexOutput::flush() { + main->flush(); +} + +void ChecksumIndexOutput::close() { + main->close(); +} + +int64_t ChecksumIndexOutput::getFilePointer() { + return main->getFilePointer(); +} + +void ChecksumIndexOutput::seek(int64_t pos) { + boost::throw_exception(RuntimeException(L"Seek not allowed")); +} + +void ChecksumIndexOutput::prepareCommit() { + int64_t checksum = getChecksum(); + + // Intentionally write a mismatched checksum. This is because we want to 1) test, as best we can, that we + // are able to write a long to the file, but 2) not actually "commit" the file yet. This (prepare commit) + // is phase 1 of a two-phase commit. + int64_t pos = main->getFilePointer(); + main->writeLong(checksum - 1); + main->flush(); + main->seek(pos); +} + +void ChecksumIndexOutput::finishCommit() { + main->writeLong(getChecksum()); +} + +int64_t ChecksumIndexOutput::length() { + return main->length(); +} + } diff --git a/src/core/store/Directory.cpp b/src/core/store/Directory.cpp index 6730e0af..f7a6c4b6 100644 --- a/src/core/store/Directory.cpp +++ b/src/core/store/Directory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,126 +12,109 @@ #include "IndexInput.h" #include "IndexOutput.h" -namespace Lucene -{ - Directory::Directory() - { - isOpen = true; - } - - Directory::~Directory() - { - } - - void Directory::close() - { - // override - } - - void Directory::sync(const String& name) - { - } - - IndexInputPtr Directory::openInput(const String& name, int32_t bufferSize) - { - return openInput(name); - } - - LockPtr Directory::makeLock(const String& name) - { - return lockFactory->makeLock(name); - } - - void Directory::clearLock(const String& name) - { - if (lockFactory) - lockFactory->clearLock(name); - } - - void Directory::setLockFactory(LockFactoryPtr lockFactory) - { - BOOST_ASSERT(lockFactory); - this->lockFactory = lockFactory; - this->lockFactory->setLockPrefix(getLockID()); - } - - LockFactoryPtr Directory::getLockFactory() - { - return lockFactory; - } - - String Directory::getLockID() - { - return toString(); - } - - String Directory::toString() - { - return LuceneObject::toString() + L" lockFactory=" + getLockFactory()->toString(); +namespace Lucene { + +Directory::Directory() { + isOpen = true; +} + +Directory::~Directory() { +} + +void Directory::close() { + // override +} + +void Directory::sync(const String& name) { +} + +IndexInputPtr Directory::openInput(const String& name, int32_t bufferSize) { + return openInput(name); +} + +LockPtr Directory::makeLock(const String& name) { + return lockFactory->makeLock(name); +} + +void Directory::clearLock(const String& name) { + if (lockFactory) { + lockFactory->clearLock(name); } - - void Directory::copy(DirectoryPtr src, DirectoryPtr dest, bool closeDirSrc) - { - HashSet files(src->listAll()); - - ByteArray buf(ByteArray::newInstance(BufferedIndexOutput::BUFFER_SIZE)); - - for (HashSet::iterator file = files.begin(); file != files.end(); ++file) - { - if (!IndexFileNameFilter::accept(L"", *file)) - continue; - - IndexOutputPtr os; - IndexInputPtr is; - - LuceneException finally; - try - { - // create file in dest directory - os = dest->createOutput(*file); - // read current file - is = src->openInput(*file); - // and copy to dest directory - int64_t len = is->length(); - int64_t readCount = 0; - while (readCount < len) - { - int32_t toRead = readCount + BufferedIndexOutput::BUFFER_SIZE > len ? (int32_t)(len - readCount) : BufferedIndexOutput::BUFFER_SIZE; - is->readBytes(buf.get(), 0, toRead); - os->writeBytes(buf.get(), toRead); - readCount += toRead; - } - } - catch (LuceneException& e) - { - finally = e; - } - // graceful cleanup - try - { - if (os) - os->close(); - } - catch (...) - { +} + +void Directory::setLockFactory(const LockFactoryPtr& lockFactory) { + BOOST_ASSERT(lockFactory); + this->lockFactory = lockFactory; + this->lockFactory->setLockPrefix(getLockID()); +} + +LockFactoryPtr Directory::getLockFactory() { + return lockFactory; +} + +String Directory::getLockID() { + return toString(); +} + +String Directory::toString() { + return LuceneObject::toString() + L" lockFactory=" + getLockFactory()->toString(); +} + +void Directory::copy(const DirectoryPtr& src, const DirectoryPtr& dest, bool closeDirSrc) { + HashSet files(src->listAll()); + + ByteArray buf(ByteArray::newInstance(BufferedIndexOutput::BUFFER_SIZE)); + + for (HashSet::iterator file = files.begin(); file != files.end(); ++file) { + if (!IndexFileNameFilter::accept(L"", *file)) { + continue; + } + + IndexOutputPtr os; + IndexInputPtr is; + + LuceneException finally; + try { + // create file in dest directory + os = dest->createOutput(*file); + // read current file + is = src->openInput(*file); + // and copy to dest directory + int64_t len = is->length(); + int64_t readCount = 0; + while (readCount < len) { + int32_t toRead = readCount + BufferedIndexOutput::BUFFER_SIZE > len ? (int32_t)(len - readCount) : BufferedIndexOutput::BUFFER_SIZE; + is->readBytes(buf.get(), 0, toRead); + os->writeBytes(buf.get(), toRead); + readCount += toRead; } - try - { - if (is) - is->close(); + } catch (LuceneException& e) { + finally = e; + } + // graceful cleanup + try { + if (os) { + os->close(); } - catch (...) - { + } catch (...) { + } + try { + if (is) { + is->close(); } - finally.throwException(); + } catch (...) { } - if (closeDirSrc) - src->close(); + finally.throwException(); + } + if (closeDirSrc) { + src->close(); } - - void Directory::ensureOpen() - { - if (!isOpen) - boost::throw_exception(AlreadyClosedException(L"This directory is closed")); +} + +void Directory::ensureOpen() { + if (!isOpen) { + boost::throw_exception(AlreadyClosedException(L"This directory is closed")); } } + +} diff --git a/src/core/store/FSDirectory.cpp b/src/core/store/FSDirectory.cpp index 61655e41..2cb257ac 100644 --- a/src/core/store/FSDirectory.cpp +++ b/src/core/store/FSDirectory.cpp @@ -1,11 +1,10 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include #include "FSDirectory.h" #include "NativeFSLockFactory.h" #include "SimpleFSDirectory.h" @@ -14,232 +13,229 @@ #include "FileUtils.h" #include "StringUtils.h" +#if defined(_WIN32) + #include +#elif defined(__APPLE__) + #include +#else + #include +#endif +#include + extern "C" { #include "../util/md5/md5.h" } -namespace Lucene -{ - /// Default read chunk size. This is a conditional default based on operating system. - #ifdef LPP_BUILD_64 - const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = INT_MAX; - #else - const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = 100 * 1024 * 1024; // 100mb - #endif - - FSDirectory::FSDirectory(const String& path, LockFactoryPtr lockFactory) - { - checked = false; - chunkSize = DEFAULT_READ_CHUNK_SIZE; - - // new ctors use always NativeFSLockFactory as default - if (!lockFactory) - lockFactory = newLucene(); - directory = path; - - if (FileUtils::fileExists(directory) && !FileUtils::isDirectory(directory)) - boost::throw_exception(NoSuchDirectoryException(L"File '" + directory + L"' exists but is not a directory")); - - setLockFactory(lockFactory); - - // for filesystem based LockFactory, delete the lockPrefix if the locks are placed - // in index dir. if no index dir is given, set ourselves - FSLockFactoryPtr lf(boost::dynamic_pointer_cast(lockFactory)); - - if (lf) - { - if (lf->getLockDir().empty()) - { - lf->setLockDir(directory); - lf->setLockPrefix(L""); - } - else if (lf->getLockDir() == directory) - lf->setLockPrefix(L""); - } - } - - FSDirectory::~FSDirectory() - { - } - - FSDirectoryPtr FSDirectory::open(const String& path) - { - return open(path, LockFactoryPtr()); +namespace Lucene { + +/// Default read chunk size. This is a conditional default based on operating system. +#ifdef LPP_BUILD_64 +const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = INT_MAX; +#else +const int32_t FSDirectory::DEFAULT_READ_CHUNK_SIZE = 100 * 1024 * 1024; // 100mb +#endif + +FSDirectory::FSDirectory(const String& path, const LockFactoryPtr& lockFactory) { + checked = false; + chunkSize = DEFAULT_READ_CHUNK_SIZE; + + LockFactoryPtr _lockFactory(lockFactory); + + // new ctors use always NativeFSLockFactory as default + if (!_lockFactory) { + _lockFactory = newLucene(); } - - FSDirectoryPtr FSDirectory::open(const String& path, LockFactoryPtr lockFactory) - { - return newLucene(path, lockFactory); + directory = path; + + if (FileUtils::fileExists(directory) && !FileUtils::isDirectory(directory)) { + boost::throw_exception(NoSuchDirectoryException(L"File '" + directory + L"' exists but is not a directory")); } - - void FSDirectory::createDir() - { - if (!checked) - { - if (!FileUtils::fileExists(directory) && !FileUtils::createDirectory(directory)) - boost::throw_exception(IOException(L"Cannot create directory: " + directory)); - checked = true; + + setLockFactory(_lockFactory); + + // for filesystem based LockFactory, delete the lockPrefix if the locks are placed + // in index dir. if no index dir is given, set ourselves + FSLockFactoryPtr lf(boost::dynamic_pointer_cast(_lockFactory)); + + if (lf) { + if (lf->getLockDir().empty()) { + lf->setLockDir(directory); + lf->setLockPrefix(L""); + } else if (lf->getLockDir() == directory) { + lf->setLockPrefix(L""); } } - - void FSDirectory::initOutput(const String& name) - { - ensureOpen(); - createDir(); - String path(FileUtils::joinPath(directory, name)); - if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) // delete existing, if any - boost::throw_exception(IOException(L"Cannot overwrite: " + name)); - } +} - HashSet FSDirectory::listAll(const String& dir) - { - if (!FileUtils::fileExists(dir)) - boost::throw_exception(NoSuchDirectoryException(L"Directory '" + dir + L"' does not exist")); - else if (!FileUtils::isDirectory(dir)) - boost::throw_exception(NoSuchDirectoryException(L"File '" + dir + L"' exists but is not a directory")); - - HashSet result(HashSet::newInstance()); - - // Exclude subdirs - if (!FileUtils::listDirectory(dir, true, result)) - boost::throw_exception(IOException(L"Directory '" + dir + L"' exists and is a directory, but cannot be listed")); - - return result; - } - - HashSet FSDirectory::listAll() - { - ensureOpen(); - return listAll(directory); - } - - bool FSDirectory::fileExists(const String& name) - { - ensureOpen(); - return FileUtils::fileExists(FileUtils::joinPath(directory, name)); - } - - uint64_t FSDirectory::fileModified(const String& name) - { - ensureOpen(); - return FileUtils::fileModified(FileUtils::joinPath(directory, name)); +FSDirectory::~FSDirectory() { +} + +FSDirectoryPtr FSDirectory::open(const String& path) { + return open(path, LockFactoryPtr()); +} + +FSDirectoryPtr FSDirectory::open(const String& path, const LockFactoryPtr& lockFactory) { + return newLucene(path, lockFactory); +} + +void FSDirectory::createDir() { + if (!checked) { + if (!FileUtils::fileExists(directory) && !FileUtils::createDirectory(directory)) { + boost::throw_exception(IOException(L"Cannot create directory: " + directory)); + } + checked = true; } - - uint64_t FSDirectory::fileModified(const String& directory, const String& name) - { - return FileUtils::fileModified(FileUtils::joinPath(directory, name)); +} + +void FSDirectory::initOutput(const String& name) { + ensureOpen(); + createDir(); + String path(FileUtils::joinPath(directory, name)); + if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) { // delete existing, if any + boost::throw_exception(IOException(L"Cannot overwrite: " + name)); } - - void FSDirectory::touchFile(const String& name) - { - ensureOpen(); - FileUtils::touchFile(FileUtils::joinPath(directory, name)); +} + +HashSet FSDirectory::listAll(const String& dir) { + if (!FileUtils::fileExists(dir)) { + boost::throw_exception(NoSuchDirectoryException(L"Directory '" + dir + L"' does not exist")); + } else if (!FileUtils::isDirectory(dir)) { + boost::throw_exception(NoSuchDirectoryException(L"File '" + dir + L"' exists but is not a directory")); } - - void FSDirectory::deleteFile(const String& name) - { - ensureOpen(); - if (!FileUtils::removeFile(FileUtils::joinPath(directory, name))) - boost::throw_exception(IOException(L"Cannot delete: " + name)); + + HashSet result(HashSet::newInstance()); + + // Exclude subdirs + if (!FileUtils::listDirectory(dir, true, result)) { + boost::throw_exception(IOException(L"Directory '" + dir + L"' exists and is a directory, but cannot be listed")); } - - int64_t FSDirectory::fileLength(const String& name) - { - ensureOpen(); - return FileUtils::fileLength(FileUtils::joinPath(directory, name)); + + return result; +} + +HashSet FSDirectory::listAll() { + ensureOpen(); + return listAll(directory); +} + +bool FSDirectory::fileExists(const String& name) { + ensureOpen(); + return FileUtils::fileExists(FileUtils::joinPath(directory, name)); +} + +uint64_t FSDirectory::fileModified(const String& name) { + ensureOpen(); + return FileUtils::fileModified(FileUtils::joinPath(directory, name)); +} + +uint64_t FSDirectory::fileModified(const String& directory, const String& name) { + return FileUtils::fileModified(FileUtils::joinPath(directory, name)); +} + +void FSDirectory::touchFile(const String& name) { + ensureOpen(); + FileUtils::touchFile(FileUtils::joinPath(directory, name)); +} + +void FSDirectory::deleteFile(const String& name) { + ensureOpen(); + if (!FileUtils::removeFile(FileUtils::joinPath(directory, name))) { + boost::throw_exception(IOException(L"Cannot delete: " + name)); } - - void FSDirectory::sync(const String& name) - { - ensureOpen(); - String path(FileUtils::joinPath(directory, name)); - bool success = false; - - for (int32_t retryCount = 0; retryCount < 5; ++retryCount) - { - std::ofstream syncFile; - try - { - syncFile.open(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::in | std::ios::out); - } - catch (...) - { - } - - if (syncFile.is_open()) - { - syncFile.close(); - success = true; - break; - } - - LuceneThread::threadSleep(5); // pause 5 msec +} + +int64_t FSDirectory::fileLength(const String& name) { + ensureOpen(); + return FileUtils::fileLength(FileUtils::joinPath(directory, name)); +} + +void FSDirectory::sync(const String& name) { + ensureOpen(); + String path(FileUtils::joinPath(directory, name)); + bool success = false; + + for (int32_t retryCount = 0; retryCount < 5; ++retryCount) { + boost::iostreams::file_descriptor syncFile; + try { + syncFile.open(boost::filesystem::path(path)); + } catch (...) { } - if (!success) - boost::throw_exception(IOException(L"Sync failure: " + path)); - } - - IndexInputPtr FSDirectory::openInput(const String& name) - { - ensureOpen(); - return openInput(name, BufferedIndexInput::BUFFER_SIZE); - } - - IndexInputPtr FSDirectory::openInput(const String& name, int32_t bufferSize) - { - return Directory::openInput(name, bufferSize); - } - - String FSDirectory::getLockID() - { - ensureOpen(); - md5_state_t state; - md5_byte_t digest[16]; - - md5_init(&state); - md5_append(&state, (const md5_byte_t *)StringUtils::toUTF8(directory).c_str(), directory.size()); - md5_finish(&state, digest); - - static const wchar_t* hexDigits = L"0123456789abcdef"; - - String lockID(L"lucene-"); - for (int32_t i = 0; i < 16; ++i) - { - lockID += hexDigits[(digest[i] >> 4) & 0x0f]; - lockID += hexDigits[digest[i] & 0x0f]; + if (syncFile.is_open()) { + boost::iostreams::file_descriptor::handle_type fd = syncFile.handle(); +#if defined(_WIN32) + bool ok = ::FlushFileBuffers(fd) != 0; +#elif defined(__APPLE__) + bool ok = fcntl(fd, F_FULLFSYNC) == 0; +#else + bool ok = fsync(fd) == 0; +#endif + syncFile.close(); + if (ok) + success = true; + break; } - - return lockID; - } - - void FSDirectory::close() - { - SyncLock syncLock(this); - isOpen = false; - } - - String FSDirectory::toString() - { - return getClassName() + L"@" + directory + L" lockFactory=" + getLockFactory()->toString(); - } - - String FSDirectory::getFile() - { - ensureOpen(); - return directory; + + LuceneThread::threadSleep(5); // pause 5 msec } - - void FSDirectory::setReadChunkSize(int32_t chunkSize) - { - #ifndef LPP_BUILD_64 - this->chunkSize = chunkSize; - #endif + + if (!success) { + boost::throw_exception(IOException(L"Sync failure: " + path)); } - - int32_t FSDirectory::getReadChunkSize() - { - return chunkSize; +} + +IndexInputPtr FSDirectory::openInput(const String& name) { + ensureOpen(); + return openInput(name, BufferedIndexInput::BUFFER_SIZE); +} + +IndexInputPtr FSDirectory::openInput(const String& name, int32_t bufferSize) { + return Directory::openInput(name, bufferSize); +} + +String FSDirectory::getLockID() { + ensureOpen(); + md5_state_t state; + md5_byte_t digest[16]; + + md5_init(&state); + md5_append(&state, (const md5_byte_t*)StringUtils::toUTF8(directory).c_str(), directory.size()); + md5_finish(&state, digest); + + static const wchar_t* hexDigits = L"0123456789abcdef"; + + String lockID(L"lucene-"); + for (int32_t i = 0; i < 16; ++i) { + lockID += hexDigits[(digest[i] >> 4) & 0x0f]; + lockID += hexDigits[digest[i] & 0x0f]; } + + return lockID; +} + +void FSDirectory::close() { + SyncLock syncLock(this); + isOpen = false; +} + +String FSDirectory::toString() { + return getClassName() + L"@" + directory + L" lockFactory=" + getLockFactory()->toString(); +} + +String FSDirectory::getFile() { + ensureOpen(); + return directory; +} + +void FSDirectory::setReadChunkSize(int32_t chunkSize) { +#ifndef LPP_BUILD_64 + this->chunkSize = chunkSize; +#endif +} + +int32_t FSDirectory::getReadChunkSize() { + return chunkSize; +} + } diff --git a/src/core/store/FSLockFactory.cpp b/src/core/store/FSLockFactory.cpp index 6f9e49d1..ba40de16 100644 --- a/src/core/store/FSLockFactory.cpp +++ b/src/core/store/FSLockFactory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,25 +7,23 @@ #include "LuceneInc.h" #include "FSLockFactory.h" -namespace Lucene -{ - FSLockFactory::FSLockFactory() - { - } +namespace Lucene { - FSLockFactory::~FSLockFactory() - { - } - - void FSLockFactory::setLockDir(const String& lockDir) - { - if (!this->lockDir.empty()) - boost::throw_exception(IllegalStateException(L"You can set the lock directory for this factory only once.")); - this->lockDir = lockDir; - } - - String FSLockFactory::getLockDir() - { - return lockDir; +FSLockFactory::FSLockFactory() { +} + +FSLockFactory::~FSLockFactory() { +} + +void FSLockFactory::setLockDir(const String& lockDir) { + if (!this->lockDir.empty()) { + boost::throw_exception(IllegalStateException(L"You can set the lock directory for this factory only once.")); } + this->lockDir = lockDir; +} + +String FSLockFactory::getLockDir() { + return lockDir; +} + } diff --git a/src/core/store/FileSwitchDirectory.cpp b/src/core/store/FileSwitchDirectory.cpp index 54d441eb..081ce450 100644 --- a/src/core/store/FileSwitchDirectory.cpp +++ b/src/core/store/FileSwitchDirectory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,107 +7,88 @@ #include "LuceneInc.h" #include "FileSwitchDirectory.h" -namespace Lucene -{ - FileSwitchDirectory::FileSwitchDirectory(HashSet primaryExtensions, DirectoryPtr primaryDir, DirectoryPtr secondaryDir, bool doClose) - { - this->primaryExtensions = primaryExtensions; - this->primaryDir = primaryDir; - this->secondaryDir = secondaryDir; - this->doClose = doClose; - this->lockFactory = primaryDir->getLockFactory(); - } - - FileSwitchDirectory::~FileSwitchDirectory() - { - } - - DirectoryPtr FileSwitchDirectory::getPrimaryDir() - { - return primaryDir; - } - - DirectoryPtr FileSwitchDirectory::getSecondaryDir() - { - return secondaryDir; - } - - void FileSwitchDirectory::close() - { - if (doClose) - { - LuceneException finally; - try - { - secondaryDir->close(); - } - catch (LuceneException& e) - { - finally = e; - } - doClose = false; - primaryDir->close(); - finally.throwException(); +namespace Lucene { + +FileSwitchDirectory::FileSwitchDirectory(HashSet primaryExtensions, const DirectoryPtr& primaryDir, const DirectoryPtr& secondaryDir, bool doClose) { + this->primaryExtensions = primaryExtensions; + this->primaryDir = primaryDir; + this->secondaryDir = secondaryDir; + this->doClose = doClose; + this->lockFactory = primaryDir->getLockFactory(); +} + +FileSwitchDirectory::~FileSwitchDirectory() { +} + +DirectoryPtr FileSwitchDirectory::getPrimaryDir() { + return primaryDir; +} + +DirectoryPtr FileSwitchDirectory::getSecondaryDir() { + return secondaryDir; +} + +void FileSwitchDirectory::close() { + if (doClose) { + LuceneException finally; + try { + secondaryDir->close(); + } catch (LuceneException& e) { + finally = e; } + doClose = false; + primaryDir->close(); + finally.throwException(); } - - HashSet FileSwitchDirectory::listAll() - { - HashSet primaryFiles(primaryDir->listAll()); - HashSet secondaryFiles(secondaryDir->listAll()); - HashSet files(HashSet::newInstance(primaryFiles.begin(), primaryFiles.end())); - files.addAll(secondaryFiles.begin(), secondaryFiles.end()); - return files; - } - - String FileSwitchDirectory::getExtension(const String& name) - { - String::size_type i = name.find_last_of(L'.'); - return i == String::npos ? L"" : name.substr(i + 1); - } - - DirectoryPtr FileSwitchDirectory::getDirectory(const String& name) - { - return primaryExtensions.contains(getExtension(name)) ? primaryDir : secondaryDir; - } - - bool FileSwitchDirectory::fileExists(const String& name) - { - return getDirectory(name)->fileExists(name); - } - - uint64_t FileSwitchDirectory::fileModified(const String& name) - { - return getDirectory(name)->fileModified(name); - } - - void FileSwitchDirectory::touchFile(const String& name) - { - getDirectory(name)->touchFile(name); - } - - void FileSwitchDirectory::deleteFile(const String& name) - { - getDirectory(name)->deleteFile(name); - } - - int64_t FileSwitchDirectory::fileLength(const String& name) - { - return getDirectory(name)->fileLength(name); - } - - IndexOutputPtr FileSwitchDirectory::createOutput(const String& name) - { - return getDirectory(name)->createOutput(name); - } - - void FileSwitchDirectory::sync(const String& name) - { - getDirectory(name)->sync(name); - } - - IndexInputPtr FileSwitchDirectory::openInput(const String& name) - { - return getDirectory(name)->openInput(name); - } +} + +HashSet FileSwitchDirectory::listAll() { + HashSet primaryFiles(primaryDir->listAll()); + HashSet secondaryFiles(secondaryDir->listAll()); + HashSet files(HashSet::newInstance(primaryFiles.begin(), primaryFiles.end())); + files.addAll(secondaryFiles.begin(), secondaryFiles.end()); + return files; +} + +String FileSwitchDirectory::getExtension(const String& name) { + String::size_type i = name.find_last_of(L'.'); + return i == String::npos ? L"" : name.substr(i + 1); +} + +DirectoryPtr FileSwitchDirectory::getDirectory(const String& name) { + return primaryExtensions.contains(getExtension(name)) ? primaryDir : secondaryDir; +} + +bool FileSwitchDirectory::fileExists(const String& name) { + return getDirectory(name)->fileExists(name); +} + +uint64_t FileSwitchDirectory::fileModified(const String& name) { + return getDirectory(name)->fileModified(name); +} + +void FileSwitchDirectory::touchFile(const String& name) { + getDirectory(name)->touchFile(name); +} + +void FileSwitchDirectory::deleteFile(const String& name) { + getDirectory(name)->deleteFile(name); +} + +int64_t FileSwitchDirectory::fileLength(const String& name) { + return getDirectory(name)->fileLength(name); +} + +IndexOutputPtr FileSwitchDirectory::createOutput(const String& name) { + return getDirectory(name)->createOutput(name); +} + +void FileSwitchDirectory::sync(const String& name) { + getDirectory(name)->sync(name); +} + +IndexInputPtr FileSwitchDirectory::openInput(const String& name) { + return getDirectory(name)->openInput(name); +} + } diff --git a/src/core/store/IndexInput.cpp b/src/core/store/IndexInput.cpp index 3828c6d6..319f3161 100644 --- a/src/core/store/IndexInput.cpp +++ b/src/core/store/IndexInput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,146 +10,126 @@ #include "Reader.h" #include "StringUtils.h" -namespace Lucene -{ - IndexInput::IndexInput() - { - preUTF8Strings = false; - } - - IndexInput::~IndexInput() - { +namespace Lucene { + +IndexInput::IndexInput() { + preUTF8Strings = false; +} + +IndexInput::~IndexInput() { +} + +void IndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) { + // default to ignoring useBuffer entirely + readBytes(b, offset, length); +} + +int32_t IndexInput::readInt() { + int32_t i = (readByte() & 0xff) << 24; + i |= (readByte() & 0xff) << 16; + i |= (readByte() & 0xff) << 8; + i |= (readByte() & 0xff); + return i; +} + +int32_t IndexInput::readVInt() { + uint8_t b = readByte(); + int32_t i = (b & 0x7f); + + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (b & 0x7f) << shift; } - - void IndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length, bool useBuffer) - { - // default to ignoring useBuffer entirely - readBytes(b, offset, length); + return i; +} + +int64_t IndexInput::readLong() { + int64_t i = (int64_t)readInt() << 32; + i |= (readInt() & 0xffffffffLL); + return i; +} + +int64_t IndexInput::readVLong() { + uint8_t b = readByte(); + int64_t i = (b & 0x7f); + + for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) { + b = readByte(); + i |= (int64_t)(b & 0x7f) << shift; } - - int32_t IndexInput::readInt() - { - int32_t i = (readByte() & 0xff) << 24; - i |= (readByte() & 0xff) << 16; - i |= (readByte() & 0xff) << 8; - i |= (readByte() & 0xff); - return i; + return i; +} + +void IndexInput::setModifiedUTF8StringsMode() { + preUTF8Strings = true; +} + +String IndexInput::readString() { + if (preUTF8Strings) { + return readModifiedUTF8String(); } - - int32_t IndexInput::readVInt() - { + int32_t length = readVInt(); + ByteArray bytes(ByteArray::newInstance(length)); + readBytes(bytes.get(), 0, length); + return StringUtils::toUnicode(bytes.get(), length); +} + +String IndexInput::readModifiedUTF8String() { + int32_t length = readVInt(); + CharArray chars(CharArray::newInstance(length)); + return String(chars.get(), readChars(chars.get(), 0, length)); +} + +int32_t IndexInput::readChars(wchar_t* buffer, int32_t start, int32_t length) { + Array chars(Array::newInstance(length)); + for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); - int32_t i = (b & 0x7f); - - for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) - { - b = readByte(); - i |= (b & 0x7f) << shift; + if ((b & 0x80) == 0) { + chars[i] = (uint16_t)(b & 0x7f); + } else if ((b & 0xe0) != 0xe0) { + chars[i] = (uint16_t)(((b & 0x1f) << 6) | (readByte() & 0x3f)); + } else { + uint32_t ch = ((b & 0x0f) << 12); + ch |= (readByte() & 0x3f) << 6; + ch |= (readByte() & 0x3f); + chars[i] = (uint16_t)ch; } - return i; } - - int64_t IndexInput::readLong() - { - int64_t i = (int64_t)readInt() << 32; - i |= (readInt() & 0xffffffffLL); - return i; - } - - int64_t IndexInput::readVLong() - { + UTF16DecoderPtr utf16Decoder(newLucene(chars.get(), chars.get() + length)); + int32_t decodeLength = utf16Decoder->decode(buffer + start, length); + return decodeLength == Reader::READER_EOF ? 0 : decodeLength; +} + +void IndexInput::skipChars(int32_t length) { + for (int32_t i = 0; i < length; ++i) { uint8_t b = readByte(); - int64_t i = (b & 0x7f); - - for (int32_t shift = 7; (b & 0x80) != 0; shift += 7) - { - b = readByte(); - i |= (b & 0x7f) << shift; - } - return i; - } - - void IndexInput::setModifiedUTF8StringsMode() - { - preUTF8Strings = true; - } - - String IndexInput::readString() - { - if (preUTF8Strings) - return readModifiedUTF8String(); - int32_t length = readVInt(); - ByteArray bytes(ByteArray::newInstance(length)); - readBytes(bytes.get(), 0, length); - return StringUtils::toUnicode(bytes.get(), length); - } - - String IndexInput::readModifiedUTF8String() - { - int32_t length = readVInt(); - CharArray chars(CharArray::newInstance(length)); - return String(chars.get(), readChars(chars.get(), 0, length)); - } - - int32_t IndexInput::readChars(wchar_t* buffer, int32_t start, int32_t length) - { - Array chars(Array::newInstance(length)); - for (int32_t i = 0; i < length; ++i) - { - uint8_t b = readByte(); - if ((b & 0x80) == 0) - chars[i] = (uint16_t)(b & 0x7f); - else if ((b & 0xe0) != 0xe0) - chars[i] = (uint16_t)(((b & 0x1f) << 6) | (readByte() & 0x3f)); - else - { - uint32_t ch = ((b & 0x0f) << 12); - ch |= (readByte() & 0x3f) << 6; - ch |= (readByte() & 0x3f); - chars[i] = (uint16_t)ch; - } + if ((b & 0x80) == 0) { + // do nothing, we only need one byte + } else if ((b & 0xe0) != 0xe0) { + readByte(); // read an additional byte + } else { + // read two additional bytes + readByte(); + readByte(); } - UTF16DecoderPtr utf16Decoder(newLucene(chars.get(), chars.get() + length)); - int32_t decodeLength = utf16Decoder->decode(buffer + start, length); - return decodeLength == Reader::READER_EOF ? 0 : decodeLength; } - - void IndexInput::skipChars(int32_t length) - { - for (int32_t i = 0; i < length; ++i) - { - uint8_t b = readByte(); - if ((b & 0x80) == 0) - { // do nothing, we only need one byte - } - else if ((b & 0xe0) != 0xe0) - readByte(); // read an additional byte - else - { - // read two additional bytes - readByte(); - readByte(); - } - } - } - - MapStringString IndexInput::readStringStringMap() - { - MapStringString map(MapStringString::newInstance()); - int32_t count = readInt(); - for (int32_t i = 0; i < count; ++i) - { - String key(readString()); - String val(readString()); - map.put(key, val); - } - return map; - } - - LuceneObjectPtr IndexInput::clone(LuceneObjectPtr other) - { - IndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(LuceneObject::clone(other))); - cloneIndexInput->preUTF8Strings = preUTF8Strings; - return cloneIndexInput; +} + +MapStringString IndexInput::readStringStringMap() { + MapStringString map(MapStringString::newInstance()); + int32_t count = readInt(); + for (int32_t i = 0; i < count; ++i) { + String key(readString()); + String val(readString()); + map.put(key, val); } + return map; +} + +LuceneObjectPtr IndexInput::clone(const LuceneObjectPtr& other) { + IndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(LuceneObject::clone(other))); + cloneIndexInput->preUTF8Strings = preUTF8Strings; + return cloneIndexInput; +} + } diff --git a/src/core/store/IndexOutput.cpp b/src/core/store/IndexOutput.cpp index f7071a08..5245da0e 100644 --- a/src/core/store/IndexOutput.cpp +++ b/src/core/store/IndexOutput.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,114 +11,96 @@ #include "UnicodeUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t IndexOutput::COPY_BUFFER_SIZE = 16384; - - IndexOutput::~IndexOutput() - { - } - - void IndexOutput::writeBytes(const uint8_t* b, int32_t length) - { - writeBytes(b, 0, length); - } - - void IndexOutput::writeInt(int32_t i) - { - writeByte((uint8_t)(i >> 24)); - writeByte((uint8_t)(i >> 16)); - writeByte((uint8_t)(i >> 8)); - writeByte((uint8_t)i); - } - - void IndexOutput::writeVInt(int32_t i) - { - while ((i & ~0x7f) != 0) - { - writeByte((uint8_t)((i & 0x7f) | 0x80)); - i = MiscUtils::unsignedShift(i, 7); - } - writeByte((uint8_t)i); - } - - void IndexOutput::writeLong(int64_t i) - { - writeInt((int32_t)(i >> 32)); - writeInt((int32_t)i); - } - - void IndexOutput::writeVLong(int64_t i) - { - while ((i & ~0x7f) != 0) - { - writeByte((uint8_t)((i & 0x7f) | 0x80)); - i = MiscUtils::unsignedShift(i, (int64_t)7); - } - writeByte((uint8_t)i); +namespace Lucene { + +const int32_t IndexOutput::COPY_BUFFER_SIZE = 16384; + +IndexOutput::~IndexOutput() { +} + +void IndexOutput::writeBytes(const uint8_t* b, int32_t length) { + writeBytes(b, 0, length); +} + +void IndexOutput::writeInt(int32_t i) { + writeByte((uint8_t)(i >> 24)); + writeByte((uint8_t)(i >> 16)); + writeByte((uint8_t)(i >> 8)); + writeByte((uint8_t)i); +} + +void IndexOutput::writeVInt(int32_t i) { + while ((i & ~0x7f) != 0) { + writeByte((uint8_t)((i & 0x7f) | 0x80)); + i = MiscUtils::unsignedShift(i, 7); } - - void IndexOutput::writeString(const String& s) - { - UTF8ResultPtr utf8Result(newLucene()); - StringUtils::toUTF8(s.c_str(), s.length(), utf8Result); - writeVInt(utf8Result->length); - writeBytes(utf8Result->result.get(), utf8Result->length); + writeByte((uint8_t)i); +} + +void IndexOutput::writeLong(int64_t i) { + writeInt((int32_t)(i >> 32)); + writeInt((int32_t)i); +} + +void IndexOutput::writeVLong(int64_t i) { + while ((i & ~0x7f) != 0) { + writeByte((uint8_t)((i & 0x7f) | 0x80)); + i = MiscUtils::unsignedShift(i, (int64_t)7); } - - void IndexOutput::writeChars(const String& s, int32_t start, int32_t length) - { - int32_t end = start + length; - for (int32_t i = start; i < end; ++i) - { - int32_t code = (int32_t)s[i]; - if (code >= 0x01 && code <= 0x7f) - writeByte((uint8_t)code); - else if (((code >= 0x80) && (code <= 0x7ff)) || code == 0) - { - writeByte((uint8_t)(0xc0 | (code >> 6))); - writeByte((uint8_t)(0x80 | (code & 0x3f))); - } - else - { - writeByte((uint8_t)(0xe0 | MiscUtils::unsignedShift(code, 12))); - writeByte((uint8_t)(0x80 | ((code >> 6) & 0x3f))); - writeByte((uint8_t)(0x80 | (code & 0x3f))); - } + writeByte((uint8_t)i); +} + +void IndexOutput::writeString(const String& s) { + UTF8ResultPtr utf8Result(newLucene()); + StringUtils::toUTF8(s.c_str(), s.length(), utf8Result); + writeVInt(utf8Result->length); + writeBytes(utf8Result->result.get(), utf8Result->length); +} + +void IndexOutput::writeChars(const String& s, int32_t start, int32_t length) { + int32_t end = start + length; + for (int32_t i = start; i < end; ++i) { + int32_t code = (int32_t)s[i]; + if (code >= 0x01 && code <= 0x7f) { + writeByte((uint8_t)code); + } else if (((code >= 0x80) && (code <= 0x7ff)) || code == 0) { + writeByte((uint8_t)(0xc0 | (code >> 6))); + writeByte((uint8_t)(0x80 | (code & 0x3f))); + } else { + writeByte((uint8_t)(0xe0 | MiscUtils::unsignedShift(code, 12))); + writeByte((uint8_t)(0x80 | ((code >> 6) & 0x3f))); + writeByte((uint8_t)(0x80 | (code & 0x3f))); } } - - void IndexOutput::copyBytes(IndexInputPtr input, int64_t numBytes) - { - BOOST_ASSERT(numBytes >= 0); - int64_t left = numBytes; - if (!copyBuffer) - copyBuffer = ByteArray::newInstance(COPY_BUFFER_SIZE); - while (left > 0) - { - int32_t toCopy = left > COPY_BUFFER_SIZE ? COPY_BUFFER_SIZE : (int32_t)left; - input->readBytes(copyBuffer.get(), 0, toCopy); - writeBytes(copyBuffer.get(), 0, toCopy); - left -= toCopy; - } +} + +void IndexOutput::copyBytes(const IndexInputPtr& input, int64_t numBytes) { + BOOST_ASSERT(numBytes >= 0); + int64_t left = numBytes; + if (!copyBuffer) { + copyBuffer = ByteArray::newInstance(COPY_BUFFER_SIZE); } - - void IndexOutput::setLength(int64_t length) - { + while (left > 0) { + int32_t toCopy = left > COPY_BUFFER_SIZE ? COPY_BUFFER_SIZE : (int32_t)left; + input->readBytes(copyBuffer.get(), 0, toCopy); + writeBytes(copyBuffer.get(), 0, toCopy); + left -= toCopy; } - - void IndexOutput::writeStringStringMap(MapStringString map) - { - if (!map) - writeInt(0); - else - { - writeInt(map.size()); - for (MapStringString::iterator entry = map.begin(); entry != map.end(); ++entry) - { - writeString(entry->first); - writeString(entry->second); - } +} + +void IndexOutput::setLength(int64_t length) { +} + +void IndexOutput::writeStringStringMap(MapStringString map) { + if (!map) { + writeInt(0); + } else { + writeInt(map.size()); + for (MapStringString::iterator entry = map.begin(); entry != map.end(); ++entry) { + writeString(entry->first); + writeString(entry->second); } } } + +} diff --git a/src/core/store/Lock.cpp b/src/core/store/Lock.cpp index 333dfb1a..04874160 100644 --- a/src/core/store/Lock.cpp +++ b/src/core/store/Lock.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,30 +8,29 @@ #include "Lock.h" #include "LuceneThread.h" -namespace Lucene -{ - /// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. - const int32_t Lock::LOCK_OBTAIN_WAIT_FOREVER = -1; - - /// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. - const int32_t Lock::LOCK_POLL_INTERVAL = 1000; - - Lock::~Lock() - { - } - - bool Lock::obtain(int32_t lockWaitTimeout) - { - bool locked = obtain(); - int32_t maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL; - int32_t sleepCount = 0; - while (!locked) - { - if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) - boost::throw_exception(LockObtainFailedException(L"Lock obtain timed out")); - LuceneThread::threadSleep(LOCK_POLL_INTERVAL); - locked = obtain(); +namespace Lucene { + +/// How long {@link #obtain(int64_t)} waits, in milliseconds, in between attempts to acquire the lock. +const int32_t Lock::LOCK_OBTAIN_WAIT_FOREVER = -1; + +/// Pass this value to {@link #obtain(int64_t)} to try forever to obtain the lock. +const int32_t Lock::LOCK_POLL_INTERVAL = 1000; + +Lock::~Lock() { +} + +bool Lock::obtain(int32_t lockWaitTimeout) { + bool locked = obtain(); + int32_t maxSleepCount = lockWaitTimeout / LOCK_POLL_INTERVAL; + int32_t sleepCount = 0; + while (!locked) { + if (lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER && sleepCount++ >= maxSleepCount) { + boost::throw_exception(LockObtainFailedException(L"Lock obtain timed out")); } - return locked; + LuceneThread::threadSleep(LOCK_POLL_INTERVAL); + locked = obtain(); } + return locked; +} + } diff --git a/src/core/store/LockFactory.cpp b/src/core/store/LockFactory.cpp index 26e1cede..2c03fe6f 100644 --- a/src/core/store/LockFactory.cpp +++ b/src/core/store/LockFactory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,19 +7,17 @@ #include "LuceneInc.h" #include "LockFactory.h" -namespace Lucene -{ - LockFactory::~LockFactory() - { - } - - void LockFactory::setLockPrefix(const String& lockPrefix) - { - this->lockPrefix = lockPrefix; - } - - String LockFactory::getLockPrefix() - { - return lockPrefix; - } +namespace Lucene { + +LockFactory::~LockFactory() { +} + +void LockFactory::setLockPrefix(const String& lockPrefix) { + this->lockPrefix = lockPrefix; +} + +String LockFactory::getLockPrefix() { + return lockPrefix; +} + } diff --git a/src/core/store/MMapDirectory.cpp b/src/core/store/MMapDirectory.cpp index 98cc0f23..46156e3a 100644 --- a/src/core/store/MMapDirectory.cpp +++ b/src/core/store/MMapDirectory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,110 +13,90 @@ #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - MMapDirectory::MMapDirectory(const String& path, LockFactoryPtr lockFactory) : FSDirectory(path, lockFactory) - { - } - - MMapDirectory::~MMapDirectory() - { - } - - IndexInputPtr MMapDirectory::openInput(const String& name, int32_t bufferSize) - { - ensureOpen(); - return newLucene(FileUtils::joinPath(directory, name)); - } - - IndexOutputPtr MMapDirectory::createOutput(const String& name) - { - initOutput(name); - return newLucene(FileUtils::joinPath(directory, name)); - } - - MMapIndexInput::MMapIndexInput(const String& path) - { - _length = path.empty() ? 0 : (int32_t)FileUtils::fileLength(path); - bufferPosition = 0; - if (!path.empty()) - { - try - { - file.open(StringUtils::toUTF8(path).c_str(), _length); - } - catch (...) - { - boost::throw_exception(FileNotFoundException(path)); - } - } - isClone = false; - } - - MMapIndexInput::~MMapIndexInput() - { - } - - uint8_t MMapIndexInput::readByte() - { - try - { - return file.data()[bufferPosition++]; - } - catch (...) - { - boost::throw_exception(IOException(L"Read past EOF")); - return 0; - } - } - - void MMapIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - try - { - MiscUtils::arrayCopy(file.data(), bufferPosition, b, offset, length); - bufferPosition += length; - } - catch (...) - { - boost::throw_exception(IOException(L"Read past EOF")); +namespace Lucene { + +MMapDirectory::MMapDirectory(const String& path, const LockFactoryPtr& lockFactory) : FSDirectory(path, lockFactory) { +} + +MMapDirectory::~MMapDirectory() { +} + +IndexInputPtr MMapDirectory::openInput(const String& name, int32_t bufferSize) { + ensureOpen(); + return newLucene(FileUtils::joinPath(directory, name)); +} + +IndexOutputPtr MMapDirectory::createOutput(const String& name) { + initOutput(name); + return newLucene(FileUtils::joinPath(directory, name)); +} + +MMapIndexInput::MMapIndexInput(const String& path) { + _length = path.empty() ? 0 : (int32_t)FileUtils::fileLength(path); + bufferPosition = 0; + if (!path.empty()) { + try { + file.open(boost::filesystem::path(path), _length); + } catch (...) { + boost::throw_exception(FileNotFoundException(path)); } } - - int64_t MMapIndexInput::getFilePointer() - { - return bufferPosition; - } - - void MMapIndexInput::seek(int64_t pos) - { - bufferPosition = (int32_t)pos; + isClone = false; +} + +MMapIndexInput::~MMapIndexInput() { +} + +uint8_t MMapIndexInput::readByte() { + try { + return file.data()[bufferPosition++]; + } catch (...) { + boost::throw_exception(IOException(L"Read past EOF")); + return 0; } - - int64_t MMapIndexInput::length() - { - return (int64_t)_length; +} + +void MMapIndexInput::readBytes(uint8_t* b, int32_t offset, int32_t length) { + try { + MiscUtils::arrayCopy(file.data(), bufferPosition, b, offset, length); + bufferPosition += length; + } catch (...) { + boost::throw_exception(IOException(L"Read past EOF")); } - - void MMapIndexInput::close() - { - if (isClone || !file.is_open()) - return; - _length = 0; - bufferPosition = 0; - file.close(); +} + +int64_t MMapIndexInput::getFilePointer() { + return bufferPosition; +} + +void MMapIndexInput::seek(int64_t pos) { + bufferPosition = (int32_t)pos; +} + +int64_t MMapIndexInput::length() { + return (int64_t)_length; +} + +void MMapIndexInput::close() { + if (isClone || !file.is_open()) { + return; } - - LuceneObjectPtr MMapIndexInput::clone(LuceneObjectPtr other) - { - if (!file.is_open()) - boost::throw_exception(AlreadyClosedException(L"MMapIndexInput already closed")); - LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); - MMapIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); - cloneIndexInput->_length = _length; - cloneIndexInput->file = file; - cloneIndexInput->bufferPosition = bufferPosition; - cloneIndexInput->isClone = true; - return cloneIndexInput; + _length = 0; + bufferPosition = 0; + file.close(); +} + +LuceneObjectPtr MMapIndexInput::clone(const LuceneObjectPtr& other) { + if (!file.is_open()) { + boost::throw_exception(AlreadyClosedException(L"MMapIndexInput already closed")); } + LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); + MMapIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); + cloneIndexInput->_length = _length; + cloneIndexInput->file = file; + cloneIndexInput->bufferPosition = bufferPosition; + cloneIndexInput->isClone = true; + return cloneIndexInput; +} + } diff --git a/src/core/store/NativeFSLockFactory.cpp b/src/core/store/NativeFSLockFactory.cpp index 06f9a474..2ba0471e 100644 --- a/src/core/store/NativeFSLockFactory.cpp +++ b/src/core/store/NativeFSLockFactory.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include +#include #include #include "NativeFSLockFactory.h" #include "_NativeFSLockFactory.h" @@ -13,212 +13,202 @@ #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - NativeFSLockFactory::NativeFSLockFactory(const String& lockDirName) - { - setLockDir(lockDirName); +namespace Lucene { + +NativeFSLockFactory::NativeFSLockFactory(const String& lockDirName) { + setLockDir(lockDirName); +} + +NativeFSLockFactory::~NativeFSLockFactory() { +} + +LockPtr NativeFSLockFactory::makeLock(const String& lockName) { + SyncLock syncLock(this); + return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); +} + +void NativeFSLockFactory::clearLock(const String& lockName) { + // note that this isn't strictly required anymore because the existence of these files does not mean + // they are locked, but still do this in case people really want to see the files go away + + if (FileUtils::isDirectory(lockDir)) { + String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); + if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) { + boost::throw_exception(IOException(L"Failed to delete: " + lockPath)); + } } - - NativeFSLockFactory::~NativeFSLockFactory() - { +} + +NativeFSLock::NativeFSLock(const String& lockDir, const String& lockFileName) { + this->lockDir = lockDir; + path = FileUtils::joinPath(lockDir, lockFileName); +} + +NativeFSLock::~NativeFSLock() { + try { + release(); + } catch (...) { } - - LockPtr NativeFSLockFactory::makeLock(const String& lockName) +} + +SynchronizePtr NativeFSLock::LOCK_HELD_LOCK() { + static SynchronizePtr _LOCK_HELD_LOCK; + LUCENE_RUN_ONCE( + _LOCK_HELD_LOCK = newInstance(); + ); + return _LOCK_HELD_LOCK; +} + +HashSet NativeFSLock::LOCK_HELD() { + static HashSet _LOCK_HELD; + LUCENE_RUN_ONCE( + _LOCK_HELD = HashSet::newInstance(); + ); + return _LOCK_HELD; +} + +bool NativeFSLock::lockExists() { + SyncLock syncLock(this); + return lock.get() != NULL; +} + +bool NativeFSLock::obtain() { + SyncLock syncLock(this); + + if (lockExists()) + // our instance is already locked { - SyncLock syncLock(this); - return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); + return false; } - - void NativeFSLockFactory::clearLock(const String& lockName) - { - // note that this isn't strictly required anymore because the existence of these files does not mean - // they are locked, but still do this in case people really want to see the files go away - - if (FileUtils::isDirectory(lockDir)) - { - String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); - if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) - boost::throw_exception(IOException(L"Failed to delete: " + lockPath)); + + // ensure that lockdir exists and is a directory + if (!FileUtils::fileExists(lockDir)) { + if (!FileUtils::createDirectory(lockDir)) { + boost::throw_exception(IOException(L"Cannot create directory: " + lockDir)); } + } else if (!FileUtils::isDirectory(lockDir)) { + boost::throw_exception(IOException(L"Found regular file where directory expected: " + lockDir)); } - - NativeFSLock::NativeFSLock(const String& lockDir, const String& lockFileName) - { - this->lockDir = lockDir; - path = FileUtils::joinPath(lockDir, lockFileName); - } - - NativeFSLock::~NativeFSLock() + + bool markedHeld = false; + + // make sure nobody else in-process has this lock held already and mark it held if not { - try + SyncLock heldLock(LOCK_HELD_LOCK()); + if (LOCK_HELD().contains(path)) + // someone else already has the lock { - release(); - } - catch (...) - { - } - } - - SynchronizePtr NativeFSLock::LOCK_HELD_LOCK() - { - static SynchronizePtr _LOCK_HELD_LOCK; - if (!_LOCK_HELD_LOCK) - _LOCK_HELD_LOCK = newInstance(); - return _LOCK_HELD_LOCK; - } - - HashSet NativeFSLock::LOCK_HELD() - { - static HashSet _LOCK_HELD; - if (!_LOCK_HELD) - _LOCK_HELD = HashSet::newInstance(); - return _LOCK_HELD; - } - - bool NativeFSLock::lockExists() - { - SyncLock syncLock(this); - return lock; - } - - bool NativeFSLock::obtain() - { - SyncLock syncLock(this); - - if (lockExists()) - // our instance is already locked return false; - - // ensure that lockdir exists and is a directory - if (!FileUtils::fileExists(lockDir)) - { - if (!FileUtils::createDirectory(lockDir)) - boost::throw_exception(IOException(L"Cannot create directory: " + lockDir)); + } else { + // this "reserves" the fact that we are the one thread trying to obtain this lock, so we own the + // only instance of a channel against this file + LOCK_HELD().add(path); + markedHeld = true; } - else if (!FileUtils::isDirectory(lockDir)) - boost::throw_exception(IOException(L"Found regular file where directory expected: " + lockDir)); - - bool markedHeld = false; - - // make sure nobody else in-process has this lock held already and mark it held if not - { - SyncLock heldLock(LOCK_HELD_LOCK()); - if (LOCK_HELD().contains(path)) - // someone else already has the lock - return false; - else - { - // this "reserves" the fact that we are the one thread trying to obtain this lock, so we own the - // only instance of a channel against this file - LOCK_HELD().add(path); - markedHeld = true; - } - } - - try - { - // we can get intermittent "access denied" here, so we treat this as failure to acquire the lock - std::ofstream f(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::out); - - if (f.is_open()) + } + + try { + // we can get intermittent "access denied" here, so we treat this as failure to acquire the lock + boost::filesystem::ofstream f(path, std::ios::binary | std::ios::out); + + if (f.is_open()) { + std::string lockpath; + + // file_lock only accepts char* filenames and we cannot losslessly convert Unicode paths to + // char*. The usual way to work around this is to use 8.3 short names. +#if defined(_WIN32) || defined(_WIN64) + wchar_t pathOut[MAX_PATH+1]; + if (::GetShortPathNameW(path.c_str(), pathOut, MAX_PATH+1) != 0) { + lockpath = boost::filesystem::path(pathOut).string(); + } else +#endif // Windows { - lock = newInstance(StringUtils::toUTF8(path).c_str()); - lock->lock(); + lockpath = boost::filesystem::path(path).string(); } + lock = newInstance(lockpath.c_str()); + lock->lock(); } - catch (...) - { + } catch (...) { + lock.reset(); + } + + if (markedHeld && !lockExists()) { + SyncLock heldLock(LOCK_HELD_LOCK()); + LOCK_HELD().remove(path); + } + + return lockExists(); +} + +void NativeFSLock::release() { + SyncLock syncLock(this); + + if (lockExists()) { + try { + lock->unlock(); lock.reset(); + } catch (...) { } - - if (markedHeld && !lockExists()) + { SyncLock heldLock(LOCK_HELD_LOCK()); LOCK_HELD().remove(path); } - - return lockExists(); - } - - void NativeFSLock::release() - { - SyncLock syncLock(this); - - if (lockExists()) - { - try - { - lock->unlock(); - lock.reset(); - } - catch (...) - { - } - - { - SyncLock heldLock(LOCK_HELD_LOCK()); - LOCK_HELD().remove(path); + + // we don't care anymore if the file cannot be deleted because it's held up by another process + // (eg. AntiVirus). NativeFSLock does not depend on the existence/absence of the lock file + FileUtils::removeFile(path); + } else { + // if we don't hold the lock, and somebody still called release(), for example as a result of + // calling IndexWriter.unlock(), we should attempt to obtain the lock and release it. If the + // obtain fails, it means the lock cannot be released, and we should throw a proper exception + // rather than silently failing/not doing anything. + bool obtained = false; + LuceneException finally; + try { + obtained = obtain(); + if (!obtained) { + boost::throw_exception(LockReleaseFailedException(L"Cannot forcefully unlock a NativeFSLock which is held by another indexer component: " + path)); } - - // we don't care anymore if the file cannot be deleted because it's held up by another process - // (eg. AntiVirus). NativeFSLock does not depend on the existence/absence of the lock file - FileUtils::removeFile(path); + } catch (LuceneException& e) { + finally = e; } - else - { - // if we don't hold the lock, and somebody still called release(), for example as a result of - // calling IndexWriter.unlock(), we should attempt to obtain the lock and release it. If the - // obtain fails, it means the lock cannot be released, and we should throw a proper exception - // rather than silently failing/not doing anything. - bool obtained = false; - LuceneException finally; - try - { - obtained = obtain(); - if (!obtained) - boost::throw_exception(LockReleaseFailedException(L"Cannot forcefully unlock a NativeFSLock which is held by another indexer component: " + path)); - } - catch (LuceneException& e) - { - finally = e; - } - if (obtained) - release(); - finally.throwException(); + if (obtained) { + release(); } + finally.throwException(); } - - bool NativeFSLock::isLocked() - { - SyncLock syncLock(this); - - // the test for is islocked is not directly possible with native file locks - - // first a shortcut, if a lock reference in this instance is available - if (lockExists()) - return true; - - // look if lock file is present; if not, there can definitely be no lock! - if (!FileUtils::fileExists(path)) - return false; - - // try to obtain and release (if was locked) the lock - try - { - bool obtained = obtain(); - if (obtained) - release(); - return !obtained; - } - catch (LuceneException&) - { - return false; - } +} + +bool NativeFSLock::isLocked() { + SyncLock syncLock(this); + + // the test for is islocked is not directly possible with native file locks + + // first a shortcut, if a lock reference in this instance is available + if (lockExists()) { + return true; } - - String NativeFSLock::toString() - { - return getClassName() + L"@" + path; + + // look if lock file is present; if not, there can definitely be no lock! + if (!FileUtils::fileExists(path)) { + return false; + } + + // try to obtain and release (if was locked) the lock + try { + bool obtained = obtain(); + if (obtained) { + release(); + } + return !obtained; + } catch (LuceneException&) { + return false; } } + +String NativeFSLock::toString() { + return getClassName() + L"@" + path; +} + +} diff --git a/src/core/store/NoLockFactory.cpp b/src/core/store/NoLockFactory.cpp index be20b9f5..539369b7 100644 --- a/src/core/store/NoLockFactory.cpp +++ b/src/core/store/NoLockFactory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,64 +8,53 @@ #include "NoLockFactory.h" #include "_NoLockFactory.h" -namespace Lucene -{ - NoLockFactory::~NoLockFactory() - { - } - - NoLockFactoryPtr NoLockFactory::getNoLockFactory() - { - static NoLockFactoryPtr singleton; - if (!singleton) - { - singleton = newLucene(); - CycleCheck::addStatic(singleton); - } - return singleton; - } - - NoLockPtr NoLockFactory::getSingletonLock() - { - // Single instance returned whenever makeLock is called. - static NoLockPtr singletonLock; - if (!singletonLock) - { - singletonLock = newLucene(); - CycleCheck::addStatic(singletonLock); - } - return singletonLock; - } - - LockPtr NoLockFactory::makeLock(const String& lockName) - { - return getSingletonLock(); - } - - void NoLockFactory::clearLock(const String& lockName) - { - } - - NoLock::~NoLock() - { - } - - bool NoLock::obtain() - { - return true; - } - - void NoLock::release() - { - } - - bool NoLock::isLocked() - { - return false; - } - - String NoLock::toString() - { - return getClassName(); - } +namespace Lucene { + +NoLockFactory::~NoLockFactory() { +} + +NoLockFactoryPtr NoLockFactory::getNoLockFactory() { + static NoLockFactoryPtr singleton; + LUCENE_RUN_ONCE( + singleton = newLucene(); + CycleCheck::addStatic(singleton); + ); + return singleton; +} + +NoLockPtr NoLockFactory::getSingletonLock() { + // Single instance returned whenever makeLock is called. + static NoLockPtr singletonLock; + LUCENE_RUN_ONCE( + singletonLock = newLucene(); + CycleCheck::addStatic(singletonLock); + ); + return singletonLock; +} + +LockPtr NoLockFactory::makeLock(const String& lockName) { + return getSingletonLock(); +} + +void NoLockFactory::clearLock(const String& lockName) { +} + +NoLock::~NoLock() { +} + +bool NoLock::obtain() { + return true; +} + +void NoLock::release() { +} + +bool NoLock::isLocked() { + return false; +} + +String NoLock::toString() { + return getClassName(); +} + } diff --git a/src/core/store/RAMDirectory.cpp b/src/core/store/RAMDirectory.cpp index 021702d0..dbff24bd 100644 --- a/src/core/store/RAMDirectory.cpp +++ b/src/core/store/RAMDirectory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,153 +13,146 @@ #include "LuceneThread.h" #include "MiscUtils.h" -namespace Lucene -{ - RAMDirectory::RAMDirectory() - { - this->fileMap = MapStringRAMFile::newInstance(); - this->_sizeInBytes = 0; - this->copyDirectory = false; - this->closeDir = false; - setLockFactory(newLucene()); - } - - RAMDirectory::RAMDirectory(DirectoryPtr dir) - { - this->fileMap = MapStringRAMFile::newInstance(); - this->_sizeInBytes = 0; - this->copyDirectory = true; - this->_dirSource = dir; - this->closeDir = false; - setLockFactory(newLucene()); - } - - RAMDirectory::RAMDirectory(DirectoryPtr dir, bool closeDir) - { - this->fileMap = MapStringRAMFile::newInstance(); - this->_sizeInBytes = 0; - this->copyDirectory = true; - this->_dirSource = dir; - this->closeDir = closeDir; - setLockFactory(newLucene()); - } - - RAMDirectory::~RAMDirectory() - { - } - - void RAMDirectory::initialize() - { - if (copyDirectory) - Directory::copy(DirectoryPtr(_dirSource), shared_from_this(), closeDir); +namespace Lucene { + +RAMDirectory::RAMDirectory() { + this->fileMap = MapStringRAMFile::newInstance(); + this->_sizeInBytes = 0; + this->copyDirectory = false; + this->closeDir = false; + setLockFactory(newLucene()); +} + +RAMDirectory::RAMDirectory(const DirectoryPtr& dir) { + this->fileMap = MapStringRAMFile::newInstance(); + this->_sizeInBytes = 0; + this->copyDirectory = true; + this->_dirSource = dir; + this->closeDir = false; + setLockFactory(newLucene()); +} + +RAMDirectory::RAMDirectory(const DirectoryPtr& dir, bool closeDir) { + this->fileMap = MapStringRAMFile::newInstance(); + this->_sizeInBytes = 0; + this->copyDirectory = true; + this->_dirSource = dir; + this->closeDir = closeDir; + setLockFactory(newLucene()); +} + +RAMDirectory::~RAMDirectory() { +} + +void RAMDirectory::initialize() { + if (copyDirectory) { + Directory::copy(DirectoryPtr(_dirSource), shared_from_this(), closeDir); } - - HashSet RAMDirectory::listAll() - { - SyncLock syncLock(this); - ensureOpen(); - HashSet result(HashSet::newInstance()); - for (MapStringRAMFile::iterator fileName = fileMap.begin(); fileName != fileMap.end(); ++fileName) - result.add(fileName->first); - return result; +} + +HashSet RAMDirectory::listAll() { + SyncLock syncLock(this); + ensureOpen(); + HashSet result(HashSet::newInstance()); + for (MapStringRAMFile::iterator fileName = fileMap.begin(); fileName != fileMap.end(); ++fileName) { + result.add(fileName->first); } - - bool RAMDirectory::fileExists(const String& name) - { - ensureOpen(); - SyncLock syncLock(this); - return fileMap.contains(name); + return result; +} + +bool RAMDirectory::fileExists(const String& name) { + ensureOpen(); + SyncLock syncLock(this); + return fileMap.contains(name); +} + +uint64_t RAMDirectory::fileModified(const String& name) { + ensureOpen(); + SyncLock syncLock(this); + MapStringRAMFile::iterator ramFile = fileMap.find(name); + if (ramFile == fileMap.end()) { + boost::throw_exception(FileNotFoundException(name)); } - - uint64_t RAMDirectory::fileModified(const String& name) + return ramFile->second->getLastModified(); +} + +void RAMDirectory::touchFile(const String& name) { + ensureOpen(); + RAMFilePtr file; { - ensureOpen(); SyncLock syncLock(this); MapStringRAMFile::iterator ramFile = fileMap.find(name); - if (ramFile == fileMap.end()) + if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); - return ramFile->second->getLastModified(); - } - - void RAMDirectory::touchFile(const String& name) - { - ensureOpen(); - RAMFilePtr file; - { - SyncLock syncLock(this); - MapStringRAMFile::iterator ramFile = fileMap.find(name); - if (ramFile == fileMap.end()) - boost::throw_exception(FileNotFoundException(name)); - file = ramFile->second; } - int64_t ts1 = MiscUtils::currentTimeMillis(); - while (ts1 == MiscUtils::currentTimeMillis()) - LuceneThread::threadSleep(1); - file->setLastModified(MiscUtils::currentTimeMillis()); + file = ramFile->second; } - - int64_t RAMDirectory::fileLength(const String& name) - { - ensureOpen(); - SyncLock syncLock(this); - MapStringRAMFile::iterator ramFile = fileMap.find(name); - if (ramFile == fileMap.end()) - boost::throw_exception(FileNotFoundException(name)); - return ramFile->second->getLength(); + int64_t ts1 = MiscUtils::currentTimeMillis(); + while (ts1 == MiscUtils::currentTimeMillis()) { + LuceneThread::threadSleep(1); } - - int64_t RAMDirectory::sizeInBytes() + file->setLastModified(MiscUtils::currentTimeMillis()); +} + +int64_t RAMDirectory::fileLength(const String& name) { + ensureOpen(); + SyncLock syncLock(this); + MapStringRAMFile::iterator ramFile = fileMap.find(name); + if (ramFile == fileMap.end()) { + boost::throw_exception(FileNotFoundException(name)); + } + return ramFile->second->getLength(); +} + +int64_t RAMDirectory::sizeInBytes() { + SyncLock syncLock(this); + ensureOpen(); + return _sizeInBytes; +} + +void RAMDirectory::deleteFile(const String& name) { + SyncLock syncLock(this); + ensureOpen(); + MapStringRAMFile::iterator ramFile = fileMap.find(name); + if (ramFile == fileMap.end()) { + boost::throw_exception(FileNotFoundException(name)); + } + _sizeInBytes -= ramFile->second->getSizeInBytes(); + fileMap.remove(name); +} + +IndexOutputPtr RAMDirectory::createOutput(const String& name) { + ensureOpen(); + RAMFilePtr file(newLucene(shared_from_this())); { SyncLock syncLock(this); - ensureOpen(); - return _sizeInBytes; + MapStringRAMFile::iterator existing = fileMap.find(name); + if (existing != fileMap.end()) { + _sizeInBytes -= existing->second->getSizeInBytes(); + existing->second->_directory.reset(); + } + fileMap.put(name, file); } - - void RAMDirectory::deleteFile(const String& name) + return newLucene(file); +} + +IndexInputPtr RAMDirectory::openInput(const String& name) { + ensureOpen(); + RAMFilePtr file; { SyncLock syncLock(this); - ensureOpen(); MapStringRAMFile::iterator ramFile = fileMap.find(name); - if (ramFile == fileMap.end()) + if (ramFile == fileMap.end()) { boost::throw_exception(FileNotFoundException(name)); - _sizeInBytes -= ramFile->second->getSizeInBytes(); - fileMap.remove(name); - } - - IndexOutputPtr RAMDirectory::createOutput(const String& name) - { - ensureOpen(); - RAMFilePtr file(newLucene(shared_from_this())); - { - SyncLock syncLock(this); - MapStringRAMFile::iterator existing = fileMap.find(name); - if (existing != fileMap.end()) - { - _sizeInBytes -= existing->second->getSizeInBytes(); - existing->second->_directory.reset(); - } - fileMap.put(name, file); } - return newLucene(file); - } - - IndexInputPtr RAMDirectory::openInput(const String& name) - { - ensureOpen(); - RAMFilePtr file; - { - SyncLock syncLock(this); - MapStringRAMFile::iterator ramFile = fileMap.find(name); - if (ramFile == fileMap.end()) - boost::throw_exception(FileNotFoundException(name)); - file = ramFile->second; - } - return newLucene(file); - } - - void RAMDirectory::close() - { - isOpen = false; - fileMap.reset(); + file = ramFile->second; } + return newLucene(file); +} + +void RAMDirectory::close() { + isOpen = false; + fileMap.reset(); +} + } diff --git a/src/core/store/RAMFile.cpp b/src/core/store/RAMFile.cpp index 6c3646d7..01f9724c 100644 --- a/src/core/store/RAMFile.cpp +++ b/src/core/store/RAMFile.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,91 +9,79 @@ #include "RAMDirectory.h" #include "MiscUtils.h" -namespace Lucene -{ - RAMFile::RAMFile() - { - this->buffers = Collection::newInstance(); - this->length = 0; - this->sizeInBytes = 0; - this->lastModified = MiscUtils::currentTimeMillis(); - } - - RAMFile::RAMFile(RAMDirectoryPtr directory) - { - this->buffers = Collection::newInstance(); - this->length = 0; - this->sizeInBytes = 0; - this->_directory = directory; - this->lastModified = MiscUtils::currentTimeMillis(); - } - - RAMFile::~RAMFile() - { - } - - int64_t RAMFile::getLength() - { - SyncLock syncLock(this); - return length; - } - - void RAMFile::setLength(int64_t length) - { - SyncLock syncLock(this); - this->length = length; - } - - int64_t RAMFile::getLastModified() - { - SyncLock syncLock(this); - return lastModified; - } - - void RAMFile::setLastModified(int64_t lastModified) - { - SyncLock syncLock(this); - this->lastModified = lastModified; - } - - ByteArray RAMFile::addBuffer(int32_t size) - { - ByteArray buffer(newBuffer(size)); - { - SyncLock syncLock(this); - buffers.add(buffer); - sizeInBytes += size; - } - - RAMDirectoryPtr directory(_directory.lock()); - if (directory) - { - SyncLock dirLock(directory); - directory->_sizeInBytes += size; - } - return buffer; - } - - ByteArray RAMFile::getBuffer(int32_t index) - { - SyncLock syncLock(this); - return buffers[index]; - } - - int32_t RAMFile::numBuffers() +namespace Lucene { + +RAMFile::RAMFile() { + this->buffers = Collection::newInstance(); + this->length = 0; + this->sizeInBytes = 0; + this->lastModified = MiscUtils::currentTimeMillis(); +} + +RAMFile::RAMFile(const RAMDirectoryPtr& directory) { + this->buffers = Collection::newInstance(); + this->length = 0; + this->sizeInBytes = 0; + this->_directory = directory; + this->lastModified = MiscUtils::currentTimeMillis(); +} + +RAMFile::~RAMFile() { +} + +int64_t RAMFile::getLength() { + SyncLock syncLock(this); + return length; +} + +void RAMFile::setLength(int64_t length) { + SyncLock syncLock(this); + this->length = length; +} + +int64_t RAMFile::getLastModified() { + SyncLock syncLock(this); + return lastModified; +} + +void RAMFile::setLastModified(int64_t lastModified) { + SyncLock syncLock(this); + this->lastModified = lastModified; +} + +ByteArray RAMFile::addBuffer(int32_t size) { + ByteArray buffer(newBuffer(size)); { SyncLock syncLock(this); - return buffers.size(); - } - - ByteArray RAMFile::newBuffer(int32_t size) - { - return ByteArray::newInstance(size); + buffers.add(buffer); + sizeInBytes += size; } - - int64_t RAMFile::getSizeInBytes() - { - SyncLock syncLock(this); - return sizeInBytes; + + RAMDirectoryPtr directory(_directory.lock()); + if (directory) { + SyncLock dirLock(directory); + directory->_sizeInBytes += size; } + return buffer; +} + +ByteArray RAMFile::getBuffer(int32_t index) { + SyncLock syncLock(this); + return buffers[index]; +} + +int32_t RAMFile::numBuffers() { + SyncLock syncLock(this); + return buffers.size(); +} + +ByteArray RAMFile::newBuffer(int32_t size) { + return ByteArray::newInstance(size); +} + +int64_t RAMFile::getSizeInBytes() { + SyncLock syncLock(this); + return sizeInBytes; +} + } diff --git a/src/core/store/RAMInputStream.cpp b/src/core/store/RAMInputStream.cpp index a2501bfa..0cf173e4 100644 --- a/src/core/store/RAMInputStream.cpp +++ b/src/core/store/RAMInputStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,128 +11,111 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t RAMInputStream::BUFFER_SIZE = RAMOutputStream::BUFFER_SIZE; - - RAMInputStream::RAMInputStream() - { - _length = 0; - - // make sure that we switch to the first needed buffer lazily - currentBufferIndex = -1; - bufferPosition = 0; - bufferStart = 0; - bufferLength = 0; - } - - RAMInputStream::RAMInputStream(RAMFilePtr f) - { - file = f; - _length = file->length; - if (_length / BUFFER_SIZE >= INT_MAX) - boost::throw_exception(IOException(L"Too large RAMFile: " + StringUtils::toString(_length))); - - // make sure that we switch to the first needed buffer lazily - currentBufferIndex = -1; - bufferPosition = 0; - bufferStart = 0; - bufferLength = 0; - } - - RAMInputStream::~RAMInputStream() - { - } - - void RAMInputStream::close() - { - // nothing to do here +namespace Lucene { + +const int32_t RAMInputStream::BUFFER_SIZE = RAMOutputStream::BUFFER_SIZE; + +RAMInputStream::RAMInputStream() { + _length = 0; + + // make sure that we switch to the first needed buffer lazily + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; +} + +RAMInputStream::RAMInputStream(const RAMFilePtr& f) { + file = f; + _length = file->length; + if (_length / BUFFER_SIZE >= INT_MAX) { + boost::throw_exception(IOException(L"Too large RAMFile: " + StringUtils::toString(_length))); } - - int64_t RAMInputStream::length() - { - return _length; + + // make sure that we switch to the first needed buffer lazily + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; +} + +RAMInputStream::~RAMInputStream() { +} + +void RAMInputStream::close() { + // nothing to do here +} + +int64_t RAMInputStream::length() { + return _length; +} + +uint8_t RAMInputStream::readByte() { + if (bufferPosition >= bufferLength) { + ++currentBufferIndex; + switchCurrentBuffer(true); } - - uint8_t RAMInputStream::readByte() - { - if (bufferPosition >= bufferLength) - { + return currentBuffer[bufferPosition++]; +} + +void RAMInputStream::readBytes(uint8_t* b, int32_t offset, int32_t length) { + while (length > 0) { + if (bufferPosition >= bufferLength) { ++currentBufferIndex; switchCurrentBuffer(true); } - return currentBuffer[bufferPosition++]; - } - - void RAMInputStream::readBytes(uint8_t* b, int32_t offset, int32_t length) - { - while (length > 0) - { - if (bufferPosition >= bufferLength) - { - ++currentBufferIndex; - switchCurrentBuffer(true); - } - - int32_t remainInBuffer = bufferLength - bufferPosition; - int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; - MiscUtils::arrayCopy(currentBuffer.get(), bufferPosition, b, offset, bytesToCopy); - offset += bytesToCopy; - length -= bytesToCopy; - bufferPosition += bytesToCopy; - } - } - - void RAMInputStream::switchCurrentBuffer(bool enforceEOF) - { - if (currentBufferIndex >= file->numBuffers()) - { - // end of file reached, no more buffers left - if (enforceEOF) - boost::throw_exception(IOException(L"Read past EOF")); - else - { - // force eof if a read takes place at this position - --currentBufferIndex; - bufferPosition = BUFFER_SIZE; - } - } - else - { - currentBuffer = file->getBuffer(currentBufferIndex); - bufferPosition = 0; - bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; - int64_t buflen = _length - bufferStart; - bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int32_t)buflen; - } - } - - int64_t RAMInputStream::getFilePointer() - { - return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; + + int32_t remainInBuffer = bufferLength - bufferPosition; + int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; + MiscUtils::arrayCopy(currentBuffer.get(), bufferPosition, b, offset, bytesToCopy); + offset += bytesToCopy; + length -= bytesToCopy; + bufferPosition += bytesToCopy; } - - void RAMInputStream::seek(int64_t pos) - { - if (!currentBuffer || (int32_t)pos < bufferStart || (int32_t)pos >= bufferStart + BUFFER_SIZE) - { - currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); - switchCurrentBuffer(false); +} + +void RAMInputStream::switchCurrentBuffer(bool enforceEOF) { + if (currentBufferIndex >= file->numBuffers()) { + // end of file reached, no more buffers left + if (enforceEOF) { + boost::throw_exception(IOException(L"Read past EOF")); + } else { + // force eof if a read takes place at this position + --currentBufferIndex; + bufferPosition = BUFFER_SIZE; } - bufferPosition = (int32_t)(pos % BUFFER_SIZE); + } else { + currentBuffer = file->getBuffer(currentBufferIndex); + bufferPosition = 0; + bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; + int64_t buflen = _length - bufferStart; + bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int32_t)buflen; } - - LuceneObjectPtr RAMInputStream::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); - RAMInputStreamPtr cloneInputStream(boost::dynamic_pointer_cast(clone)); - cloneInputStream->file = file; - cloneInputStream->_length = _length; - cloneInputStream->currentBuffer = currentBuffer; - cloneInputStream->currentBufferIndex = currentBufferIndex; - cloneInputStream->bufferPosition = bufferPosition; - cloneInputStream->bufferStart = bufferStart; - cloneInputStream->bufferLength = bufferLength; - return cloneInputStream; +} + +int64_t RAMInputStream::getFilePointer() { + return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; +} + +void RAMInputStream::seek(int64_t pos) { + if (!currentBuffer || (int32_t)pos < bufferStart || (int32_t)pos >= bufferStart + BUFFER_SIZE) { + currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); + switchCurrentBuffer(false); } + bufferPosition = (int32_t)(pos % BUFFER_SIZE); +} + +LuceneObjectPtr RAMInputStream::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = IndexInput::clone(other ? other : newLucene()); + RAMInputStreamPtr cloneInputStream(boost::dynamic_pointer_cast(clone)); + cloneInputStream->file = file; + cloneInputStream->_length = _length; + cloneInputStream->currentBuffer = currentBuffer; + cloneInputStream->currentBufferIndex = currentBufferIndex; + cloneInputStream->bufferPosition = bufferPosition; + cloneInputStream->bufferStart = bufferStart; + cloneInputStream->bufferLength = bufferLength; + return cloneInputStream; +} + } diff --git a/src/core/store/RAMOutputStream.cpp b/src/core/store/RAMOutputStream.cpp index bed91d50..da52588b 100644 --- a/src/core/store/RAMOutputStream.cpp +++ b/src/core/store/RAMOutputStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,146 +10,130 @@ #include "RAMDirectory.h" #include "MiscUtils.h" -namespace Lucene -{ - const int32_t RAMOutputStream::BUFFER_SIZE = 1024; - - RAMOutputStream::RAMOutputStream() - { - file = newLucene(RAMDirectoryPtr()); - - // make sure that we switch to the first needed buffer lazily - currentBufferIndex = -1; - bufferPosition = 0; - bufferStart = 0; - bufferLength = 0; - } - - RAMOutputStream::RAMOutputStream(RAMFilePtr f) - { - file = f; - - // make sure that we switch to the first needed buffer lazily - currentBufferIndex = -1; - bufferPosition = 0; - bufferStart = 0; - bufferLength = 0; - } - - RAMOutputStream::~RAMOutputStream() - { - } - - void RAMOutputStream::writeTo(IndexOutputPtr out) - { - flush(); - int64_t end = file->length; - int64_t pos = 0; - int32_t buffer = 0; - while (pos < end) - { - int32_t length = BUFFER_SIZE; - int64_t nextPos = pos + length; - if (nextPos > end) // at the last buffer - length = (int32_t)(end - pos); - out->writeBytes(file->getBuffer(buffer++).get(), length); - pos = nextPos; +namespace Lucene { + +const int32_t RAMOutputStream::BUFFER_SIZE = 1024; + +RAMOutputStream::RAMOutputStream() { + file = newLucene(RAMDirectoryPtr()); + + // make sure that we switch to the first needed buffer lazily + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; +} + +RAMOutputStream::RAMOutputStream(const RAMFilePtr& f) { + file = f; + + // make sure that we switch to the first needed buffer lazily + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; +} + +RAMOutputStream::~RAMOutputStream() { +} + +void RAMOutputStream::writeTo(const IndexOutputPtr& out) { + flush(); + int64_t end = file->length; + int64_t pos = 0; + int32_t buffer = 0; + while (pos < end) { + int32_t length = BUFFER_SIZE; + int64_t nextPos = pos + length; + if (nextPos > end) { // at the last buffer + length = (int32_t)(end - pos); } + out->writeBytes(file->getBuffer(buffer++).get(), length); + pos = nextPos; } - - void RAMOutputStream::reset() - { - currentBuffer.reset(); - currentBufferIndex = -1; - bufferPosition = 0; - bufferStart = 0; - bufferLength = 0; - file->setLength(0); - } - - void RAMOutputStream::close() - { - flush(); - } - - void RAMOutputStream::seek(int64_t pos) - { - // set the file length in case we seek back and flush() has not been called yet - setFileLength(); - if ((int64_t)pos < bufferStart || (int64_t)pos >= bufferStart + bufferLength) - { - currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); - switchCurrentBuffer(); - } - bufferPosition = (int32_t)(pos % BUFFER_SIZE); +} + +void RAMOutputStream::reset() { + currentBuffer.reset(); + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; + file->setLength(0); +} + +void RAMOutputStream::close() { + flush(); +} + +void RAMOutputStream::seek(int64_t pos) { + // set the file length in case we seek back and flush() has not been called yet + setFileLength(); + if ((int64_t)pos < bufferStart || (int64_t)pos >= bufferStart + bufferLength) { + currentBufferIndex = (int32_t)(pos / BUFFER_SIZE); + switchCurrentBuffer(); } - - int64_t RAMOutputStream::length() - { - return file->length; + bufferPosition = (int32_t)(pos % BUFFER_SIZE); +} + +int64_t RAMOutputStream::length() { + return file->length; +} + +void RAMOutputStream::writeByte(uint8_t b) { + if (bufferPosition == bufferLength) { + ++currentBufferIndex; + switchCurrentBuffer(); } - - void RAMOutputStream::writeByte(uint8_t b) - { - if (bufferPosition == bufferLength) - { + currentBuffer[bufferPosition++] = b; +} + +void RAMOutputStream::writeBytes(const uint8_t* b, int32_t offset, int32_t length) { + while (length > 0) { + BOOST_ASSERT(b != NULL); + if (bufferPosition == bufferLength) { ++currentBufferIndex; switchCurrentBuffer(); } - currentBuffer[bufferPosition++] = b; - } - - void RAMOutputStream::writeBytes(const uint8_t* b, int32_t offset, int32_t length) - { - while (length > 0) - { - BOOST_ASSERT(b != NULL); - if (bufferPosition == bufferLength) - { - ++currentBufferIndex; - switchCurrentBuffer(); - } - - int32_t remainInBuffer = currentBuffer.size() - bufferPosition; - int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; - MiscUtils::arrayCopy(b, offset, currentBuffer.get(), bufferPosition, bytesToCopy); - offset += bytesToCopy; - length -= bytesToCopy; - bufferPosition += bytesToCopy; - } - } - - void RAMOutputStream::switchCurrentBuffer() - { - if (currentBufferIndex == file->numBuffers()) - currentBuffer = file->addBuffer(BUFFER_SIZE); - else - currentBuffer = file->getBuffer(currentBufferIndex); - bufferPosition = 0; - bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; - bufferLength = currentBuffer.size(); - } - - void RAMOutputStream::setFileLength() - { - int64_t pointer = bufferStart + bufferPosition; - if (pointer > file->length) - file->setLength(pointer); - } - - void RAMOutputStream::flush() - { - file->setLastModified(MiscUtils::currentTimeMillis()); - setFileLength(); + + int32_t remainInBuffer = currentBuffer.size() - bufferPosition; + int32_t bytesToCopy = length < remainInBuffer ? length : remainInBuffer; + MiscUtils::arrayCopy(b, offset, currentBuffer.get(), bufferPosition, bytesToCopy); + offset += bytesToCopy; + length -= bytesToCopy; + bufferPosition += bytesToCopy; } - - int64_t RAMOutputStream::getFilePointer() - { - return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; +} + +void RAMOutputStream::switchCurrentBuffer() { + if (currentBufferIndex == file->numBuffers()) { + currentBuffer = file->addBuffer(BUFFER_SIZE); + } else { + currentBuffer = file->getBuffer(currentBufferIndex); } - - int64_t RAMOutputStream::sizeInBytes() - { - return file->numBuffers() * BUFFER_SIZE; + bufferPosition = 0; + bufferStart = (int64_t)BUFFER_SIZE * (int64_t)currentBufferIndex; + bufferLength = currentBuffer.size(); +} + +void RAMOutputStream::setFileLength() { + int64_t pointer = bufferStart + bufferPosition; + if (pointer > file->length) { + file->setLength(pointer); } } + +void RAMOutputStream::flush() { + file->setLastModified(MiscUtils::currentTimeMillis()); + setFileLength(); +} + +int64_t RAMOutputStream::getFilePointer() { + return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; +} + +int64_t RAMOutputStream::sizeInBytes() { + return file->numBuffers() * BUFFER_SIZE; +} + +} diff --git a/src/core/store/SimpleFSDirectory.cpp b/src/core/store/SimpleFSDirectory.cpp index 4eaddc64..88d39498 100644 --- a/src/core/store/SimpleFSDirectory.cpp +++ b/src/core/store/SimpleFSDirectory.cpp @@ -1,11 +1,11 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include +#include #include "SimpleFSDirectory.h" #include "_SimpleFSDirectory.h" #include "IndexOutput.h" @@ -13,262 +13,227 @@ #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - SimpleFSDirectory::SimpleFSDirectory(const String& path, LockFactoryPtr lockFactory) : FSDirectory(path, lockFactory) - { - } - - SimpleFSDirectory::~SimpleFSDirectory() - { - } - - IndexOutputPtr SimpleFSDirectory::createOutput(const String& name) - { - initOutput(name); - return newLucene(FileUtils::joinPath(directory, name)); - } - - IndexInputPtr SimpleFSDirectory::openInput(const String& name) - { - return FSDirectory::openInput(name); - } - - IndexInputPtr SimpleFSDirectory::openInput(const String& name, int32_t bufferSize) - { - ensureOpen(); - return newLucene(FileUtils::joinPath(directory, name), bufferSize, getReadChunkSize()); - } - - const int32_t InputFile::FILE_EOF = FileReader::FILE_EOF; - const int32_t InputFile::FILE_ERROR = FileReader::FILE_ERROR; - - InputFile::InputFile(const String& path) - { - file = newInstance(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::in); - if (!file->is_open()) - boost::throw_exception(FileNotFoundException(path)); - position = 0; - length = FileUtils::fileLength(path); - } - - InputFile::~InputFile() - { - } - - void InputFile::setPosition(int64_t position) - { - this->position = position; - file->seekg((std::streamoff)position); - if (!file->good()) - boost::throw_exception(IOException()); - } - - int64_t InputFile::getPosition() - { - return position; - } - - int64_t InputFile::getLength() - { - return length; - } - - int32_t InputFile::read(uint8_t* b, int32_t offset, int32_t length) - { - try - { - if (file->eof()) - return FILE_EOF; - file->read((char*)b + offset, length); - int32_t readCount = file->gcount(); - position += readCount; - return readCount; - } - catch (...) - { - return FILE_ERROR; - } - } - - void InputFile::close() - { - if (file->is_open()) - file->close(); - } - - bool InputFile::isValid() - { - return (file && file->is_open() && file->good()); - } - - SimpleFSIndexInput::SimpleFSIndexInput() - { - this->chunkSize = 0; - this->isClone = false; - } - - SimpleFSIndexInput::SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize) : BufferedIndexInput(bufferSize) - { - this->file = newLucene(path); - this->path = path; - this->chunkSize = chunkSize; - this->isClone = false; +namespace Lucene { + +SimpleFSDirectory::SimpleFSDirectory(const String& path, const LockFactoryPtr& lockFactory) : FSDirectory(path, lockFactory) { +} + +SimpleFSDirectory::~SimpleFSDirectory() { +} + +IndexOutputPtr SimpleFSDirectory::createOutput(const String& name) { + initOutput(name); + return newLucene(FileUtils::joinPath(directory, name)); +} + +IndexInputPtr SimpleFSDirectory::openInput(const String& name) { + return FSDirectory::openInput(name); +} + +IndexInputPtr SimpleFSDirectory::openInput(const String& name, int32_t bufferSize) { + ensureOpen(); + return newLucene(FileUtils::joinPath(directory, name), bufferSize, getReadChunkSize()); +} + +const int32_t InputFile::FILE_EOF = FileReader::FILE_EOF; +const int32_t InputFile::FILE_ERROR = FileReader::FILE_ERROR; + +InputFile::InputFile(const String& path) { + file = newInstance(path, std::ios::binary | std::ios::in); + if (!file->is_open()) { + boost::throw_exception(FileNotFoundException(path)); } - - SimpleFSIndexInput::~SimpleFSIndexInput() - { + position = 0; + length = FileUtils::fileLength(path); +} + +InputFile::~InputFile() { +} + +void InputFile::setPosition(int64_t position) { + this->position = position; + file->seekg((std::streamoff)position); + if (!file->good()) { + boost::throw_exception(IOException()); } - - void SimpleFSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) - { - SyncLock fileLock(file); - - int64_t position = getFilePointer(); - if (position != file->getPosition()) - file->setPosition(position); - - int32_t total = 0; - - while (total < length) - { - int32_t readLength = total + chunkSize > length ? length - total : chunkSize; - - int32_t i = file->read(b, offset + total, readLength); - if (i == InputFile::FILE_EOF) - boost::throw_exception(IOException(L"Read past EOF")); - total += i; +} + +int64_t InputFile::getPosition() { + return position; +} + +int64_t InputFile::getLength() { + return length; +} + +int32_t InputFile::read(uint8_t* b, int32_t offset, int32_t length) { + try { + if (file->eof()) { + return FILE_EOF; } + file->read((char*)b + offset, length); + int32_t readCount = file->gcount(); + position += readCount; + return readCount; + } catch (...) { + return FILE_ERROR; } - - void SimpleFSIndexInput::seekInternal(int64_t pos) - { - } - - int64_t SimpleFSIndexInput::length() - { - return file->getLength(); - } - - void SimpleFSIndexInput::close() - { - if (!isClone) - file->close(); - } - - bool SimpleFSIndexInput::isValid() - { - return file->isValid(); - } - - LuceneObjectPtr SimpleFSIndexInput::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = BufferedIndexInput::clone(other ? other : newLucene()); - SimpleFSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); - cloneIndexInput->path = path; - cloneIndexInput->file = file; - cloneIndexInput->chunkSize = chunkSize; - cloneIndexInput->isClone = true; - return cloneIndexInput; - } - - OutputFile::OutputFile(const String& path) - { - this->path = path; - file = newInstance(StringUtils::toUTF8(path).c_str(), std::ios::binary | std::ios::out); +} + +void InputFile::close() { + if (file->is_open()) { + file->close(); } - - OutputFile::~OutputFile() - { +} + +bool InputFile::isValid() { + return (file && file->is_open() && file->good()); +} + +SimpleFSIndexInput::SimpleFSIndexInput() { + this->chunkSize = 0; + this->isClone = false; +} + +SimpleFSIndexInput::SimpleFSIndexInput(const String& path, int32_t bufferSize, int32_t chunkSize) : BufferedIndexInput(bufferSize) { + this->file = newLucene(path); + this->path = path; + this->chunkSize = chunkSize; + this->isClone = false; +} + +SimpleFSIndexInput::~SimpleFSIndexInput() { +} + +void SimpleFSIndexInput::readInternal(uint8_t* b, int32_t offset, int32_t length) { + SyncLock fileLock(file); + + int64_t position = getFilePointer(); + if (position != file->getPosition()) { + file->setPosition(position); } - - bool OutputFile::write(const uint8_t* b, int32_t offset, int32_t length) - { - if (!file->is_open()) - return false; - try - { - file->write((char*)b + offset, length); - return file->good(); - } - catch (...) - { - return false; + + int32_t total = 0; + + while (total < length) { + int32_t readLength = total + chunkSize > length ? length - total : chunkSize; + + int32_t i = file->read(b, offset + total, readLength); + if (i == InputFile::FILE_EOF) { + boost::throw_exception(IOException(L"Read past EOF")); } + total += i; } - - void OutputFile::close() - { - file.reset(); - } - - void OutputFile::setPosition(int64_t position) - { - file->seekp((std::streamoff)position); - if (!file->good()) - boost::throw_exception(IOException()); - } - - int64_t OutputFile::getLength() - { - return FileUtils::fileLength(path); - } - - void OutputFile::setLength(int64_t length) - { - FileUtils::setFileLength(path, length); - } - - void OutputFile::flush() - { - if (file->is_open()) - file->flush(); +} + +void SimpleFSIndexInput::seekInternal(int64_t pos) { +} + +int64_t SimpleFSIndexInput::length() { + return file->getLength(); +} + +void SimpleFSIndexInput::close() { + if (!isClone) { + file->close(); } - - bool OutputFile::isValid() - { - return (file && file->is_open() && file->good()); +} + +bool SimpleFSIndexInput::isValid() { + return file->isValid(); +} + +LuceneObjectPtr SimpleFSIndexInput::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = BufferedIndexInput::clone(other ? other : newLucene()); + SimpleFSIndexInputPtr cloneIndexInput(boost::dynamic_pointer_cast(clone)); + cloneIndexInput->path = path; + cloneIndexInput->file = file; + cloneIndexInput->chunkSize = chunkSize; + cloneIndexInput->isClone = true; + return cloneIndexInput; +} + +OutputFile::OutputFile(const String& path) { + this->path = path; + file = newInstance(path, std::ios::binary | std::ios::out); +} + +OutputFile::~OutputFile() { +} + +bool OutputFile::write(const uint8_t* b, int32_t offset, int32_t length) { + if (!file->is_open()) { + return false; } - - SimpleFSIndexOutput::SimpleFSIndexOutput(const String& path) - { - file = newLucene(path); - isOpen = true; + try { + file->write((char*)b + offset, length); + return file->good(); + } catch (...) { + return false; } - - SimpleFSIndexOutput::~SimpleFSIndexOutput() - { +} + +void OutputFile::close() { + file.reset(); +} + +void OutputFile::setPosition(int64_t position) { + file->seekp((std::streamoff)position); + if (!file->good()) { + boost::throw_exception(IOException()); } - - void SimpleFSIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) - { - file->write(b, offset, length); +} + +int64_t OutputFile::getLength() { + return FileUtils::fileLength(path); +} + +void OutputFile::setLength(int64_t length) { + FileUtils::setFileLength(path, length); +} + +void OutputFile::flush() { + if (file->is_open()) { file->flush(); } - - void SimpleFSIndexOutput::close() - { - if (isOpen) - { - BufferedIndexOutput::close(); - file.reset(); - isOpen = false; - } - } - - void SimpleFSIndexOutput::seek(int64_t pos) - { - BufferedIndexOutput::seek(pos); - file->setPosition(pos); - } - - int64_t SimpleFSIndexOutput::length() - { - return file->getLength(); - } - - void SimpleFSIndexOutput::setLength(int64_t length) - { - file->setLength(length); +} + +bool OutputFile::isValid() { + return (file && file->is_open() && file->good()); +} + +SimpleFSIndexOutput::SimpleFSIndexOutput(const String& path) { + file = newLucene(path); + isOpen = true; +} + +SimpleFSIndexOutput::~SimpleFSIndexOutput() { +} + +void SimpleFSIndexOutput::flushBuffer(const uint8_t* b, int32_t offset, int32_t length) { + file->write(b, offset, length); + file->flush(); +} + +void SimpleFSIndexOutput::close() { + if (isOpen) { + BufferedIndexOutput::close(); + file.reset(); + isOpen = false; } } + +void SimpleFSIndexOutput::seek(int64_t pos) { + BufferedIndexOutput::seek(pos); + file->setPosition(pos); +} + +int64_t SimpleFSIndexOutput::length() { + return file->getLength(); +} + +void SimpleFSIndexOutput::setLength(int64_t length) { + file->setLength(length); +} + +} diff --git a/src/core/store/SimpleFSLockFactory.cpp b/src/core/store/SimpleFSLockFactory.cpp index 4bec1be5..fcbbc615 100644 --- a/src/core/store/SimpleFSLockFactory.cpp +++ b/src/core/store/SimpleFSLockFactory.cpp @@ -1,91 +1,79 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include +#include #include "SimpleFSLockFactory.h" #include "_SimpleFSLockFactory.h" #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - SimpleFSLockFactory::SimpleFSLockFactory() - { - } - - SimpleFSLockFactory::SimpleFSLockFactory(const String& lockDir) - { - setLockDir(lockDir); - } - - SimpleFSLockFactory::~SimpleFSLockFactory() - { - } - - LockPtr SimpleFSLockFactory::makeLock(const String& lockName) - { - return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); - } - - void SimpleFSLockFactory::clearLock(const String& lockName) - { - if (FileUtils::isDirectory(lockDir)) - { - String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); - if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) - boost::throw_exception(IOException(L"Cannot delete " + lockPath)); +namespace Lucene { + +SimpleFSLockFactory::SimpleFSLockFactory() { +} + +SimpleFSLockFactory::SimpleFSLockFactory(const String& lockDir) { + setLockDir(lockDir); +} + +SimpleFSLockFactory::~SimpleFSLockFactory() { +} + +LockPtr SimpleFSLockFactory::makeLock(const String& lockName) { + return newLucene(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName); +} + +void SimpleFSLockFactory::clearLock(const String& lockName) { + if (FileUtils::isDirectory(lockDir)) { + String lockPath(FileUtils::joinPath(lockDir, lockPrefix.empty() ? lockName : lockPrefix + L"-" + lockName)); + if (FileUtils::fileExists(lockPath) && !FileUtils::removeFile(lockPath)) { + boost::throw_exception(IOException(L"Cannot delete " + lockPath)); } } - - SimpleFSLock::SimpleFSLock(const String& lockDir, const String& lockFileName) - { - this->lockDir = lockDir; - this->lockFile = lockFile; - } - - SimpleFSLock::~SimpleFSLock() - { - } - - bool SimpleFSLock::obtain() - { - // Ensure that lockDir exists and is a directory - if (!FileUtils::fileExists(lockDir)) - { - if (!FileUtils::createDirectory(lockDir)) - boost::throw_exception(RuntimeException(L"Cannot create directory: " + lockDir)); - } - else if (!FileUtils::isDirectory(lockDir)) - boost::throw_exception(RuntimeException(L"Found regular file where directory expected: " + lockDir)); - std::ofstream f; - try - { - f.open(StringUtils::toUTF8(FileUtils::joinPath(lockDir, lockFile)).c_str(), std::ios::binary | std::ios::out); - } - catch (...) - { +} + +SimpleFSLock::SimpleFSLock(const String& lockDir, const String& lockFileName) { + this->lockDir = lockDir; + this->lockFile = lockFileName; +} + +SimpleFSLock::~SimpleFSLock() { +} + +bool SimpleFSLock::obtain() { + // Ensure that lockDir exists and is a directory + if (!FileUtils::fileExists(lockDir)) { + if (!FileUtils::createDirectory(lockDir)) { + boost::throw_exception(RuntimeException(L"Cannot create directory: " + lockDir)); } - return f.is_open(); - } - - void SimpleFSLock::release() - { - String path(FileUtils::joinPath(lockDir, lockFile)); - if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) - boost::throw_exception(LockReleaseFailedException(L"failed to delete " + path)); + } else if (!FileUtils::isDirectory(lockDir)) { + boost::throw_exception(RuntimeException(L"Found regular file where directory expected: " + lockDir)); } - - bool SimpleFSLock::isLocked() - { - return FileUtils::fileExists(FileUtils::joinPath(lockDir, lockFile)); + boost::filesystem::ofstream f; + try { + f.open(FileUtils::joinPath(lockDir, lockFile), std::ios::binary | std::ios::out); + } catch (...) { } - - String SimpleFSLock::toString() - { - return getClassName() + L"@" + FileUtils::joinPath(lockDir, lockFile); + return f.is_open(); +} + +void SimpleFSLock::release() { + String path(FileUtils::joinPath(lockDir, lockFile)); + if (FileUtils::fileExists(path) && !FileUtils::removeFile(path)) { + boost::throw_exception(LockReleaseFailedException(L"failed to delete " + path)); } } + +bool SimpleFSLock::isLocked() { + return FileUtils::fileExists(FileUtils::joinPath(lockDir, lockFile)); +} + +String SimpleFSLock::toString() { + return getClassName() + L"@" + FileUtils::joinPath(lockDir, lockFile); +} + +} diff --git a/src/core/store/SingleInstanceLockFactory.cpp b/src/core/store/SingleInstanceLockFactory.cpp index 739b7ff3..f3f398ba 100644 --- a/src/core/store/SingleInstanceLockFactory.cpp +++ b/src/core/store/SingleInstanceLockFactory.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,60 +8,51 @@ #include "SingleInstanceLockFactory.h" #include "_SingleInstanceLockFactory.h" -namespace Lucene -{ - SingleInstanceLockFactory::SingleInstanceLockFactory() - { - locks = HashSet::newInstance(); - } - - SingleInstanceLockFactory::~SingleInstanceLockFactory() - { - } - - LockPtr SingleInstanceLockFactory::makeLock(const String& lockName) - { - // We do not use the LockPrefix at all, because the private HashSet instance - // effectively scopes the locking to this single Directory instance. - return newLucene(locks, lockName); - } - - void SingleInstanceLockFactory::clearLock(const String& lockName) - { - SyncLock syncLock(&locks); - locks.remove(lockName); - } - - SingleInstanceLock::SingleInstanceLock(HashSet locks, const String& lockName) - { - this->locks = locks; - this->lockName = lockName; - } - - SingleInstanceLock::~SingleInstanceLock() - { - } - - bool SingleInstanceLock::obtain() - { - SyncLock syncLock(&locks); - return locks.add(lockName); - } - - void SingleInstanceLock::release() - { - SyncLock syncLock(&locks); - locks.remove(lockName); - } - - bool SingleInstanceLock::isLocked() - { - SyncLock syncLock(&locks); - return locks.contains(lockName); - } - - String SingleInstanceLock::toString() - { - return lockName; - } +namespace Lucene { + +SingleInstanceLockFactory::SingleInstanceLockFactory() { + locks = HashSet::newInstance(); +} + +SingleInstanceLockFactory::~SingleInstanceLockFactory() { +} + +LockPtr SingleInstanceLockFactory::makeLock(const String& lockName) { + // We do not use the LockPrefix at all, because the private HashSet instance + // effectively scopes the locking to this single Directory instance. + return newLucene(locks, lockName); +} + +void SingleInstanceLockFactory::clearLock(const String& lockName) { + SyncLock syncLock(&locks); + locks.remove(lockName); +} + +SingleInstanceLock::SingleInstanceLock(HashSet locks, const String& lockName) { + this->locks = locks; + this->lockName = lockName; +} + +SingleInstanceLock::~SingleInstanceLock() { +} + +bool SingleInstanceLock::obtain() { + SyncLock syncLock(&locks); + return locks.add(lockName); +} + +void SingleInstanceLock::release() { + SyncLock syncLock(&locks); + locks.remove(lockName); +} + +bool SingleInstanceLock::isLocked() { + SyncLock syncLock(&locks); + return locks.contains(lockName); +} + +String SingleInstanceLock::toString() { + return lockName; +} + } diff --git a/src/core/util/Allocator.cpp b/src/core/util/Allocator.cpp deleted file mode 100644 index 36f02c8e..00000000 --- a/src/core/util/Allocator.cpp +++ /dev/null @@ -1,67 +0,0 @@ -///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. -// Distributable under the terms of either the Apache License (Version 2.0) -// or the GNU Lesser General Public License. -///////////////////////////////////////////////////////////////////////////// - -#include "LuceneInc.h" -#include "Allocator.h" - -#ifdef LPP_USE_NEDMALLOC -extern "C" -{ -#include "nedmalloc/nedmalloc.h" -} -#endif - -namespace Lucene -{ - void* AllocMemory(size_t size) - { - #if defined(LPP_USE_NEDMALLOC) - return nedalloc::nedmalloc(size); - #elif (defined(_WIN32) || defined(_WIN64)) && !defined(NDEBUG) - return _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__); - #else - return malloc(size); - #endif - } - - void* ReallocMemory(void* memory, size_t size) - { - if (memory == NULL) - return AllocMemory(size); - if (size == 0) - { - FreeMemory(memory); - return NULL; - } - #if defined(LPP_USE_NEDMALLOC) - return nedalloc::nedrealloc(memory, size); - #elif defined(_WIN32) && !defined(NDEBUG) - return _realloc_dbg(memory, size, _NORMAL_BLOCK, __FILE__, __LINE__); - #else - return realloc(memory, size); - #endif - } - - void FreeMemory(void* memory) - { - if (memory == NULL) - return; - #if defined(LPP_USE_NEDMALLOC) - nedalloc::nedfree(memory); - #elif defined(_WIN32) && !defined(NDEBUG) - _free_dbg(memory, _NORMAL_BLOCK); - #else - free(memory); - #endif - } - - void ReleaseThreadCache() - { - #if defined(LPP_USE_NEDMALLOC) - nedalloc::neddisablethreadcache(0); - #endif - } -} diff --git a/src/core/util/Attribute.cpp b/src/core/util/Attribute.cpp index 5df3b2c7..ae45eadf 100644 --- a/src/core/util/Attribute.cpp +++ b/src/core/util/Attribute.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,24 +7,21 @@ #include "LuceneInc.h" #include "Attribute.h" -namespace Lucene -{ - Attribute::~Attribute() - { - } - - int32_t Attribute::hashCode() - { - return LuceneObject::hashCode(); - } - - bool Attribute::equals(LuceneObjectPtr other) - { - return LuceneObject::equals(other); - } - - LuceneObjectPtr Attribute::clone(LuceneObjectPtr other) - { - return LuceneObject::clone(other); - } +namespace Lucene { + +Attribute::~Attribute() { +} + +int32_t Attribute::hashCode() { + return LuceneObject::hashCode(); +} + +bool Attribute::equals(const LuceneObjectPtr& other) { + return LuceneObject::equals(other); +} + +LuceneObjectPtr Attribute::clone(const LuceneObjectPtr& other) { + return LuceneObject::clone(other); +} + } diff --git a/src/core/util/AttributeSource.cpp b/src/core/util/AttributeSource.cpp index 48ea4ffb..12e6b218 100644 --- a/src/core/util/AttributeSource.cpp +++ b/src/core/util/AttributeSource.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,254 +8,241 @@ #include "AttributeSource.h" #include "Attribute.h" -namespace Lucene -{ - AttributeFactory::AttributeFactory() - { - } +namespace Lucene { - AttributeFactory::~AttributeFactory() - { - } - - AttributePtr AttributeFactory::createAttributeInstance(const String& className) - { - return AttributePtr(); // override - } - - AttributeFactoryPtr AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY() - { - static AttributeFactoryPtr _DEFAULT_ATTRIBUTE_FACTORY; - if (!_DEFAULT_ATTRIBUTE_FACTORY) - { - _DEFAULT_ATTRIBUTE_FACTORY = newLucene(); - CycleCheck::addStatic(_DEFAULT_ATTRIBUTE_FACTORY); - } - return _DEFAULT_ATTRIBUTE_FACTORY; - } - - AttributeSource::AttributeSource() - { - this->attributes = MapStringAttribute::newInstance(); - this->factory = AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY(); - } - - AttributeSource::AttributeSource(AttributeSourcePtr input) - { - if (!input) - boost::throw_exception(IllegalArgumentException(L"input AttributeSource must not be null")); - this->attributes = input->attributes; - this->factory = input->factory; - } - - AttributeSource::AttributeSource(AttributeFactoryPtr factory) - { - this->attributes = MapStringAttribute::newInstance(); - this->factory = factory; - } - - AttributeSource::~AttributeSource() - { - } - - AttributeFactoryPtr AttributeSource::getAttributeFactory() - { - return this->factory; - } - - void AttributeSource::addAttribute(const String& className, AttributePtr attrImpl) - { - // invalidate state to force recomputation in captureState() - currentState.reset(); - attributes.put(className, attrImpl); - } - - bool AttributeSource::hasAttributes() - { - return !attributes.empty(); - } - - AttributePtr AttributeSource::getAttribute(const String& className) - { - return attributes.get(className); - } +AttributeFactory::AttributeFactory() { +} + +AttributeFactory::~AttributeFactory() { +} + +AttributePtr AttributeFactory::createAttributeInstance(const String& className) { + return AttributePtr(); // override +} + +AttributeFactoryPtr AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY() { + static AttributeFactoryPtr _DEFAULT_ATTRIBUTE_FACTORY; + + LUCENE_RUN_ONCE( + _DEFAULT_ATTRIBUTE_FACTORY = newLucene(); + CycleCheck::addStatic(_DEFAULT_ATTRIBUTE_FACTORY); + ); - bool AttributeSource::hasAttribute(const String& className) - { - return attributes.contains(className); + return _DEFAULT_ATTRIBUTE_FACTORY; +} + +AttributeSource::AttributeSource() { + this->attributes = MapStringAttribute::newInstance(); + this->factory = AttributeFactory::DEFAULT_ATTRIBUTE_FACTORY(); +} + +AttributeSource::AttributeSource(const AttributeSourcePtr& input) { + if (!input) { + boost::throw_exception(IllegalArgumentException(L"input AttributeSource must not be null")); } - - void AttributeSource::computeCurrentState() - { - currentState = newLucene(); - AttributeSourceStatePtr c(currentState); - MapStringAttribute::iterator attrImpl = attributes.begin(); + this->attributes = input->attributes; + this->factory = input->factory; +} + +AttributeSource::AttributeSource(const AttributeFactoryPtr& factory) { + this->attributes = MapStringAttribute::newInstance(); + this->factory = factory; +} + +AttributeSource::~AttributeSource() { +} + +AttributeFactoryPtr AttributeSource::getAttributeFactory() { + return this->factory; +} + +void AttributeSource::addAttribute(const String& className, const AttributePtr& attrImpl) { + // invalidate state to force recomputation in captureState() + currentState.reset(); + attributes.put(className, attrImpl); +} + +bool AttributeSource::hasAttributes() { + return !attributes.empty(); +} + +AttributePtr AttributeSource::getAttribute(const String& className) { + return attributes.get(className); +} + +bool AttributeSource::hasAttribute(const String& className) { + return attributes.contains(className); +} + +void AttributeSource::computeCurrentState() { + currentState = newLucene(); + AttributeSourceStatePtr c(currentState); + MapStringAttribute::iterator attrImpl = attributes.begin(); + c->attribute = attrImpl->second; + ++attrImpl; + while (attrImpl != attributes.end()) { + c->next = newLucene(); + c = c->next; c->attribute = attrImpl->second; ++attrImpl; - while (attrImpl != attributes.end()) - { - c->next = newLucene(); - c = c->next; - c->attribute = attrImpl->second; - ++attrImpl; - } } - - void AttributeSource::clearAttributes() - { - if (hasAttributes()) - { - if (!currentState) - computeCurrentState(); - for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) - attrImpl->second->clear(); +} + +void AttributeSource::clearAttributes() { + if (hasAttributes()) { + if (!currentState) { + computeCurrentState(); + } + for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) { + attrImpl->second->clear(); } } - - AttributeSourceStatePtr AttributeSource::captureState() - { - if (!hasAttributes()) - return AttributeSourceStatePtr(); - - if (!currentState) - computeCurrentState(); - - return boost::dynamic_pointer_cast(currentState->clone()); +} + +AttributeSourceStatePtr AttributeSource::captureState() { + if (!hasAttributes()) { + return AttributeSourceStatePtr(); } - - void AttributeSource::restoreState(AttributeSourceStatePtr state) - { - if (!state) - return; - - do - { - MapStringAttribute::iterator attrImpl = attributes.find(state->attribute->getClassName()); - if (attrImpl == attributes.end()) - boost::throw_exception(IllegalArgumentException(L"State contains an AttributeImpl that is not in this AttributeSource")); - state->attribute->copyTo(attrImpl->second); - state = state->next; + + if (!currentState) { + computeCurrentState(); + } + + return boost::dynamic_pointer_cast(currentState->clone()); +} + +void AttributeSource::restoreState(const AttributeSourceStatePtr& state) { + AttributeSourceStatePtr _state(state); + if (!_state) { + return; + } + + do { + MapStringAttribute::iterator attrImpl = attributes.find(_state->attribute->getClassName()); + if (attrImpl == attributes.end()) { + boost::throw_exception(IllegalArgumentException(L"State contains an AttributeImpl that is not in this AttributeSource")); } - while (state); + _state->attribute->copyTo(attrImpl->second); + _state = _state->next; + } while (_state); +} + +int32_t AttributeSource::hashCode() { + int32_t code = 0; + for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) { + code = code * 31 + attrImpl->second->hashCode(); } - - int32_t AttributeSource::hashCode() - { - int32_t code = 0; - for (MapStringAttribute::iterator attrImpl = attributes.begin(); attrImpl != attributes.end(); ++attrImpl) - code = code * 31 + attrImpl->second->hashCode(); - return code; + return code; +} + +bool AttributeSource::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - bool AttributeSource::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - - AttributeSourcePtr otherAttributeSource = boost::dynamic_pointer_cast(other); - if (otherAttributeSource) - { - if (hasAttributes()) - { - if (!otherAttributeSource->hasAttributes()) - return false; - - if (attributes.size() != otherAttributeSource->attributes.size()) + + AttributeSourcePtr otherAttributeSource = boost::dynamic_pointer_cast(other); + if (otherAttributeSource) { + if (hasAttributes()) { + if (!otherAttributeSource->hasAttributes()) { + return false; + } + + if (attributes.size() != otherAttributeSource->attributes.size()) { + return false; + } + + // it is only equal if all attribute impls are the same in the same order + if (!currentState) { + computeCurrentState(); + } + + AttributeSourceStatePtr thisState(currentState); + if (!otherAttributeSource->currentState) { + otherAttributeSource->computeCurrentState(); + } + + AttributeSourceStatePtr otherState(otherAttributeSource->currentState); + while (thisState && otherState) { + if (otherState->attribute->getClassName() != thisState->attribute->getClassName() || !otherState->attribute->equals(thisState->attribute)) { return false; - - // it is only equal if all attribute impls are the same in the same order - if (!currentState) - computeCurrentState(); - - AttributeSourceStatePtr thisState(currentState); - if (!otherAttributeSource->currentState) - otherAttributeSource->computeCurrentState(); - - AttributeSourceStatePtr otherState(otherAttributeSource->currentState); - while (thisState && otherState) - { - if (otherState->attribute->getClassName() != thisState->attribute->getClassName() || !otherState->attribute->equals(thisState->attribute)) - return false; - thisState = thisState->next; - otherState = otherState->next; } - return true; + thisState = thisState->next; + otherState = otherState->next; } - else - return !otherAttributeSource->hasAttributes(); + return true; + } else { + return !otherAttributeSource->hasAttributes(); } - else - return false; + } else { + return false; } - - String AttributeSource::toString() - { - StringStream buf; - buf << L"("; - if (hasAttributes()) - { - if (!currentState) - computeCurrentState(); - for (AttributeSourceStatePtr state(currentState); state; state = state->next) - { - if (state != currentState) - buf << L","; - buf << state->attribute->toString(); +} + +String AttributeSource::toString() { + StringStream buf; + buf << L"("; + if (hasAttributes()) { + if (!currentState) { + computeCurrentState(); + } + for (AttributeSourceStatePtr state(currentState); state; state = state->next) { + if (state != currentState) { + buf << L","; } + buf << state->attribute->toString(); } - buf << ")"; - return buf.str(); } - - AttributeSourcePtr AttributeSource::cloneAttributes() - { - AttributeSourcePtr clone(newLucene(this->factory)); - - if (hasAttributes()) - { - if (!currentState) - computeCurrentState(); - for (AttributeSourceStatePtr state(currentState); state; state = state->next) - clone->attributes.put(state->attribute->getClassName(), boost::dynamic_pointer_cast(state->attribute->clone())); + buf << ")"; + return buf.str(); +} + +AttributeSourcePtr AttributeSource::cloneAttributes() { + AttributeSourcePtr clone(newLucene(this->factory)); + + if (hasAttributes()) { + if (!currentState) { + computeCurrentState(); } - - return clone; - } - - Collection AttributeSource::getAttributes() - { - Collection attrImpls(Collection::newInstance()); - if (hasAttributes()) - { - if (!currentState) - computeCurrentState(); - for (AttributeSourceStatePtr state(currentState); state; state = state->next) - attrImpls.add(state->attribute); + for (AttributeSourceStatePtr state(currentState); state; state = state->next) { + clone->attributes.put(state->attribute->getClassName(), boost::dynamic_pointer_cast(state->attribute->clone())); } - return attrImpls; } - - DefaultAttributeFactory::~DefaultAttributeFactory() - { - } - - AttributePtr DefaultAttributeFactory::createAttributeInstance(const String& className) - { - return AttributePtr(); - } - - AttributeSourceState::~AttributeSourceState() - { + + return clone; +} + +Collection AttributeSource::getAttributes() { + Collection attrImpls(Collection::newInstance()); + if (hasAttributes()) { + if (!currentState) { + computeCurrentState(); + } + for (AttributeSourceStatePtr state(currentState); state; state = state->next) { + attrImpls.add(state->attribute); + } } - - LuceneObjectPtr AttributeSourceState::clone(LuceneObjectPtr other) - { - AttributeSourceStatePtr clone(newLucene()); - clone->attribute = boost::dynamic_pointer_cast(attribute->clone()); - - if (next) - clone->next = boost::dynamic_pointer_cast(next->clone()); - - return clone; + return attrImpls; +} + +DefaultAttributeFactory::~DefaultAttributeFactory() { +} + +AttributePtr DefaultAttributeFactory::createAttributeInstance(const String& className) { + return AttributePtr(); +} + +AttributeSourceState::~AttributeSourceState() { +} + +LuceneObjectPtr AttributeSourceState::clone(const LuceneObjectPtr& other) { + AttributeSourceStatePtr clone(newLucene()); + clone->attribute = boost::dynamic_pointer_cast(attribute->clone()); + + if (next) { + clone->next = boost::dynamic_pointer_cast(next->clone()); } + + return clone; +} + } diff --git a/src/core/util/Base64.cpp b/src/core/util/Base64.cpp index d18dc04b..be00413b 100644 --- a/src/core/util/Base64.cpp +++ b/src/core/util/Base64.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,120 +9,117 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - const String Base64::BASE64_CHARS = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - - Base64::~Base64() - { - } - - String Base64::encode(ByteArray bytes) - { - return encode(bytes.get(), bytes.size()); - } - - String Base64::encode(const uint8_t* bytes, int32_t length) - { - String result; - uint8_t byteArray3[3]; - uint8_t byteArray4[4]; - int32_t i = 0; - - while (length--) - { - byteArray3[i++] = *(bytes++); - if (i == 3) - { - byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; - byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); - byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); - byteArray4[3] = byteArray3[2] & 0x3f; - - for (i = 0; i < 4; ++i) - result += BASE64_CHARS[byteArray4[i]]; - i = 0; - } - } +namespace Lucene { + +const String Base64::BASE64_CHARS = L"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; - if (i != 0) - { - for (int32_t j = i; j < 3; ++j) - byteArray3[j] = 0; +Base64::~Base64() { +} + +String Base64::encode(ByteArray bytes) { + return encode(bytes.get(), bytes.size()); +} +String Base64::encode(const uint8_t* bytes, int32_t length) { + String result; + uint8_t byteArray3[3]; + uint8_t byteArray4[4]; + int32_t i = 0; + + while (length--) { + byteArray3[i++] = *(bytes++); + if (i == 3) { byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); byteArray4[3] = byteArray3[2] & 0x3f; - for (int32_t j = 0; j < i + 1; ++j) - result += BASE64_CHARS[byteArray4[j]]; - - while (i++ < 3) - result += L'='; + for (i = 0; i < 4; ++i) { + result += BASE64_CHARS[byteArray4[i]]; + } + i = 0; } - return result; } - - ByteArray Base64::decode(const String& str) - { - int32_t length = str.length(); - uint8_t byteArray4[4]; - uint8_t byteArray3[3]; - - int32_t i = 0; - int32_t charIndex = 0; - - ByteArray result(ByteArray::newInstance(length / 2)); - int32_t resultIndex = 0; - - while (length-- && str[charIndex] != L'=' && isBase64(str[charIndex])) - { - byteArray4[i++] = (uint8_t)str[charIndex++]; - if (i == 4) - { - for (i = 0; i < 4; ++i) - byteArray4[i] = BASE64_CHARS.find(byteArray4[i]); - byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); - byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); - byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; - - for (i = 0; i < 3; ++i) - { - if (resultIndex >= result.size()) - result.resize((int32_t)((double)result.size() * 1.5)); - result[resultIndex++] = byteArray3[i]; - } - - i = 0; - } + + if (i != 0) { + for (int32_t j = i; j < 3; ++j) { + byteArray3[j] = 0; + } + + byteArray4[0] = (byteArray3[0] & 0xfc) >> 2; + byteArray4[1] = ((byteArray3[0] & 0x03) << 4) + ((byteArray3[1] & 0xf0) >> 4); + byteArray4[2] = ((byteArray3[1] & 0x0f) << 2) + ((byteArray3[2] & 0xc0) >> 6); + byteArray4[3] = byteArray3[2] & 0x3f; + + for (int32_t j = 0; j < i + 1; ++j) { + result += BASE64_CHARS[byteArray4[j]]; } - if (i != 0) - { - for (int32_t j = i; j < 4; ++j) - byteArray4[j] = 0; - for (int32_t j = 0; j < 4; ++j) - byteArray4[j] = BASE64_CHARS.find(byteArray4[j]); + while (i++ < 3) { + result += L'='; + } + } + return result; +} + +ByteArray Base64::decode(const String& str) { + int32_t length = str.length(); + uint8_t byteArray4[4]; + uint8_t byteArray3[3]; + + int32_t i = 0; + int32_t charIndex = 0; + + ByteArray result(ByteArray::newInstance(length / 2)); + int32_t resultIndex = 0; + + while (length-- && str[charIndex] != L'=' && isBase64(str[charIndex])) { + byteArray4[i++] = (uint8_t)str[charIndex++]; + if (i == 4) { + for (i = 0; i < 4; ++i) { + byteArray4[i] = static_cast(BASE64_CHARS.find(byteArray4[i])); + } byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; - for (int32_t j = 0; j < i - 1; ++j) - { - if (resultIndex >= result.size()) + for (i = 0; i < 3; ++i) { + if (resultIndex >= result.size()) { result.resize((int32_t)((double)result.size() * 1.5)); - result[resultIndex++] = byteArray3[j]; + } + result[resultIndex++] = byteArray3[i]; } - } - - result.resize(resultIndex); - return result; + i = 0; + } } - - bool Base64::isBase64(wchar_t ch) - { - return (UnicodeUtil::isAlnum(ch) || ch == L'+' || ch == L'/'); + + if (i != 0) { + for (int32_t j = i; j < 4; ++j) { + byteArray4[j] = 0; + } + for (int32_t j = 0; j < 4; ++j) { + byteArray4[j] = static_cast(BASE64_CHARS.find(byteArray4[j])); + } + byteArray3[0] = (byteArray4[0] << 2) + ((byteArray4[1] & 0x30) >> 4); + byteArray3[1] = ((byteArray4[1] & 0xf) << 4) + ((byteArray4[2] & 0x3c) >> 2); + byteArray3[2] = ((byteArray4[2] & 0x3) << 6) + byteArray4[3]; + + for (int32_t j = 0; j < i - 1; ++j) { + if (resultIndex >= result.size()) { + result.resize((int32_t)((double)result.size() * 1.5)); + } + result[resultIndex++] = byteArray3[j]; + } } + + result.resize(resultIndex); + + return result; +} + +bool Base64::isBase64(wchar_t ch) { + return (UnicodeUtil::isAlnum(ch) || ch == L'+' || ch == L'/'); +} + } diff --git a/src/core/util/BitSet.cpp b/src/core/util/BitSet.cpp index e0af67f5..6eb9d943 100644 --- a/src/core/util/BitSet.cpp +++ b/src/core/util/BitSet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,265 +8,257 @@ #include "BitSet.h" #include "BitUtil.h" -namespace Lucene -{ - BitSet::BitSet(uint32_t size) : bitSet(size) - { - } - - BitSet::~BitSet() - { - } - - const uint64_t* BitSet::getBits() - { - return bitSet.empty() ? NULL : static_cast(&bitSet.m_bits[0]); - } - - void BitSet::clear() - { - bitSet.clear(); - } - - void BitSet::clear(uint32_t bitIndex) - { - if (bitIndex <= bitSet.size()) - bitSet.set(bitIndex, false); - } - - void BitSet::fastClear(uint32_t bitIndex) - { +namespace Lucene { + +BitSet::BitSet(uint32_t size) : bitSet(size) { +} + +BitSet::~BitSet() { +} + +const uint64_t* BitSet::getBits() { + return bitSet.empty() ? NULL : static_cast(&bitSet.m_bits[0]); +} + +void BitSet::clear() { + bitSet.clear(); +} + +void BitSet::clear(uint32_t bitIndex) { + if (bitIndex <= bitSet.size()) { bitSet.set(bitIndex, false); } - - void BitSet::clear(uint32_t fromIndex, uint32_t toIndex) - { - toIndex = std::min(toIndex, (uint32_t)bitSet.size()); - for (bitset_type::size_type i = std::min(fromIndex, (uint32_t)bitSet.size()); i < toIndex; ++i) - bitSet.set(i, false); +} + +void BitSet::fastClear(uint32_t bitIndex) { + bitSet.set(bitIndex, false); +} + +void BitSet::clear(uint32_t fromIndex, uint32_t toIndex) { + toIndex = std::min(toIndex, (uint32_t)bitSet.size()); + for (bitset_type::size_type i = std::min(fromIndex, (uint32_t)bitSet.size()); i < toIndex; ++i) { + bitSet.set(i, false); } - - void BitSet::fastClear(uint32_t fromIndex, uint32_t toIndex) - { - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.set(i, false); +} + +void BitSet::fastClear(uint32_t fromIndex, uint32_t toIndex) { + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.set(i, false); } - - void BitSet::set(uint32_t bitIndex) - { - if (bitIndex >= bitSet.size()) - resize(bitIndex + 1); - bitSet.set(bitIndex, true); +} + +void BitSet::set(uint32_t bitIndex) { + if (bitIndex >= bitSet.size()) { + resize(bitIndex + 1); } - - void BitSet::fastSet(uint32_t bitIndex) - { - bitSet.set(bitIndex, true); + bitSet.set(bitIndex, true); +} + +void BitSet::fastSet(uint32_t bitIndex) { + bitSet.set(bitIndex, true); +} + +void BitSet::set(uint32_t bitIndex, bool value) { + if (bitIndex >= bitSet.size()) { + resize(bitIndex + 1); } - - void BitSet::set(uint32_t bitIndex, bool value) - { - if (bitIndex >= bitSet.size()) - resize(bitIndex + 1); - bitSet.set(bitIndex, value); + bitSet.set(bitIndex, value); +} + +void BitSet::fastSet(uint32_t bitIndex, bool value) { + bitSet.set(bitIndex, value); +} + +void BitSet::set(uint32_t fromIndex, uint32_t toIndex) { + if (toIndex >= bitSet.size()) { + resize(toIndex + 1); } - - void BitSet::fastSet(uint32_t bitIndex, bool value) - { - bitSet.set(bitIndex, value); + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.set(i, true); } - - void BitSet::set(uint32_t fromIndex, uint32_t toIndex) - { - if (toIndex >= bitSet.size()) - resize(toIndex + 1); - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.set(i, true); +} + +void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex) { + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.set(i, true); } - - void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex) - { - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.set(i, true); +} + +void BitSet::set(uint32_t fromIndex, uint32_t toIndex, bool value) { + if (toIndex >= bitSet.size()) { + resize(toIndex + 1); } - - void BitSet::set(uint32_t fromIndex, uint32_t toIndex, bool value) - { - if (toIndex >= bitSet.size()) - resize(toIndex + 1); - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.set(i, value); + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.set(i, value); } - - void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex, bool value) - { - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.set(i, value); +} + +void BitSet::fastSet(uint32_t fromIndex, uint32_t toIndex, bool value) { + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.set(i, value); } - - void BitSet::flip(uint32_t bitIndex) - { - if (bitIndex >= bitSet.size()) - resize(bitIndex + 1); - bitSet.flip(bitIndex); +} + +void BitSet::flip(uint32_t bitIndex) { + if (bitIndex >= bitSet.size()) { + resize(bitIndex + 1); } - - void BitSet::fastFlip(uint32_t bitIndex) - { - bitSet.flip(bitIndex); + bitSet.flip(bitIndex); +} + +void BitSet::fastFlip(uint32_t bitIndex) { + bitSet.flip(bitIndex); +} + +void BitSet::flip(uint32_t fromIndex, uint32_t toIndex) { + if (toIndex >= bitSet.size()) { + resize(toIndex + 1); } - - void BitSet::flip(uint32_t fromIndex, uint32_t toIndex) - { - if (toIndex >= bitSet.size()) - resize(toIndex + 1); - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.flip(i); + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.flip(i); } - - void BitSet::fastFlip(uint32_t fromIndex, uint32_t toIndex) - { - for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) - bitSet.flip(i); +} + +void BitSet::fastFlip(uint32_t fromIndex, uint32_t toIndex) { + for (bitset_type::size_type i = fromIndex; i < toIndex; ++i) { + bitSet.flip(i); } - - uint32_t BitSet::size() const - { - return bitSet.num_blocks() * sizeof(bitset_type::block_type) * 8; +} + +uint32_t BitSet::size() const { + return bitSet.num_blocks() * sizeof(bitset_type::block_type) * 8; +} + +uint32_t BitSet::numBlocks() const { + return bitSet.num_blocks(); +} + +bool BitSet::isEmpty() const { + return bitSet.none(); +} + +bool BitSet::get(uint32_t bitIndex) const { + return bitIndex < bitSet.size() ? bitSet.test(bitIndex) : false; +} + +bool BitSet::fastGet(uint32_t bitIndex) const { + return bitSet.test(bitIndex); +} + +int32_t BitSet::nextSetBit(uint32_t fromIndex) const { + bitset_type::size_type next = fromIndex == 0 ? bitSet.find_first() : bitSet.find_next(fromIndex - 1); + return next == bitset_type::npos ? -1 : next; +} + +void BitSet::_and(const BitSetPtr& set) { + bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); + for (bitset_type::size_type i = 0; i < minBlocks; ++i) { + bitSet.m_bits[i] &= set->bitSet.m_bits[i]; } - - uint32_t BitSet::numBlocks() const - { - return bitSet.num_blocks(); + if (bitSet.num_blocks() > minBlocks) { + std::fill(bitSet.m_bits.begin() + minBlocks, bitSet.m_bits.end(), bitset_type::block_type(0)); } - - bool BitSet::isEmpty() const - { - return bitSet.none(); +} + +void BitSet::_or(const BitSetPtr& set) { + bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); + if (set->bitSet.size() > bitSet.size()) { + resize(set->bitSet.size()); } - - bool BitSet::get(uint32_t bitIndex) const - { - return bitIndex < bitSet.size() ? bitSet.test(bitIndex) : false; + for (bitset_type::size_type i = 0; i < minBlocks; ++i) { + bitSet.m_bits[i] |= set->bitSet.m_bits[i]; } - - bool BitSet::fastGet(uint32_t bitIndex) const - { - return bitSet.test(bitIndex); + if (bitSet.num_blocks() > minBlocks) { + std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } - - int32_t BitSet::nextSetBit(uint32_t fromIndex) const - { - bitset_type::size_type next = fromIndex == 0 ? bitSet.find_first() : bitSet.find_next(fromIndex - 1); - return next == bitset_type::npos ? -1 : next; +} + +void BitSet::_xor(const BitSetPtr& set) { + bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); + if (set->bitSet.size() > bitSet.size()) { + resize(set->bitSet.size()); } - - void BitSet::_and(BitSetPtr set) - { - bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); - for (bitset_type::size_type i = 0; i < minBlocks; ++i) - bitSet.m_bits[i] &= set->bitSet.m_bits[i]; - if (bitSet.num_blocks() > minBlocks) - std::fill(bitSet.m_bits.begin() + minBlocks, bitSet.m_bits.end(), bitset_type::block_type(0)); + for (bitset_type::size_type i = 0; i < minBlocks; ++i) { + bitSet.m_bits[i] ^= set->bitSet.m_bits[i]; } - - void BitSet::_or(BitSetPtr set) - { - bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); - if (set->bitSet.size() > bitSet.size()) - resize(set->bitSet.size()); - for (bitset_type::size_type i = 0; i < minBlocks; ++i) - bitSet.m_bits[i] |= set->bitSet.m_bits[i]; - if (bitSet.num_blocks() > minBlocks) - std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); + if (bitSet.num_blocks() > minBlocks) { + std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); } - - void BitSet::_xor(BitSetPtr set) - { - bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); - if (set->bitSet.size() > bitSet.size()) - resize(set->bitSet.size()); - for (bitset_type::size_type i = 0; i < minBlocks; ++i) - bitSet.m_bits[i] ^= set->bitSet.m_bits[i]; - if (bitSet.num_blocks() > minBlocks) - std::copy(set->bitSet.m_bits.begin() + minBlocks, set->bitSet.m_bits.end(), bitSet.m_bits.begin() + minBlocks); +} + +void BitSet::andNot(const BitSetPtr& set) { + bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); + for (bitset_type::size_type i = 0; i < minBlocks; ++i) { + bitSet.m_bits[i] &= ~set->bitSet.m_bits[i]; } - - void BitSet::andNot(BitSetPtr set) - { - bitset_type::size_type minBlocks = std::min(bitSet.num_blocks(), set->bitSet.num_blocks()); - for (bitset_type::size_type i = 0; i < minBlocks; ++i) - bitSet.m_bits[i] &= ~set->bitSet.m_bits[i]; +} + +bool BitSet::intersectsBitSet(const BitSetPtr& set) const { + return bitSet.intersects(set->bitSet); +} + +uint32_t BitSet::cardinality() { + return bitSet.num_blocks() == 0 ? 0 : (uint32_t)BitUtil::pop_array((int64_t*)getBits(), 0, bitSet.num_blocks()); +} + +void BitSet::resize(uint32_t size) { + bitset_type::size_type old_num_blocks = bitSet.num_blocks(); + bitset_type::size_type required_blocks = bitSet.calc_num_blocks(size); + if (required_blocks != old_num_blocks) { + bitSet.m_bits.resize(required_blocks, bitset_type::block_type(0)); } - - bool BitSet::intersectsBitSet(BitSetPtr set) const - { - return bitSet.intersects(set->bitSet); + bitSet.m_num_bits = size; + uint64_t extra_bits = static_cast(bitSet.size() % bitSet.bits_per_block); + if (extra_bits != 0) { + bitSet.m_bits.back() &= ~(~static_cast(0) << extra_bits); } - - uint32_t BitSet::cardinality() - { - return bitSet.num_blocks() == 0 ? 0 : (uint32_t)BitUtil::pop_array((int64_t*)getBits(), 0, bitSet.num_blocks()); +} + +bool BitSet::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { + return true; } - - void BitSet::resize(uint32_t size) - { - bitset_type::size_type old_num_blocks = bitSet.num_blocks(); - bitset_type::size_type required_blocks = bitSet.calc_num_blocks(size); - if (required_blocks != old_num_blocks) - bitSet.m_bits.resize(required_blocks, bitset_type::block_type(0)); - bitSet.m_num_bits = size; - uint64_t extra_bits = static_cast(bitSet.size() % bitSet.bits_per_block); - if (extra_bits != 0) - bitSet.m_bits.back() &= ~(~static_cast(0) << extra_bits); + BitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); + if (!otherBitSet) { + return false; } - - bool BitSet::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) - return true; - BitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); - if (!otherBitSet) + BitSetPtr first = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? otherBitSet : shared_from_this(); + BitSetPtr second = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? shared_from_this() : otherBitSet; + bitset_type::size_type firstLength = first->bitSet.num_blocks(); + bitset_type::size_type secondLength = second->bitSet.num_blocks(); + for (bitset_type::size_type i = secondLength; i < firstLength; ++i) { + if (first->bitSet.m_bits[i] != 0) { return false; - BitSetPtr first = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? otherBitSet : shared_from_this(); - BitSetPtr second = bitSet.num_blocks() < otherBitSet->bitSet.num_blocks() ? shared_from_this() : otherBitSet; - bitset_type::size_type firstLength = first->bitSet.num_blocks(); - bitset_type::size_type secondLength = second->bitSet.num_blocks(); - for (bitset_type::size_type i = secondLength; i < firstLength; ++i) - { - if (first->bitSet.m_bits[i] != 0) - return false; } - for (bitset_type::size_type i = 0; i < secondLength; ++i) - { - if (first->bitSet.m_bits[i] != second->bitSet.m_bits[i]) - return false; - } - return true; } - - int32_t BitSet::hashCode() - { - // Start with a zero hash and use a mix that results in zero if the input is zero. - // This effectively truncates trailing zeros without an explicit check. - int64_t hash = 0; - uint32_t maxSize = bitSet.num_blocks(); - const uint64_t* bits = getBits(); - for (uint32_t bit = 0; bit < maxSize; ++bit) - { - hash ^= bits[bit]; - hash = (hash << 1) | (hash >> 63); // rotate left + for (bitset_type::size_type i = 0; i < secondLength; ++i) { + if (first->bitSet.m_bits[i] != second->bitSet.m_bits[i]) { + return false; } - // Fold leftmost bits into right and add a constant to prevent empty sets from - // returning 0, which is too common. - return (int32_t)((hash >> 32) ^ hash) + 0x98761234; - } - - LuceneObjectPtr BitSet::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - BitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); - cloneBitSet->bitSet = bitSet; - return cloneBitSet; } + return true; +} + +int32_t BitSet::hashCode() { + // Start with a zero hash and use a mix that results in zero if the input is zero. + // This effectively truncates trailing zeros without an explicit check. + int64_t hash = 0; + uint32_t maxSize = bitSet.num_blocks(); + const uint64_t* bits = getBits(); + for (uint32_t bit = 0; bit < maxSize; ++bit) { + hash ^= bits[bit]; + hash = (hash << 1) | (hash >> 63); // rotate left + } + // Fold leftmost bits into right and add a constant to prevent empty sets from + // returning 0, which is too common. + return (int32_t)((hash >> 32) ^ hash) + 0x98761234; +} + +LuceneObjectPtr BitSet::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + BitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); + cloneBitSet->bitSet = bitSet; + return cloneBitSet; +} + } diff --git a/src/core/util/BitUtil.cpp b/src/core/util/BitUtil.cpp index 8f79fac9..771e7b85 100644 --- a/src/core/util/BitUtil.cpp +++ b/src/core/util/BitUtil.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,571 +8,543 @@ #include "BitUtil.h" #include "MiscUtils.h" -namespace Lucene -{ - const uint8_t BitUtil::ntzTable[] = - { - 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, - 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 - }; - - BitUtil::~BitUtil() - { - } - - int32_t BitUtil::pop(int64_t x) - { - x = x - (MiscUtils::unsignedShift(x, (int64_t)1) & 0x5555555555555555LL); - x = (x & 0x3333333333333333LL) + (MiscUtils::unsignedShift(x, (int64_t)2) & 0x3333333333333333LL); - x = (x + MiscUtils::unsignedShift(x, (int64_t)4)) & 0x0f0f0f0f0f0f0f0fLL; - x = x + MiscUtils::unsignedShift(x, (int64_t)8); - x = x + MiscUtils::unsignedShift(x, (int64_t)16); - x = x + MiscUtils::unsignedShift(x, (int64_t)32); - return (int32_t)x & 0x7f; - } - - int64_t BitUtil::pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords) - { - int32_t n = wordOffset + numWords; - int64_t tot = 0; - int64_t tot8 = 0; - int64_t ones = 0; - int64_t twos = 0; - int64_t fours = 0; - - int32_t i = wordOffset; - for (; i <= n - 8; i += 8) - { - int64_t twosA; - CSA(twosA, ones, ones, A[i], A[i + 1]); - - int64_t twosB; - CSA(twosB, ones, ones, A[i + 2], A[i + 3]); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - CSA(twosA, ones, ones, A[i + 4], A[i + 5]); - - CSA(twosB, ones, ones, A[i + 6], A[i + 7]); - - int64_t foursB; - CSA(foursB, twos, twos, twosA, twosB); - - int64_t eights; - CSA(eights, fours, fours, foursA, foursB); - - tot8 += pop(eights); - } - - // Handle trailing words in a binary-search manner. - // Derived from the loop above by setting specific elements to 0. - - if (i <= n - 4) - { - int64_t twosA; - CSA(twosA, ones, ones, A[i], A[i + 1]); - - int64_t twosB; - CSA(twosB, ones, ones, A[i + 2], A[i + 3]); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 4; - } - - if (i <= n - 2) - { - int64_t twosA; - CSA(twosA, ones, ones, A[i], A[i + 1]); - - int64_t foursA = twos & twosA; - twos = twos ^ twosA; - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 2; - } - - if (i < n) - tot += pop(A[i]); - - tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); - - return tot; - } - - int64_t BitUtil::pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) - { - int32_t n = wordOffset + numWords; - int64_t tot = 0; - int64_t tot8 = 0; - int64_t ones = 0; - int64_t twos = 0; - int64_t fours = 0; - - int32_t i = wordOffset; - for (; i <= n - 8; i += 8) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - CSA(twosA, ones, ones, (A[i + 4] & B[i + 4]), (A[i + 5] & B[i + 5])); - - CSA(twosB, ones, ones, (A[i + 6] & B[i + 6]), (A[i + 7] & B[i + 7])); - - int64_t foursB; - CSA(foursB, twos, twos, twosA, twosB); - - int64_t eights; - CSA(eights, fours, fours, foursA, foursB); - - tot8 += pop(eights); - } - - if (i <= n - 4) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 4; - } - - if (i <= n - 2) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); - - int64_t foursA = twos & twosA; - twos = twos ^ twosA; - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 2; - } - - if (i < n) - tot += pop((A[i] & B[i])); - - tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); - - return tot; - } - - int64_t BitUtil::pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) - { - int32_t n = wordOffset + numWords; - int64_t tot = 0; - int64_t tot8 = 0; - int64_t ones = 0; - int64_t twos = 0; - int64_t fours = 0; - - int32_t i = wordOffset; - for (; i <= n - 8; i += 8) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - CSA(twosA, ones, ones, (A[i + 4] | B[i + 4]), (A[i + 5] | B[i + 5])); - - CSA(twosB, ones, ones, (A[i + 6] | B[i + 6]), (A[i + 7] | B[i + 7])); - - int64_t foursB; - CSA(foursB, twos, twos, twosA, twosB); - - int64_t eights; - CSA(eights, fours, fours, foursA, foursB); - - tot8 += pop(eights); - } - - if (i <= n - 4) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 4; - } - - if (i <= n - 2) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); - - int64_t foursA = twos & twosA; - twos = twos ^ twosA; - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 2; - } - - if (i < n) - tot += pop((A[i] | B[i])); - - tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); - - return tot; - } - - int64_t BitUtil::pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) - { - int32_t n = wordOffset + numWords; - int64_t tot = 0; - int64_t tot8 = 0; - int64_t ones = 0; - int64_t twos = 0; - int64_t fours = 0; - - int32_t i = wordOffset; - for (; i <= n - 8; i += 8) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - CSA(twosA, ones, ones, (A[i + 4] & ~B[i + 4]), (A[i + 5] & ~B[i + 5])); - - CSA(twosB, ones, ones, (A[i + 6] & ~B[i + 6]), (A[i + 7] & ~B[i + 7])); - - int64_t foursB; - CSA(foursB, twos, twos, twosA, twosB); - - int64_t eights; - CSA(eights, fours, fours, foursA, foursB); - - tot8 += pop(eights); - } - - if (i <= n - 4) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 4; - } - - if (i <= n - 2) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); - - int64_t foursA = twos & twosA; - twos = twos ^ twosA; - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 2; - } - - if (i < n) - tot += pop((A[i] & ~B[i])); - - tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); - - return tot; - } - - int64_t BitUtil::pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) - { - int32_t n = wordOffset + numWords; - int64_t tot = 0; - int64_t tot8 = 0; - int64_t ones = 0; - int64_t twos = 0; - int64_t fours = 0; - - int32_t i = wordOffset; - for (; i <= n - 8; i += 8) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - CSA(twosA, ones, ones, (A[i + 4] ^ B[i + 4]), (A[i + 5] ^ B[i + 5])); - - CSA(twosB, ones, ones, (A[i + 6] ^ B[i + 6]), (A[i + 7] ^ B[i + 7])); - - int64_t foursB; - CSA(foursB, twos, twos, twosA, twosB); - - int64_t eights; - CSA(eights, fours, fours, foursA, foursB); - - tot8 += pop(eights); - } - - if (i <= n - 4) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); - - int64_t twosB; - CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); - - int64_t foursA; - CSA(foursA, twos, twos, twosA, twosB); - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 4; - } - - if (i <= n - 2) - { - int64_t twosA; - CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); - - int64_t foursA = twos & twosA; - twos = twos ^ twosA; - - int64_t eights = fours & foursA; - fours = fours ^ foursA; - - tot8 += pop(eights); - i += 2; - } - - if (i < n) - tot += pop((A[i] ^ B[i])); - - tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); - - return tot; - } - - void BitUtil::CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c) - { - int64_t u = a ^ b; - h = (a & b) | (u & c); - l = u ^ c; - } - - int32_t BitUtil::ntz(int64_t val) - { - // A full binary search to determine the low byte was slower than a linear search for nextSetBit(). - // This is most likely because the implementation of nextSetBit() shifts bits to the right, increasing - // the probability that the first non-zero byte is in the rhs. - - // This implementation does a single binary search at the top level only so that all other bit shifting - // can be done on ints instead of longs to remain friendly to 32 bit architectures. In addition, the - // case of a non-zero first byte is checked for first because it is the most common in dense bit arrays. - - int32_t lower = (int32_t)val; - int32_t lowByte = lower & 0xff; - if (lowByte != 0) - return ntzTable[lowByte]; - - if (lower != 0) - { - lowByte = MiscUtils::unsignedShift(lower, 8) & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 8; - lowByte = MiscUtils::unsignedShift(lower, 16) & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 16; - // no need to mask off low byte for the last byte in the 32 bit word - // no need to check for zero on the last byte either. - return ntzTable[MiscUtils::unsignedShift(lower, 24)] + 24; - } - else - { - // grab upper 32 bits - int32_t upper = (int32_t)(val >> 32); - lowByte = upper & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 32; - lowByte = MiscUtils::unsignedShift(upper, 8) & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 40; - lowByte = MiscUtils::unsignedShift(upper, 16) & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 48; - // no need to mask off low byte for the last byte in the 32 bit word - // no need to check for zero on the last byte either. - return ntzTable[MiscUtils::unsignedShift(upper, 24)] + 56; - } +namespace Lucene { + +const uint8_t BitUtil::ntzTable[] = { + 8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, + 5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 +}; + +BitUtil::~BitUtil() { +} + +int32_t BitUtil::pop(int64_t x) { + x = x - (MiscUtils::unsignedShift(x, (int64_t)1) & 0x5555555555555555LL); + x = (x & 0x3333333333333333LL) + (MiscUtils::unsignedShift(x, (int64_t)2) & 0x3333333333333333LL); + x = (x + MiscUtils::unsignedShift(x, (int64_t)4)) & 0x0f0f0f0f0f0f0f0fLL; + x = x + MiscUtils::unsignedShift(x, (int64_t)8); + x = x + MiscUtils::unsignedShift(x, (int64_t)16); + x = x + MiscUtils::unsignedShift(x, (int64_t)32); + return (int32_t)x & 0x7f; +} + +int64_t BitUtil::pop_array(const int64_t* A, int32_t wordOffset, int32_t numWords) { + int32_t n = wordOffset + numWords; + int64_t tot = 0; + int64_t tot8 = 0; + int64_t ones = 0; + int64_t twos = 0; + int64_t fours = 0; + + int32_t i = wordOffset; + for (; i <= n - 8; i += 8) { + int64_t twosA; + CSA(twosA, ones, ones, A[i], A[i + 1]); + + int64_t twosB; + CSA(twosB, ones, ones, A[i + 2], A[i + 3]); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + CSA(twosA, ones, ones, A[i + 4], A[i + 5]); + + CSA(twosB, ones, ones, A[i + 6], A[i + 7]); + + int64_t foursB; + CSA(foursB, twos, twos, twosA, twosB); + + int64_t eights; + CSA(eights, fours, fours, foursA, foursB); + + tot8 += pop(eights); + } + + // Handle trailing words in a binary-search manner. + // Derived from the loop above by setting specific elements to 0. + + if (i <= n - 4) { + int64_t twosA; + CSA(twosA, ones, ones, A[i], A[i + 1]); + + int64_t twosB; + CSA(twosB, ones, ones, A[i + 2], A[i + 3]); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + int64_t twosA; + CSA(twosA, ones, ones, A[i], A[i + 1]); + + int64_t foursA = twos & twosA; + twos = twos ^ twosA; + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop(A[i]); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; +} + +int64_t BitUtil::pop_intersect(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { + int32_t n = wordOffset + numWords; + int64_t tot = 0; + int64_t tot8 = 0; + int64_t ones = 0; + int64_t twos = 0; + int64_t fours = 0; + + int32_t i = wordOffset; + for (; i <= n - 8; i += 8) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + CSA(twosA, ones, ones, (A[i + 4] & B[i + 4]), (A[i + 5] & B[i + 5])); + + CSA(twosB, ones, ones, (A[i + 6] & B[i + 6]), (A[i + 7] & B[i + 7])); + + int64_t foursB; + CSA(foursB, twos, twos, twosA, twosB); + + int64_t eights; + CSA(eights, fours, fours, foursA, foursB); + + tot8 += pop(eights); + } + + if (i <= n - 4) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] & B[i + 2]), (A[i + 3] & B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & B[i]), (A[i + 1] & B[i + 1])); + + int64_t foursA = twos & twosA; + twos = twos ^ twosA; + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] & B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; +} + +int64_t BitUtil::pop_union(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { + int32_t n = wordOffset + numWords; + int64_t tot = 0; + int64_t tot8 = 0; + int64_t ones = 0; + int64_t twos = 0; + int64_t fours = 0; + + int32_t i = wordOffset; + for (; i <= n - 8; i += 8) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + CSA(twosA, ones, ones, (A[i + 4] | B[i + 4]), (A[i + 5] | B[i + 5])); + + CSA(twosB, ones, ones, (A[i + 6] | B[i + 6]), (A[i + 7] | B[i + 7])); + + int64_t foursB; + CSA(foursB, twos, twos, twosA, twosB); + + int64_t eights; + CSA(eights, fours, fours, foursA, foursB); + + tot8 += pop(eights); + } + + if (i <= n - 4) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] | B[i + 2]), (A[i + 3] | B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] | B[i]), (A[i + 1] | B[i + 1])); + + int64_t foursA = twos & twosA; + twos = twos ^ twosA; + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] | B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; +} + +int64_t BitUtil::pop_andnot(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { + int32_t n = wordOffset + numWords; + int64_t tot = 0; + int64_t tot8 = 0; + int64_t ones = 0; + int64_t twos = 0; + int64_t fours = 0; + + int32_t i = wordOffset; + for (; i <= n - 8; i += 8) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + CSA(twosA, ones, ones, (A[i + 4] & ~B[i + 4]), (A[i + 5] & ~B[i + 5])); + + CSA(twosB, ones, ones, (A[i + 6] & ~B[i + 6]), (A[i + 7] & ~B[i + 7])); + + int64_t foursB; + CSA(foursB, twos, twos, twosA, twosB); + + int64_t eights; + CSA(eights, fours, fours, foursA, foursB); + + tot8 += pop(eights); + } + + if (i <= n - 4) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] & ~B[i + 2]), (A[i + 3] & ~B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] & ~B[i]), (A[i + 1] & ~B[i + 1])); + + int64_t foursA = twos & twosA; + twos = twos ^ twosA; + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; + } + + if (i < n) { + tot += pop((A[i] & ~B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; +} + +int64_t BitUtil::pop_xor(const int64_t* A, const int64_t* B, int32_t wordOffset, int32_t numWords) { + int32_t n = wordOffset + numWords; + int64_t tot = 0; + int64_t tot8 = 0; + int64_t ones = 0; + int64_t twos = 0; + int64_t fours = 0; + + int32_t i = wordOffset; + for (; i <= n - 8; i += 8) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + CSA(twosA, ones, ones, (A[i + 4] ^ B[i + 4]), (A[i + 5] ^ B[i + 5])); + + CSA(twosB, ones, ones, (A[i + 6] ^ B[i + 6]), (A[i + 7] ^ B[i + 7])); + + int64_t foursB; + CSA(foursB, twos, twos, twosA, twosB); + + int64_t eights; + CSA(eights, fours, fours, foursA, foursB); + + tot8 += pop(eights); + } + + if (i <= n - 4) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); + + int64_t twosB; + CSA(twosB, ones, ones, (A[i + 2] ^ B[i + 2]), (A[i + 3] ^ B[i + 3])); + + int64_t foursA; + CSA(foursA, twos, twos, twosA, twosB); + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 4; + } + + if (i <= n - 2) { + int64_t twosA; + CSA(twosA, ones, ones, (A[i] ^ B[i]), (A[i + 1] ^ B[i + 1])); + + int64_t foursA = twos & twosA; + twos = twos ^ twosA; + + int64_t eights = fours & foursA; + fours = fours ^ foursA; + + tot8 += pop(eights); + i += 2; } - - int32_t BitUtil::ntz(int32_t val) - { - // This implementation does a single binary search at the top level only. In addition, the case - // of a non-zero first byte is checked for first because it is the most common in dense bit arrays. - - int32_t lowByte = val & 0xff; - if (lowByte != 0) - return ntzTable[lowByte]; - lowByte = MiscUtils::unsignedShift(val, 8) & 0xff; - if (lowByte != 0) + + if (i < n) { + tot += pop((A[i] ^ B[i])); + } + + tot += (pop(fours) << 2) + (pop(twos) << 1) + pop(ones) + (tot8 << 3); + + return tot; +} + +void BitUtil::CSA(int64_t& h, int64_t& l, int64_t a, int64_t b, int64_t c) { + int64_t u = a ^ b; + h = (a & b) | (u & c); + l = u ^ c; +} + +int32_t BitUtil::ntz(int64_t val) { + // A full binary search to determine the low byte was slower than a linear search for nextSetBit(). + // This is most likely because the implementation of nextSetBit() shifts bits to the right, increasing + // the probability that the first non-zero byte is in the rhs. + + // This implementation does a single binary search at the top level only so that all other bit shifting + // can be done on ints instead of longs to remain friendly to 32 bit architectures. In addition, the + // case of a non-zero first byte is checked for first because it is the most common in dense bit arrays. + + int32_t lower = (int32_t)val; + int32_t lowByte = lower & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte]; + } + + if (lower != 0) { + lowByte = MiscUtils::unsignedShift(lower, 8) & 0xff; + if (lowByte != 0) { return ntzTable[lowByte] + 8; - lowByte = MiscUtils::unsignedShift(val, 16) & 0xff; - if (lowByte != 0) - return ntzTable[lowByte] + 16; - // no need to mask off low byte for the last byte. - // no need to check for zero on the last byte either. - return ntzTable[MiscUtils::unsignedShift(val, 24)] + 24; - } - - int32_t BitUtil::ntz2(int64_t x) - { - int32_t n = 0; - int32_t y = (int32_t)x; - if (y == 0) // the only 64 bit shift necessary - { - n += 32; - y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); - } - if ((y & 0x0000ffff) == 0) - { - n += 16; - y = MiscUtils::unsignedShift(y, 16); - } - if ((y & 0x000000ff) == 0) - { - n += 8; - y = MiscUtils::unsignedShift(y, 8); } - return (ntzTable[y & 0xff]) + n; - } - - int32_t BitUtil::ntz3(int64_t x) - { - int32_t n = 1; - - // do the first step as a long, all others as ints. - int32_t y = (int32_t)x; - if (y == 0) - { - n += 32; - y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); - } - if ((y & 0x0000ffff) == 0) - { - n += 16; - y = MiscUtils::unsignedShift(y, 16); + lowByte = MiscUtils::unsignedShift(lower, 16) & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 16; } - if ((y & 0x000000ff) == 0) - { - n += 8; - y = MiscUtils::unsignedShift(y, 8); + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[MiscUtils::unsignedShift(lower, 24)] + 24; + } else { + // grab upper 32 bits + int32_t upper = (int32_t)(val >> 32); + lowByte = upper & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 32; } - if ((y & 0x0000000f) == 0) - { - n += 4; - y = MiscUtils::unsignedShift(y, 4); + lowByte = MiscUtils::unsignedShift(upper, 8) & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 40; } - if ((y & 0x00000003) == 0) - { - n += 2; - y = MiscUtils::unsignedShift(y, 2); + lowByte = MiscUtils::unsignedShift(upper, 16) & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 48; } - return n - (y & 1); - } - - bool BitUtil::isPowerOfTwo(int32_t v) - { - return ((v & (v - 1)) == 0); - } - - bool BitUtil::isPowerOfTwo(int64_t v) - { - return ((v & (v - 1)) == 0); - } - - int32_t BitUtil::nextHighestPowerOfTwo(int32_t v) - { - --v; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - return ++v; - } - - int64_t BitUtil::nextHighestPowerOfTwo(int64_t v) - { - --v; - v |= v >> 1; - v |= v >> 2; - v |= v >> 4; - v |= v >> 8; - v |= v >> 16; - v |= v >> 32; - return ++v; + // no need to mask off low byte for the last byte in the 32 bit word + // no need to check for zero on the last byte either. + return ntzTable[MiscUtils::unsignedShift(upper, 24)] + 56; + } +} + +int32_t BitUtil::ntz(int32_t val) { + // This implementation does a single binary search at the top level only. In addition, the case + // of a non-zero first byte is checked for first because it is the most common in dense bit arrays. + + int32_t lowByte = val & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte]; + } + lowByte = MiscUtils::unsignedShift(val, 8) & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 8; } + lowByte = MiscUtils::unsignedShift(val, 16) & 0xff; + if (lowByte != 0) { + return ntzTable[lowByte] + 16; + } + // no need to mask off low byte for the last byte. + // no need to check for zero on the last byte either. + return ntzTable[MiscUtils::unsignedShift(val, 24)] + 24; +} + +int32_t BitUtil::ntz2(int64_t x) { + int32_t n = 0; + int32_t y = (int32_t)x; + if (y == 0) { // the only 64 bit shift necessary + n += 32; + y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); + } + if ((y & 0x0000ffff) == 0) { + n += 16; + y = MiscUtils::unsignedShift(y, 16); + } + if ((y & 0x000000ff) == 0) { + n += 8; + y = MiscUtils::unsignedShift(y, 8); + } + return (ntzTable[y & 0xff]) + n; +} + +int32_t BitUtil::ntz3(int64_t x) { + int32_t n = 1; + + // do the first step as a long, all others as ints. + int32_t y = (int32_t)x; + if (y == 0) { + n += 32; + y = (int32_t)MiscUtils::unsignedShift(x, (int64_t)32); + } + if ((y & 0x0000ffff) == 0) { + n += 16; + y = MiscUtils::unsignedShift(y, 16); + } + if ((y & 0x000000ff) == 0) { + n += 8; + y = MiscUtils::unsignedShift(y, 8); + } + if ((y & 0x0000000f) == 0) { + n += 4; + y = MiscUtils::unsignedShift(y, 4); + } + if ((y & 0x00000003) == 0) { + n += 2; + y = MiscUtils::unsignedShift(y, 2); + } + return n - (y & 1); +} + +bool BitUtil::isPowerOfTwo(int32_t v) { + return ((v & (v - 1)) == 0); +} + +bool BitUtil::isPowerOfTwo(int64_t v) { + return ((v & (v - 1)) == 0); +} + +int32_t BitUtil::nextHighestPowerOfTwo(int32_t v) { + --v; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + return ++v; +} + +int64_t BitUtil::nextHighestPowerOfTwo(int64_t v) { + --v; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + return ++v; +} + } diff --git a/src/core/util/BitVector.cpp b/src/core/util/BitVector.cpp index 8487ade3..dbd59974 100644 --- a/src/core/util/BitVector.cpp +++ b/src/core/util/BitVector.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,245 +12,228 @@ #include "TestPoint.h" #include "MiscUtils.h" -namespace Lucene -{ - const uint8_t BitVector::BYTE_COUNTS[] = - { - 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, - 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 - }; - - BitVector::BitVector(int32_t n) - { - _size = n; - bits = ByteArray::newInstance((_size >> 3) + 1); - MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); - _count = 0; - } - - BitVector::BitVector(ByteArray bits, int32_t size) - { - this->bits = bits; - this->_size = size; - this->_count = -1; - } - - BitVector::BitVector(DirectoryPtr d, const String& name) - { - IndexInputPtr input(d->openInput(name)); - LuceneException finally; - try - { - _size = input->readInt(); // read size - if (_size == -1) - readDgaps(input); - else - readBits(input); - } - catch (LuceneException& e) - { - finally = e; +namespace Lucene { + +const uint8_t BitVector::BYTE_COUNTS[] = { + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 +}; + +BitVector::BitVector(int32_t n) { + _size = n; + bits = ByteArray::newInstance((_size >> 3) + 1); + MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); + _count = 0; +} + +BitVector::BitVector(ByteArray bits, int32_t size) { + this->bits = bits; + this->_size = size; + this->_count = -1; +} + +BitVector::BitVector(const DirectoryPtr& d, const String& name) { + IndexInputPtr input(d->openInput(name)); + LuceneException finally; + try { + _size = input->readInt(); // read size + if (_size == -1) { + readDgaps(input); + } else { + readBits(input); } - input->close(); - finally.throwException(); - } - - BitVector::~BitVector() - { - } - - LuceneObjectPtr BitVector::clone(LuceneObjectPtr other) - { - ByteArray copyBits(ByteArray::newInstance(bits.size())); - MiscUtils::arrayCopy(bits.get(), 0, copyBits.get(), 0, bits.size()); - BitVectorPtr clone = newLucene(copyBits, _size); - clone->_count = _count; - return clone; + } catch (LuceneException& e) { + finally = e; } - - void BitVector::set(int32_t bit) - { - if (bit >= _size) - boost::throw_exception(IndexOutOfBoundsException()); - bits[bit >> 3] |= 1 << (bit & 7); - _count = -1; + input->close(); + finally.throwException(); +} + +BitVector::~BitVector() { +} + +LuceneObjectPtr BitVector::clone(const LuceneObjectPtr& other) { + ByteArray copyBits(ByteArray::newInstance(bits.size())); + MiscUtils::arrayCopy(bits.get(), 0, copyBits.get(), 0, bits.size()); + BitVectorPtr clone = newLucene(copyBits, _size); + clone->_count = _count; + return clone; +} + +void BitVector::set(int32_t bit) { + if (bit >= _size) { + boost::throw_exception(IndexOutOfBoundsException()); } - - bool BitVector::getAndSet(int32_t bit) - { - if (bit >= _size) - boost::throw_exception(IndexOutOfBoundsException()); - int32_t pos = (bit >> 3); - int32_t v = bits[pos]; - int32_t flag = 1 << (bit & 7); - if ((flag & v) != 0) - return true; - else - { - bits[pos] = (uint8_t)(v | flag); - if (_count != -1) - ++_count; - return false; + bits[bit >> 3] |= 1 << (bit & 7); + _count = -1; +} + +bool BitVector::getAndSet(int32_t bit) { + if (bit >= _size) { + boost::throw_exception(IndexOutOfBoundsException()); + } + int32_t pos = (bit >> 3); + int32_t v = bits[pos]; + int32_t flag = 1 << (bit & 7); + if ((flag & v) != 0) { + return true; + } else { + bits[pos] = (uint8_t)(v | flag); + if (_count != -1) { + ++_count; } + return false; } - - void BitVector::clear(int32_t bit) - { - if (bit >= _size) - boost::throw_exception(IndexOutOfBoundsException()); - bits[bit >> 3] &= ~(1 << (bit & 7)); - _count = -1; - } - - bool BitVector::get(int32_t bit) - { - BOOST_ASSERT(bit >= 0 && bit < _size); - return (bits[bit >> 3] & (1 << (bit & 7))) != 0; - } - - int32_t BitVector::size() - { - return _size; - } - - int32_t BitVector::count() - { - // if the vector has been modified - if (_count == -1) - { - int32_t c = 0; - int32_t end = bits.size(); - for (int32_t i = 0; i < end; ++i) - c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte - _count = c; - } - return _count; +} + +void BitVector::clear(int32_t bit) { + if (bit >= _size) { + boost::throw_exception(IndexOutOfBoundsException()); } - - int32_t BitVector::getRecomputedCount() - { + bits[bit >> 3] &= ~(1 << (bit & 7)); + _count = -1; +} + +bool BitVector::get(int32_t bit) { + BOOST_ASSERT(bit >= 0 && bit < _size); + return (bits[bit >> 3] & (1 << (bit & 7))) != 0; +} + +int32_t BitVector::size() { + return _size; +} + +int32_t BitVector::count() { + // if the vector has been modified + if (_count == -1) { int32_t c = 0; int32_t end = bits.size(); - for (int32_t i = 0; i < end; ++i) - c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte - return c; - } - - void BitVector::write(DirectoryPtr d, const String& name) - { - TestScope testScope(L"BitVector", L"write"); - IndexOutputPtr output(d->createOutput(name)); - LuceneException finally; - try - { - if (isSparse()) - writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. - else - writeBits(output); - } - catch (LuceneException& e) - { - finally = e; + for (int32_t i = 0; i < end; ++i) { + c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte } - output->close(); - finally.throwException(); + _count = c; } - - void BitVector::writeBits(IndexOutputPtr output) - { - output->writeInt(size()); // write size - output->writeInt(count()); // write count - output->writeBytes(bits.get(), bits.size()); + return _count; +} + +int32_t BitVector::getRecomputedCount() { + int32_t c = 0; + int32_t end = bits.size(); + for (int32_t i = 0; i < end; ++i) { + c += BYTE_COUNTS[bits[i] & 0xff]; // sum bits per byte } - - void BitVector::writeDgaps(IndexOutputPtr output) - { - output->writeInt(-1); // mark using d-gaps - output->writeInt(size()); // write size - output->writeInt(count()); // write count - int32_t last = 0; - int32_t n = count(); - int32_t m = bits.size(); - for (int32_t i = 0; i < m && n > 0; ++i) - { - if (bits[i] != 0) - { - output->writeVInt(i-last); - output->writeByte(bits[i]); - last = i; - n -= BYTE_COUNTS[bits[i] & 0xff]; - } + return c; +} + +void BitVector::write(const DirectoryPtr& d, const String& name) { + TestScope testScope(L"BitVector", L"write"); + IndexOutputPtr output(d->createOutput(name)); + LuceneException finally; + try { + if (isSparse()) { + writeDgaps(output); // sparse bit-set more efficiently saved as d-gaps. + } else { + writeBits(output); } + } catch (LuceneException& e) { + finally = e; } - - bool BitVector::isSparse() - { - // note: order of comparisons below set to favor smaller values (no binary range search.) - // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. - // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore - // multiplying count by (8+8) or (8+16) or (8+24) etc.: - // - first 8 for writing bits[i] (1 byte vs. 1 bit), and - // - second part for writing the byte-number d-gap as vint. - // note: factor is for read/write of byte-arrays being faster than vints. - int32_t factor = 10; - if (bits.size() < (1 << 7)) - return factor * (4 + (8 + 8) * count()) < size(); - if (bits.size() < (1 << 14)) - return factor * (4 + (8 + 16) * count()) < size(); - if (bits.size() < (1 << 21)) - return factor * (4 + (8 + 24) * count()) < size(); - if (bits.size() < (1 << 28)) - return factor * (4 + (8 + 32) * count()) < size(); - return factor * (4 + (8 + 40) * count()) < size(); - } - - void BitVector::readBits(IndexInputPtr input) - { - _count = input->readInt(); // read count - bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits - MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); - input->readBytes(bits.get(), 0, bits.size()); - } - - void BitVector::readDgaps(IndexInputPtr input) - { - _size = input->readInt(); // (re)read size - _count = input->readInt(); // read count - bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits - MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); - int32_t last = 0; - int32_t n = count(); - while (n > 0) - { - last += input->readVInt(); - bits[last] = input->readByte(); - n -= BYTE_COUNTS[bits[last] & 0xff]; + output->close(); + finally.throwException(); +} + +void BitVector::writeBits(const IndexOutputPtr& output) { + output->writeInt(size()); // write size + output->writeInt(count()); // write count + output->writeBytes(bits.get(), bits.size()); +} + +void BitVector::writeDgaps(const IndexOutputPtr& output) { + output->writeInt(-1); // mark using d-gaps + output->writeInt(size()); // write size + output->writeInt(count()); // write count + int32_t last = 0; + int32_t n = count(); + int32_t m = bits.size(); + for (int32_t i = 0; i < m && n > 0; ++i) { + if (bits[i] != 0) { + output->writeVInt(i-last); + output->writeByte(bits[i]); + last = i; + n -= BYTE_COUNTS[bits[i] & 0xff]; } } - - BitVectorPtr BitVector::subset(int32_t start, int32_t end) - { - if (start < 0 || end > size() || end < start) - boost::throw_exception(IndexOutOfBoundsException()); - // Special case -- return empty vector is start == end - if (end == start) - return newLucene(0); - ByteArray bits(ByteArray::newInstance(MiscUtils::unsignedShift(end - start - 1, 3) + 1)); - int32_t s = MiscUtils::unsignedShift(start, 3); - for (int32_t i = 0; i < bits.size(); ++i) - { - int32_t cur = 0xff & this->bits[i + s]; - int32_t next = i + s + 1 >= this->bits.size() ? 0 : 0xff & this->bits[i + s + 1]; - bits[i] = (uint8_t)(MiscUtils::unsignedShift(cur, (start & 7)) | ((next << (8 - (start & 7))))); - } - int32_t bitsToClear = (bits.size() * 8 - (end - start)) % 8; - bits[bits.size() - 1] &= ~(0xff << (8 - bitsToClear)); - return newLucene(bits, end - start); +} + +bool BitVector::isSparse() { + // note: order of comparisons below set to favor smaller values (no binary range search.) + // note: adding 4 because we start with ((int) -1) to indicate d-gaps format. + // note: we write the d-gap for the byte number, and the byte (bits[i]) itself, therefore + // multiplying count by (8+8) or (8+16) or (8+24) etc.: + // - first 8 for writing bits[i] (1 byte vs. 1 bit), and + // - second part for writing the byte-number d-gap as vint. + // note: factor is for read/write of byte-arrays being faster than vints. + int32_t factor = 10; + if (bits.size() < (1 << 7)) { + return factor * (4 + (8 + 8) * count()) < size(); + } + if (bits.size() < (1 << 14)) { + return factor * (4 + (8 + 16) * count()) < size(); + } + if (bits.size() < (1 << 21)) { + return factor * (4 + (8 + 24) * count()) < size(); + } + if (bits.size() < (1 << 28)) { + return factor * (4 + (8 + 32) * count()) < size(); + } + return factor * (4 + (8 + 40) * count()) < size(); +} + +void BitVector::readBits(const IndexInputPtr& input) { + _count = input->readInt(); // read count + bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits + MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); + input->readBytes(bits.get(), 0, bits.size()); +} + +void BitVector::readDgaps(const IndexInputPtr& input) { + _size = input->readInt(); // (re)read size + _count = input->readInt(); // read count + bits = ByteArray::newInstance((_size >> 3) + 1); // allocate bits + MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0); + int32_t last = 0; + int32_t n = count(); + while (n > 0) { + last += input->readVInt(); + bits[last] = input->readByte(); + n -= BYTE_COUNTS[bits[last] & 0xff]; } } + +BitVectorPtr BitVector::subset(int32_t start, int32_t end) { + if (start < 0 || end > size() || end < start) { + boost::throw_exception(IndexOutOfBoundsException()); + } + // Special case -- return empty vector is start == end + if (end == start) { + return newLucene(0); + } + ByteArray bits(ByteArray::newInstance(MiscUtils::unsignedShift(end - start - 1, 3) + 1)); + int32_t s = MiscUtils::unsignedShift(start, 3); + for (int32_t i = 0; i < bits.size(); ++i) { + int32_t cur = 0xff & this->bits[i + s]; + int32_t next = i + s + 1 >= this->bits.size() ? 0 : 0xff & this->bits[i + s + 1]; + bits[i] = (uint8_t)(MiscUtils::unsignedShift(cur, (start & 7)) | ((next << (8 - (start & 7))))); + } + int32_t bitsToClear = (bits.size() * 8 - (end - start)) % 8; + bits[bits.size() - 1] &= ~(0xff << (8 - bitsToClear)); + return newLucene(bits, end - start); +} + +} diff --git a/src/core/util/BufferedReader.cpp b/src/core/util/BufferedReader.cpp index 41379c59..84aa130e 100644 --- a/src/core/util/BufferedReader.cpp +++ b/src/core/util/BufferedReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,118 +8,105 @@ #include "BufferedReader.h" #include "MiscUtils.h" -namespace Lucene -{ - const int32_t BufferedReader::READER_BUFFER = 8192; - - BufferedReader::BufferedReader(ReaderPtr reader, int32_t size) - { - this->reader = reader; - this->bufferSize = size; - this->bufferLength = 0; - this->bufferPosition = 0; - } - - BufferedReader::~BufferedReader() - { - } - - int32_t BufferedReader::read() - { - if (bufferPosition >= bufferLength) - { - if (refill() == READER_EOF) - return READER_EOF; +namespace Lucene { + +const int32_t BufferedReader::READER_BUFFER = 8192; + +BufferedReader::BufferedReader(const ReaderPtr& reader, int32_t size) { + this->reader = reader; + this->bufferSize = size; + this->bufferLength = 0; + this->bufferPosition = 0; +} + +BufferedReader::~BufferedReader() { +} + +int32_t BufferedReader::read() { + if (bufferPosition >= bufferLength) { + if (refill() == READER_EOF) { + return READER_EOF; } - return buffer[bufferPosition++]; } - - int32_t BufferedReader::peek() - { - if (bufferPosition >= bufferLength) - { - if (refill() == READER_EOF) - return READER_EOF; + return buffer[bufferPosition++]; +} + +int32_t BufferedReader::peek() { + if (bufferPosition >= bufferLength) { + if (refill() == READER_EOF) { + return READER_EOF; } - return buffer[bufferPosition]; } - - int32_t BufferedReader::read(wchar_t* b, int32_t offset, int32_t length) - { - if (length == 0) - return 0; - - int32_t remaining = length; - - while (remaining > 0) - { - int32_t available = bufferLength - bufferPosition; - - if (remaining <= available) - { - // the buffer contains enough data to satisfy this request - MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, remaining); - bufferPosition += remaining; - remaining = 0; - } - else if (available > 0) - { - // the buffer does not have enough data, first serve all we've got - MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); - bufferPosition += available; - offset += available; - remaining -= available; - } - else if (refill() == READER_EOF) - { - length -= remaining; - break; - } - } - - return length == 0 ? READER_EOF : length; + return buffer[bufferPosition]; +} + +int32_t BufferedReader::read(wchar_t* b, int32_t offset, int32_t length) { + if (length == 0) { + return 0; } - - bool BufferedReader::readLine(String& line) - { - line.clear(); - wchar_t ch = (wchar_t)read(); - while (ch != (wchar_t)READER_EOF && ch != L'\r' && ch != L'\n') - { - line += ch; - ch = (wchar_t)read(); + + int32_t remaining = length; + + while (remaining > 0) { + int32_t available = bufferLength - bufferPosition; + + if (remaining <= available) { + // the buffer contains enough data to satisfy this request + MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, remaining); + bufferPosition += remaining; + remaining = 0; + } else if (available > 0) { + // the buffer does not have enough data, first serve all we've got + MiscUtils::arrayCopy(buffer.get(), bufferPosition, b, offset, available); + bufferPosition += available; + offset += available; + remaining -= available; + } else if (refill() == READER_EOF) { + length -= remaining; + break; } - if (ch == '\r' && (wchar_t)peek() == L'\n') - read(); - return (!line.empty() || ch != (wchar_t)READER_EOF); - } - - int32_t BufferedReader::refill() - { - if (!buffer) - buffer = CharArray::newInstance(bufferSize); // allocate buffer lazily - int32_t readLength = reader->read(buffer.get(), 0, bufferSize); - bufferLength = readLength == READER_EOF ? 0 : readLength; - bufferPosition = 0; - return readLength; } - void BufferedReader::close() - { - reader->close(); - bufferLength = 0; - bufferPosition = 0; + return length == 0 ? READER_EOF : length; +} + +bool BufferedReader::readLine(String& line) { + line.clear(); + wchar_t ch = (wchar_t)read(); + while (ch != (wchar_t)READER_EOF && ch != L'\r' && ch != L'\n') { + line += ch; + ch = (wchar_t)read(); } - - bool BufferedReader::markSupported() - { - return false; + if (ch == '\r' && (wchar_t)peek() == L'\n') { + read(); } - - void BufferedReader::reset() - { - reader->reset(); - bufferLength = 0; - bufferPosition = 0; + return (!line.empty() || ch != (wchar_t)READER_EOF); +} + +int32_t BufferedReader::refill() { + if (!buffer) { + buffer = CharArray::newInstance(bufferSize); // allocate buffer lazily } + int32_t readLength = reader->read(buffer.get(), 0, bufferSize); + bufferLength = readLength == READER_EOF ? 0 : readLength; + bufferPosition = 0; + return readLength; +} + +void BufferedReader::close() { + reader->close(); + bufferLength = 0; + bufferPosition = 0; +} + +bool BufferedReader::markSupported() { + return false; +} + +void BufferedReader::reset() { + reader->reset(); + bufferLength = 0; + bufferPosition = 0; +} + } diff --git a/src/core/util/CharFolder.cpp b/src/core/util/CharFolder.cpp index 99db0f34..933178f8 100644 --- a/src/core/util/CharFolder.cpp +++ b/src/core/util/CharFolder.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,38 +9,36 @@ #include "MiscUtils.h" #include "UnicodeUtils.h" -namespace Lucene -{ - bool CharFolder::lowerCache = CharFolder::fillLower(); - bool CharFolder::upperCache = CharFolder::fillUpper(); - wchar_t CharFolder::lowerChars[CHAR_MAX - CHAR_MIN + 1]; - wchar_t CharFolder::upperChars[CHAR_MAX - CHAR_MIN + 1]; +namespace Lucene { - CharFolder::~CharFolder() - { - } - - wchar_t CharFolder::toLower(wchar_t ch) - { - return (ch > CHAR_MIN && ch < CHAR_MAX) ? lowerChars[ch - CHAR_MIN] : UnicodeUtil::toLower(ch); - } - - wchar_t CharFolder::toUpper(wchar_t ch) - { - return (ch > CHAR_MIN && ch < CHAR_MAX) ? upperChars[ch - CHAR_MIN] : UnicodeUtil::toUpper(ch); - } - - bool CharFolder::fillLower() - { - for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) - lowerChars[index - CHAR_MIN] = UnicodeUtil::toLower((wchar_t)index); - return true; +bool CharFolder::lowerCache = CharFolder::fillLower(); +bool CharFolder::upperCache = CharFolder::fillUpper(); +wchar_t CharFolder::lowerChars[CHAR_MAX - CHAR_MIN + 1]; +wchar_t CharFolder::upperChars[CHAR_MAX - CHAR_MIN + 1]; + +CharFolder::~CharFolder() { +} + +wchar_t CharFolder::toLower(wchar_t ch) { + return (ch > CHAR_MIN && ch < CHAR_MAX) ? lowerChars[ch - CHAR_MIN] : UnicodeUtil::toLower(ch); +} + +wchar_t CharFolder::toUpper(wchar_t ch) { + return (ch > CHAR_MIN && ch < CHAR_MAX) ? upperChars[ch - CHAR_MIN] : UnicodeUtil::toUpper(ch); +} + +bool CharFolder::fillLower() { + for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) { + lowerChars[index - CHAR_MIN] = UnicodeUtil::toLower((wchar_t)index); } - - bool CharFolder::fillUpper() - { - for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) - upperChars[index - CHAR_MIN] = UnicodeUtil::toUpper((wchar_t)index); - return true; + return true; +} + +bool CharFolder::fillUpper() { + for (int32_t index = CHAR_MIN; index < CHAR_MAX; ++index) { + upperChars[index - CHAR_MIN] = UnicodeUtil::toUpper((wchar_t)index); } + return true; +} + } diff --git a/src/core/util/Collator.cpp b/src/core/util/Collator.cpp index 5f96d774..e841b1e5 100644 --- a/src/core/util/Collator.cpp +++ b/src/core/util/Collator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,18 +8,16 @@ #include #include "Collator.h" -namespace Lucene -{ - Collator::Collator(std::locale locale) : collate(std::use_facet< std::collate >(locale)) - { - } - - Collator::~Collator() - { - } - - int32_t Collator::compare(const String& first, const String& second) - { - return collate.compare(first.c_str(), first.c_str() + first.length(), second.c_str(), second.c_str() + second.length()); - } +namespace Lucene { + +Collator::Collator(std::locale locale) : collate(std::use_facet< std::collate >(locale)) { +} + +Collator::~Collator() { +} + +int32_t Collator::compare(const String& first, const String& second) { + return collate.compare(first.c_str(), first.c_str() + first.length(), second.c_str(), second.c_str() + second.length()); +} + } diff --git a/src/core/util/Constants.cpp b/src/core/util/Constants.cpp index f96e86f9..7cea4ae8 100644 --- a/src/core/util/Constants.cpp +++ b/src/core/util/Constants.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,43 +7,43 @@ #include "LuceneInc.h" #include "Constants.h" -namespace Lucene -{ - #if defined(linux) || defined(__linux) || defined(__linux__) - String Constants::OS_NAME = L"Linux"; - #elif defined(sun) || defined(__sun) - String Constants::OS_NAME = L"Sun"; - #elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(_WIN64) || defined(__WIN64__) || defined(WIN64) - String Constants::OS_NAME = L"Windows"; - #elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__) - String Constants::OS_NAME = L"Mac"; - #elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) - String Constants::OS_NAME = L"BSD"; - #endif - - String Constants::LUCENE_MAIN_VERSION = L"3.0.3.4"; - String Constants::LUCENE_VERSION = L"3.0.3"; - - Constants::Constants() - { - // private - } - - Constants::~Constants() - { - } - - LuceneVersion::LuceneVersion() - { - // private - } - - LuceneVersion::~LuceneVersion() - { - } - - bool LuceneVersion::onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second) - { - return (first >= second); - } +namespace Lucene { + +#if defined(linux) || defined(__linux) || defined(__linux__) +String Constants::OS_NAME = L"Linux"; +#elif defined(sun) || defined(__sun) +String Constants::OS_NAME = L"Sun"; +#elif defined(_WIN32) || defined(__WIN32__) || defined(WIN32) || defined(_WIN64) || defined(__WIN64__) || defined(WIN64) +String Constants::OS_NAME = L"Windows"; +#elif defined(macintosh) || defined(__APPLE__) || defined(__APPLE_CC__) +String Constants::OS_NAME = L"Mac"; +#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) +String Constants::OS_NAME = L"BSD"; +#elif defined(__GNU__) +String Constants::OS_NAME = L"HURD"; +#else +String Constants::OS_NAME = L"UNKNOWN"; +#endif + +String Constants::LUCENE_MAIN_VERSION = L"3.0.9"; +String Constants::LUCENE_VERSION = L"3.0.9"; + +Constants::Constants() { + // private +} + +Constants::~Constants() { +} + +LuceneVersion::LuceneVersion() { + // private +} + +LuceneVersion::~LuceneVersion() { +} + +bool LuceneVersion::onOrAfter(LuceneVersion::Version first, LuceneVersion::Version second) { + return (first >= second); +} + } diff --git a/src/core/util/CycleCheck.cpp b/src/core/util/CycleCheck.cpp index 2521f37b..48667326 100644 --- a/src/core/util/CycleCheck.cpp +++ b/src/core/util/CycleCheck.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,66 +8,60 @@ #include #include "CycleCheck.h" -namespace Lucene -{ - MapStringInt CycleCheck::cycleMap; - Set CycleCheck::staticRefs; - - CycleCheck::~CycleCheck() - { +namespace Lucene { + +MapStringInt CycleCheck::cycleMap; +Set CycleCheck::staticRefs; + +CycleCheck::~CycleCheck() { +} + +void CycleCheck::addRef(const String& className, int32_t ref) { + if (!cycleMap) { + cycleMap = MapStringInt::newInstance(); } - - void CycleCheck::addRef(const String& className, int32_t ref) - { - if (!cycleMap) - cycleMap = MapStringInt::newInstance(); - SyncLock lockRef(&cycleMap); - MapStringInt::iterator classRef = cycleMap.find(className); - if (classRef == cycleMap.end()) - cycleMap.put(className, 1); - else - { - classRef->second += ref; - if (classRef->second < 0) - boost::throw_exception(RuntimeException(L"invalid class reference")); + SyncLock lockRef(&cycleMap); + MapStringInt::iterator classRef = cycleMap.find(className); + if (classRef == cycleMap.end()) { + cycleMap.put(className, 1); + } else { + classRef->second += ref; + if (classRef->second < 0) { + boost::throw_exception(RuntimeException(L"invalid class reference")); } } - - void CycleCheck::addStatic(LuceneObjectPtr* staticRef) - { - #ifdef LPP_USE_CYCLIC_CHECK - if (!staticRefs) - staticRefs = Set::newInstance(); - staticRefs.add(staticRef); - #endif +} + +void CycleCheck::addStatic(LuceneObjectPtr* staticRef) { +#ifdef LPP_USE_CYCLIC_CHECK + LUCENE_RUN_ONCE( + staticRefs = Set::newInstance(); + ); + staticRefs.add(staticRef); +#endif +} + +void CycleCheck::dumpRefs() { + // destroy all registered statics + if (staticRefs) { + for (Set::iterator staticRef = staticRefs.begin(); staticRef != staticRefs.end(); ++staticRef) { + (*staticRef)->reset(); + } } - - void CycleCheck::dumpRefs() - { + + if (cycleMap) { SyncLock lockRef(&cycleMap); - - // destroy all registered statics - if (staticRefs) - { - for (Set::iterator staticRef = staticRefs.begin(); staticRef != staticRefs.end(); ++staticRef) - (*staticRef)->reset(); - } - - if (cycleMap) - { - bool reportCycles = true; - for (MapStringInt::iterator classRef = cycleMap.begin(); classRef != cycleMap.end(); ++classRef) - { - if (classRef->second > 0) - { - if (reportCycles) - { - std::wcout << L"Cyclic references detected!\n"; - reportCycles = false; - } - std::wcout << classRef->first << L": " << classRef->second << L"\n"; + bool reportCycles = true; + for (MapStringInt::iterator classRef = cycleMap.begin(); classRef != cycleMap.end(); ++classRef) { + if (classRef->second > 0) { + if (reportCycles) { + std::wcout << L"Cyclic references detected!\n"; + reportCycles = false; } + std::wcout << classRef->first << L": " << classRef->second << L"\n"; } } } } + +} diff --git a/src/core/util/DocIdBitSet.cpp b/src/core/util/DocIdBitSet.cpp index 673785ad..5e7576ed 100644 --- a/src/core/util/DocIdBitSet.cpp +++ b/src/core/util/DocIdBitSet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,83 +9,71 @@ #include "_DocIdBitSet.h" #include "BitSet.h" -namespace Lucene -{ - DocIdBitSet::DocIdBitSet() - { - } - - DocIdBitSet::DocIdBitSet(BitSetPtr bitSet) - { - this->bitSet = bitSet; - } - - DocIdBitSet::~DocIdBitSet() - { - } - - DocIdSetIteratorPtr DocIdBitSet::iterator() - { - return newLucene(bitSet); - } - - bool DocIdBitSet::isCacheable() - { +namespace Lucene { + +DocIdBitSet::DocIdBitSet() { +} + +DocIdBitSet::DocIdBitSet(const BitSetPtr& bitSet) { + this->bitSet = bitSet; +} + +DocIdBitSet::~DocIdBitSet() { +} + +DocIdSetIteratorPtr DocIdBitSet::iterator() { + return newLucene(bitSet); +} + +bool DocIdBitSet::isCacheable() { + return true; +} + +BitSetPtr DocIdBitSet::getBitSet() { + return bitSet; +} + +bool DocIdBitSet::equals(const LuceneObjectPtr& other) { + if (DocIdSet::equals(other)) { return true; } - - BitSetPtr DocIdBitSet::getBitSet() - { - return bitSet; - } - - bool DocIdBitSet::equals(LuceneObjectPtr other) - { - if (DocIdSet::equals(other)) - return true; - DocIdBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); - return bitSet->equals(otherBitSet->bitSet); - } - - int32_t DocIdBitSet::hashCode() - { - return bitSet->hashCode(); - } - - LuceneObjectPtr DocIdBitSet::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - DocIdBitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); - cloneBitSet->bitSet = boost::dynamic_pointer_cast(bitSet->clone()); - return cloneBitSet; - } - - DocIdBitSetIterator::DocIdBitSetIterator(BitSetPtr bitSet) - { - this->bitSet = bitSet; - this->docId = -1; - } - - DocIdBitSetIterator::~DocIdBitSetIterator() - { - } - - int32_t DocIdBitSetIterator::docID() - { - return docId; - } - - int32_t DocIdBitSetIterator::nextDoc() - { - int32_t doc = bitSet->nextSetBit(docId + 1); - docId = doc == -1 ? NO_MORE_DOCS : doc; - return docId; - } - - int32_t DocIdBitSetIterator::advance(int32_t target) - { - int32_t doc = bitSet->nextSetBit(target); - docId = doc == -1 ? NO_MORE_DOCS : doc; - return docId; - } + DocIdBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); + return bitSet->equals(otherBitSet->bitSet); +} + +int32_t DocIdBitSet::hashCode() { + return bitSet->hashCode(); +} + +LuceneObjectPtr DocIdBitSet::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + DocIdBitSetPtr cloneBitSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); + cloneBitSet->bitSet = boost::dynamic_pointer_cast(bitSet->clone()); + return cloneBitSet; +} + +DocIdBitSetIterator::DocIdBitSetIterator(const BitSetPtr& bitSet) { + this->bitSet = bitSet; + this->docId = -1; +} + +DocIdBitSetIterator::~DocIdBitSetIterator() { +} + +int32_t DocIdBitSetIterator::docID() { + return docId; +} + +int32_t DocIdBitSetIterator::nextDoc() { + int32_t doc = bitSet->nextSetBit(docId + 1); + docId = doc == -1 ? NO_MORE_DOCS : doc; + return docId; +} + +int32_t DocIdBitSetIterator::advance(int32_t target) { + int32_t doc = bitSet->nextSetBit(target); + docId = doc == -1 ? NO_MORE_DOCS : doc; + return docId; +} + } diff --git a/src/core/util/FieldCacheSanityChecker.cpp b/src/core/util/FieldCacheSanityChecker.cpp index 8090856b..afc326c6 100644 --- a/src/core/util/FieldCacheSanityChecker.cpp +++ b/src/core/util/FieldCacheSanityChecker.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,268 +12,243 @@ #include "StringUtils.h" #include "VariantUtils.h" -namespace Lucene -{ - FieldCacheSanityChecker::FieldCacheSanityChecker() - { - } - - FieldCacheSanityChecker::~FieldCacheSanityChecker() - { - } - - Collection FieldCacheSanityChecker::checkSanity(FieldCachePtr cache) - { - return checkSanity(cache->getCacheEntries()); - } - - Collection FieldCacheSanityChecker::checkSanity(Collection cacheEntries) - { - FieldCacheSanityCheckerPtr sanityChecker(newLucene()); - return sanityChecker->check(cacheEntries); +namespace Lucene { + +FieldCacheSanityChecker::FieldCacheSanityChecker() { +} + +FieldCacheSanityChecker::~FieldCacheSanityChecker() { +} + +Collection FieldCacheSanityChecker::checkSanity(const FieldCachePtr& cache) { + return checkSanity(cache->getCacheEntries()); +} + +Collection FieldCacheSanityChecker::checkSanity(Collection cacheEntries) { + FieldCacheSanityCheckerPtr sanityChecker(newLucene()); + return sanityChecker->check(cacheEntries); +} + +Collection FieldCacheSanityChecker::check(Collection cacheEntries) { + if (!cacheEntries || cacheEntries.empty()) { + return Collection::newInstance(); } - - Collection FieldCacheSanityChecker::check(Collection cacheEntries) - { - if (!cacheEntries || cacheEntries.empty()) - return Collection::newInstance(); - - // Maps the (valId) identityhashCode of cache values to sets of CacheEntry instances - MapSetIntFieldCacheEntry valIdToItems(MapSetIntFieldCacheEntry::map_type::newInstance()); - - // Maps ReaderField keys to Sets of ValueIds - MapSetReaderFieldInt readerFieldToValIds(MapSetReaderFieldInt::map_type::newInstance()); - - // Any keys that we know result in more then one valId - SetReaderField valMismatchKeys(SetReaderField::newInstance()); - - // iterate over all the cacheEntries to get the mappings we'll need - for (int32_t i = 0; i < cacheEntries.size(); ++i) - { - FieldCacheEntryPtr item(cacheEntries[i]); - boost::any val(item->getValue()); - - if (VariantUtils::typeOf(val)) - continue; - - ReaderFieldPtr rf(newLucene(item->getReaderKey(), item->getFieldName())); - int32_t valId = VariantUtils::hashCode(val); - - // indirect mapping, so the MapOfSet will dedup identical valIds for us - valIdToItems.put(valId, item); - if (1 < readerFieldToValIds.put(rf, valId)) - valMismatchKeys.add(rf); + + // Maps the (valId) identityhashCode of cache values to sets of CacheEntry instances + MapSetIntFieldCacheEntry valIdToItems(MapSetIntFieldCacheEntry::map_type::newInstance()); + + // Maps ReaderField keys to Sets of ValueIds + MapSetReaderFieldInt readerFieldToValIds(MapSetReaderFieldInt::map_type::newInstance()); + + // Any keys that we know result in more then one valId + SetReaderField valMismatchKeys(SetReaderField::newInstance()); + + // iterate over all the cacheEntries to get the mappings we'll need + for (int32_t i = 0; i < cacheEntries.size(); ++i) { + FieldCacheEntryPtr item(cacheEntries[i]); + boost::any val(item->getValue()); + + if (VariantUtils::typeOf(val)) { + continue; + } + + ReaderFieldPtr rf(newLucene(item->getReaderKey(), item->getFieldName())); + int32_t valId = VariantUtils::hashCode(val); + + // indirect mapping, so the MapOfSet will dedup identical valIds for us + valIdToItems.put(valId, item); + if (1 < readerFieldToValIds.put(rf, valId)) { + valMismatchKeys.add(rf); } - - Collection insanity(Collection::newInstance()); - - Collection mismatch(checkValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); - insanity.addAll(mismatch.begin(), mismatch.end()); - - Collection subreaders(checkSubreaders(valIdToItems, readerFieldToValIds)); - insanity.addAll(subreaders.begin(), subreaders.end()); - - return insanity; } - - Collection FieldCacheSanityChecker::checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, - MapSetReaderFieldInt readerFieldToValIds, - SetReaderField valMismatchKeys) - { - Collection insanity(Collection::newInstance()); - - if (!valMismatchKeys.empty()) - { - // we have multiple values for some ReaderFields - - MapSetReaderFieldInt::map_type rfMap = readerFieldToValIds.getMap(); - MapSetIntFieldCacheEntry::map_type valMap = valIdToItems.getMap(); - - for (SetReaderField::iterator rf = valMismatchKeys.begin(); rf != valMismatchKeys.end(); ++rf) - { - Collection badEntries(Collection::newInstance()); - - MapSetReaderFieldInt::set_type values(rfMap.get(*rf)); - for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) - { - MapSetIntFieldCacheEntry::set_type cacheEntries(valMap.get(*value)); - for (MapSetIntFieldCacheEntry::set_type::iterator cacheEntry = cacheEntries.begin(); cacheEntry != cacheEntries.end(); ++cacheEntry) - badEntries.add(*cacheEntry); + + Collection insanity(Collection::newInstance()); + + Collection mismatch(checkValueMismatch(valIdToItems, readerFieldToValIds, valMismatchKeys)); + insanity.addAll(mismatch.begin(), mismatch.end()); + + Collection subreaders(checkSubreaders(valIdToItems, readerFieldToValIds)); + insanity.addAll(subreaders.begin(), subreaders.end()); + + return insanity; +} + +Collection FieldCacheSanityChecker::checkValueMismatch(MapSetIntFieldCacheEntry valIdToItems, + MapSetReaderFieldInt readerFieldToValIds, + SetReaderField valMismatchKeys) { + Collection insanity(Collection::newInstance()); + + if (!valMismatchKeys.empty()) { + // we have multiple values for some ReaderFields + + MapSetReaderFieldInt::map_type rfMap = readerFieldToValIds.getMap(); + MapSetIntFieldCacheEntry::map_type valMap = valIdToItems.getMap(); + + for (SetReaderField::iterator rf = valMismatchKeys.begin(); rf != valMismatchKeys.end(); ++rf) { + Collection badEntries(Collection::newInstance()); + + MapSetReaderFieldInt::set_type values(rfMap.get(*rf)); + for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { + MapSetIntFieldCacheEntry::set_type cacheEntries(valMap.get(*value)); + for (MapSetIntFieldCacheEntry::set_type::iterator cacheEntry = cacheEntries.begin(); cacheEntry != cacheEntries.end(); ++cacheEntry) { + badEntries.add(*cacheEntry); } - - insanity.add(newLucene(VALUEMISMATCH, L"Multiple distinct value objects for " + (*rf)->toString(), badEntries)); } + + insanity.add(newLucene(VALUEMISMATCH, L"Multiple distinct value objects for " + (*rf)->toString(), badEntries)); } - return insanity; } - - Collection FieldCacheSanityChecker::checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, - MapSetReaderFieldInt readerFieldToValIds) - { - Collection insanity(Collection::newInstance()); - - MapReaderFieldSetReaderField badChildren(MapReaderFieldSetReaderField::newInstance()); - MapSetReaderFieldReaderField badKids(badChildren); // wrapper - - MapSetIntFieldCacheEntry::map_type viToItemSets = valIdToItems.getMap(); - MapSetReaderFieldInt::map_type rfToValIdSets = readerFieldToValIds.getMap(); - - SetReaderField seen(SetReaderField::newInstance()); - - for (MapSetReaderFieldInt::map_type::iterator rf = rfToValIdSets.begin(); rf != rfToValIdSets.end(); ++rf) - { - if (seen.contains(rf->first)) - continue; - - Collection kids(getAllDecendentReaderKeys(rf->first->readerKey)); - for (Collection::iterator kidKey = kids.begin(); kidKey != kids.end(); ++kidKey) - { - ReaderFieldPtr kid(newLucene(*kidKey, rf->first->fieldName)); - - if (badChildren.contains(kid)) - { - // we've already process this kid as RF and found other problems track those problems as our own - badKids.put(rf->first, kid); - badKids.putAll(rf->first, badChildren.get(kid)); - badChildren.remove(kid); - } - else if (rfToValIdSets.contains(kid)) - { - // we have cache entries for the kid - badKids.put(rf->first, kid); - } - seen.add(kid); + return insanity; +} + +Collection FieldCacheSanityChecker::checkSubreaders(MapSetIntFieldCacheEntry valIdToItems, + MapSetReaderFieldInt readerFieldToValIds) { + Collection insanity(Collection::newInstance()); + + MapReaderFieldSetReaderField badChildren(MapReaderFieldSetReaderField::newInstance()); + MapSetReaderFieldReaderField badKids(badChildren); // wrapper + + MapSetIntFieldCacheEntry::map_type viToItemSets = valIdToItems.getMap(); + MapSetReaderFieldInt::map_type rfToValIdSets = readerFieldToValIds.getMap(); + + SetReaderField seen(SetReaderField::newInstance()); + + for (MapSetReaderFieldInt::map_type::iterator rf = rfToValIdSets.begin(); rf != rfToValIdSets.end(); ++rf) { + if (seen.contains(rf->first)) { + continue; + } + + Collection kids(getAllDecendentReaderKeys(rf->first->readerKey)); + for (Collection::iterator kidKey = kids.begin(); kidKey != kids.end(); ++kidKey) { + ReaderFieldPtr kid(newLucene(*kidKey, rf->first->fieldName)); + + if (badChildren.contains(kid)) { + // we've already process this kid as RF and found other problems track those problems as our own + badKids.put(rf->first, kid); + badKids.putAll(rf->first, badChildren.get(kid)); + badChildren.remove(kid); + } else if (rfToValIdSets.contains(kid)) { + // we have cache entries for the kid + badKids.put(rf->first, kid); } - seen.add(rf->first); + seen.add(kid); } - - // every mapping in badKids represents an Insanity - for (MapReaderFieldSetReaderField::iterator parent = badChildren.begin(); parent != badChildren.end(); ++parent) - { - SetReaderField kids = parent->second; - Collection badEntries(Collection::newInstance()); - - // put parent entries in first - MapSetReaderFieldInt::set_type values(rfToValIdSets.get(parent->first)); - for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) - { + seen.add(rf->first); + } + + // every mapping in badKids represents an Insanity + for (MapReaderFieldSetReaderField::iterator parent = badChildren.begin(); parent != badChildren.end(); ++parent) { + SetReaderField kids = parent->second; + Collection badEntries(Collection::newInstance()); + + // put parent entries in first + MapSetReaderFieldInt::set_type values(rfToValIdSets.get(parent->first)); + for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { + MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); + badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); + } + + // now the entries for the descendants + for (SetReaderField::iterator kid = kids.begin(); kid != kids.end(); ++kid) { + MapSetReaderFieldInt::set_type values(rfToValIdSets.get(*kid)); + for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) { MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); } - - // now the entries for the descendants - for (SetReaderField::iterator kid = kids.begin(); kid != kids.end(); ++kid) - { - MapSetReaderFieldInt::set_type values(rfToValIdSets.get(*kid)); - for (MapSetReaderFieldInt::set_type::iterator value = values.begin(); value != values.end(); ++value) - { - MapSetIntFieldCacheEntry::set_type cacheEntries(viToItemSets.get(*value)); - badEntries.addAll(cacheEntries.begin(), cacheEntries.end()); - } - } - - insanity.add(newLucene(SUBREADER, L"Found caches for descendants of " + parent->first->toString(), badEntries)); } - - return insanity; + + insanity.add(newLucene(SUBREADER, L"Found caches for descendants of " + parent->first->toString(), badEntries)); } - - Collection FieldCacheSanityChecker::getAllDecendentReaderKeys(LuceneObjectPtr seed) - { - Collection all(Collection::newInstance()); // will grow as we iter - all.add(seed); - for (int32_t i = 0; i < all.size(); ++i) - { - IndexReaderPtr indexReader(boost::dynamic_pointer_cast(all[i])); - if (indexReader) - { - Collection subs(indexReader->getSequentialSubReaders()); - for (int32_t j = 0; subs && j < subs.size(); ++j) - all.add(subs[j]->getFieldCacheKey()); + + return insanity; +} + +Collection FieldCacheSanityChecker::getAllDecendentReaderKeys(const LuceneObjectPtr& seed) { + Collection all(Collection::newInstance()); // will grow as we iter + all.add(seed); + for (int32_t i = 0; i < all.size(); ++i) { + IndexReaderPtr indexReader(boost::dynamic_pointer_cast(all[i])); + if (indexReader) { + Collection subs(indexReader->getSequentialSubReaders()); + for (int32_t j = 0; subs && j < subs.size(); ++j) { + all.add(subs[j]->getFieldCacheKey()); } } - - // need to remove the first, because it was the seed - all.remove(all.begin()); - return all; - } - - ReaderField::ReaderField(LuceneObjectPtr readerKey, const String& fieldName) - { - this->readerKey = readerKey; - this->fieldName = fieldName; - } - - ReaderField::~ReaderField() - { - } - - int32_t ReaderField::hashCode() - { - return readerKey->hashCode() * StringUtils::hashCode(fieldName); } - - bool ReaderField::equals(LuceneObjectPtr other) - { - ReaderFieldPtr otherReaderField(boost::dynamic_pointer_cast(other)); - if (!otherReaderField) - return false; - return (readerKey->equals(otherReaderField->readerKey) && fieldName == otherReaderField->fieldName); - } - - String ReaderField::toString() - { - return readerKey->toString() + L"+" + fieldName; - } - - Insanity::Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries) - { - if (!entries || entries.empty()) - boost::throw_exception(IllegalArgumentException(L"Insanity requires non-null/non-empty CacheEntry[]")); - this->type = type; - this->msg = msg; - this->entries = entries; - } - - Insanity::~Insanity() - { - } - - FieldCacheSanityChecker::InsanityType Insanity::getType() - { - return type; - } - - String Insanity::getMsg() - { - return msg; + + // need to remove the first, because it was the seed + all.remove(all.begin()); + return all; +} + +ReaderField::ReaderField(const LuceneObjectPtr& readerKey, const String& fieldName) { + this->readerKey = readerKey; + this->fieldName = fieldName; +} + +ReaderField::~ReaderField() { +} + +int32_t ReaderField::hashCode() { + return readerKey->hashCode() * StringUtils::hashCode(fieldName); +} + +bool ReaderField::equals(const LuceneObjectPtr& other) { + ReaderFieldPtr otherReaderField(boost::dynamic_pointer_cast(other)); + if (!otherReaderField) { + return false; } - - Collection Insanity::getCacheEntries() - { - return entries; + return (readerKey->equals(otherReaderField->readerKey) && fieldName == otherReaderField->fieldName); +} + +String ReaderField::toString() { + return readerKey->toString() + L"+" + fieldName; +} + +Insanity::Insanity(FieldCacheSanityChecker::InsanityType type, const String& msg, Collection entries) { + if (!entries || entries.empty()) { + boost::throw_exception(IllegalArgumentException(L"Insanity requires non-null/non-empty CacheEntry[]")); } - - String Insanity::toString() - { - StringStream buffer; - switch (type) - { - case FieldCacheSanityChecker::SUBREADER: - buffer << L"SUBREADER: "; - break; - case FieldCacheSanityChecker::VALUEMISMATCH: - buffer << L"VALUEMISMATCH: "; - break; - case FieldCacheSanityChecker::EXPECTED: - buffer << L"EXPECTED: "; - break; - } - buffer << msg << L"\n"; - - for (Collection::iterator ce = entries.begin(); ce != entries.end(); ++ce) - buffer << L"\t" << (*ce)->toString() << L"\n"; - - return buffer.str(); + this->type = type; + this->msg = msg; + this->entries = entries; +} + +Insanity::~Insanity() { +} + +FieldCacheSanityChecker::InsanityType Insanity::getType() { + return type; +} + +String Insanity::getMsg() { + return msg; +} + +Collection Insanity::getCacheEntries() { + return entries; +} + +String Insanity::toString() { + StringStream buffer; + switch (type) { + case FieldCacheSanityChecker::SUBREADER: + buffer << L"SUBREADER: "; + break; + case FieldCacheSanityChecker::VALUEMISMATCH: + buffer << L"VALUEMISMATCH: "; + break; + case FieldCacheSanityChecker::EXPECTED: + buffer << L"EXPECTED: "; + break; + } + buffer << msg << L"\n"; + + for (Collection::iterator ce = entries.begin(); ce != entries.end(); ++ce) { + buffer << L"\t" << (*ce)->toString() << L"\n"; } + + return buffer.str(); +} + } diff --git a/src/core/util/FileReader.cpp b/src/core/util/FileReader.cpp index a8b55408..18442188 100644 --- a/src/core/util/FileReader.cpp +++ b/src/core/util/FileReader.cpp @@ -1,78 +1,72 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include +#include #include "FileReader.h" #include "MiscUtils.h" #include "FileUtils.h" #include "StringUtils.h" -namespace Lucene -{ - const int32_t FileReader::FILE_EOF = Reader::READER_EOF; - const int32_t FileReader::FILE_ERROR = -1; - - FileReader::FileReader(const String& fileName) - { - this->file = newInstance(StringUtils::toUTF8(fileName).c_str(), std::ios::binary | std::ios::in); - if (!file->is_open()) - boost::throw_exception(FileNotFoundException(fileName)); - _length = FileUtils::fileLength(fileName); - } - - FileReader::~FileReader() - { - } - - int32_t FileReader::read() - { - wchar_t buffer; - return read(&buffer, 0, 1) == FILE_EOF ? FILE_EOF : buffer; +namespace Lucene { + +const int32_t FileReader::FILE_EOF = Reader::READER_EOF; +const int32_t FileReader::FILE_ERROR = -1; + +FileReader::FileReader(const String& fileName) { + this->file = newInstance(fileName, std::ios::binary | std::ios::in); + if (!file->is_open()) { + boost::throw_exception(FileNotFoundException(fileName)); } - - int32_t FileReader::read(wchar_t* buffer, int32_t offset, int32_t length) - { - try - { - if (file->eof()) - return FILE_EOF; - if (!fileBuffer) - fileBuffer = ByteArray::newInstance(length); - if (length > fileBuffer.size()) - fileBuffer.resize(length); - file->read((char*)fileBuffer.get(), length); - int32_t readLength = file->gcount(); - MiscUtils::arrayCopy(fileBuffer.get(), 0, buffer, offset, readLength); - return readLength == 0 ? FILE_EOF : readLength; + _length = FileUtils::fileLength(fileName); +} + +FileReader::~FileReader() { +} + +int32_t FileReader::read() { + wchar_t buffer; + return read(&buffer, 0, 1) == FILE_EOF ? FILE_EOF : buffer; +} + +int32_t FileReader::read(wchar_t* buffer, int32_t offset, int32_t length) { + try { + if (file->eof()) { + return FILE_EOF; } - catch (...) - { - return FILE_ERROR; + if (!fileBuffer) { + fileBuffer = ByteArray::newInstance(length); } + if (length > fileBuffer.size()) { + fileBuffer.resize(length); + } + file->read((char*)fileBuffer.get(), length); + int32_t readLength = file->gcount(); + MiscUtils::arrayCopy(fileBuffer.get(), 0, buffer, offset, readLength); + return readLength == 0 ? FILE_EOF : readLength; + } catch (...) { + return FILE_ERROR; } - - void FileReader::close() - { - file->close(); - } - - bool FileReader::markSupported() - { - return false; - } - - void FileReader::reset() - { - file->clear(); - file->seekg((std::streamoff)0); - } - - int64_t FileReader::length() - { - return _length; - } +} + +void FileReader::close() { + file->close(); +} + +bool FileReader::markSupported() { + return false; +} + +void FileReader::reset() { + file->clear(); + file->seekg((std::streamoff)0); +} + +int64_t FileReader::length() { + return _length; +} + } diff --git a/src/core/util/FileUtils.cpp b/src/core/util/FileUtils.cpp index 4d3d436e..d92efbb8 100644 --- a/src/core/util/FileUtils.cpp +++ b/src/core/util/FileUtils.cpp @@ -1,13 +1,13 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" -#include #include #include +#include #include "LuceneThread.h" #include "StringUtils.h" #include "FileUtils.h" @@ -20,222 +20,122 @@ #include #endif -namespace Lucene -{ - namespace FileUtils - { - bool fileExists(const String& path) - { - try - { - return boost::filesystem::exists(path.c_str()); - } - catch (...) - { - return false; - } - } - - uint64_t fileModified(const String& path) - { - try - { - return (uint64_t)boost::filesystem::last_write_time(path.c_str()); - } - catch (...) - { - return 0; - } - } - - bool touchFile(const String& path) - { - try - { - boost::filesystem::last_write_time(path.c_str(), time(NULL)); - return true; - } - catch (...) - { - return false; - } - } - - int64_t fileLength(const String& path) - { - try - { - int64_t fileSize = (int64_t)boost::filesystem::file_size(path.c_str()); - for (int32_t i = 0; fileSize == 0 && i < 100; ++i) - { - LuceneThread::threadYield(); - fileSize = (int64_t)boost::filesystem::file_size(path.c_str()); - } - return fileSize; - } - catch (...) - { - return 0; - } - } +namespace Lucene { - bool setFileLength(const String& path, int64_t length) - { - try - { - if (!fileExists(path)) - return false; - #if defined(_WIN32) || defined(_WIN64) - int32_t fd = _wopen(path.c_str(), _O_WRONLY | _O_CREAT | _O_BINARY, _S_IWRITE); - return _chsize(fd, (long)length) == 0; - #else - return truncate(StringUtils::toUTF8(path).c_str(), (off_t)length) == 0; - #endif - } - catch (...) - { - return false; - } - } - - bool removeFile(const String& path) - { - try - { - return boost::filesystem::remove(path.c_str()); - } - catch (...) - { - return false; - } - } - - bool copyFile(const String& source, const String& dest) - { - try - { - boost::filesystem::copy_file(source.c_str(), dest.c_str()); - return true; - } - catch (...) - { - return false; - } - } - - bool createDirectory(const String& path) - { - try - { - return boost::filesystem::create_directory(path.c_str()); - } - catch (...) - { - return false; - } - } - - bool removeDirectory(const String& path) - { - try - { - boost::filesystem::remove_all(path.c_str()); - return true; - } - catch (...) - { - return false; - } - } - - bool isDirectory(const String& path) - { - try - { - return boost::filesystem::is_directory(path.c_str()); - } - catch (...) - { - return false; - } - } - - bool listDirectory(const String& path, bool filesOnly, HashSet dirList) - { - try - { - for (boost::filesystem::wdirectory_iterator dir(path.c_str()); dir != boost::filesystem::wdirectory_iterator(); ++dir) - { - if (!filesOnly || !boost::filesystem::is_directory(dir->status())) - dirList.add(dir->path().filename().c_str()); - } - return true; - } - catch (...) - { - return false; - } - } - - bool copyDirectory(const String& source, const String& dest) - { - try - { - HashSet dirList(HashSet::newInstance()); - if (!listDirectory(source, true, dirList)) - return false; - - createDirectory(dest); - - for (HashSet::iterator file = dirList.begin(); file != dirList.end(); ++file) - copyFile(joinPath(source, *file), joinPath(dest, *file)); - - return true; - } - catch (...) - { - return false; - } - } - - String joinPath(const String& path, const String& file) - { - try - { - boost::filesystem::wpath join(path.c_str()); - join /= file.c_str(); - return join.directory_string().c_str(); - } - catch (...) - { - return path; - } - } - - String extractPath(const String& path) - { - try - { - boost::filesystem::wpath parentPath(path.c_str()); - return parentPath.parent_path().directory_string().c_str(); - } - catch (...) - { - return path; - } - } - - String extractFile(const String& path) - { - try - { - boost::filesystem::wpath fileName(path.c_str()); - return fileName.filename().c_str(); - } - catch (...) - { - return path; - } +namespace FileUtils { + +bool fileExists(const String& path) { + boost::system::error_code ec; + return boost::filesystem::exists(path.c_str(), ec); +} + +uint64_t fileModified(const String& path) { + boost::system::error_code ec; + uint64_t t = (uint64_t)boost::filesystem::last_write_time(path.c_str(), ec); + return ec ? 0 : t; +} + +bool touchFile(const String& path) { + boost::system::error_code ec; + boost::filesystem::last_write_time(path.c_str(), time(NULL), ec); + return !ec; +} + +int64_t fileLength(const String& path) { + boost::system::error_code ec; + int64_t fileSize = (int64_t)boost::filesystem::file_size(path.c_str(), ec); + for (int32_t i = 0; !ec && fileSize == 0 && i < 100; ++i) { + LuceneThread::threadYield(); + fileSize = (int64_t)boost::filesystem::file_size(path.c_str(), ec); + } + + return ec ? 0 : fileSize; +} + +bool setFileLength(const String& path, int64_t length) { + if (!fileExists(path)) { + return false; + } +#if defined(_WIN32) || defined(_WIN64) + int32_t fd = _wopen(path.c_str(), _O_WRONLY | _O_CREAT | _O_BINARY, _S_IWRITE); + return _chsize(fd, (long)length) == 0; +#else + return truncate(boost::filesystem::path(path).c_str(), (off_t)length) == 0; +#endif +} + +bool removeFile(const String& path) { + boost::system::error_code ec; + return boost::filesystem::remove(path.c_str(), ec); +} + +bool copyFile(const String& source, const String& dest) { + boost::system::error_code ec; + boost::filesystem::copy_file(source.c_str(), dest.c_str(), ec); + return !ec; +} + +bool createDirectory(const String& path) { + boost::system::error_code ec; + return boost::filesystem::create_directory(path.c_str(), ec) && !ec; +} + +bool removeDirectory(const String& path) { + boost::system::error_code ec; + boost::filesystem::remove_all(path.c_str(), ec); + return !ec; +} + +bool isDirectory(const String& path) { + boost::system::error_code ec; + return boost::filesystem::is_directory(path.c_str(), ec); +} + +bool listDirectory(const String& path, bool filesOnly, HashSet dirList) { + boost::system::error_code ec; + boost::filesystem::directory_iterator dir(path.c_str(), ec); + if (ec) { + return false; + } + + for (; dir != boost::filesystem::directory_iterator(); ++dir) { + if (!filesOnly || !boost::filesystem::is_directory(dir->status())) { + dirList.add(dir->path().filename().wstring().c_str()); } } + return true; +} + +bool copyDirectory(const String& source, const String& dest) { + HashSet dirList(HashSet::newInstance()); + if (!listDirectory(source, true, dirList)) { + return false; + } + + createDirectory(dest); + + for (HashSet::iterator file = dirList.begin(); file != dirList.end(); ++file) { + copyFile(joinPath(source, *file), joinPath(dest, *file)); + } + + return true; +} + +String joinPath(const String& path, const String& file) { + boost::filesystem::path join(path.c_str()); + join /= file.c_str(); + return join.wstring().c_str(); +} + +String extractPath(const String& path) { + boost::filesystem::path parentPath(path.c_str()); + return parentPath.parent_path().wstring().c_str(); +} + +String extractFile(const String& path) { + boost::filesystem::path fileName(path.c_str()); + return fileName.filename().wstring().c_str(); +} + +} } diff --git a/src/core/util/InfoStream.cpp b/src/core/util/InfoStream.cpp index a9e1b4ff..f0229bcc 100644 --- a/src/core/util/InfoStream.cpp +++ b/src/core/util/InfoStream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,46 +9,38 @@ #include "InfoStream.h" #include "StringUtils.h" -namespace Lucene -{ - InfoStream::InfoStream() - { - } - - InfoStream::~InfoStream() - { - } - - InfoStreamFile::InfoStreamFile(const String& path) : file(StringUtils::toUTF8(path).c_str()) - { - } - - InfoStreamFile::~InfoStreamFile() - { - } - - InfoStreamFile& InfoStreamFile::operator<< (const String& t) - { - file << t; - return *this; - } - - InfoStreamOut::~InfoStreamOut() - { - } - - InfoStreamOut& InfoStreamOut::operator<< (const String& t) - { - std::wcout << t; - return *this; - } - - InfoStreamNull::~InfoStreamNull() - { - } - - InfoStreamNull& InfoStreamNull::operator<< (const String& t) - { - return *this; - } +namespace Lucene { + +InfoStream::InfoStream() { +} + +InfoStream::~InfoStream() { +} + +InfoStreamFile::InfoStreamFile(const String& path) : file(path) { +} + +InfoStreamFile::~InfoStreamFile() { +} + +InfoStreamFile& InfoStreamFile::operator<< (const String& t) { + file << t; + return *this; +} + +InfoStreamOut::~InfoStreamOut() { +} + +InfoStreamOut& InfoStreamOut::operator<< (const String& t) { + std::wcout << t; + return *this; +} + +InfoStreamNull::~InfoStreamNull() { +} + +InfoStreamNull& InfoStreamNull::operator<< (const String& t) { + return *this; +} + } diff --git a/src/core/util/InputStreamReader.cpp b/src/core/util/InputStreamReader.cpp index f1ed70f6..ceefafd2 100644 --- a/src/core/util/InputStreamReader.cpp +++ b/src/core/util/InputStreamReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,41 +9,35 @@ #include "BufferedReader.h" #include "UTF8Stream.h" -namespace Lucene -{ - InputStreamReader::InputStreamReader(ReaderPtr reader) - { - this->reader = reader; - this->decoder = newLucene(newLucene(reader, 1024)); - } - - InputStreamReader::~InputStreamReader() - { - } - - int32_t InputStreamReader::read() - { - int32_t buffer; - return read((wchar_t*)&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; - } - - int32_t InputStreamReader::read(wchar_t* b, int32_t offset, int32_t length) - { - return decoder->decode(b + offset, length); - } - - void InputStreamReader::close() - { - reader->close(); - } - - bool InputStreamReader::markSupported() - { - return false; - } - - void InputStreamReader::reset() - { - reader->reset(); - } +namespace Lucene { + +InputStreamReader::InputStreamReader(const ReaderPtr& reader) { + this->reader = reader; + this->decoder = newLucene(newLucene(reader, 1024)); +} + +InputStreamReader::~InputStreamReader() { +} + +int32_t InputStreamReader::read() { + int32_t buffer; + return read((wchar_t*)&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; +} + +int32_t InputStreamReader::read(wchar_t* b, int32_t offset, int32_t length) { + return decoder->decode(b + offset, length); +} + +void InputStreamReader::close() { + reader->close(); +} + +bool InputStreamReader::markSupported() { + return false; +} + +void InputStreamReader::reset() { + reader->reset(); +} + } diff --git a/src/core/util/LuceneAllocator.cpp b/src/core/util/LuceneAllocator.cpp new file mode 100644 index 00000000..92a7ccad --- /dev/null +++ b/src/core/util/LuceneAllocator.cpp @@ -0,0 +1,46 @@ +///////////////////////////////////////////////////////////////////////////// +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. +// Distributable under the terms of either the Apache License (Version 2.0) +// or the GNU Lesser General Public License. +///////////////////////////////////////////////////////////////////////////// + +#include "LuceneInc.h" +#include "LuceneAllocator.h" + +namespace Lucene { + +void* AllocMemory(size_t size) { +#if (defined(_WIN32) || defined(_WIN64)) && !defined(NDEBUG) + return _malloc_dbg(size, _NORMAL_BLOCK, __FILE__, __LINE__); +#else + return malloc(size); +#endif +} + +void* ReallocMemory(void* memory, size_t size) { + if (memory == NULL) { + return AllocMemory(size); + } + if (size == 0) { + FreeMemory(memory); + return NULL; + } +#if defined(_WIN32) && !defined(NDEBUG) + return _realloc_dbg(memory, size, _NORMAL_BLOCK, __FILE__, __LINE__); +#else + return realloc(memory, size); +#endif +} + +void FreeMemory(void* memory) { + if (memory == NULL) { + return; + } +#if defined(_WIN32) && !defined(NDEBUG) + _free_dbg(memory, _NORMAL_BLOCK); +#else + free(memory); +#endif +} + +} diff --git a/src/core/util/LuceneException.cpp b/src/core/util/LuceneException.cpp index 7ae4f0f9..2d0bb35c 100644 --- a/src/core/util/LuceneException.cpp +++ b/src/core/util/LuceneException.cpp @@ -1,100 +1,103 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// #include "LuceneInc.h" #include "LuceneException.h" +#include "StringUtils.h" -namespace Lucene -{ - LuceneException::LuceneException(const String& error, ExceptionType type) throw() - { - this->error = error; - this->type = type; - } - - LuceneException::~LuceneException() throw() - { - } - - LuceneException::ExceptionType LuceneException::getType() const - { - return type; - } - - String LuceneException::getError() const - { - return error; - } - - bool LuceneException::isNull() const - { - return (type == Null); - } - - void LuceneException::throwException() - { - switch (type) - { - case LuceneException::AlreadyClosed: - boost::throw_exception(AlreadyClosedException(error, type)); - case LuceneException::Compression: - boost::throw_exception(CompressionException(error, type)); - case LuceneException::CorruptIndex: - boost::throw_exception(CorruptIndexException(error, type)); - case LuceneException::FieldReader: - boost::throw_exception(FieldReaderException(error, type)); - case LuceneException::FileNotFound: - boost::throw_exception(FileNotFoundException(error, type)); - case LuceneException::IllegalArgument: - boost::throw_exception(IllegalArgumentException(error, type)); - case LuceneException::IllegalState: - boost::throw_exception(IllegalStateException(error, type)); - case LuceneException::IndexOutOfBounds: - boost::throw_exception(IndexOutOfBoundsException(error, type)); - case LuceneException::IO: - boost::throw_exception(IOException(error, type)); - case LuceneException::LockObtainFailed: - boost::throw_exception(LockObtainFailedException(error, type)); - case LuceneException::LockReleaseFailed: - boost::throw_exception(LockReleaseFailedException(error, type)); - case LuceneException::Lookahead: - boost::throw_exception(LookaheadSuccess(error, type)); - case LuceneException::MergeAborted: - boost::throw_exception(MergeAbortedException(error, type)); - case LuceneException::Merge: - boost::throw_exception(MergeException(error, type)); - case LuceneException::NoSuchDirectory: - boost::throw_exception(NoSuchDirectoryException(error, type)); - case LuceneException::NullPointer: - boost::throw_exception(NullPointerException(error, type)); - case LuceneException::NumberFormat: - boost::throw_exception(NumberFormatException(error, type)); - case LuceneException::OutOfMemory: - boost::throw_exception(OutOfMemoryError(error, type)); - case LuceneException::Parse: - boost::throw_exception(ParseException(error, type)); - case LuceneException::QueryParser: - boost::throw_exception(QueryParserError(error, type)); - case LuceneException::Runtime: - boost::throw_exception(RuntimeException(error, type)); - case LuceneException::StaleReader: - boost::throw_exception(StaleReaderException(error, type)); - case LuceneException::StopFillCache: - boost::throw_exception(StopFillCacheException(error, type)); - case LuceneException::Temporary: - boost::throw_exception(TemporaryException(error, type)); - case LuceneException::TimeExceeded: - boost::throw_exception(TimeExceededException(error, type)); - case LuceneException::TooManyClauses: - boost::throw_exception(TooManyClausesException(error, type)); - case LuceneException::UnsupportedOperation: - boost::throw_exception(UnsupportedOperationException(error, type)); - case LuceneException::Null: - // silence static analyzer - break; - } +namespace Lucene { + +LuceneException::LuceneException(const String& error, ExceptionType type) throw() { + this->error = error; + this->type = type; + SingleStringStream ss; + ss << "LuceneException[" << type << "]: " << StringUtils::toUTF8(error); + this->_what = ss.str(); +} + +LuceneException::~LuceneException() throw() { +} + +LuceneException::ExceptionType LuceneException::getType() const { + return type; +} + +String LuceneException::getError() const { + return error; +} + +bool LuceneException::isNull() const { + return (type == Null); +} + +void LuceneException::throwException() { + switch (type) { + case LuceneException::AlreadyClosed: + boost::throw_exception(AlreadyClosedException(error, type)); + case LuceneException::Compression: + boost::throw_exception(CompressionException(error, type)); + case LuceneException::CorruptIndex: + boost::throw_exception(CorruptIndexException(error, type)); + case LuceneException::FieldReader: + boost::throw_exception(FieldReaderException(error, type)); + case LuceneException::FileNotFound: + boost::throw_exception(FileNotFoundException(error, type)); + case LuceneException::IllegalArgument: + boost::throw_exception(IllegalArgumentException(error, type)); + case LuceneException::IllegalState: + boost::throw_exception(IllegalStateException(error, type)); + case LuceneException::IndexOutOfBounds: + boost::throw_exception(IndexOutOfBoundsException(error, type)); + case LuceneException::IO: + boost::throw_exception(IOException(error, type)); + case LuceneException::LockObtainFailed: + boost::throw_exception(LockObtainFailedException(error, type)); + case LuceneException::LockReleaseFailed: + boost::throw_exception(LockReleaseFailedException(error, type)); + case LuceneException::Lookahead: + boost::throw_exception(LookaheadSuccess(error, type)); + case LuceneException::MergeAborted: + boost::throw_exception(MergeAbortedException(error, type)); + case LuceneException::Merge: + boost::throw_exception(MergeException(error, type)); + case LuceneException::NoSuchDirectory: + boost::throw_exception(NoSuchDirectoryException(error, type)); + case LuceneException::NullPointer: + boost::throw_exception(NullPointerException(error, type)); + case LuceneException::NumberFormat: + boost::throw_exception(NumberFormatException(error, type)); + case LuceneException::OutOfMemory: + boost::throw_exception(OutOfMemoryError(error, type)); + case LuceneException::Parse: + boost::throw_exception(ParseException(error, type)); + case LuceneException::QueryParser: + boost::throw_exception(QueryParserError(error, type)); + case LuceneException::Runtime: + boost::throw_exception(RuntimeException(error, type)); + case LuceneException::StaleReader: + boost::throw_exception(StaleReaderException(error, type)); + case LuceneException::StopFillCache: + boost::throw_exception(StopFillCacheException(error, type)); + case LuceneException::Temporary: + boost::throw_exception(TemporaryException(error, type)); + case LuceneException::TimeExceeded: + boost::throw_exception(TimeExceededException(error, type)); + case LuceneException::TooManyClauses: + boost::throw_exception(TooManyClausesException(error, type)); + case LuceneException::UnsupportedOperation: + boost::throw_exception(UnsupportedOperationException(error, type)); + case LuceneException::Null: + // silence static analyzer + break; } } + +const char* LuceneException::what() const throw() +{ + return _what.c_str(); +} + +} diff --git a/src/core/util/LuceneObject.cpp b/src/core/util/LuceneObject.cpp index eb509baf..536aabdf 100644 --- a/src/core/util/LuceneObject.cpp +++ b/src/core/util/LuceneObject.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,45 +8,39 @@ #include "LuceneObject.h" #include "StringUtils.h" -namespace Lucene -{ - LuceneObject::LuceneObject() - { - } +namespace Lucene { - LuceneObject::~LuceneObject() - { - } - - void LuceneObject::initialize() - { - // override - } - - LuceneObjectPtr LuceneObject::clone(LuceneObjectPtr other) - { - if (!other) - boost::throw_exception(UnsupportedOperationException(L"clone must not be null")); - return other; - } - - int32_t LuceneObject::hashCode() - { - return (int32_t)(int64_t)this; - } - - bool LuceneObject::equals(LuceneObjectPtr other) - { - return (other && this == other.get()); - } - - int32_t LuceneObject::compareTo(LuceneObjectPtr other) - { - return (int32_t)(this - other.get()); - } - - String LuceneObject::toString() - { - return StringUtils::toString(hashCode()); +LuceneObject::LuceneObject() { +} + +LuceneObject::~LuceneObject() { +} + +void LuceneObject::initialize() { + // override +} + +LuceneObjectPtr LuceneObject::clone(const LuceneObjectPtr& other) { + if (!other) { + boost::throw_exception(UnsupportedOperationException(L"clone must not be null")); } + return other; +} + +int32_t LuceneObject::hashCode() { + return (int32_t)(int64_t)this; +} + +bool LuceneObject::equals(const LuceneObjectPtr& other) { + return (other && this == other.get()); +} + +int32_t LuceneObject::compareTo(const LuceneObjectPtr& other) { + return (int32_t)(this - other.get()); +} + +String LuceneObject::toString() { + return StringUtils::toString(hashCode()); +} + } diff --git a/src/core/util/LuceneSignal.cpp b/src/core/util/LuceneSignal.cpp index 6dd2c32d..e2aef404 100644 --- a/src/core/util/LuceneSignal.cpp +++ b/src/core/util/LuceneSignal.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,40 +8,38 @@ #include "LuceneSignal.h" #include "Synchronize.h" -namespace Lucene -{ - LuceneSignal::LuceneSignal(SynchronizePtr objectLock) - { - this->objectLock = objectLock; - } - - LuceneSignal::~LuceneSignal() - { - } - - void LuceneSignal::createSignal(LuceneSignalPtr& signal, SynchronizePtr objectLock) - { - static boost::mutex lockMutex; - boost::mutex::scoped_lock syncLock(lockMutex); - if (!signal) - signal = newInstance(objectLock); +namespace Lucene { + +LuceneSignal::LuceneSignal(const SynchronizePtr& objectLock) { + this->objectLock = objectLock; +} + +LuceneSignal::~LuceneSignal() { +} + +void LuceneSignal::createSignal(LuceneSignalPtr& signal, const SynchronizePtr& objectLock) { + static boost::mutex lockMutex; + boost::mutex::scoped_lock syncLock(lockMutex); + if (!signal) { + signal = newInstance(objectLock); } - - void LuceneSignal::wait(int32_t timeout) - { - int32_t relockCount = objectLock ? objectLock->unlockAll() : 0; - boost::mutex::scoped_lock waitLock(waitMutex); - while (!signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(timeout))) - { - if (timeout != 0 || signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(10))) - break; +} + +void LuceneSignal::wait(int32_t timeout) { + int32_t relockCount = objectLock ? objectLock->unlockAll() : 0; + boost::mutex::scoped_lock waitLock(waitMutex); + while (!signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(timeout))) { + if (timeout != 0 || signalCondition.timed_wait(waitMutex, boost::posix_time::milliseconds(10))) { + break; } - for (int32_t relock = 0; relock < relockCount; ++relock) - objectLock->lock(); } - - void LuceneSignal::notifyAll() - { - signalCondition.notify_all(); + for (int32_t relock = 0; relock < relockCount; ++relock) { + objectLock->lock(); } } + +void LuceneSignal::notifyAll() { + signalCondition.notify_all(); +} + +} diff --git a/src/core/util/LuceneSync.cpp b/src/core/util/LuceneSync.cpp index 49e0b854..9c195a47 100644 --- a/src/core/util/LuceneSync.cpp +++ b/src/core/util/LuceneSync.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,46 +9,39 @@ #include "Synchronize.h" #include "LuceneSignal.h" -namespace Lucene -{ - LuceneSync::~LuceneSync() - { - } - - SynchronizePtr LuceneSync::getSync() - { - Synchronize::createSync(objectLock); - return objectLock; - } - - LuceneSignalPtr LuceneSync::getSignal() - { - LuceneSignal::createSignal(objectSignal, getSync()); - return objectSignal; - } - - void LuceneSync::lock(int32_t timeout) - { - getSync()->lock(); - } - - void LuceneSync::unlock() - { - getSync()->unlock(); - } - - bool LuceneSync::holdsLock() - { - return getSync()->holdsLock(); - } - - void LuceneSync::wait(int32_t timeout) - { - getSignal()->wait(timeout); - } - - void LuceneSync::notifyAll() - { - getSignal()->notifyAll(); - } +namespace Lucene { + +LuceneSync::~LuceneSync() { +} + +SynchronizePtr LuceneSync::getSync() { + Synchronize::createSync(objectLock); + return objectLock; +} + +LuceneSignalPtr LuceneSync::getSignal() { + LuceneSignal::createSignal(objectSignal, getSync()); + return objectSignal; +} + +void LuceneSync::lock(int32_t timeout) { + getSync()->lock(); +} + +void LuceneSync::unlock() { + getSync()->unlock(); +} + +bool LuceneSync::holdsLock() { + return getSync()->holdsLock(); +} + +void LuceneSync::wait(int32_t timeout) { + getSignal()->wait(timeout); +} + +void LuceneSync::notifyAll() { + getSignal()->notifyAll(); +} + } diff --git a/src/core/util/LuceneThread.cpp b/src/core/util/LuceneThread.cpp index 2b2b3233..591caa02 100644 --- a/src/core/util/LuceneThread.cpp +++ b/src/core/util/LuceneThread.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,117 +8,103 @@ #include #include "LuceneThread.h" -namespace Lucene -{ - #if defined(_WIN32) || defined(_WIN64) - const int32_t LuceneThread::MAX_PRIORITY = THREAD_PRIORITY_HIGHEST; - const int32_t LuceneThread::NORM_PRIORITY = THREAD_PRIORITY_NORMAL; - const int32_t LuceneThread::MIN_PRIORITY = THREAD_PRIORITY_LOWEST; - #else - const int32_t LuceneThread::MAX_PRIORITY = 2; - const int32_t LuceneThread::NORM_PRIORITY = 0; - const int32_t LuceneThread::MIN_PRIORITY = -2; - #endif - - LuceneThread::LuceneThread() - { - running = false; +namespace Lucene { + +#if defined(_WIN32) || defined(_WIN64) +const int32_t LuceneThread::MAX_THREAD_PRIORITY = THREAD_PRIORITY_HIGHEST; +const int32_t LuceneThread::NORM_THREAD_PRIORITY = THREAD_PRIORITY_NORMAL; +const int32_t LuceneThread::MIN_THREAD_PRIORITY = THREAD_PRIORITY_LOWEST; +#else +const int32_t LuceneThread::MAX_THREAD_PRIORITY = 2; +const int32_t LuceneThread::NORM_THREAD_PRIORITY = 0; +const int32_t LuceneThread::MIN_THREAD_PRIORITY = -2; +#endif + +LuceneThread::LuceneThread() { + running = false; +} + +LuceneThread::~LuceneThread() { +} + +void LuceneThread::start() { + setRunning(false); + thread = newInstance(LuceneThread::runThread, this); + setRunning(true); +} + +void LuceneThread::runThread(LuceneThread* thread) { + LuceneThreadPtr threadObject(thread->shared_from_this()); + try { + threadObject->run(); + } catch (...) { } - - LuceneThread::~LuceneThread() - { + threadObject->setRunning(false); + threadObject.reset(); +} + +void LuceneThread::setRunning(bool running) { + SyncLock syncLock(this); + this->running = running; +} + +bool LuceneThread::isRunning() { + SyncLock syncLock(this); + return running; +} + +bool LuceneThread::isAlive() { + return (thread && isRunning()); +} + +void LuceneThread::setPriority(int32_t priority) { +#if defined(_WIN32) || defined(_WIN64) + if (thread) { + SetThreadPriority(thread->native_handle(), priority); } - - void LuceneThread::start() - { - setRunning(false); - thread = newInstance(LuceneThread::runThread, this); - setRunning(true); +#endif +} + +int32_t LuceneThread::getPriority() { +#if defined(_WIN32) || defined(_WIN64) + return thread ? GetThreadPriority(thread->native_handle()) : NORM_THREAD_PRIORITY; +#else + return NORM_THREAD_PRIORITY; +#endif +} + +void LuceneThread::yield() { + if (thread) { + thread->yield(); } - - void LuceneThread::runThread(LuceneThread* thread) - { - LuceneThreadPtr threadObject(thread->shared_from_this()); - try - { - threadObject->run(); +} + +bool LuceneThread::join(int32_t timeout) { + while (isAlive() && !thread->timed_join(boost::posix_time::milliseconds(timeout))) { + if (timeout != 0) { + return false; } - catch (...) - { + if (thread->timed_join(boost::posix_time::milliseconds(10))) { + return true; } - threadObject->setRunning(false); - threadObject.reset(); - ReleaseThreadCache(); } + return true; +} + +int64_t LuceneThread::currentId() { +#if defined(_WIN32) || defined(_WIN64) + return (int64_t)GetCurrentThreadId(); +#else + return (int64_t)pthread_self(); +#endif +} + +void LuceneThread::threadSleep(int32_t time) { + boost::this_thread::sleep(boost::posix_time::milliseconds(time)); +} + +void LuceneThread::threadYield() { + boost::this_thread::yield(); +} - void LuceneThread::setRunning(bool running) - { - SyncLock syncLock(this); - this->running = running; - } - - bool LuceneThread::isRunning() - { - SyncLock syncLock(this); - return running; - } - - bool LuceneThread::isAlive() - { - return (thread && isRunning()); - } - - void LuceneThread::setPriority(int32_t priority) - { - #if defined(_WIN32) || defined(_WIN64) - if (thread) - SetThreadPriority(thread->native_handle(), priority); - #endif - } - - int32_t LuceneThread::getPriority() - { - #if defined(_WIN32) || defined(_WIN64) - return thread ? GetThreadPriority(thread->native_handle()) : NORM_PRIORITY; - #else - return NORM_PRIORITY; - #endif - } - - void LuceneThread::yield() - { - if (thread) - thread->yield(); - } - - bool LuceneThread::join(int32_t timeout) - { - while (isAlive() && !thread->timed_join(boost::posix_time::milliseconds(timeout))) - { - if (timeout != 0) - return false; - if (thread->timed_join(boost::posix_time::milliseconds(10))) - return true; - } - return true; - } - - int64_t LuceneThread::currentId() - { - #if defined(_WIN32) || defined(_WIN64) - return GetCurrentThreadId(); - #else - return pthread_self(); - #endif - } - - void LuceneThread::threadSleep(int32_t time) - { - boost::this_thread::sleep(boost::posix_time::milliseconds(time)); - } - - void LuceneThread::threadYield() - { - boost::this_thread::yield(); - } } diff --git a/src/core/util/MiscUtils.cpp b/src/core/util/MiscUtils.cpp index c04cc71b..f5856ce3 100644 --- a/src/core/util/MiscUtils.cpp +++ b/src/core/util/MiscUtils.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,144 +8,120 @@ #include "MiscUtils.h" #include "LuceneObject.h" -namespace Lucene -{ - const uint32_t MiscUtils::SINGLE_EXPONENT_MASK = 0x7f800000; - const uint32_t MiscUtils::SINGLE_MANTISSA_MASK = 0x007fffff; - const uint32_t MiscUtils::SINGLE_NAN_BITS = (MiscUtils::SINGLE_EXPONENT_MASK | 0x00400000); +namespace Lucene { - const uint64_t MiscUtils::DOUBLE_SIGN_MASK = 0x8000000000000000LL; - const uint64_t MiscUtils::DOUBLE_EXPONENT_MASK = 0x7ff0000000000000LL; - const uint64_t MiscUtils::DOUBLE_MANTISSA_MASK = 0x000fffffffffffffLL; - const uint64_t MiscUtils::DOUBLE_NAN_BITS = DOUBLE_EXPONENT_MASK | 0x0008000000000000LL; +const uint32_t MiscUtils::SINGLE_EXPONENT_MASK = 0x7f800000; +const uint32_t MiscUtils::SINGLE_MANTISSA_MASK = 0x007fffff; +const uint32_t MiscUtils::SINGLE_NAN_BITS = (MiscUtils::SINGLE_EXPONENT_MASK | 0x00400000); - uint64_t MiscUtils::getTimeMillis(boost::posix_time::ptime time) - { - return boost::posix_time::time_duration(time - boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1))).total_milliseconds(); - } - - uint64_t MiscUtils::currentTimeMillis() - { - return getTimeMillis(boost::posix_time::microsec_clock::universal_time()); - } - - int32_t MiscUtils::getNextSize(int32_t targetSize) - { - return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize; - } - - int32_t MiscUtils::getShrinkSize(int32_t currentSize, int32_t targetSize) - { - int32_t newSize = getNextSize(targetSize); - return (newSize < currentSize / 2) ? newSize : currentSize; - } - - int32_t MiscUtils::bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2) - { - int32_t len = std::min(len1, len2); - for (int32_t i = 0; i < len; ++i) - { - if (bytes1[i] != bytes2[i]) - return i; +const uint64_t MiscUtils::DOUBLE_SIGN_MASK = 0x8000000000000000LL; +const uint64_t MiscUtils::DOUBLE_EXPONENT_MASK = 0x7ff0000000000000LL; +const uint64_t MiscUtils::DOUBLE_MANTISSA_MASK = 0x000fffffffffffffLL; +const uint64_t MiscUtils::DOUBLE_NAN_BITS = DOUBLE_EXPONENT_MASK | 0x0008000000000000LL; + +uint64_t MiscUtils::getTimeMillis(boost::posix_time::ptime time) { + return boost::posix_time::time_duration(time - boost::posix_time::ptime(boost::gregorian::date(1970, 1, 1))).total_milliseconds(); +} + +uint64_t MiscUtils::currentTimeMillis() { + return getTimeMillis(boost::posix_time::microsec_clock::universal_time()); +} + +int32_t MiscUtils::getNextSize(int32_t targetSize) { + return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize; +} + +int32_t MiscUtils::getShrinkSize(int32_t currentSize, int32_t targetSize) { + int32_t newSize = getNextSize(targetSize); + return (newSize < currentSize / 2) ? newSize : currentSize; +} + +int32_t MiscUtils::bytesDifference(uint8_t* bytes1, int32_t len1, uint8_t* bytes2, int32_t len2) { + int32_t len = std::min(len1, len2); + for (int32_t i = 0; i < len; ++i) { + if (bytes1[i] != bytes2[i]) { + return i; } - return len; - } - - int32_t MiscUtils::hashCode(const wchar_t* array, int32_t start, int32_t end) - { - return hashCode(array + start, array + end, hashNumeric); } - - int32_t MiscUtils::hashCode(const uint8_t* array, int32_t start, int32_t end) - { - return hashCode(array + start, array + end, hashNumeric); - } - - int32_t MiscUtils::hashCode(bool value) - { - return value ? 1231 : 1237; - } - - int32_t MiscUtils::doubleToIntBits(double value) - { - int32_t intValue = 0; - float floatValue = (float)value; - std::memcpy(&intValue, &floatValue, sizeof(float)); - - if ((intValue & SINGLE_EXPONENT_MASK) == SINGLE_EXPONENT_MASK) - { - if (intValue & SINGLE_MANTISSA_MASK) - return SINGLE_NAN_BITS; + return len; +} + +int32_t MiscUtils::hashCode(const wchar_t* array, int32_t start, int32_t end) { + return hashCode(array + start, array + end, hashNumeric); +} + +int32_t MiscUtils::hashCode(const uint8_t* array, int32_t start, int32_t end) { + return hashCode(array + start, array + end, hashNumeric); +} + +int32_t MiscUtils::hashCode(bool value) { + return value ? 1231 : 1237; +} + +int32_t MiscUtils::doubleToIntBits(double value) { + int32_t intValue = 0; + float floatValue = (float)value; + std::memcpy(&intValue, &floatValue, sizeof(float)); + + if ((intValue & SINGLE_EXPONENT_MASK) == SINGLE_EXPONENT_MASK) { + if (intValue & SINGLE_MANTISSA_MASK) { + return SINGLE_NAN_BITS; } - - return intValue; - } - - int32_t MiscUtils::doubleToRawIntBits(double value) - { - int32_t intValue = 0; - float floatValue = (float)value; - std::memcpy(&intValue, &floatValue, sizeof(float)); - return intValue; - } - - double MiscUtils::intBitsToDouble(int32_t bits) - { - float floatValue = 0; - std::memcpy(&floatValue, &bits, sizeof(int32_t)); - return (double)floatValue; } - - int64_t MiscUtils::doubleToLongBits(double value) - { - int64_t longValue = 0; - std::memcpy(&longValue, &value, sizeof(double)); - - if ((longValue & DOUBLE_EXPONENT_MASK) == DOUBLE_EXPONENT_MASK) - { - if (longValue & DOUBLE_MANTISSA_MASK) - return DOUBLE_NAN_BITS; + + return intValue; +} + +int32_t MiscUtils::doubleToRawIntBits(double value) { + int32_t intValue = 0; + float floatValue = (float)value; + std::memcpy(&intValue, &floatValue, sizeof(float)); + return intValue; +} + +double MiscUtils::intBitsToDouble(int32_t bits) { + float floatValue = 0; + std::memcpy(&floatValue, &bits, sizeof(int32_t)); + return (double)floatValue; +} + +int64_t MiscUtils::doubleToLongBits(double value) { + int64_t longValue = 0; + std::memcpy(&longValue, &value, sizeof(double)); + + if ((longValue & DOUBLE_EXPONENT_MASK) == DOUBLE_EXPONENT_MASK) { + if (longValue & DOUBLE_MANTISSA_MASK) { + return DOUBLE_NAN_BITS; } - - return longValue; - } - - int64_t MiscUtils::doubleToRawLongBits(double value) - { - int64_t longValue = 0; - std::memcpy(&longValue, &value, sizeof(double)); - return longValue; - } - - double MiscUtils::longBitsToDouble(int64_t bits) - { - double doubleValue = 0; - std::memcpy(&doubleValue, &bits, sizeof(int64_t)); - return doubleValue; } - bool MiscUtils::isInfinite(double value) - { - return (value == std::numeric_limits::infinity() || value == -std::numeric_limits::infinity()); - } - - bool MiscUtils::isNaN(double value) - { - return (value != value); - } - - bool MiscUtils::equalTypes(LuceneObjectPtr first, LuceneObjectPtr second) - { - return (typeid(*first) == typeid(*second)); - } - - int64_t MiscUtils::unsignedShift(int64_t num, int64_t shift) - { - return (shift & 0x3f) == 0 ? num : (((uint64_t)num >> 1) & 0x7fffffffffffffffLL) >> ((shift & 0x3f) - 1); - } - - int32_t MiscUtils::unsignedShift(int32_t num, int32_t shift) - { - return (shift & 0x1f) == 0 ? num : (((uint32_t)num >> 1) & 0x7fffffff) >> ((shift & 0x1f) - 1); - } + return longValue; +} + +int64_t MiscUtils::doubleToRawLongBits(double value) { + int64_t longValue = 0; + std::memcpy(&longValue, &value, sizeof(double)); + return longValue; +} + +double MiscUtils::longBitsToDouble(int64_t bits) { + double doubleValue = 0; + std::memcpy(&doubleValue, &bits, sizeof(int64_t)); + return doubleValue; +} + +bool MiscUtils::isInfinite(double value) { + return (value == std::numeric_limits::infinity() || value == -std::numeric_limits::infinity()); +} + +bool MiscUtils::isNaN(double value) { + return (value != value); +} + +bool MiscUtils::equalTypes(const LuceneObjectPtr& first, const LuceneObjectPtr& second) { + const LuceneObject& firstRef(*first); + const LuceneObject& secondRef(*second); + return (typeid(firstRef) == typeid(secondRef)); +} + } diff --git a/src/core/util/NumericUtils.cpp b/src/core/util/NumericUtils.cpp index 6b6ee5df..3f3f4f64 100644 --- a/src/core/util/NumericUtils.cpp +++ b/src/core/util/NumericUtils.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,250 +9,229 @@ #include "MiscUtils.h" #include "StringUtils.h" -namespace Lucene -{ - /// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, - /// and {@link NumericRangeFilter} as default. - const int32_t NumericUtils::PRECISION_STEP_DEFAULT = 4; - - /// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + - /// shift in the first character. - const wchar_t NumericUtils::SHIFT_START_LONG = (wchar_t)0x20; - - /// The maximum term length (used for char[] buffer size) for encoding long values. - /// @see #longToPrefixCoded(long,int,char[]) - const int32_t NumericUtils::BUF_SIZE_LONG = 63 / 7 + 2; - - /// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + - /// shift in the first character. - const wchar_t NumericUtils::SHIFT_START_INT = (wchar_t)0x60; - - /// The maximum term length (used for char[] buffer size) for encoding int values. - /// @see #intToPrefixCoded(int,int,char[]) - const int32_t NumericUtils::BUF_SIZE_INT = 31 / 7 + 2; - - NumericUtils::~NumericUtils() - { - } - - int32_t NumericUtils::longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer) - { - if (shift > 63 || shift < 0) - boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..63")); - int32_t nChars = (63 - shift) / 7 + 1; - int32_t len = nChars + 1; - buffer[0] = (wchar_t)(SHIFT_START_LONG + shift); - int64_t sortableBits = val ^ 0x8000000000000000LL; - sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)shift); - while (nChars >= 1) - { - // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is - // right-justified so that lucene can prefix-encode the terms more efficiently. - buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); - sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)7); +namespace Lucene { + +/// The default precision step used by {@link NumericField}, {@link NumericTokenStream}, {@link NumericRangeQuery}, +/// and {@link NumericRangeFilter} as default. +const int32_t NumericUtils::PRECISION_STEP_DEFAULT = 4; + +/// Longs are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_LONG + +/// shift in the first character. +const wchar_t NumericUtils::SHIFT_START_LONG = (wchar_t)0x20; + +/// The maximum term length (used for char[] buffer size) for encoding long values. +/// @see #longToPrefixCoded(long,int,char[]) +const int32_t NumericUtils::BUF_SIZE_LONG = 63 / 7 + 2; + +/// Integers are stored at lower precision by shifting off lower bits. The shift count is stored as SHIFT_START_INT + +/// shift in the first character. +const wchar_t NumericUtils::SHIFT_START_INT = (wchar_t)0x60; + +/// The maximum term length (used for char[] buffer size) for encoding int values. +/// @see #intToPrefixCoded(int,int,char[]) +const int32_t NumericUtils::BUF_SIZE_INT = 31 / 7 + 2; + +NumericUtils::~NumericUtils() { +} + +int32_t NumericUtils::longToPrefixCoded(int64_t val, int32_t shift, CharArray buffer) { + if (shift > 63 || shift < 0) { + boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..63")); + } + int32_t nChars = (63 - shift) / 7 + 1; + int32_t len = nChars + 1; + buffer[0] = (wchar_t)(SHIFT_START_LONG + shift); + int64_t sortableBits = val ^ 0x8000000000000000LL; + sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)shift); + while (nChars >= 1) { + // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is + // right-justified so that lucene can prefix-encode the terms more efficiently. + buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); + sortableBits = MiscUtils::unsignedShift(sortableBits, (int64_t)7); + } + return len; +} + +String NumericUtils::longToPrefixCoded(int64_t val, int32_t shift) { + CharArray buffer(CharArray::newInstance(BUF_SIZE_LONG)); + int32_t len = longToPrefixCoded(val, shift, buffer); + return String(buffer.get(), len); +} + +String NumericUtils::longToPrefixCoded(int64_t val) { + return longToPrefixCoded(val, 0); +} + +int32_t NumericUtils::intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer) { + if (shift > 31 || shift < 0) { + boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..31")); + } + int32_t nChars = (31 - shift) / 7 + 1; + int32_t len = nChars + 1; + buffer[0] = (wchar_t)(SHIFT_START_INT + shift); + int32_t sortableBits = val ^ 0x80000000; + sortableBits = MiscUtils::unsignedShift(sortableBits, shift); + while (nChars >= 1) { + // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is + // right-justified so that lucene can prefix-encode the terms more efficiently. + buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); + sortableBits = MiscUtils::unsignedShift(sortableBits, 7); + } + return len; +} + +String NumericUtils::intToPrefixCoded(int32_t val, int32_t shift) { + CharArray buffer(CharArray::newInstance(BUF_SIZE_INT)); + int32_t len = intToPrefixCoded(val, shift, buffer); + return String(buffer.get(), len); +} + +String NumericUtils::intToPrefixCoded(int32_t val) { + return intToPrefixCoded(val, 0); +} + +int64_t NumericUtils::prefixCodedToLong(const String& prefixCoded) { + int32_t shift = prefixCoded[0] - SHIFT_START_LONG; + if (shift > 63 || shift < 0) { + boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a LONG?)")); + } + int64_t sortableBits = 0; + for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { + sortableBits <<= 7; + wchar_t ch = prefixCoded[i]; + if (ch > 0x7f) { + boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + + L" at position " + StringUtils::toString(i) + L" is invalid)")); } - return len; - } - - String NumericUtils::longToPrefixCoded(int64_t val, int32_t shift) - { - CharArray buffer(CharArray::newInstance(BUF_SIZE_LONG)); - int32_t len = longToPrefixCoded(val, shift, buffer); - return String(buffer.get(), len); + sortableBits |= (int64_t)ch; } - - String NumericUtils::longToPrefixCoded(int64_t val) - { - return longToPrefixCoded(val, 0); - } - - int32_t NumericUtils::intToPrefixCoded(int32_t val, int32_t shift, CharArray buffer) - { - if (shift > 31 || shift < 0) - boost::throw_exception(IllegalArgumentException(L"Illegal shift value, must be 0..31")); - int32_t nChars = (31 - shift) / 7 + 1; - int32_t len = nChars + 1; - buffer[0] = (wchar_t)(SHIFT_START_INT + shift); - int32_t sortableBits = val ^ 0x80000000; - sortableBits = MiscUtils::unsignedShift(sortableBits, shift); - while (nChars >= 1) - { - // Store 7 bits per character for good efficiency when UTF-8 encoding. The whole number is - // right-justified so that lucene can prefix-encode the terms more efficiently. - buffer[nChars--] = (wchar_t)(sortableBits & 0x7f); - sortableBits = MiscUtils::unsignedShift(sortableBits, 7); + return (sortableBits << shift) ^ 0x8000000000000000LL; +} + +int32_t NumericUtils::prefixCodedToInt(const String& prefixCoded) { + int32_t shift = prefixCoded[0] - SHIFT_START_INT; + if (shift > 31 || shift < 0) { + boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a INT?)")); + } + int32_t sortableBits = 0; + for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) { + sortableBits <<= 7; + wchar_t ch = prefixCoded[i]; + if (ch > 0x7f) { + boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + + L" at position " + StringUtils::toString(i) + L" is invalid)")); } - return len; - } - - String NumericUtils::intToPrefixCoded(int32_t val, int32_t shift) - { - CharArray buffer(CharArray::newInstance(BUF_SIZE_INT)); - int32_t len = intToPrefixCoded(val, shift, buffer); - return String(buffer.get(), len); + sortableBits |= (int32_t)ch; } - - String NumericUtils::intToPrefixCoded(int32_t val) - { - return intToPrefixCoded(val, 0); + return (sortableBits << shift) ^ 0x80000000; +} + +int64_t NumericUtils::doubleToSortableLong(double val) { + int64_t f = MiscUtils::doubleToRawLongBits(val); + if (f < 0) { + f ^= 0x7fffffffffffffffLL; } - - int64_t NumericUtils::prefixCodedToLong(const String& prefixCoded) - { - int32_t shift = prefixCoded[0] - SHIFT_START_LONG; - if (shift > 63 || shift < 0) - boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a LONG?)")); - int64_t sortableBits = 0; - for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) - { - sortableBits <<= 7; - wchar_t ch = prefixCoded[i]; - if (ch > 0x7f) - { - boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + - L" at position " + StringUtils::toString(i) + L" is invalid)")); - } - sortableBits |= (int64_t)ch; - } - return (sortableBits << shift) ^ 0x8000000000000000LL; + return f; +} + +String NumericUtils::doubleToPrefixCoded(double val) { + return longToPrefixCoded(doubleToSortableLong(val)); +} + +double NumericUtils::sortableLongToDouble(int64_t val) { + if (val < 0) { + val ^= 0x7fffffffffffffffLL; } - - int32_t NumericUtils::prefixCodedToInt(const String& prefixCoded) - { - int32_t shift = prefixCoded[0] - SHIFT_START_INT; - if (shift > 31 || shift < 0) - boost::throw_exception(NumberFormatException(L"Invalid shift value in prefixCoded string (is encoded value really a INT?)")); - int32_t sortableBits = 0; - for (int32_t i = 1, len = prefixCoded.length(); i < len; ++i) - { - sortableBits <<= 7; - wchar_t ch = prefixCoded[i]; - if (ch > 0x7f) - { - boost::throw_exception(NumberFormatException(L"Invalid prefixCoded numerical value representation (char " + StringUtils::toString(ch, 16) + - L" at position " + StringUtils::toString(i) + L" is invalid)")); - } - sortableBits |= (int32_t)ch; + return MiscUtils::longBitsToDouble(val); +} + +double NumericUtils::prefixCodedToDouble(const String& val) { + return sortableLongToDouble(prefixCodedToLong(val)); +} + +void NumericUtils::splitLongRange(const LongRangeBuilderPtr& builder, int32_t precisionStep, int64_t minBound, int64_t maxBound) { + splitRange(builder, 64, precisionStep, minBound, maxBound); +} + +void NumericUtils::splitIntRange(const IntRangeBuilderPtr& builder, int32_t precisionStep, int32_t minBound, int32_t maxBound) { + splitRange(builder, 32, precisionStep, (int64_t)minBound, (int64_t)maxBound); +} + +void NumericUtils::splitRange(const LuceneObjectPtr& builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound) { + if (precisionStep < 1) { + boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); + } + if (minBound > maxBound) { + return; + } + for (int32_t shift = 0; ; shift += precisionStep) { + // calculate new bounds for inner precision + int64_t diff = (int64_t)1 << (shift + precisionStep); + int64_t mask = (((int64_t)1 << precisionStep) - (int64_t)1) << shift; + bool hasLower = ((minBound & mask) != 0); + bool hasUpper = ((maxBound & mask) != mask); + int64_t nextMinBound = ((hasLower ? (minBound + diff) : minBound) & ~mask); + int64_t nextMaxBound = ((hasUpper ? (maxBound - diff) : maxBound) & ~mask); + bool lowerWrapped = nextMinBound < minBound; + bool upperWrapped = nextMaxBound > maxBound; + + if (shift + precisionStep >= valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) { + // We are in the lowest precision or the next precision is not available. + addRange(builder, valSize, minBound, maxBound, shift); + break; // exit the split recursion loop } - return (sortableBits << shift) ^ 0x80000000; - } - - int64_t NumericUtils::doubleToSortableLong(double val) - { - int64_t f = MiscUtils::doubleToRawLongBits(val); - if (f < 0) - f ^= 0x7fffffffffffffffLL; - return f; - } - - String NumericUtils::doubleToPrefixCoded(double val) - { - return longToPrefixCoded(doubleToSortableLong(val)); - } - - double NumericUtils::sortableLongToDouble(int64_t val) - { - if (val < 0) - val ^= 0x7fffffffffffffffLL; - return MiscUtils::longBitsToDouble(val); - } - - double NumericUtils::prefixCodedToDouble(const String& val) - { - return sortableLongToDouble(prefixCodedToLong(val)); - } - - void NumericUtils::splitLongRange(LongRangeBuilderPtr builder, int32_t precisionStep, int64_t minBound, int64_t maxBound) - { - splitRange(builder, 64, precisionStep, minBound, maxBound); - } - - void NumericUtils::splitIntRange(IntRangeBuilderPtr builder, int32_t precisionStep, int32_t minBound, int32_t maxBound) - { - splitRange(builder, 32, precisionStep, (int64_t)minBound, (int64_t)maxBound); - } - - void NumericUtils::splitRange(LuceneObjectPtr builder, int32_t valSize, int32_t precisionStep, int64_t minBound, int64_t maxBound) - { - if (precisionStep < 1) - boost::throw_exception(IllegalArgumentException(L"precisionStep must be >=1")); - if (minBound > maxBound) - return; - for (int32_t shift = 0; ; shift += precisionStep) - { - // calculate new bounds for inner precision - int64_t diff = (int64_t)1 << (shift + precisionStep); - int64_t mask = (((int64_t)1 << precisionStep) - (int64_t)1) << shift; - bool hasLower = ((minBound & mask) != 0); - bool hasUpper = ((maxBound & mask) != mask); - int64_t nextMinBound = ((hasLower ? (minBound + diff) : minBound) & ~mask); - int64_t nextMaxBound = ((hasUpper ? (maxBound - diff) : maxBound) & ~mask); - bool lowerWrapped = nextMinBound < minBound; - bool upperWrapped = nextMaxBound > maxBound; - - if (shift + precisionStep >= valSize || nextMinBound>nextMaxBound || lowerWrapped || upperWrapped) - { - // We are in the lowest precision or the next precision is not available. - addRange(builder, valSize, minBound, maxBound, shift); - break; // exit the split recursion loop - } - - if (hasLower) - addRange(builder, valSize, minBound, minBound | mask, shift); - if (hasUpper) - addRange(builder, valSize, maxBound & ~mask, maxBound, shift); - - // recurse to next precision - minBound = nextMinBound; - maxBound = nextMaxBound; + + if (hasLower) { + addRange(builder, valSize, minBound, minBound | mask, shift); } - } - - void NumericUtils::addRange(LuceneObjectPtr builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift) - { - // for the max bound set all lower bits (that were shifted away): this is important for testing or other - // usages of the splitted range (eg. to reconstruct the full range). The prefixEncoding will remove the - // bits anyway, so they do not hurt! - maxBound |= ((int64_t)1 << shift) - (int64_t)1; - // delegate to correct range builder - switch (valSize) - { - case 64: - boost::dynamic_pointer_cast(builder)->addRange(minBound, maxBound, shift); - break; - case 32: - boost::dynamic_pointer_cast(builder)->addRange((int32_t)minBound, (int32_t)maxBound, shift); - break; - default: - boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64.")); + if (hasUpper) { + addRange(builder, valSize, maxBound & ~mask, maxBound, shift); } + + // recurse to next precision + minBound = nextMinBound; + maxBound = nextMaxBound; } - - LongRangeBuilder::~LongRangeBuilder() - { - } - - void LongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) - { - boost::throw_exception(UnsupportedOperationException()); - } - - void LongRangeBuilder::addRange(int64_t min, int64_t max, int32_t shift) - { - addRange(NumericUtils::longToPrefixCoded(min, shift), NumericUtils::longToPrefixCoded(max, shift)); - } - - IntRangeBuilder::~IntRangeBuilder() - { - } - - void IntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) - { - boost::throw_exception(UnsupportedOperationException()); - } - - void IntRangeBuilder::addRange(int32_t min, int32_t max, int32_t shift) - { - addRange(NumericUtils::intToPrefixCoded(min, shift), NumericUtils::intToPrefixCoded(max, shift)); +} + +void NumericUtils::addRange(const LuceneObjectPtr& builder, int32_t valSize, int64_t minBound, int64_t maxBound, int32_t shift) { + // for the max bound set all lower bits (that were shifted away): this is important for testing or other + // usages of the splitted range (eg. to reconstruct the full range). The prefixEncoding will remove the + // bits anyway, so they do not hurt! + maxBound |= ((int64_t)1 << shift) - (int64_t)1; + // delegate to correct range builder + switch (valSize) { + case 64: + boost::dynamic_pointer_cast(builder)->addRange(minBound, maxBound, shift); + break; + case 32: + boost::dynamic_pointer_cast(builder)->addRange((int32_t)minBound, (int32_t)maxBound, shift); + break; + default: + boost::throw_exception(IllegalArgumentException(L"valSize must be 32 or 64.")); } } + +LongRangeBuilder::~LongRangeBuilder() { +} + +void LongRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { + boost::throw_exception(UnsupportedOperationException()); +} + +void LongRangeBuilder::addRange(int64_t min, int64_t max, int32_t shift) { + addRange(NumericUtils::longToPrefixCoded(min, shift), NumericUtils::longToPrefixCoded(max, shift)); +} + +IntRangeBuilder::~IntRangeBuilder() { +} + +void IntRangeBuilder::addRange(const String& minPrefixCoded, const String& maxPrefixCoded) { + boost::throw_exception(UnsupportedOperationException()); +} + +void IntRangeBuilder::addRange(int32_t min, int32_t max, int32_t shift) { + addRange(NumericUtils::intToPrefixCoded(min, shift), NumericUtils::intToPrefixCoded(max, shift)); +} + +} diff --git a/src/core/util/OpenBitSet.cpp b/src/core/util/OpenBitSet.cpp index bce04a51..c5412a18 100644 --- a/src/core/util/OpenBitSet.cpp +++ b/src/core/util/OpenBitSet.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,603 +10,566 @@ #include "BitUtil.h" #include "MiscUtils.h" -namespace Lucene -{ - OpenBitSet::OpenBitSet(int64_t numBits) - { - bits = LongArray::newInstance(bits2words(numBits)); - MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0LL); - wlen = bits.size(); - } - - OpenBitSet::OpenBitSet(LongArray bits, int32_t numWords) - { - this->bits = bits; - this->wlen = numWords; - } - - OpenBitSet::~OpenBitSet() - { - } - - DocIdSetIteratorPtr OpenBitSet::iterator() - { - return newLucene(bits, wlen); - } - - bool OpenBitSet::isCacheable() - { - return true; +namespace Lucene { + +OpenBitSet::OpenBitSet(int64_t numBits) { + bits = LongArray::newInstance(bits2words(numBits)); + MiscUtils::arrayFill(bits.get(), 0, bits.size(), 0LL); + wlen = bits.size(); +} + +OpenBitSet::OpenBitSet(LongArray bits, int32_t numWords) { + this->bits = bits; + this->wlen = numWords; +} + +OpenBitSet::~OpenBitSet() { +} + +DocIdSetIteratorPtr OpenBitSet::iterator() { + return newLucene(bits, wlen); +} + +bool OpenBitSet::isCacheable() { + return true; +} + +int64_t OpenBitSet::capacity() { + return bits.size() << 6; +} + +int64_t OpenBitSet::size() { + return capacity(); +} + +bool OpenBitSet::isEmpty() { + return (cardinality() == 0); +} + +LongArray OpenBitSet::getBits() { + return bits; +} + +void OpenBitSet::setBits(LongArray bits) { + this->bits = bits; +} + +int32_t OpenBitSet::getNumWords() { + return wlen; +} + +void OpenBitSet::setNumWords(int32_t numWords) { + this->wlen = numWords; +} + +bool OpenBitSet::get(int32_t index) { + int32_t i = index >> 6; // div 64 + // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, + // removing the need for an explicit check. + if (i >= bits.size()) { + return false; } - - int64_t OpenBitSet::capacity() - { - return bits.size() << 6; - } - - int64_t OpenBitSet::size() - { - return capacity(); - } - - bool OpenBitSet::isEmpty() - { - return (cardinality() == 0); - } - - LongArray OpenBitSet::getBits() - { - return bits; - } - - void OpenBitSet::setBits(LongArray bits) - { - this->bits = bits; - } - - int32_t OpenBitSet::getNumWords() - { - return wlen; - } - - void OpenBitSet::setNumWords(int32_t numWords) - { - this->wlen = numWords; - } - - bool OpenBitSet::get(int32_t index) - { - int32_t i = index >> 6; // div 64 - // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, - // removing the need for an explicit check. - if (i >= bits.size()) - return false; - int32_t bit = (index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - return ((bits[i] & bitmask) != 0); - } - - bool OpenBitSet::fastGet(int32_t index) - { - int32_t i = index >> 6; // div 64 - // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, - // removing the need for an explicit check. - int32_t bit = (index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - return ((bits[i] & bitmask) != 0); - } - - bool OpenBitSet::get(int64_t index) - { - int32_t i = (int32_t)(index >> 6); // div 64 - if (i >= bits.size()) - return false; - int32_t bit = ((int32_t)index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - return ((bits[i] & bitmask) != 0); - } - - bool OpenBitSet::fastGet(int64_t index) - { - int32_t i = (int32_t)(index >> 6); // div 64 - int32_t bit = ((int32_t)index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - return ((bits[i] & bitmask) != 0); - } - - int32_t OpenBitSet::getBit(int32_t index) - { - int32_t i = index >> 6; // div 64 - int32_t bit = (index & 0x3f); // mod 64 - return (int32_t)MiscUtils::unsignedShift(bits[i], (int64_t)bit) & 0x01; - } - - void OpenBitSet::set(int64_t index) - { - int32_t wordNum = expandingWordNum(index); - int32_t bit = (int32_t)index & 0x3f; - int64_t bitmask = 1LL << bit; - bits[wordNum] |= bitmask; - } - - void OpenBitSet::fastSet(int32_t index) - { - int32_t wordNum = index >> 6; // div 64 - int32_t bit = index & 0x3f; - int64_t bitmask = 1LL << bit; - bits[wordNum] |= bitmask; - } - - void OpenBitSet::fastSet(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); - int32_t bit = (int32_t)index & 0x3f; - int64_t bitmask = 1LL << bit; - bits[wordNum] |= bitmask; - } - - void OpenBitSet::set(int64_t startIndex, int64_t endIndex) - { - if (endIndex <= startIndex) - return; - - int32_t startWord = (int32_t)(startIndex >> 6); - - // since endIndex is one past the end, this is index of the last word to be changed - int32_t endWord = expandingWordNum(endIndex - 1); - - int64_t startmask = -1LL << (startIndex & 0x3f); - int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); - - if (startWord == endWord) - { - bits[startWord] |= (startmask & endmask); - return; - } + int32_t bit = (index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + return ((bits[i] & bitmask) != 0); +} - bits[startWord] |= startmask; - MiscUtils::arrayFill(bits.get(), startWord + 1, endWord, -1LL); - bits[endWord] |= endmask; - } - - int32_t OpenBitSet::expandingWordNum(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); - if (wordNum >= wlen) - { - ensureCapacity(index + 1); - wlen = wordNum + 1; - } - return wordNum; - } - - void OpenBitSet::fastClear(int32_t index) - { - int32_t wordNum = index >> 6; - int32_t bit = (index & 0x03f); - int64_t bitmask = 1LL << bit; - bits[wordNum] &= ~bitmask; - } - - void OpenBitSet::fastClear(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); - int32_t bit = (int32_t)index & 0x3f; - int64_t bitmask = 1LL << bit; - bits[wordNum] &= ~bitmask; - } - - void OpenBitSet::clear(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); - if (wordNum >= wlen) - return; - int32_t bit = (int32_t)index & 0x3f; - int64_t bitmask = 1LL << bit; - bits[wordNum] &= ~bitmask; - } - - void OpenBitSet::clear(int32_t startIndex, int32_t endIndex) - { - if (endIndex <= startIndex) - return; - - int32_t startWord = (startIndex >> 6); - if (startWord >= wlen) - return; - - // since endIndex is one past the end, this is index of the last word to be changed. - int32_t endWord = ((endIndex - 1) >> 6); - - int64_t startmask = -1LL << (startIndex & 0x3f); - int64_t endmask = MiscUtils::unsignedShift(-1LL, (int64_t)-endIndex); - - // invert masks since we are clearing - startmask = ~startmask; - endmask = ~endmask; - - if (startWord == endWord) - { - bits[startWord] &= (startmask | endmask); - return; - } +bool OpenBitSet::fastGet(int32_t index) { + int32_t i = index >> 6; // div 64 + // signed shift will keep a negative index and force an array-index-out-of-bounds-exception, + // removing the need for an explicit check. + int32_t bit = (index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + return ((bits[i] & bitmask) != 0); +} + +bool OpenBitSet::get(int64_t index) { + int32_t i = (int32_t)(index >> 6); // div 64 + if (i >= bits.size()) { + return false; + } + int32_t bit = ((int32_t)index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + return ((bits[i] & bitmask) != 0); +} + +bool OpenBitSet::fastGet(int64_t index) { + int32_t i = (int32_t)(index >> 6); // div 64 + int32_t bit = ((int32_t)index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + return ((bits[i] & bitmask) != 0); +} + +int32_t OpenBitSet::getBit(int32_t index) { + int32_t i = index >> 6; // div 64 + int32_t bit = (index & 0x3f); // mod 64 + return (int32_t)MiscUtils::unsignedShift(bits[i], (int64_t)bit) & 0x01; +} + +void OpenBitSet::set(int64_t index) { + int32_t wordNum = expandingWordNum(index); + int32_t bit = (int32_t)index & 0x3f; + int64_t bitmask = 1LL << bit; + bits[wordNum] |= bitmask; +} + +void OpenBitSet::fastSet(int32_t index) { + int32_t wordNum = index >> 6; // div 64 + int32_t bit = index & 0x3f; + int64_t bitmask = 1LL << bit; + bits[wordNum] |= bitmask; +} + +void OpenBitSet::fastSet(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); + int32_t bit = (int32_t)index & 0x3f; + int64_t bitmask = 1LL << bit; + bits[wordNum] |= bitmask; +} + +void OpenBitSet::set(int64_t startIndex, int64_t endIndex) { + if (endIndex <= startIndex) { + return; + } - bits[startWord] &= startmask; + int32_t startWord = (int32_t)(startIndex >> 6); - int32_t middle = std::min(wlen, endWord); - MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); - if (endWord < wlen) - bits[endWord] &= endmask; + // since endIndex is one past the end, this is index of the last word to be changed + int32_t endWord = expandingWordNum(endIndex - 1); + + int64_t startmask = -1LL << (startIndex & 0x3f); + int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); + + if (startWord == endWord) { + bits[startWord] |= (startmask & endmask); + return; } - - void OpenBitSet::clear(int64_t startIndex, int64_t endIndex) - { - if (endIndex <= startIndex) - return; - int32_t startWord = (int32_t)(startIndex>>6); - if (startWord >= wlen) - return; + bits[startWord] |= startmask; + MiscUtils::arrayFill(bits.get(), startWord + 1, endWord, -1LL); + bits[endWord] |= endmask; +} - // since endIndex is one past the end, this is index of the last word to be changed. - int32_t endWord = (int32_t)((endIndex - 1) >> 6); +int32_t OpenBitSet::expandingWordNum(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); + if (wordNum >= wlen) { + ensureCapacity(index + 1); + wlen = wordNum + 1; + } + return wordNum; +} - int64_t startmask = -1LL << (startIndex & 0x3f); - int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); +void OpenBitSet::fastClear(int32_t index) { + int32_t wordNum = index >> 6; + int32_t bit = (index & 0x03f); + int64_t bitmask = 1LL << bit; + bits[wordNum] &= ~bitmask; +} - // invert masks since we are clearing - startmask = ~startmask; - endmask = ~endmask; +void OpenBitSet::fastClear(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); + int32_t bit = (int32_t)index & 0x3f; + int64_t bitmask = 1LL << bit; + bits[wordNum] &= ~bitmask; +} - if (startWord == endWord) - { - bits[startWord] &= (startmask | endmask); - return; - } +void OpenBitSet::clear(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); + if (wordNum >= wlen) { + return; + } + int32_t bit = (int32_t)index & 0x3f; + int64_t bitmask = 1LL << bit; + bits[wordNum] &= ~bitmask; +} - bits[startWord] &= startmask; - - int32_t middle = std::min(wlen, endWord); - MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); - if (endWord < wlen) - bits[endWord] &= endmask; - } - - bool OpenBitSet::getAndSet(int32_t index) - { - int32_t wordNum = index >> 6; // div 64 - int32_t bit = (index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - bool val = ((bits[wordNum] & bitmask) != 0); - bits[wordNum] |= bitmask; - return val; - } - - bool OpenBitSet::getAndSet(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); // div 64 - int32_t bit = (int32_t)index & 0x3f; // mod 64 - int64_t bitmask = 1LL << bit; - bool val = ((bits[wordNum] & bitmask) != 0); - bits[wordNum] |= bitmask; - return val; - } - - void OpenBitSet::fastFlip(int32_t index) - { - int32_t wordNum = index >> 6; // div 64 - int32_t bit = (index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - bits[wordNum] ^= bitmask; - } - - void OpenBitSet::fastFlip(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); // div 64 - int32_t bit = (int32_t)index & 0x3f; // mod 64 - int64_t bitmask = 1LL << bit; - bits[wordNum] ^= bitmask; - } - - void OpenBitSet::flip(int64_t index) - { - int32_t wordNum = expandingWordNum(index); - int32_t bit = (int32_t)index & 0x3f; // mod 64 - int64_t bitmask = 1LL << bit; - bits[wordNum] ^= bitmask; - } - - bool OpenBitSet::flipAndGet(int32_t index) - { - int32_t wordNum = index >> 6; // div 64 - int32_t bit = (index & 0x3f); // mod 64 - int64_t bitmask = 1LL << bit; - bits[wordNum] ^= bitmask; - return ((bits[wordNum] & bitmask) != 0); - } - - bool OpenBitSet::flipAndGet(int64_t index) - { - int32_t wordNum = (int32_t)(index >> 6); // div 64 - int32_t bit = (int32_t)index & 0x3f; // mod 64 - int64_t bitmask = 1LL << bit; - bits[wordNum] ^= bitmask; - return ((bits[wordNum] & bitmask) != 0); - } - - void OpenBitSet::flip(int64_t startIndex, int64_t endIndex) - { - if (endIndex <= startIndex) - return; - int32_t startWord = (int32_t)(startIndex >> 6); - - // since endIndex is one past the end, this is index of the last word to be changed. - int32_t endWord = expandingWordNum(endIndex - 1); - - int64_t startmask = -1LL << (startIndex & 0x3f); - int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); - - if (startWord == endWord) - { - bits[startWord] ^= (startmask & endmask); - return; - } +void OpenBitSet::clear(int32_t startIndex, int32_t endIndex) { + if (endIndex <= startIndex) { + return; + } - bits[startWord] ^= startmask; - - for (int32_t i = startWord + 1; i < endWord; ++i) - bits[i] = ~bits[i]; - bits[endWord] ^= endmask; - } - - int64_t OpenBitSet::cardinality() - { - return BitUtil::pop_array(bits.get(), 0, wlen); - } - - int64_t OpenBitSet::intersectionCount(OpenBitSetPtr a, OpenBitSetPtr b) - { - return BitUtil::pop_intersect(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); - } - - int64_t OpenBitSet::unionCount(OpenBitSetPtr a, OpenBitSetPtr b) - { - int64_t tot = BitUtil::pop_union(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); - if (a->wlen < b->wlen) - tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); - else if (a->wlen > b->wlen) - tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); - return tot; - } - - int64_t OpenBitSet::andNotCount(OpenBitSetPtr a, OpenBitSetPtr b) - { - int64_t tot = BitUtil::pop_andnot(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); - if (a->wlen > b->wlen) - tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); - return tot; - } - - int64_t OpenBitSet::xorCount(OpenBitSetPtr a, OpenBitSetPtr b) - { - int64_t tot = BitUtil::pop_xor(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); - if (a->wlen < b->wlen) - tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); - else if (a->wlen > b->wlen) - tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); - return tot; - } - - int32_t OpenBitSet::nextSetBit(int32_t index) - { - int32_t i = MiscUtils::unsignedShift(index, 6); - if (i >= wlen) - return -1; - int32_t subIndex = (index & 0x3f); // index within the word - int64_t word = MiscUtils::unsignedShift(bits[i], (int64_t)subIndex); // skip all the bits to the right of index - - if (word != 0) - return (i << 6) + subIndex + BitUtil::ntz(word); - - while (++i < wlen) - { - word = bits[i]; - if (word != 0) - return (i << 6) + BitUtil::ntz(word); - } + int32_t startWord = (startIndex >> 6); + if (startWord >= wlen) { + return; + } + + // since endIndex is one past the end, this is index of the last word to be changed. + int32_t endWord = ((endIndex - 1) >> 6); + + int64_t startmask = -1LL << (startIndex & 0x3f); + int64_t endmask = MiscUtils::unsignedShift(-1LL, (int64_t)-endIndex); + + // invert masks since we are clearing + startmask = ~startmask; + endmask = ~endmask; + + if (startWord == endWord) { + bits[startWord] &= (startmask | endmask); + return; + } + + bits[startWord] &= startmask; + + int32_t middle = std::min(wlen, endWord); + MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); + if (endWord < wlen) { + bits[endWord] &= endmask; + } +} + +void OpenBitSet::clear(int64_t startIndex, int64_t endIndex) { + if (endIndex <= startIndex) { + return; + } + + int32_t startWord = (int32_t)(startIndex>>6); + if (startWord >= wlen) { + return; + } + + // since endIndex is one past the end, this is index of the last word to be changed. + int32_t endWord = (int32_t)((endIndex - 1) >> 6); + + int64_t startmask = -1LL << (startIndex & 0x3f); + int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); + + // invert masks since we are clearing + startmask = ~startmask; + endmask = ~endmask; + + if (startWord == endWord) { + bits[startWord] &= (startmask | endmask); + return; + } + + bits[startWord] &= startmask; + + int32_t middle = std::min(wlen, endWord); + MiscUtils::arrayFill(bits.get(), startWord + 1, middle, 0LL); + if (endWord < wlen) { + bits[endWord] &= endmask; + } +} + +bool OpenBitSet::getAndSet(int32_t index) { + int32_t wordNum = index >> 6; // div 64 + int32_t bit = (index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + bool val = ((bits[wordNum] & bitmask) != 0); + bits[wordNum] |= bitmask; + return val; +} + +bool OpenBitSet::getAndSet(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); // div 64 + int32_t bit = (int32_t)index & 0x3f; // mod 64 + int64_t bitmask = 1LL << bit; + bool val = ((bits[wordNum] & bitmask) != 0); + bits[wordNum] |= bitmask; + return val; +} + +void OpenBitSet::fastFlip(int32_t index) { + int32_t wordNum = index >> 6; // div 64 + int32_t bit = (index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + bits[wordNum] ^= bitmask; +} + +void OpenBitSet::fastFlip(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); // div 64 + int32_t bit = (int32_t)index & 0x3f; // mod 64 + int64_t bitmask = 1LL << bit; + bits[wordNum] ^= bitmask; +} +void OpenBitSet::flip(int64_t index) { + int32_t wordNum = expandingWordNum(index); + int32_t bit = (int32_t)index & 0x3f; // mod 64 + int64_t bitmask = 1LL << bit; + bits[wordNum] ^= bitmask; +} + +bool OpenBitSet::flipAndGet(int32_t index) { + int32_t wordNum = index >> 6; // div 64 + int32_t bit = (index & 0x3f); // mod 64 + int64_t bitmask = 1LL << bit; + bits[wordNum] ^= bitmask; + return ((bits[wordNum] & bitmask) != 0); +} + +bool OpenBitSet::flipAndGet(int64_t index) { + int32_t wordNum = (int32_t)(index >> 6); // div 64 + int32_t bit = (int32_t)index & 0x3f; // mod 64 + int64_t bitmask = 1LL << bit; + bits[wordNum] ^= bitmask; + return ((bits[wordNum] & bitmask) != 0); +} + +void OpenBitSet::flip(int64_t startIndex, int64_t endIndex) { + if (endIndex <= startIndex) { + return; + } + int32_t startWord = (int32_t)(startIndex >> 6); + + // since endIndex is one past the end, this is index of the last word to be changed. + int32_t endWord = expandingWordNum(endIndex - 1); + + int64_t startmask = -1LL << (startIndex & 0x3f); + int64_t endmask = MiscUtils::unsignedShift(-1LL, -endIndex); + + if (startWord == endWord) { + bits[startWord] ^= (startmask & endmask); + return; + } + + bits[startWord] ^= startmask; + + for (int32_t i = startWord + 1; i < endWord; ++i) { + bits[i] = ~bits[i]; + } + bits[endWord] ^= endmask; +} + +int64_t OpenBitSet::cardinality() { + return BitUtil::pop_array(bits.get(), 0, wlen); +} + +int64_t OpenBitSet::intersectionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { + return BitUtil::pop_intersect(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); +} + +int64_t OpenBitSet::unionCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { + int64_t tot = BitUtil::pop_union(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); + if (a->wlen < b->wlen) { + tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); + } else if (a->wlen > b->wlen) { + tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); + } + return tot; +} + +int64_t OpenBitSet::andNotCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { + int64_t tot = BitUtil::pop_andnot(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); + if (a->wlen > b->wlen) { + tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); + } + return tot; +} + +int64_t OpenBitSet::xorCount(const OpenBitSetPtr& a, const OpenBitSetPtr& b) { + int64_t tot = BitUtil::pop_xor(a->bits.get(), b->bits.get(), 0, std::min(a->wlen, b->wlen)); + if (a->wlen < b->wlen) { + tot += BitUtil::pop_array(b->bits.get(), a->wlen, b->wlen - a->wlen); + } else if (a->wlen > b->wlen) { + tot += BitUtil::pop_array(a->bits.get(), b->wlen, a->wlen - b->wlen); + } + return tot; +} + +int32_t OpenBitSet::nextSetBit(int32_t index) { + int32_t i = MiscUtils::unsignedShift(index, 6); + if (i >= wlen) { return -1; } - - int64_t OpenBitSet::nextSetBit(int64_t index) - { - int32_t i = (int32_t)(index >> 6); - if (i >= wlen) - return -1; - int32_t subIndex = (int32_t)index & 0x3f; // index within the word - int64_t word = bits[i] >> subIndex; // skip all the bits to the right of index - - if (word != 0) - return ((int64_t)i << 6) + (subIndex + BitUtil::ntz(word)); - - while (++i < wlen) - { - word = bits[i]; - if (word != 0) - return ((int64_t)i << 6) + BitUtil::ntz(word); + int32_t subIndex = (index & 0x3f); // index within the word + int64_t word = MiscUtils::unsignedShift(bits[i], (int64_t)subIndex); // skip all the bits to the right of index + + if (word != 0) { + return (i << 6) + subIndex + BitUtil::ntz(word); + } + + while (++i < wlen) { + word = bits[i]; + if (word != 0) { + return (i << 6) + BitUtil::ntz(word); } + } + return -1; +} + +int64_t OpenBitSet::nextSetBit(int64_t index) { + int32_t i = (int32_t)(index >> 6); + if (i >= wlen) { return -1; } - - LuceneObjectPtr OpenBitSet::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - OpenBitSetPtr cloneSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); - cloneSet->wlen = wlen; - cloneSet->bits = LongArray::newInstance(bits.size()); - MiscUtils::arrayCopy(bits.get(), 0, cloneSet->bits.get(), 0, bits.size()); - return cloneSet; - } - - void OpenBitSet::intersect(OpenBitSetPtr other) - { - int32_t newLen= std::min(this->wlen, other->wlen); - LongArray thisArr = this->bits; - LongArray otherArr = other->bits; - // testing against zero can be more efficient - int32_t pos = newLen; - while (--pos >= 0) - thisArr[pos] &= otherArr[pos]; - if (this->wlen > newLen) - { - // fill zeros from the new shorter length to the old length - MiscUtils::arrayFill(bits.get(), newLen, this->wlen, 0LL); - } - this->wlen = newLen; - } - - void OpenBitSet::_union(OpenBitSetPtr other) - { - int32_t newLen = std::max(wlen, other->wlen); - ensureCapacityWords(newLen); - - LongArray thisArr = this->bits; - LongArray otherArr = other->bits; - int32_t pos = std::min(wlen, other->wlen); - while (--pos >= 0) - thisArr[pos] |= otherArr[pos]; - if (this->wlen < newLen) - MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); - this->wlen = newLen; - } - - void OpenBitSet::remove(OpenBitSetPtr other) - { - int32_t idx = std::min(wlen, other->wlen); - LongArray thisArr = this->bits; - LongArray otherArr = other->bits; - while (--idx >= 0) - thisArr[idx] &= ~otherArr[idx]; - } - - void OpenBitSet::_xor(OpenBitSetPtr other) - { - int32_t newLen = std::max(wlen, other->wlen); - ensureCapacityWords(newLen); - - LongArray thisArr = this->bits; - LongArray otherArr = other->bits; - int32_t pos = std::min(wlen, other->wlen); - while (--pos >= 0) - thisArr[pos] ^= otherArr[pos]; - if (this->wlen < newLen) - MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); - this->wlen = newLen; - } - - void OpenBitSet::_and(OpenBitSetPtr other) - { - intersect(other); - } - - void OpenBitSet::_or(OpenBitSetPtr other) - { - _union(other); - } - - void OpenBitSet::andNot(OpenBitSetPtr other) - { - remove(other); - } - - bool OpenBitSet::intersects(OpenBitSetPtr other) - { - int32_t pos = std::min(this->wlen, other->wlen); - LongArray thisArr = this->bits; - LongArray otherArr = other->bits; - while (--pos >= 0) - { - if ((thisArr[pos] & otherArr[pos]) !=0 ) - return true; - } - return false; + int32_t subIndex = (int32_t)index & 0x3f; // index within the word + int64_t word = bits[i] >> subIndex; // skip all the bits to the right of index + + if (word != 0) { + return ((int64_t)i << 6) + (subIndex + BitUtil::ntz(word)); } - - void OpenBitSet::ensureCapacityWords(int32_t numWords) - { - int32_t length = bits.size(); - if (length < numWords) - { - bits.resize(MiscUtils::getNextSize(numWords)); - MiscUtils::arrayFill(bits.get(), length, bits.size(), 0LL); + + while (++i < wlen) { + word = bits[i]; + if (word != 0) { + return ((int64_t)i << 6) + BitUtil::ntz(word); } } - void OpenBitSet::ensureCapacity(int64_t numBits) - { - ensureCapacityWords(bits2words(numBits)); + return -1; +} + +LuceneObjectPtr OpenBitSet::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + OpenBitSetPtr cloneSet(boost::dynamic_pointer_cast(LuceneObject::clone(clone))); + cloneSet->wlen = wlen; + cloneSet->bits = LongArray::newInstance(bits.size()); + MiscUtils::arrayCopy(bits.get(), 0, cloneSet->bits.get(), 0, bits.size()); + return cloneSet; +} + +void OpenBitSet::intersect(const OpenBitSetPtr& other) { + int32_t newLen= std::min(this->wlen, other->wlen); + LongArray thisArr = this->bits; + LongArray otherArr = other->bits; + // testing against zero can be more efficient + int32_t pos = newLen; + while (--pos >= 0) { + thisArr[pos] &= otherArr[pos]; + } + if (this->wlen > newLen) { + // fill zeros from the new shorter length to the old length + MiscUtils::arrayFill(bits.get(), newLen, this->wlen, 0LL); + } + this->wlen = newLen; +} + +void OpenBitSet::_union(const OpenBitSetPtr& other) { + int32_t newLen = std::max(wlen, other->wlen); + ensureCapacityWords(newLen); + + LongArray thisArr = this->bits; + LongArray otherArr = other->bits; + int32_t pos = std::min(wlen, other->wlen); + while (--pos >= 0) { + thisArr[pos] |= otherArr[pos]; + } + if (this->wlen < newLen) { + MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); + } + this->wlen = newLen; +} + +void OpenBitSet::remove(const OpenBitSetPtr& other) { + int32_t idx = std::min(wlen, other->wlen); + LongArray thisArr = this->bits; + LongArray otherArr = other->bits; + while (--idx >= 0) { + thisArr[idx] &= ~otherArr[idx]; } - - void OpenBitSet::trimTrailingZeros() - { - int32_t idx = wlen - 1; - while (idx >= 0 && bits[idx] == 0) - --idx; - wlen = idx + 1; +} + +void OpenBitSet::_xor(const OpenBitSetPtr& other) { + int32_t newLen = std::max(wlen, other->wlen); + ensureCapacityWords(newLen); + + LongArray thisArr = this->bits; + LongArray otherArr = other->bits; + int32_t pos = std::min(wlen, other->wlen); + while (--pos >= 0) { + thisArr[pos] ^= otherArr[pos]; } - - int32_t OpenBitSet::bits2words(int64_t numBits) - { - return (int32_t)(MiscUtils::unsignedShift(numBits - 1, (int64_t)6) + 1); + if (this->wlen < newLen) { + MiscUtils::arrayCopy(otherArr.get(), this->wlen, thisArr.get(), this->wlen, newLen - this->wlen); } - - bool OpenBitSet::equals(LuceneObjectPtr other) - { - if (LuceneObject::equals(other)) + this->wlen = newLen; +} + +void OpenBitSet::_and(const OpenBitSetPtr& other) { + intersect(other); +} + +void OpenBitSet::_or(const OpenBitSetPtr& other) { + _union(other); +} + +void OpenBitSet::andNot(const OpenBitSetPtr& other) { + remove(other); +} + +bool OpenBitSet::intersects(const OpenBitSetPtr& other) { + int32_t pos = std::min(this->wlen, other->wlen); + LongArray thisArr = this->bits; + LongArray otherArr = other->bits; + while (--pos >= 0) { + if ((thisArr[pos] & otherArr[pos]) !=0 ) { return true; - OpenBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); - if (!otherBitSet) - return false; - OpenBitSetPtr a; - OpenBitSetPtr b = otherBitSet; - // make a the larger set - if (b->wlen > this->wlen) - { - a = b; - b = shared_from_this(); } - else - a = shared_from_this(); - - // check for any set bits out of the range of b - for (int32_t i = a->wlen - 1; i >= b->wlen; --i) - { - if (a->bits[i] !=0 ) - return false; - } - - for (int32_t i = b->wlen - 1; i >= 0; --i) - { - if (a->bits[i] != b->bits[i]) - return false; - } - + } + return false; +} + +void OpenBitSet::ensureCapacityWords(int32_t numWords) { + int32_t length = bits.size(); + if (length < numWords) { + bits.resize(MiscUtils::getNextSize(numWords)); + MiscUtils::arrayFill(bits.get(), length, bits.size(), 0LL); + } +} + +void OpenBitSet::ensureCapacity(int64_t numBits) { + ensureCapacityWords(bits2words(numBits)); +} + +void OpenBitSet::trimTrailingZeros() { + int32_t idx = wlen - 1; + while (idx >= 0 && bits[idx] == 0) { + --idx; + } + wlen = idx + 1; +} + +int32_t OpenBitSet::bits2words(int64_t numBits) { + return (int32_t)(MiscUtils::unsignedShift(numBits - 1, (int64_t)6) + 1); +} + +bool OpenBitSet::equals(const LuceneObjectPtr& other) { + if (LuceneObject::equals(other)) { return true; } - - int32_t OpenBitSet::hashCode() - { - // Start with a zero hash and use a mix that results in zero if the input is zero. - // This effectively truncates trailing zeros without an explicit check. - int64_t hash = 0; - for (int32_t i = bits.size(); --i >= 0;) - { - hash ^= bits[i]; - hash = (hash << 1) | MiscUtils::unsignedShift(hash, (int64_t)63); // rotate left + OpenBitSetPtr otherBitSet(boost::dynamic_pointer_cast(other)); + if (!otherBitSet) { + return false; + } + OpenBitSetPtr a; + OpenBitSetPtr b = otherBitSet; + // make a the larger set + if (b->wlen > this->wlen) { + a = b; + b = shared_from_this(); + } else { + a = shared_from_this(); + } + + // check for any set bits out of the range of b + for (int32_t i = a->wlen - 1; i >= b->wlen; --i) { + if (a->bits[i] !=0 ) { + return false; + } + } + + for (int32_t i = b->wlen - 1; i >= 0; --i) { + if (a->bits[i] != b->bits[i]) { + return false; } - // Fold leftmost bits into right and add a constant to prevent empty sets from - // returning 0, which is too common. - return (int32_t)((hash >> 32) ^ hash) + 0x98761234; } + + return true; +} + +int32_t OpenBitSet::hashCode() { + // Start with a zero hash and use a mix that results in zero if the input is zero. + // This effectively truncates trailing zeros without an explicit check. + int64_t hash = 0; + for (int32_t i = bits.size(); --i >= 0;) { + hash ^= bits[i]; + hash = (hash << 1) | MiscUtils::unsignedShift(hash, (int64_t)63); // rotate left + } + // Fold leftmost bits into right and add a constant to prevent empty sets from + // returning 0, which is too common. + return (int32_t)((hash >> 32) ^ hash) + 0x98761234; +} + } diff --git a/src/core/util/OpenBitSetDISI.cpp b/src/core/util/OpenBitSetDISI.cpp index cbda95d9..43ec5867 100644 --- a/src/core/util/OpenBitSetDISI.cpp +++ b/src/core/util/OpenBitSetDISI.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,55 +7,52 @@ #include "LuceneInc.h" #include "OpenBitSetDISI.h" -namespace Lucene -{ - OpenBitSetDISI::OpenBitSetDISI(DocIdSetIteratorPtr disi, int32_t maxSize) : OpenBitSet(maxSize) - { - inPlaceOr(disi); - } - - OpenBitSetDISI::OpenBitSetDISI(int32_t maxSize) : OpenBitSet(maxSize) - { - } - - OpenBitSetDISI::~OpenBitSetDISI() - { +namespace Lucene { + +OpenBitSetDISI::OpenBitSetDISI(const DocIdSetIteratorPtr& disi, int32_t maxSize) : OpenBitSet(maxSize) { + inPlaceOr(disi); +} + +OpenBitSetDISI::OpenBitSetDISI(int32_t maxSize) : OpenBitSet(maxSize) { +} + +OpenBitSetDISI::~OpenBitSetDISI() { +} + +void OpenBitSetDISI::inPlaceOr(const DocIdSetIteratorPtr& disi) { + int32_t doc; + int32_t _size = size(); + while ((doc = disi->nextDoc()) < _size) { + set(doc); } - - void OpenBitSetDISI::inPlaceOr(DocIdSetIteratorPtr disi) - { - int32_t doc; - int32_t _size = size(); - while ((doc = disi->nextDoc()) < _size) - set(doc); +} + +void OpenBitSetDISI::inPlaceAnd(const DocIdSetIteratorPtr& disi) { + int32_t bitSetDoc = nextSetBit((int32_t)0); + int32_t disiDoc; + while (bitSetDoc != -1 && (disiDoc = disi->advance(bitSetDoc)) != DocIdSetIterator::NO_MORE_DOCS) { + clear(bitSetDoc, disiDoc); + bitSetDoc = nextSetBit(disiDoc + 1); } - - void OpenBitSetDISI::inPlaceAnd(DocIdSetIteratorPtr disi) - { - int32_t bitSetDoc = nextSetBit((int32_t)0); - int32_t disiDoc; - while (bitSetDoc != -1 && (disiDoc = disi->advance(bitSetDoc)) != DocIdSetIterator::NO_MORE_DOCS) - { - clear(bitSetDoc, disiDoc); - bitSetDoc = nextSetBit(disiDoc + 1); - } - if (bitSetDoc != -1) - clear((int64_t)bitSetDoc, size()); + if (bitSetDoc != -1) { + clear((int64_t)bitSetDoc, size()); } - - void OpenBitSetDISI::inPlaceNot(DocIdSetIteratorPtr disi) - { - int32_t doc; - int32_t _size = size(); - while ((doc = disi->nextDoc()) < _size) - clear(doc); +} + +void OpenBitSetDISI::inPlaceNot(const DocIdSetIteratorPtr& disi) { + int32_t doc; + int32_t _size = size(); + while ((doc = disi->nextDoc()) < _size) { + clear(doc); } - - void OpenBitSetDISI::inPlaceXor(DocIdSetIteratorPtr disi) - { - int32_t doc; - int32_t _size = size(); - while ((doc = disi->nextDoc()) < _size) - flip(doc); +} + +void OpenBitSetDISI::inPlaceXor(const DocIdSetIteratorPtr& disi) { + int32_t doc; + int32_t _size = size(); + while ((doc = disi->nextDoc()) < _size) { + flip(doc); } } + +} diff --git a/src/core/util/OpenBitSetIterator.cpp b/src/core/util/OpenBitSetIterator.cpp index 5d61c13e..b6671fcf 100644 --- a/src/core/util/OpenBitSetIterator.cpp +++ b/src/core/util/OpenBitSetIterator.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,158 +9,140 @@ #include "OpenBitSet.h" #include "MiscUtils.h" -namespace Lucene -{ - /// The General Idea: instead of having an array per byte that has the offsets of the - /// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). - /// That should be faster than accessing an array for each index, and the total array - /// size is kept smaller (256*sizeof(int32_t))=1K - const int32_t OpenBitSetIterator::bitlist[] = - { - 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, - 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, - 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, - 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, 0x6431, - 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, 0x65321, - 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, 0x7, - 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, - 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, 0x7531, - 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, - 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, 0x76321, 0x764, - 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, 0x765, 0x7651, - 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, 0x76541, 0x76542, - 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, 0x81, 0x82, 0x821, - 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431, 0x8432, - 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, 0x8532, 0x85321, 0x854, - 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861, - 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, 0x8641, 0x8642, 0x86421, - 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653, - 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542, 0x865421, 0x86543, - 0x865431, 0x865432, 0x8654321, 0x87, 0x871, 0x872, 0x8721, 0x873, 0x8731, - 0x8732, 0x87321, 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431, 0x87432, - 0x874321, 0x875, 0x8751, 0x8752, 0x87521, 0x8753, 0x87531, 0x87532, 0x875321, - 0x8754, 0x87541, 0x87542, 0x875421, 0x87543, 0x875431, 0x875432, 0x8754321, - 0x876, 0x8761, 0x8762, 0x87621, 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, - 0x87641, 0x87642, 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, - 0x87651, 0x87652, 0x876521, 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, - 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431, 0x8765432, 0x87654321 - }; - - OpenBitSetIterator::OpenBitSetIterator(OpenBitSetPtr bitSet) - { - arr = bitSet->getBits(); - words = bitSet->getNumWords(); - i = -1; - word = 0; - wordShift = 0; - indexArray = 0; - curDocId = -1; +namespace Lucene { + +/// The General Idea: instead of having an array per byte that has the offsets of the +/// next set bit, that array could be packed inside a 32 bit integer (8 4 bit numbers). +/// That should be faster than accessing an array for each index, and the total array +/// size is kept smaller (256*sizeof(int32_t))=1K +const int32_t OpenBitSetIterator::bitlist[] = { + 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, + 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, + 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, + 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, 0x6431, + 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, 0x65321, + 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, 0x7, + 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, 0x7421, + 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, 0x7531, + 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, 0x75432, + 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, 0x76321, 0x764, + 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, 0x765, 0x7651, + 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, 0x76541, 0x76542, + 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, 0x81, 0x82, 0x821, + 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, 0x843, 0x8431, 0x8432, + 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, 0x8532, 0x85321, 0x854, + 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, 0x854321, 0x86, 0x861, + 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, 0x8641, 0x8642, 0x86421, + 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, 0x8652, 0x86521, 0x8653, + 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, 0x86542, 0x865421, 0x86543, + 0x865431, 0x865432, 0x8654321, 0x87, 0x871, 0x872, 0x8721, 0x873, 0x8731, + 0x8732, 0x87321, 0x874, 0x8741, 0x8742, 0x87421, 0x8743, 0x87431, 0x87432, + 0x874321, 0x875, 0x8751, 0x8752, 0x87521, 0x8753, 0x87531, 0x87532, 0x875321, + 0x8754, 0x87541, 0x87542, 0x875421, 0x87543, 0x875431, 0x875432, 0x8754321, + 0x876, 0x8761, 0x8762, 0x87621, 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, + 0x87641, 0x87642, 0x876421, 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, + 0x87651, 0x87652, 0x876521, 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, + 0x876541, 0x876542, 0x8765421, 0x876543, 0x8765431, 0x8765432, static_cast(0x87654321) +}; + +OpenBitSetIterator::OpenBitSetIterator(const OpenBitSetPtr& bitSet) { + arr = bitSet->getBits(); + words = bitSet->getNumWords(); + i = -1; + word = 0; + wordShift = 0; + indexArray = 0; + curDocId = -1; +} + +OpenBitSetIterator::OpenBitSetIterator(LongArray bits, int32_t numWords) { + arr = bits; + words = numWords; + i = -1; + word = 0; + wordShift = 0; + indexArray = 0; + curDocId = -1; +} + +OpenBitSetIterator::~OpenBitSetIterator() { +} + +void OpenBitSetIterator::shift() { + if ((int32_t)word == 0) { + wordShift += 32; + word = MiscUtils::unsignedShift(word, (int64_t)32); } - - OpenBitSetIterator::OpenBitSetIterator(LongArray bits, int32_t numWords) - { - arr = bits; - words = numWords; - i = -1; - word = 0; - wordShift = 0; - indexArray = 0; - curDocId = -1; + if ((word & 0x0000ffff) == 0) { + wordShift += 16; + word = MiscUtils::unsignedShift(word, (int64_t)16); } - - OpenBitSetIterator::~OpenBitSetIterator() - { + if ((word & 0x000000ff) == 0) { + wordShift += 8; + word = MiscUtils::unsignedShift(word, (int64_t)8); } - - void OpenBitSetIterator::shift() - { - if ((int32_t)word == 0) - { - wordShift += 32; - word = MiscUtils::unsignedShift(word, (int64_t)32); - } - if ((word & 0x0000ffff) == 0) - { - wordShift += 16; - word = MiscUtils::unsignedShift(word, (int64_t)16); - } - if ((word & 0x000000ff) == 0) - { - wordShift += 8; + indexArray = bitlist[(int32_t)word & 0xff]; +} + +int32_t OpenBitSetIterator::nextDoc() { + if (indexArray == 0) { + if (word != 0) { word = MiscUtils::unsignedShift(word, (int64_t)8); + wordShift += 8; } - indexArray = bitlist[(int32_t)word & 0xff]; - } - - int32_t OpenBitSetIterator::nextDoc() - { - if (indexArray == 0) - { - if (word != 0) - { - word = MiscUtils::unsignedShift(word, (int64_t)8); - wordShift += 8; - } - while (word == 0) - { - if (++i >= words) - { - curDocId = NO_MORE_DOCS; - return curDocId; - } - word = arr[i]; - wordShift = -1; // loop invariant code motion should move this + while (word == 0) { + if (++i >= words) { + curDocId = NO_MORE_DOCS; + return curDocId; } - - // after the first time, should I go with a linear search, or stick with the binary search in shift? - shift(); + word = arr[i]; + wordShift = -1; // loop invariant code motion should move this } - int32_t bitIndex = (indexArray & 0x0f) + wordShift; - indexArray = MiscUtils::unsignedShift(indexArray, 4); - curDocId = (i << 6) + bitIndex; - return curDocId; + // after the first time, should I go with a linear search, or stick with the binary search in shift? + shift(); } - - int32_t OpenBitSetIterator::advance(int32_t target) - { - indexArray = 0; - i = target >> 6; - if (i >= words) - { - word = 0; // setup so next() will also return -1 - curDocId = NO_MORE_DOCS; - return curDocId; - } - wordShift = target & 0x3f; - word = MiscUtils::unsignedShift(arr[i], (int64_t)wordShift); - if (word != 0) - --wordShift; // compensate for 1 based arrIndex - else - { - while (word == 0) - { - if (++i >= words) - { - curDocId = NO_MORE_DOCS; - return curDocId; - } - word = arr[i]; - } - wordShift = -1; - } - shift(); + int32_t bitIndex = (indexArray & 0x0f) + wordShift; + indexArray = MiscUtils::unsignedShift(indexArray, 4); + curDocId = (i << 6) + bitIndex; + return curDocId; +} - int32_t bitIndex = (indexArray & 0x0f) + wordShift; - indexArray = MiscUtils::unsignedShift(indexArray, 4); - curDocId = (i << 6) + bitIndex; +int32_t OpenBitSetIterator::advance(int32_t target) { + indexArray = 0; + i = target >> 6; + if (i >= words) { + word = 0; // setup so next() will also return -1 + curDocId = NO_MORE_DOCS; return curDocId; } - - int32_t OpenBitSetIterator::docID() - { - return curDocId; + wordShift = target & 0x3f; + word = MiscUtils::unsignedShift(arr[i], (int64_t)wordShift); + if (word != 0) { + --wordShift; // compensate for 1 based arrIndex + } else { + while (word == 0) { + if (++i >= words) { + curDocId = NO_MORE_DOCS; + return curDocId; + } + word = arr[i]; + } + wordShift = -1; } + + shift(); + + int32_t bitIndex = (indexArray & 0x0f) + wordShift; + indexArray = MiscUtils::unsignedShift(indexArray, 4); + curDocId = (i << 6) + bitIndex; + return curDocId; +} + +int32_t OpenBitSetIterator::docID() { + return curDocId; +} + } diff --git a/src/core/util/Random.cpp b/src/core/util/Random.cpp index e6dbe57c..77919c24 100644 --- a/src/core/util/Random.cpp +++ b/src/core/util/Random.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,53 +8,46 @@ #include "Random.h" #include "MiscUtils.h" -namespace Lucene -{ - Random::Random() - { - this->seed = (int64_t)MiscUtils::currentTimeMillis(); - } - - Random::Random(int64_t seed) - { - this->seed = seed; - } - - Random::~Random() - { - } +namespace Lucene { - void Random::setSeed(int64_t seed) - { - this->seed = (seed ^ 0x5deece66dLL) & (((int64_t)1 << 48) - 1); - } - - int32_t Random::nextInt(int32_t limit) - { - if ((limit & -limit) == limit) - return (int32_t)((limit * (int64_t)next(31)) >> 31); - - int32_t bits = 0; - int32_t val = 0; - - do - { - bits = next(31); - val = bits % limit; - } - while (bits - val + (limit - 1) < 0); - - return val; - } - - double Random::nextDouble() - { - return ((double)(((int64_t)next(26) << 27) + next(27)) / (double)((int64_t)1 << 53)); - } +Random::Random() { + this->seed = (int64_t)MiscUtils::currentTimeMillis(); +} + +Random::Random(int64_t seed) { + this->seed = seed; +} + +Random::~Random() { +} - int32_t Random::next(int32_t bits) - { - seed = (seed * 0x5deece66dLL + 0xb) & (((int64_t)1 << 48) - 1); - return (int32_t)(seed >> (48 - bits)); +void Random::setSeed(int64_t seed) { + this->seed = (seed ^ 0x5deece66dLL) & (((int64_t)1 << 48) - 1); +} + +int32_t Random::nextInt(int32_t limit) { + if ((limit & -limit) == limit) { + return (int32_t)((limit * (int64_t)next(31)) >> 31); } + + int32_t bits = 0; + int32_t val = 0; + + do { + bits = next(31); + val = bits % limit; + } while (bits - val + (limit - 1) < 0); + + return val; +} + +double Random::nextDouble() { + return ((double)(((int64_t)next(26) << 27) + next(27)) / (double)((int64_t)1 << 53)); +} + +int32_t Random::next(int32_t bits) { + seed = (seed * 0x5deece66dLL + 0xb) & (((int64_t)1 << 48) - 1); + return (int32_t)(seed >> (48 - bits)); +} + } diff --git a/src/core/util/Reader.cpp b/src/core/util/Reader.cpp index 04f38a2d..5e660996 100644 --- a/src/core/util/Reader.cpp +++ b/src/core/util/Reader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,46 +7,39 @@ #include "LuceneInc.h" #include "Reader.h" -namespace Lucene -{ - const int32_t Reader::READER_EOF = -1; - - Reader::Reader() - { - } - - Reader::~Reader() - { - } - - int32_t Reader::read() - { - wchar_t buffer; - return read(&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; - } - - int64_t Reader::skip(int64_t n) - { - return 0; // override - } - - bool Reader::markSupported() - { - return false; // override - } - - void Reader::mark(int32_t readAheadLimit) - { - // override - } - - void Reader::reset() - { - // override - } - - int64_t Reader::length() - { - return 0; // override - } +namespace Lucene { + +const int32_t Reader::READER_EOF = -1; + +Reader::Reader() { +} + +Reader::~Reader() { +} + +int32_t Reader::read() { + wchar_t buffer; + return read(&buffer, 0, 1) == READER_EOF ? READER_EOF : buffer; +} + +int64_t Reader::skip(int64_t n) { + return 0; // override +} + +bool Reader::markSupported() { + return false; // override +} + +void Reader::mark(int32_t readAheadLimit) { + // override +} + +void Reader::reset() { + // override +} + +int64_t Reader::length() { + return 0; // override +} + } diff --git a/src/core/util/ReaderUtil.cpp b/src/core/util/ReaderUtil.cpp index 2e7ef96b..0b000c1c 100644 --- a/src/core/util/ReaderUtil.cpp +++ b/src/core/util/ReaderUtil.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,52 +8,45 @@ #include "ReaderUtil.h" #include "IndexReader.h" -namespace Lucene -{ - ReaderUtil::~ReaderUtil() - { - } - - void ReaderUtil::gatherSubReaders(Collection allSubReaders, IndexReaderPtr reader) - { - Collection subReaders(reader->getSequentialSubReaders()); - if (!subReaders) - { - // Add the reader itself, and do not recurse - allSubReaders.add(reader); - } - else - { - for (Collection::iterator subReader = subReaders.begin(); subReader != subReaders.end(); ++subReader) - gatherSubReaders(allSubReaders, *subReader); - } - } - - IndexReaderPtr ReaderUtil::subReader(int32_t doc, IndexReaderPtr reader) - { - Collection subReaders(Collection::newInstance()); - ReaderUtil::gatherSubReaders(subReaders, reader); - Collection docStarts(Collection::newInstance(subReaders.size())); - int32_t maxDoc = 0; - for (int32_t i = 0; i < subReaders.size(); ++i) - { - docStarts[i] = maxDoc; - maxDoc += subReaders[i]->maxDoc(); +namespace Lucene { + +ReaderUtil::~ReaderUtil() { +} + +void ReaderUtil::gatherSubReaders(Collection allSubReaders, const IndexReaderPtr& reader) { + Collection subReaders(reader->getSequentialSubReaders()); + if (!subReaders) { + // Add the reader itself, and do not recurse + allSubReaders.add(reader); + } else { + for (Collection::iterator subReader = subReaders.begin(); subReader != subReaders.end(); ++subReader) { + gatherSubReaders(allSubReaders, *subReader); } - return subReaders[ReaderUtil::subIndex(doc, docStarts)]; - } - - IndexReaderPtr ReaderUtil::subReader(IndexReaderPtr reader, int32_t subIndex) - { - Collection subReaders(Collection::newInstance()); - ReaderUtil::gatherSubReaders(subReaders, reader); - return subReaders[subIndex]; } - - int32_t ReaderUtil::subIndex(int32_t n, Collection docStarts) - { - // Binary search to locate reader - Collection::iterator index = std::upper_bound(docStarts.begin(), docStarts.end(), n); - return (std::distance(docStarts.begin(), index) - 1); +} + +IndexReaderPtr ReaderUtil::subReader(int32_t doc, const IndexReaderPtr& reader) { + Collection subReaders(Collection::newInstance()); + ReaderUtil::gatherSubReaders(subReaders, reader); + Collection docStarts(Collection::newInstance(subReaders.size())); + int32_t maxDoc = 0; + for (int32_t i = 0; i < subReaders.size(); ++i) { + docStarts[i] = maxDoc; + maxDoc += subReaders[i]->maxDoc(); } + return subReaders[ReaderUtil::subIndex(doc, docStarts)]; +} + +IndexReaderPtr ReaderUtil::subReader(const IndexReaderPtr& reader, int32_t subIndex) { + Collection subReaders(Collection::newInstance()); + ReaderUtil::gatherSubReaders(subReaders, reader); + return subReaders[subIndex]; +} + +int32_t ReaderUtil::subIndex(int32_t n, Collection docStarts) { + // Binary search to locate reader + Collection::iterator index = std::upper_bound(docStarts.begin(), docStarts.end(), n); + return (std::distance(docStarts.begin(), index) - 1); +} + } diff --git a/src/core/util/ScorerDocQueue.cpp b/src/core/util/ScorerDocQueue.cpp index a5e347fd..cce4dd87 100644 --- a/src/core/util/ScorerDocQueue.cpp +++ b/src/core/util/ScorerDocQueue.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,172 +10,148 @@ #include "Scorer.h" #include "MiscUtils.h" -namespace Lucene -{ - ScorerDocQueue::ScorerDocQueue(int32_t maxSize) - { - this->_size = 0; - int32_t heapSize = maxSize + 1; - heap = Collection::newInstance(heapSize); - this->maxSize = maxSize; - topHSD = heap[1]; // initially null - } - - ScorerDocQueue::~ScorerDocQueue() - { - } - - void ScorerDocQueue::put(ScorerPtr scorer) - { - heap[++_size] = newLucene(scorer); - upHeap(); - } - - bool ScorerDocQueue::insert(ScorerPtr scorer) - { - if (_size < maxSize) - { - put(scorer); +namespace Lucene { + +ScorerDocQueue::ScorerDocQueue(int32_t maxSize) { + this->_size = 0; + int32_t heapSize = maxSize + 1; + heap = Collection::newInstance(heapSize); + this->maxSize = maxSize; + topHSD = heap[1]; // initially null +} + +ScorerDocQueue::~ScorerDocQueue() { +} + +void ScorerDocQueue::put(const ScorerPtr& scorer) { + heap[++_size] = newLucene(scorer); + upHeap(); +} + +bool ScorerDocQueue::insert(const ScorerPtr& scorer) { + if (_size < maxSize) { + put(scorer); + return true; + } else { + int32_t docNr = scorer->docID(); + if ((_size > 0) && (!(docNr < topHSD->doc))) { // heap[1] is top() + heap[1] = newLucene(scorer, docNr); + downHeap(); return true; + } else { + return false; } - else - { - int32_t docNr = scorer->docID(); - if ((_size > 0) && (!(docNr < topHSD->doc))) // heap[1] is top() - { - heap[1] = newLucene(scorer, docNr); - downHeap(); - return true; - } - else - return false; - } - } - - ScorerPtr ScorerDocQueue::top() - { - return topHSD->scorer; - } - - int32_t ScorerDocQueue::topDoc() - { - return topHSD->doc; - } - - double ScorerDocQueue::topScore() - { - return topHSD->scorer->score(); - } - - bool ScorerDocQueue::topNextAndAdjustElsePop() - { - return checkAdjustElsePop(topHSD->scorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); - } - - bool ScorerDocQueue::topSkipToAndAdjustElsePop(int32_t target) - { - return checkAdjustElsePop(topHSD->scorer->advance(target) != DocIdSetIterator::NO_MORE_DOCS); - } - - bool ScorerDocQueue::checkAdjustElsePop(bool cond) - { - if (cond) // see also adjustTop - topHSD->doc = topHSD->scorer->docID(); - else // see also popNoResult - { - heap[1] = heap[_size]; // move last to first - heap[_size--].reset(); - } - downHeap(); - return cond; } - - ScorerPtr ScorerDocQueue::pop() - { - ScorerPtr result(topHSD->scorer); - popNoResult(); - return result; - } - - void ScorerDocQueue::popNoResult() - { +} + +ScorerPtr ScorerDocQueue::top() { + return topHSD->scorer; +} + +int32_t ScorerDocQueue::topDoc() { + return topHSD->doc; +} + +double ScorerDocQueue::topScore() { + return topHSD->scorer->score(); +} + +bool ScorerDocQueue::topNextAndAdjustElsePop() { + return checkAdjustElsePop(topHSD->scorer->nextDoc() != DocIdSetIterator::NO_MORE_DOCS); +} + +bool ScorerDocQueue::topSkipToAndAdjustElsePop(int32_t target) { + return checkAdjustElsePop(topHSD->scorer->advance(target) != DocIdSetIterator::NO_MORE_DOCS); +} + +bool ScorerDocQueue::checkAdjustElsePop(bool cond) { + if (cond) { // see also adjustTop + topHSD->doc = topHSD->scorer->docID(); + } else { // see also popNoResult heap[1] = heap[_size]; // move last to first heap[_size--].reset(); - downHeap(); // adjust heap - } - - void ScorerDocQueue::adjustTop() - { - topHSD->adjust(); - downHeap(); } - - int32_t ScorerDocQueue::size() - { - return _size; - } - - void ScorerDocQueue::clear() - { - for (int32_t i = 0; i <= _size; ++i) - heap[i].reset(); - _size = 0; - } - - void ScorerDocQueue::upHeap() - { - int32_t i = _size; - HeapedScorerDocPtr node(heap[i]); // save bottom node - int32_t j = MiscUtils::unsignedShift(i, 1); - while ((j > 0) && (node->doc < heap[j]->doc)) - { - heap[i] = heap[j]; // shift parents down - i = j; - j = MiscUtils::unsignedShift(j, 1); - } - heap[i] = node; // install saved node - topHSD = heap[1]; + downHeap(); + return cond; +} + +ScorerPtr ScorerDocQueue::pop() { + ScorerPtr result(topHSD->scorer); + popNoResult(); + return result; +} + +void ScorerDocQueue::popNoResult() { + heap[1] = heap[_size]; // move last to first + heap[_size--].reset(); + downHeap(); // adjust heap +} + +void ScorerDocQueue::adjustTop() { + topHSD->adjust(); + downHeap(); +} + +int32_t ScorerDocQueue::size() { + return _size; +} + +void ScorerDocQueue::clear() { + for (int32_t i = 0; i <= _size; ++i) { + heap[i].reset(); } - - void ScorerDocQueue::downHeap() - { - int32_t i = 1; - HeapedScorerDocPtr node(heap[i]); // save top node - int32_t j = i << 1; // find smaller child - int32_t k = j + 1; - if ((k <= _size) && (heap[k]->doc < heap[j]->doc)) + _size = 0; +} + +void ScorerDocQueue::upHeap() { + int32_t i = _size; + HeapedScorerDocPtr node(heap[i]); // save bottom node + int32_t j = MiscUtils::unsignedShift(i, 1); + while ((j > 0) && (node->doc < heap[j]->doc)) { + heap[i] = heap[j]; // shift parents down + i = j; + j = MiscUtils::unsignedShift(j, 1); + } + heap[i] = node; // install saved node + topHSD = heap[1]; +} + +void ScorerDocQueue::downHeap() { + int32_t i = 1; + HeapedScorerDocPtr node(heap[i]); // save top node + int32_t j = i << 1; // find smaller child + int32_t k = j + 1; + if ((k <= _size) && (heap[k]->doc < heap[j]->doc)) { + j = k; + } + while ((j <= _size) && (heap[j]->doc < node->doc)) { + heap[i] = heap[j]; // shift up child + i = j; + j = i << 1; + k = j + 1; + if (k <= _size && (heap[k]->doc < heap[j]->doc)) { j = k; - while ((j <= _size) && (heap[j]->doc < node->doc)) - { - heap[i] = heap[j]; // shift up child - i = j; - j = i << 1; - k = j + 1; - if (k <= _size && (heap[k]->doc < heap[j]->doc)) - j = k; } - heap[i] = node; // install saved node - topHSD = heap[1]; - } - - HeapedScorerDoc::HeapedScorerDoc(ScorerPtr scorer) - { - this->scorer = scorer; - this->doc = scorer->docID(); - } - - HeapedScorerDoc::HeapedScorerDoc(ScorerPtr scorer, int32_t doc) - { - this->scorer = scorer; - this->doc = doc; - } - - HeapedScorerDoc::~HeapedScorerDoc() - { - } - - void HeapedScorerDoc::adjust() - { - doc = scorer->docID(); } + heap[i] = node; // install saved node + topHSD = heap[1]; +} + +HeapedScorerDoc::HeapedScorerDoc(const ScorerPtr& scorer) { + this->scorer = scorer; + this->doc = scorer->docID(); +} + +HeapedScorerDoc::HeapedScorerDoc(const ScorerPtr& scorer, int32_t doc) { + this->scorer = scorer; + this->doc = doc; +} + +HeapedScorerDoc::~HeapedScorerDoc() { +} + +void HeapedScorerDoc::adjust() { + doc = scorer->docID(); +} + } diff --git a/src/core/util/SmallDouble.cpp b/src/core/util/SmallDouble.cpp index 36905718..032611b5 100644 --- a/src/core/util/SmallDouble.cpp +++ b/src/core/util/SmallDouble.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,44 +8,43 @@ #include "SmallDouble.h" #include "MiscUtils.h" -namespace Lucene -{ - SmallDouble::~SmallDouble() - { +namespace Lucene { + +SmallDouble::~SmallDouble() { +} + +uint8_t SmallDouble::doubleToByte(double f) { + if (f < 0.0) { // round negatives up to zero + f = 0.0; + } + if (f == 0.0) { // zero is a special case + return 0; + } + + int32_t bits = MiscUtils::doubleToIntBits(f); + int32_t mantissa = (bits & 0xffffff) >> 21; + int32_t exponent = (((bits >> 24) & 0x7f) - 63) + 15; + + if (exponent > 31) { // overflow: use max value + exponent = 31; + mantissa = 7; } - - uint8_t SmallDouble::doubleToByte(double f) - { - if (f < 0.0) // round negatives up to zero - f = 0.0; - if (f == 0.0) // zero is a special case - return 0; - - int32_t bits = MiscUtils::doubleToIntBits(f); - int32_t mantissa = (bits & 0xffffff) >> 21; - int32_t exponent = (((bits >> 24) & 0x7f) - 63) + 15; - - if (exponent > 31) // overflow: use max value - { - exponent = 31; - mantissa = 7; - } - if (exponent < 0) // underflow: use min value - { - exponent = 0; - mantissa = 1; - } - - return (uint8_t)((exponent << 3) | mantissa); // pack into a uint8_t + if (exponent < 0) { // underflow: use min value + exponent = 0; + mantissa = 1; } - - double SmallDouble::byteToDouble(uint8_t b) - { - if (b == 0) // zero is a special case - return 0.0; - int32_t mantissa = b & 7; - int32_t exponent = (b >> 3) & 31; - int32_t bits = ((exponent + (63 - 15)) << 24) | (mantissa << 21); - return MiscUtils::intBitsToDouble(bits); + + return (uint8_t)((exponent << 3) | mantissa); // pack into a uint8_t +} + +double SmallDouble::byteToDouble(uint8_t b) { + if (b == 0) { // zero is a special case + return 0.0; } + int32_t mantissa = b & 7; + int32_t exponent = (b >> 3) & 31; + int32_t bits = ((exponent + (63 - 15)) << 24) | (mantissa << 21); + return MiscUtils::intBitsToDouble(bits); +} + } diff --git a/src/core/util/SortedVIntList.cpp b/src/core/util/SortedVIntList.cpp index 1f65da5c..c5599ee6 100644 --- a/src/core/util/SortedVIntList.cpp +++ b/src/core/util/SortedVIntList.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,181 +12,160 @@ #include "DocIdSetIterator.h" #include "MiscUtils.h" -namespace Lucene -{ - /// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the - /// index numbers of the set bits will be smaller than that BitSet. - const int32_t SortedVIntList::BITS2VINTLIST_SIZE = 8; - - const int32_t SortedVIntList::VB1 = 0x7f; - const int32_t SortedVIntList::BIT_SHIFT = 7; - const int32_t SortedVIntList::MAX_BYTES_PER_INT = (31 / SortedVIntList::BIT_SHIFT) + 1; - - SortedVIntList::SortedVIntList(Collection sortedInts) - { - lastInt = 0; - initBytes(); - for (int32_t i = 0; i < sortedInts.size(); ++i) - addInt(sortedInts[i]); - bytes.resize(lastBytePos); - } - - SortedVIntList::SortedVIntList(Collection sortedInts, int32_t inputSize) - { - lastInt = 0; - initBytes(); - for (int32_t i = 0; i < inputSize; ++i) - addInt(sortedInts[i]); - bytes.resize(lastBytePos); - } - - SortedVIntList::SortedVIntList(BitSetPtr bits) - { - lastInt = 0; - initBytes(); - int32_t nextInt = bits->nextSetBit(0); - while (nextInt != -1) - { - addInt(nextInt); - nextInt = bits->nextSetBit(nextInt + 1); - } - bytes.resize(lastBytePos); - } - - SortedVIntList::SortedVIntList(OpenBitSetPtr bits) - { - lastInt = 0; - initBytes(); - int32_t nextInt = bits->nextSetBit((int32_t)0); - while (nextInt != -1) - { - addInt(nextInt); - nextInt = bits->nextSetBit(nextInt + 1); - } - bytes.resize(lastBytePos); - } - - SortedVIntList::SortedVIntList(DocIdSetIteratorPtr docIdSetIterator) - { - lastInt = 0; - initBytes(); - int32_t doc; - while ((doc = docIdSetIterator->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) - addInt(doc); - bytes.resize(lastBytePos); - } - - SortedVIntList::~SortedVIntList() - { - } - - void SortedVIntList::initBytes() - { - _size = 0; - bytes = ByteArray::newInstance(128); // initial byte size - lastBytePos = 0; - } - - void SortedVIntList::addInt(int32_t nextInt) - { - int32_t diff = nextInt - lastInt; - if (diff < 0) - boost::throw_exception(IllegalArgumentException(L"Input not sorted or first element negative.")); - - if (!bytes || (lastBytePos + MAX_BYTES_PER_INT) > bytes.size()) - { - // biggest possible int does not fit - bytes.resize((bytes.size() * 2) + MAX_BYTES_PER_INT); - } - - // See IndexOutput.writeVInt() - while ((diff & ~VB1) != 0) // The high bit of the next byte needs to be set. - { - bytes[lastBytePos++] = (uint8_t)((diff & VB1) | ~VB1); - diff = MiscUtils::unsignedShift(diff, BIT_SHIFT); - } - bytes[lastBytePos++] = (uint8_t)diff; // Last byte, high bit not set. - ++_size; - lastInt = nextInt; - } - - int32_t SortedVIntList::size() - { - return _size; - } - - int32_t SortedVIntList::getByteSize() - { - return bytes ? bytes.size() : 0; - } - - bool SortedVIntList::isCacheable() - { - return true; +namespace Lucene { + +/// When a BitSet has fewer than 1 in BITS2VINTLIST_SIZE bits set, a SortedVIntList representing the +/// index numbers of the set bits will be smaller than that BitSet. +const int32_t SortedVIntList::BITS2VINTLIST_SIZE = 8; + +const int32_t SortedVIntList::VB1 = 0x7f; +const int32_t SortedVIntList::BIT_SHIFT = 7; +const int32_t SortedVIntList::MAX_BYTES_PER_INT = (31 / SortedVIntList::BIT_SHIFT) + 1; + +SortedVIntList::SortedVIntList(Collection sortedInts) { + lastInt = 0; + initBytes(); + for (int32_t i = 0; i < sortedInts.size(); ++i) { + addInt(sortedInts[i]); } - - DocIdSetIteratorPtr SortedVIntList::iterator() - { - return newLucene(shared_from_this()); + bytes.resize(lastBytePos); +} + +SortedVIntList::SortedVIntList(Collection sortedInts, int32_t inputSize) { + lastInt = 0; + initBytes(); + for (int32_t i = 0; i < inputSize; ++i) { + addInt(sortedInts[i]); } - - SortedDocIdSetIterator::SortedDocIdSetIterator(SortedVIntListPtr list) - { - _list = list; - bytePos = 0; - lastInt = 0; - doc = -1; + bytes.resize(lastBytePos); +} + +SortedVIntList::SortedVIntList(const BitSetPtr& bits) { + lastInt = 0; + initBytes(); + int32_t nextInt = bits->nextSetBit(0); + while (nextInt != -1) { + addInt(nextInt); + nextInt = bits->nextSetBit(nextInt + 1); + } + bytes.resize(lastBytePos); +} + +SortedVIntList::SortedVIntList(const OpenBitSetPtr& bits) { + lastInt = 0; + initBytes(); + int32_t nextInt = bits->nextSetBit((int32_t)0); + while (nextInt != -1) { + addInt(nextInt); + nextInt = bits->nextSetBit(nextInt + 1); + } + bytes.resize(lastBytePos); +} + +SortedVIntList::SortedVIntList(const DocIdSetIteratorPtr& docIdSetIterator) { + lastInt = 0; + initBytes(); + int32_t doc; + while ((doc = docIdSetIterator->nextDoc()) != DocIdSetIterator::NO_MORE_DOCS) { + addInt(doc); + } + bytes.resize(lastBytePos); +} + +SortedVIntList::~SortedVIntList() { +} + +void SortedVIntList::initBytes() { + _size = 0; + bytes = ByteArray::newInstance(128); // initial byte size + lastBytePos = 0; +} + +void SortedVIntList::addInt(int32_t nextInt) { + int32_t diff = nextInt - lastInt; + if (diff < 0) { + boost::throw_exception(IllegalArgumentException(L"Input not sorted or first element negative.")); } - - SortedDocIdSetIterator::~SortedDocIdSetIterator() - { + + if (!bytes || (lastBytePos + MAX_BYTES_PER_INT) > bytes.size()) { + // biggest possible int does not fit + bytes.resize((bytes.size() * 2) + MAX_BYTES_PER_INT); } - - void SortedDocIdSetIterator::advance() - { - SortedVIntListPtr list(_list); - - // See IndexInput.readVInt() - uint8_t b = list->bytes[bytePos++]; - lastInt += b & list->VB1; - for (int32_t s = list->BIT_SHIFT; (b & ~list->VB1) != 0; s += list->BIT_SHIFT) - { - b = list->bytes[bytePos++]; - lastInt += (b & list->VB1) << s; - } + + // See IndexOutput.writeVInt() + while ((diff & ~VB1) != 0) { // The high bit of the next byte needs to be set. + bytes[lastBytePos++] = (uint8_t)((diff & VB1) | ~VB1); + diff = MiscUtils::unsignedShift(diff, BIT_SHIFT); + } + bytes[lastBytePos++] = (uint8_t)diff; // Last byte, high bit not set. + ++_size; + lastInt = nextInt; +} + +int32_t SortedVIntList::size() { + return _size; +} + +int32_t SortedVIntList::getByteSize() { + return bytes ? bytes.size() : 0; +} + +bool SortedVIntList::isCacheable() { + return true; +} + +DocIdSetIteratorPtr SortedVIntList::iterator() { + return newLucene(shared_from_this()); +} + +SortedDocIdSetIterator::SortedDocIdSetIterator(const SortedVIntListPtr& list) { + _list = list; + bytePos = 0; + lastInt = 0; + doc = -1; +} + +SortedDocIdSetIterator::~SortedDocIdSetIterator() { +} + +void SortedDocIdSetIterator::advance() { + SortedVIntListPtr list(_list); + + // See IndexInput.readVInt() + uint8_t b = list->bytes[bytePos++]; + lastInt += b & list->VB1; + for (int32_t s = list->BIT_SHIFT; (b & ~list->VB1) != 0; s += list->BIT_SHIFT) { + b = list->bytes[bytePos++]; + lastInt += (b & list->VB1) << s; } - - int32_t SortedDocIdSetIterator::docID() - { - return doc; +} + +int32_t SortedDocIdSetIterator::docID() { + return doc; +} + +int32_t SortedDocIdSetIterator::nextDoc() { + SortedVIntListPtr list(_list); + if (bytePos >= list->lastBytePos) { + doc = NO_MORE_DOCS; + } else { + advance(); + doc = lastInt; } - - int32_t SortedDocIdSetIterator::nextDoc() - { - SortedVIntListPtr list(_list); - if (bytePos >= list->lastBytePos) - doc = NO_MORE_DOCS; - else - { - advance(); + return doc; +} + +int32_t SortedDocIdSetIterator::advance(int32_t target) { + SortedVIntListPtr list(_list); + while (bytePos < list->lastBytePos) { + advance(); + if (lastInt >= target) { doc = lastInt; + return doc; } - return doc; - } - - int32_t SortedDocIdSetIterator::advance(int32_t target) - { - SortedVIntListPtr list(_list); - while (bytePos < list->lastBytePos) - { - advance(); - if (lastInt >= target) - { - doc = lastInt; - return doc; - } - } - doc = NO_MORE_DOCS; - return doc; } + doc = NO_MORE_DOCS; + return doc; +} + } diff --git a/src/core/util/StringReader.cpp b/src/core/util/StringReader.cpp index c25be7be..81f83e1e 100644 --- a/src/core/util/StringReader.cpp +++ b/src/core/util/StringReader.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,50 +7,44 @@ #include "LuceneInc.h" #include "StringReader.h" -namespace Lucene -{ - StringReader::StringReader(const String& str) - { - this->str = str; - this->position = 0; - } - - StringReader::~StringReader() - { - } - - int32_t StringReader::read() - { - return position == (int32_t)str.length() ? READER_EOF : (int32_t)str[position++]; - } - - int32_t StringReader::read(wchar_t* buffer, int32_t offset, int32_t length) - { - if (position >= (int32_t)str.length()) - return READER_EOF; - int32_t readChars = std::min(length, (int32_t)str.length() - position); - std::wcsncpy(buffer + offset, str.c_str() + position, readChars); - position += readChars; - return readChars; - } +namespace Lucene { + +StringReader::StringReader(const String& str) { + this->str = str; + this->position = 0; +} + +StringReader::~StringReader() { +} + +int32_t StringReader::read() { + return position == (int32_t)str.length() ? READER_EOF : (int32_t)str[position++]; +} + +int32_t StringReader::read(wchar_t* buffer, int32_t offset, int32_t length) { + if (position >= (int32_t)str.length()) { + return READER_EOF; + } + int32_t readChars = std::min(length, (int32_t)str.length() - position); + std::wcsncpy(buffer + offset, str.c_str() + position, readChars); + position += readChars; + return readChars; +} + +void StringReader::close() { + str.clear(); +} + +bool StringReader::markSupported() { + return false; +} + +void StringReader::reset() { + position = 0; +} + +int64_t StringReader::length() { + return str.length(); +} - void StringReader::close() - { - str.clear(); - } - - bool StringReader::markSupported() - { - return false; - } - - void StringReader::reset() - { - position = 0; - } - - int64_t StringReader::length() - { - return str.length(); - } } diff --git a/src/core/util/StringUtils.cpp b/src/core/util/StringUtils.cpp index dd931e34..76474100 100644 --- a/src/core/util/StringUtils.cpp +++ b/src/core/util/StringUtils.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,193 +13,187 @@ #include "Reader.h" #include "CharFolder.h" -namespace Lucene -{ - /// Maximum length of UTF encoding. - const int32_t StringUtils::MAX_ENCODING_UTF8_SIZE = 4; - - /// Default character radix. - const int32_t StringUtils::CHARACTER_MAX_RADIX = 36; - - int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode) - { - if (length == 0) - return 0; - UTF8Decoder utf8Decoder(utf8, utf8 + length); - int32_t decodeLength = utf8Decoder.decode(unicode.get(), unicode.size()); - return decodeLength == Reader::READER_EOF ? 0 : decodeLength; - } - - int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, UnicodeResultPtr unicodeResult) - { - if (length == 0) - unicodeResult->length = 0; - else - { - if (length > unicodeResult->result.size()) - unicodeResult->result.resize(length); - unicodeResult->length = toUnicode(utf8, length, unicodeResult->result); - } - return unicodeResult->length; - } - - String StringUtils::toUnicode(const uint8_t* utf8, int32_t length) - { - if (length == 0) - return L""; - CharArray unicode(CharArray::newInstance(length)); - int32_t result = toUnicode(utf8, length, unicode); - return String(unicode.get(), result); - } - - String StringUtils::toUnicode(const SingleString& s) - { - return s.empty() ? L"" : toUnicode((uint8_t*)s.c_str(), s.length()); - } - - int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8) - { - if (length == 0) - return 0; - UTF8Encoder utf8Encoder(unicode, unicode + length); - int32_t encodeLength = utf8Encoder.encode(utf8.get(), utf8.size()); - return encodeLength == Reader::READER_EOF ? 0 : encodeLength; - } - - int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, UTF8ResultPtr utf8Result) - { - if (length == 0) - utf8Result->length = 0; - else - { - if (length * MAX_ENCODING_UTF8_SIZE > utf8Result->result.size()) - utf8Result->result.resize(length * MAX_ENCODING_UTF8_SIZE); - utf8Result->length = toUTF8(unicode, length, utf8Result->result); +namespace Lucene { + +/// Maximum length of UTF encoding. +const int32_t StringUtils::MAX_ENCODING_UTF8_SIZE = 4; + +/// Default character radix. +const int32_t StringUtils::CHARACTER_MAX_RADIX = 36; + +int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, CharArray unicode) { + if (length == 0) { + return 0; + } + UTF8Decoder utf8Decoder(utf8, utf8 + length); + int32_t decodeLength = utf8Decoder.decode(unicode.get(), unicode.size()); + return decodeLength == Reader::READER_EOF ? 0 : decodeLength; +} + +int32_t StringUtils::toUnicode(const uint8_t* utf8, int32_t length, const UnicodeResultPtr& unicodeResult) { + if (length == 0) { + unicodeResult->length = 0; + } else { + if (length > unicodeResult->result.size()) { + unicodeResult->result.resize(length); } - return utf8Result->length; - } - - SingleString StringUtils::toUTF8(const wchar_t* unicode, int32_t length) - { - if (length == 0) - return ""; - ByteArray utf8(ByteArray::newInstance(length * MAX_ENCODING_UTF8_SIZE)); - int32_t result = toUTF8(unicode, length, utf8); - return SingleString((char*)utf8.get(), result); - } - - SingleString StringUtils::toUTF8(const String& s) - { - return s.empty() ? "" : toUTF8(s.c_str(), s.size()); - } - - void StringUtils::toLower(String& str) - { - CharFolder::toLower(str.begin(), str.end()); - } - - String StringUtils::toLower(const String& str) - { - String lowerStr(str); - toLower(lowerStr); - return lowerStr; - } - - void StringUtils::toUpper(String& str) - { - CharFolder::toUpper(str.begin(), str.end()); - } - - String StringUtils::toUpper(const String& str) - { - String upperStr(str); - toUpper(upperStr); - return upperStr; - } - - int32_t StringUtils::compareCase(const String& first, const String& second) - { - return (toLower(first) == toLower(second)); - } - - Collection StringUtils::split(const String& str, const String& delim) - { - std::vector tokens; - boost::split(tokens, str, boost::is_any_of(delim.c_str())); - return Collection::newInstance(tokens.begin(), tokens.end()); - } - - int32_t StringUtils::toInt(const String& value) - { - if (value.empty()) - boost::throw_exception(NumberFormatException()); - if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) - boost::throw_exception(NumberFormatException()); - if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) - boost::throw_exception(NumberFormatException()); - return (int32_t)std::wcstol(value.c_str(), NULL, 10); - } - - int64_t StringUtils::toLong(const String& value) - { - if (value.empty()) - boost::throw_exception(NumberFormatException()); - if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) - boost::throw_exception(NumberFormatException()); - if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) - boost::throw_exception(NumberFormatException()); - #if defined(_WIN32) || defined(_WIN64) - return _wcstoi64(value.c_str(), 0, 10); - #else - return wcstoll(value.c_str(), 0, 10); - #endif - } - - int64_t StringUtils::toLong(const String& value, int32_t base) - { - int64_t longValue = 0; - for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) - longValue = UnicodeUtil::isDigit(*ptr) ? (base * longValue) + (*ptr - L'0') : (base * longValue) + (*ptr - L'a' + 10); - return longValue; - } - - double StringUtils::toDouble(const String& value) - { - if (value.empty()) - boost::throw_exception(NumberFormatException()); - if (value.length() > 1 && (value[0] == L'-' || value[0] == L'.') && !UnicodeUtil::isDigit(value[1])) - boost::throw_exception(NumberFormatException()); - if (value[0] != L'-' && value[0] != L'.' && !UnicodeUtil::isDigit(value[0])) - boost::throw_exception(NumberFormatException()); - return std::wcstod(value.c_str(), NULL); - } - - int32_t StringUtils::hashCode(const String& value) - { - int32_t hashCode = 0; - for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) - hashCode = hashCode * 31 + *ptr; - return hashCode; - } - - String StringUtils::toString(int64_t value, int32_t base) - { - static const wchar_t* digits = L"0123456789abcdefghijklmnopqrstuvwxyz"; - - int32_t bufferSize = (sizeof(int32_t) << 3) + 1; - CharArray baseOutput(CharArray::newInstance(bufferSize)); - - wchar_t* ptr = baseOutput.get() + bufferSize - 1; - *ptr = L'\0'; - - do - { - *--ptr = digits[value % base]; - value /= base; + unicodeResult->length = toUnicode(utf8, length, unicodeResult->result); + } + return unicodeResult->length; +} + +String StringUtils::toUnicode(const uint8_t* utf8, int32_t length) { + if (length == 0) { + return L""; + } + CharArray unicode(CharArray::newInstance(length)); + int32_t result = toUnicode(utf8, length, unicode); + return String(unicode.get(), result); +} + +String StringUtils::toUnicode(const SingleString& s) { + return s.empty() ? L"" : toUnicode((uint8_t*)s.c_str(), s.length()); +} + +int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, ByteArray utf8) { + if (length == 0) { + return 0; + } + UTF8Encoder utf8Encoder(unicode, unicode + length); + int32_t encodeLength = utf8Encoder.encode(utf8.get(), utf8.size()); + return encodeLength == Reader::READER_EOF ? 0 : encodeLength; +} + +int32_t StringUtils::toUTF8(const wchar_t* unicode, int32_t length, const UTF8ResultPtr& utf8Result) { + if (length == 0) { + utf8Result->length = 0; + } else { + if (length * MAX_ENCODING_UTF8_SIZE > utf8Result->result.size()) { + utf8Result->result.resize(length * MAX_ENCODING_UTF8_SIZE); } - while (ptr > baseOutput.get() && value > 0); - - return String(ptr, (baseOutput.get() + bufferSize - 1) - ptr); + utf8Result->length = toUTF8(unicode, length, utf8Result->result); + } + return utf8Result->length; +} + +SingleString StringUtils::toUTF8(const wchar_t* unicode, int32_t length) { + if (length == 0) { + return ""; + } + ByteArray utf8(ByteArray::newInstance(length * MAX_ENCODING_UTF8_SIZE)); + int32_t result = toUTF8(unicode, length, utf8); + return SingleString((char*)utf8.get(), result); +} + +SingleString StringUtils::toUTF8(const String& s) { + return s.empty() ? "" : toUTF8(s.c_str(), s.size()); +} + +void StringUtils::toLower(String& str) { + CharFolder::toLower(str.begin(), str.end()); +} + +String StringUtils::toLower(const String& str) { + String lowerStr(str); + toLower(lowerStr); + return lowerStr; +} + +void StringUtils::toUpper(String& str) { + CharFolder::toUpper(str.begin(), str.end()); +} + +String StringUtils::toUpper(const String& str) { + String upperStr(str); + toUpper(upperStr); + return upperStr; +} + +int32_t StringUtils::compareCase(const String& first, const String& second) { + return (toLower(first) == toLower(second)); +} + +Collection StringUtils::split(const String& str, const String& delim) { + std::vector tokens; + boost::split(tokens, str, boost::is_any_of(delim.c_str())); + return Collection::newInstance(tokens.begin(), tokens.end()); +} + +int32_t StringUtils::toInt(const String& value) { + if (value.empty()) { + boost::throw_exception(NumberFormatException()); + } + if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) { + boost::throw_exception(NumberFormatException()); + } + if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) { + boost::throw_exception(NumberFormatException()); + } + return (int32_t)std::wcstol(value.c_str(), NULL, 10); +} + +int64_t StringUtils::toLong(const String& value) { + if (value.empty()) { + boost::throw_exception(NumberFormatException()); + } + if (value.size() > 1 && value[0] == L'-' && !UnicodeUtil::isDigit(value[1])) { + boost::throw_exception(NumberFormatException()); + } + if (value[0] != L'-' && !UnicodeUtil::isDigit(value[0])) { + boost::throw_exception(NumberFormatException()); + } +#if defined(_WIN32) || defined(_WIN64) + return _wcstoi64(value.c_str(), 0, 10); +#else + return wcstoll(value.c_str(), 0, 10); +#endif +} + +int64_t StringUtils::toLong(const String& value, int32_t base) { + int64_t longValue = 0; + for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) { + longValue = UnicodeUtil::isDigit(*ptr) ? (base * longValue) + (*ptr - L'0') : (base * longValue) + (*ptr - L'a' + 10); } + return longValue; +} + +double StringUtils::toDouble(const String& value) { + if (value.empty()) { + boost::throw_exception(NumberFormatException()); + } + if (value.length() > 1 && (value[0] == L'-' || value[0] == L'.') && !UnicodeUtil::isDigit(value[1])) { + boost::throw_exception(NumberFormatException()); + } + if (value[0] != L'-' && value[0] != L'.' && !UnicodeUtil::isDigit(value[0])) { + boost::throw_exception(NumberFormatException()); + } + return std::wcstod(value.c_str(), NULL); +} + +int32_t StringUtils::hashCode(const String& value) { + int32_t hashCode = 0; + for (String::const_iterator ptr = value.begin(); ptr != value.end(); ++ptr) { + hashCode = hashCode * 31 + *ptr; + } + return hashCode; +} + +String StringUtils::toString(int64_t value, int32_t base) { + static const wchar_t* digits = L"0123456789abcdefghijklmnopqrstuvwxyz"; + + int32_t bufferSize = (sizeof(int32_t) << 3) + 1; + CharArray baseOutput(CharArray::newInstance(bufferSize)); + + wchar_t* ptr = baseOutput.get() + bufferSize - 1; + *ptr = L'\0'; + + do { + *--ptr = digits[value % base]; + value /= base; + } while (ptr > baseOutput.get() && value > 0); + + return String(ptr, (baseOutput.get() + bufferSize - 1) - ptr); +} + } diff --git a/src/core/util/Synchronize.cpp b/src/core/util/Synchronize.cpp index 5b72e419..47901076 100644 --- a/src/core/util/Synchronize.cpp +++ b/src/core/util/Synchronize.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,71 +9,68 @@ #include "Synchronize.h" #include "LuceneThread.h" -namespace Lucene -{ - Synchronize::Synchronize() - { - lockThread = 0; - recursionCount = 0; - } - - Synchronize::~Synchronize() - { - } - - void Synchronize::createSync(SynchronizePtr& sync) - { - static boost::mutex lockMutex; - boost::mutex::scoped_lock syncLock(lockMutex); - if (!sync) - sync = newInstance(); - } - - void Synchronize::lock(int32_t timeout) - { - if (timeout > 0) - mutexSynchronize.timed_lock(boost::posix_time::milliseconds(timeout)); - else - mutexSynchronize.lock(); - lockThread = LuceneThread::currentId(); - ++recursionCount; - } - - void Synchronize::unlock() - { - if (--recursionCount == 0) - lockThread = 0; - mutexSynchronize.unlock(); +namespace Lucene { + +Synchronize::Synchronize() { + lockThread = 0; + recursionCount = 0; +} + +Synchronize::~Synchronize() { +} + +void Synchronize::createSync(SynchronizePtr& sync) { + static boost::mutex lockMutex; + boost::mutex::scoped_lock syncLock(lockMutex); + if (!sync) { + sync = newInstance(); } - - int32_t Synchronize::unlockAll() - { - int32_t count = recursionCount; - for (int32_t unlock = 0; unlock < count; ++unlock) - this->unlock(); - return count; +} + +void Synchronize::lock(int32_t timeout) { + if (timeout > 0) { + mutexSynchronize.timed_lock(boost::posix_time::milliseconds(timeout)); + } else { + mutexSynchronize.lock(); } - - bool Synchronize::holdsLock() - { - return (lockThread == LuceneThread::currentId() && recursionCount > 0); + lockThread = LuceneThread::currentId(); + ++recursionCount; +} + +void Synchronize::unlock() { + if (--recursionCount == 0) { + lockThread = 0; } - - SyncLock::SyncLock(SynchronizePtr sync, int32_t timeout) - { - this->sync = sync; - lock(timeout); + mutexSynchronize.unlock(); +} + +int32_t Synchronize::unlockAll() { + int32_t count = recursionCount; + for (int32_t unlock = 0; unlock < count; ++unlock) { + this->unlock(); } + return count; +} + +bool Synchronize::holdsLock() { + return (lockThread == LuceneThread::currentId() && recursionCount > 0); +} - SyncLock::~SyncLock() - { - if (sync) - sync->unlock(); +SyncLock::SyncLock(const SynchronizePtr& sync, int32_t timeout) { + this->sync = sync; + lock(timeout); +} + +SyncLock::~SyncLock() { + if (sync) { + sync->unlock(); } - - void SyncLock::lock(int32_t timeout) - { - if (sync) - sync->lock(timeout); +} + +void SyncLock::lock(int32_t timeout) { + if (sync) { + sync->lock(timeout); } } + +} diff --git a/src/core/util/TestPoint.cpp b/src/core/util/TestPoint.cpp index 4e57e74b..1319037d 100644 --- a/src/core/util/TestPoint.cpp +++ b/src/core/util/TestPoint.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,59 +7,51 @@ #include "LuceneInc.h" #include "TestPoint.h" -namespace Lucene -{ - MapStringInt TestPoint::testMethods = MapStringInt::newInstance(); - bool TestPoint::enable = false; - - TestPoint::~TestPoint() - { - } - - void TestPoint::enableTestPoints() - { - enable = true; - } - - void TestPoint::clear() - { - SyncLock syncLock(&testMethods); - testMethods.clear(); - } - - void TestPoint::setTestPoint(const String& object, const String& method, bool point) - { - if (enable) - { - SyncLock syncLock(&testMethods); - testMethods.put(object + L":" + method, point); - testMethods.put(method, point); - } - } - - bool TestPoint::getTestPoint(const String& object, const String& method) - { - SyncLock syncLock(&testMethods); - MapStringInt::iterator testMethod = testMethods.find(object + L":" + method); - return testMethod == testMethods.end() ? false : (testMethod->second != 0); - } - - bool TestPoint::getTestPoint(const String& method) - { +namespace Lucene { + +MapStringInt TestPoint::testMethods = MapStringInt::newInstance(); +bool TestPoint::enable = false; + +TestPoint::~TestPoint() { +} + +void TestPoint::enableTestPoints() { + enable = true; +} + +void TestPoint::clear() { + SyncLock syncLock(&testMethods); + testMethods.clear(); +} + +void TestPoint::setTestPoint(const String& object, const String& method, bool point) { + if (enable) { SyncLock syncLock(&testMethods); - MapStringInt::iterator testMethod = testMethods.find(method); - return testMethod == testMethods.end() ? false : (testMethod->second != 0); - } - - TestScope::TestScope(const String& object, const String& method) - { - this->object = object; - this->method = method; - TestPoint::setTestPoint(object, method, true); - } - - TestScope::~TestScope() - { - TestPoint::setTestPoint(object, method, false); + testMethods.put(object + L":" + method, point); + testMethods.put(method, point); } } + +bool TestPoint::getTestPoint(const String& object, const String& method) { + SyncLock syncLock(&testMethods); + MapStringInt::iterator testMethod = testMethods.find(object + L":" + method); + return testMethod == testMethods.end() ? false : (testMethod->second != 0); +} + +bool TestPoint::getTestPoint(const String& method) { + SyncLock syncLock(&testMethods); + MapStringInt::iterator testMethod = testMethods.find(method); + return testMethod == testMethods.end() ? false : (testMethod->second != 0); +} + +TestScope::TestScope(const String& object, const String& method) { + this->object = object; + this->method = method; + TestPoint::setTestPoint(object, method, true); +} + +TestScope::~TestScope() { + TestPoint::setTestPoint(object, method, false); +} + +} diff --git a/src/core/util/ThreadPool.cpp b/src/core/util/ThreadPool.cpp index fb57326b..8086d8b1 100644 --- a/src/core/util/ThreadPool.cpp +++ b/src/core/util/ThreadPool.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -7,35 +7,32 @@ #include "LuceneInc.h" #include "ThreadPool.h" -namespace Lucene -{ - Future::~Future() - { - } - - const int32_t ThreadPool::THREADPOOL_SIZE = 5; - - ThreadPool::ThreadPool() - { - work.reset(new boost::asio::io_service::work(io_service)); - for (int32_t i = 0; i < THREADPOOL_SIZE; ++i) - threadGroup.create_thread(boost::bind(&boost::asio::io_service::run, &io_service)); - } - - ThreadPool::~ThreadPool() - { - work.reset(); // stop all threads - threadGroup.join_all(); // wait for all competition - } - - ThreadPoolPtr ThreadPool::getInstance() - { - static ThreadPoolPtr threadPool; - if (!threadPool) - { - threadPool = newLucene(); - CycleCheck::addStatic(threadPool); - } - return threadPool; +namespace Lucene { + +Future::~Future() { +} + +const int32_t ThreadPool::THREADPOOL_SIZE = 5; + +ThreadPool::ThreadPool() { + work.reset(new boost::asio::io_service::work(io_service)); + for (int32_t i = 0; i < THREADPOOL_SIZE; ++i) { + threadGroup.create_thread(boost::bind(&boost::asio::io_service::run, &io_service)); } } + +ThreadPool::~ThreadPool() { + work.reset(); // stop all threads + threadGroup.join_all(); // wait for all competition +} + +ThreadPoolPtr ThreadPool::getInstance() { + static ThreadPoolPtr threadPool; + LUCENE_RUN_ONCE( + threadPool = newLucene(); + CycleCheck::addStatic(threadPool); + ); + return threadPool; +} + +} diff --git a/src/core/util/UTF8Stream.cpp b/src/core/util/UTF8Stream.cpp index 8efb1404..3762469b 100644 --- a/src/core/util/UTF8Stream.cpp +++ b/src/core/util/UTF8Stream.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -8,415 +8,386 @@ #include "UTF8Stream.h" #include "Reader.h" -namespace Lucene -{ - const uint16_t UTF8Base::LEAD_SURROGATE_MIN = 0xd800u; - const uint16_t UTF8Base::LEAD_SURROGATE_MAX = 0xdbffu; - const uint16_t UTF8Base::TRAIL_SURROGATE_MIN = 0xdc00u; - const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu; - const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); - const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; - - // Maximum valid value for a Unicode code point - const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu; - - #ifdef LPP_UNICODE_CHAR_SIZE_2 - const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd; - const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff; - #else - const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd; - const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff; - #endif - - UTF8Base::~UTF8Base() - { - } - - inline uint8_t UTF8Base::mask8(uint32_t b) - { - return static_cast(0xff & b); - } +namespace Lucene { - inline uint16_t UTF8Base::mask16(uint32_t c) - { - return static_cast(0xffff & c); - } +const uint16_t UTF8Base::LEAD_SURROGATE_MIN = 0xd800u; +const uint16_t UTF8Base::LEAD_SURROGATE_MAX = 0xdbffu; +const uint16_t UTF8Base::TRAIL_SURROGATE_MIN = 0xdc00u; +const uint16_t UTF8Base::TRAIL_SURROGATE_MAX = 0xdfffu; +const uint16_t UTF8Base::LEAD_OFFSET = LEAD_SURROGATE_MIN - (0x10000 >> 10); +const uint32_t UTF8Base::SURROGATE_OFFSET = 0x10000u - (LEAD_SURROGATE_MIN << 10) - TRAIL_SURROGATE_MIN; - inline bool UTF8Base::isTrail(uint32_t b) - { - return ((mask8(b) >> 6) == 0x2); - } - - inline bool UTF8Base::isSurrogate(uint32_t cp) - { - return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); - } +// Maximum valid value for a Unicode code point +const uint32_t UTF8Base::CODE_POINT_MAX = 0x0010ffffu; - inline bool UTF8Base::isLeadSurrogate(uint32_t cp) - { - return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); - } +#ifdef LPP_UNICODE_CHAR_SIZE_2 +const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0xfffd; +const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0xffff; +#else +const wchar_t UTF8Base::UNICODE_REPLACEMENT_CHAR = (wchar_t)0x0001fffd; +const wchar_t UTF8Base::UNICODE_TERMINATOR = (wchar_t)0x0001ffff; +#endif - inline bool UTF8Base::isTrailSurrogate(uint32_t cp) - { - return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); - } - - inline bool UTF8Base::isValidCodePoint(uint32_t cp) - { - return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff); - } - - inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length) - { - if (cp < 0x80) - { - if (length != 1) - return true; +UTF8Base::~UTF8Base() { +} + +inline uint8_t UTF8Base::mask8(uint32_t b) { + return static_cast(0xff & b); +} + +inline uint16_t UTF8Base::mask16(uint32_t c) { + return static_cast(0xffff & c); +} + +inline bool UTF8Base::isTrail(uint32_t b) { + return ((mask8(b) >> 6) == 0x2); +} + +inline bool UTF8Base::isSurrogate(uint32_t cp) { + return (cp >= LEAD_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); +} + +inline bool UTF8Base::isLeadSurrogate(uint32_t cp) { + return (cp >= LEAD_SURROGATE_MIN && cp <= LEAD_SURROGATE_MAX); +} + +inline bool UTF8Base::isTrailSurrogate(uint32_t cp) { + return (cp >= TRAIL_SURROGATE_MIN && cp <= TRAIL_SURROGATE_MAX); +} + +inline bool UTF8Base::isValidCodePoint(uint32_t cp) { + return (cp <= CODE_POINT_MAX && !isSurrogate(cp) && cp != 0xfffe && cp != 0xffff); +} + +inline bool UTF8Base::isOverlongSequence(uint32_t cp, int32_t length) { + if (cp < 0x80) { + if (length != 1) { + return true; } - else if (cp < 0x800) - { - if (length != 2) - return true; + } else if (cp < 0x800) { + if (length != 2) { + return true; } - else if (cp < 0x10000) - { - if (length != 3) - return true; + } else if (cp < 0x10000) { + if (length != 3) { + return true; } - return false; - } - - UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd) - { - this->unicodeBegin = unicodeBegin; - this->unicodeEnd = unicodeEnd; - } - - UTF8Encoder::~UTF8Encoder() - { - } - - uint32_t UTF8Encoder::readNext() - { - return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++; } - - inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp) - { - if (cp < 0x80) // one octet - *(utf8++) = static_cast(cp); - else if (cp < 0x800) // two octets - { - *(utf8++) = static_cast((cp >> 6) | 0xc0); - *(utf8++) = static_cast((cp & 0x3f) | 0x80); + return false; +} + +UTF8Encoder::UTF8Encoder(const wchar_t* unicodeBegin, const wchar_t* unicodeEnd) { + this->unicodeBegin = unicodeBegin; + this->unicodeEnd = unicodeEnd; +} + +UTF8Encoder::~UTF8Encoder() { +} + +uint32_t UTF8Encoder::readNext() { + return unicodeBegin == unicodeEnd ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*unicodeBegin++; +} + +inline uint8_t* UTF8Encoder::appendChar(uint8_t* utf8, uint32_t cp) { + if (cp < 0x80) { // one octet + *(utf8++) = static_cast(cp); + } else if (cp < 0x800) { // two octets + *(utf8++) = static_cast((cp >> 6) | 0xc0); + *(utf8++) = static_cast((cp & 0x3f) | 0x80); + } else if (cp < 0x10000) { // three octets + *(utf8++) = static_cast((cp >> 12) | 0xe0); + *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(utf8++) = static_cast((cp & 0x3f) | 0x80); + } else { // four octets + *(utf8++) = static_cast((cp >> 18) | 0xf0); + *(utf8++) = static_cast(((cp >> 12) & 0x3f) | 0x80); + *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); + *(utf8++) = static_cast((cp & 0x3f) | 0x80); + } + return utf8; +} + +int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length) { + uint8_t* start = utf8; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + uint32_t cp = mask16(next); + if (isLeadSurrogate(cp)) { + next = readNext(); + if (next == UNICODE_TERMINATOR) { + return 0; + } + uint32_t trail_surrogate = mask16(next); + if (!isTrailSurrogate(trail_surrogate)) { + return 0; + } + cp = (cp << 10) + trail_surrogate + SURROGATE_OFFSET; + } else if (isTrailSurrogate(cp)) { + return 0; } - else if (cp < 0x10000) // three octets - { - *(utf8++) = static_cast((cp >> 12) | 0xe0); - *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(utf8++) = static_cast((cp & 0x3f) | 0x80); + if (!isValidCodePoint(cp)) { + return 0; } - else // four octets - { - *(utf8++) = static_cast((cp >> 18) | 0xf0); - *(utf8++) = static_cast(((cp >> 12) & 0x3f) | 0x80); - *(utf8++) = static_cast(((cp >> 6) & 0x3f) | 0x80); - *(utf8++) = static_cast((cp & 0x3f) | 0x80); + utf8 = appendChar(utf8, cp); + if ((utf8 - start) >= length) { + break; } - return utf8; + next = readNext(); } - - int32_t UTF8Encoder::utf16to8(uint8_t* utf8, int32_t length) - { - uint8_t* start = utf8; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - uint32_t cp = mask16(next); - if (isLeadSurrogate(cp)) - { - next = readNext(); - if (next == UNICODE_TERMINATOR) - return 0; - uint32_t trail_surrogate = mask16(next); - if (!isTrailSurrogate(trail_surrogate)) - return 0; - cp = (cp << 10) + trail_surrogate + SURROGATE_OFFSET; - } - else if (isTrailSurrogate(cp)) - return 0; - if (!isValidCodePoint(cp)) - return 0; - utf8 = appendChar(utf8, cp); - if ((utf8 - start) >= length) - break; - next = readNext(); + + return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); +} + +int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length) { + uint8_t* start = utf8; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + if (!isValidCodePoint(next)) { + return 0; } - - return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); - } - - int32_t UTF8Encoder::utf32to8(uint8_t* utf8, int32_t length) - { - uint8_t* start = utf8; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - if (!isValidCodePoint(next)) - return 0; - utf8 = appendChar(utf8, next); - if ((utf8 - start) >= length) - break; - next = readNext(); + utf8 = appendChar(utf8, next); + if ((utf8 - start) >= length) { + break; } - - return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); - } - - int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length) - { - #ifdef LPP_UNICODE_CHAR_SIZE_2 - return utf16to8(utf8, length); - #else - return utf32to8(utf8, length); - #endif + next = readNext(); } - - UTF8EncoderStream::UTF8EncoderStream(ReaderPtr reader) : UTF8Encoder(NULL, NULL) - { - this->reader = reader; + + return ((utf8 - start) == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : (utf8 - start); +} + +int32_t UTF8Encoder::encode(uint8_t* utf8, int32_t length) { +#ifdef LPP_UNICODE_CHAR_SIZE_2 + return utf16to8(utf8, length); +#else + return utf32to8(utf8, length); +#endif +} + +UTF8EncoderStream::UTF8EncoderStream(const ReaderPtr& reader) : UTF8Encoder(NULL, NULL) { + this->reader = reader; +} + +UTF8EncoderStream::~UTF8EncoderStream() { +} + +uint32_t UTF8EncoderStream::readNext() { + int32_t next = reader->read(); + return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; +} + +UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End) { + this->utf8Begin = utf8Begin; + this->utf8End = utf8End; +} + +UTF8Decoder::~UTF8Decoder() { +} + +uint32_t UTF8Decoder::readNext() { + return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++; +} + +inline int32_t UTF8Decoder::sequenceLength(uint32_t cp) { + uint8_t lead = mask8(cp); + if (lead < 0x80) { + return 1; + } else if ((lead >> 5) == 0x6) { + return 2; + } else if ((lead >> 4) == 0xe) { + return 3; + } else if ((lead >> 3) == 0x1e) { + return 4; + } + return 0; +} + +inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length) { + cp = mask8(cp); + if (length == 1) { + return true; } - - UTF8EncoderStream::~UTF8EncoderStream() - { + uint32_t next = readNext(); + if (next == UNICODE_TERMINATOR) { + return false; } - - uint32_t UTF8EncoderStream::readNext() - { - int32_t next = reader->read(); - return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; + if (!isTrail(next)) { + return false; } - - UTF8Decoder::UTF8Decoder(const uint8_t* utf8Begin, const uint8_t* utf8End) - { - this->utf8Begin = utf8Begin; - this->utf8End = utf8End; + if (length == 2) { + cp = ((cp << 6) & 0x7ff) + (next & 0x3f); + return true; } - - UTF8Decoder::~UTF8Decoder() - { + if (length == 3) { + cp = ((cp << 12) & 0xffff) + ((mask8(next) << 6) & 0xfff); + } else { + cp = ((cp << 18) & 0x1fffff) + ((mask8(next) << 12) & 0x3ffff); } - - uint32_t UTF8Decoder::readNext() - { - return utf8Begin == utf8End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf8Begin++; + next = readNext(); + if (next == UNICODE_TERMINATOR) { + return false; } - - inline int32_t UTF8Decoder::sequenceLength(uint32_t cp) - { - uint8_t lead = mask8(cp); - if (lead < 0x80) - return 1; - else if ((lead >> 5) == 0x6) - return 2; - else if ((lead >> 4) == 0xe) - return 3; - else if ((lead >> 3) == 0x1e) - return 4; - return 0; + if (!isTrail(next)) { + return false; } - - inline bool UTF8Decoder::getSequence(uint32_t& cp, int32_t length) - { - cp = mask8(cp); - if (length == 1) - return true; - uint32_t next = readNext(); - if (next == UNICODE_TERMINATOR) - return false; - if (!isTrail(next)) - return false; - if (length == 2) - { - cp = ((cp << 6) & 0x7ff) + (next & 0x3f); - return true; - } - if (length == 3) - cp = ((cp << 12) & 0xffff) + ((mask8(next) << 6) & 0xfff); - else - cp = ((cp << 18) & 0x1fffff) + ((mask8(next) << 12) & 0x3ffff); - next = readNext(); - if (next == UNICODE_TERMINATOR) - return false; - if (!isTrail(next)) - return false; - if (length == 3) - { - cp += next & 0x3f; - return true; - } - cp += (mask8(next) << 6) & 0xfff; - next = readNext(); - if (next == UNICODE_TERMINATOR) - return false; - if (!isTrail(next)) - return false; + if (length == 3) { cp += next & 0x3f; return true; } - - inline bool UTF8Decoder::isValidNext(uint32_t& cp) - { - // Determine the sequence length based on the lead octet - int32_t length = sequenceLength(cp); - if (length < 1 && length > 4) - return false; - - // Now that we have a valid sequence length, get trail octets and calculate the code point - if (!getSequence(cp, length)) - return false; - - // Decoding succeeded, now security checks - return (isValidCodePoint(cp) && !isOverlongSequence(cp, length)); - } - - int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length) - { - int32_t position = 0; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - if (!isValidNext(next)) - return 0; - if (next > 0xffff) // make a surrogate pair - { - unicode[position++] = static_cast((next >> 10) + LEAD_OFFSET); - unicode[position++] = static_cast((next & 0x3ff) + TRAIL_SURROGATE_MIN); - } - else - unicode[position++] = static_cast(next); - if (position >= length) - break; - next = readNext(); - } - - return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; - } - - int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length) - { - int32_t position = 0; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - if (!isValidNext(next)) - return 0; - unicode[position++] = static_cast(next); - if (position >= length) - break; - next = readNext(); - } - - return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; - } - - int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length) - { - #ifdef LPP_UNICODE_CHAR_SIZE_2 - return utf8to16(unicode, length); - #else - return utf8to32(unicode, length); - #endif - } - - UTF8DecoderStream::UTF8DecoderStream(ReaderPtr reader) : UTF8Decoder(NULL, NULL) - { - this->reader = reader; + cp += (mask8(next) << 6) & 0xfff; + next = readNext(); + if (next == UNICODE_TERMINATOR) { + return false; } - - UTF8DecoderStream::~UTF8DecoderStream() - { + if (!isTrail(next)) { + return false; } - - uint32_t UTF8DecoderStream::readNext() - { - int32_t next = reader->read(); - return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; + cp += next & 0x3f; + return true; +} + +inline bool UTF8Decoder::isValidNext(uint32_t& cp) { + // Determine the sequence length based on the lead octet + int32_t length = sequenceLength(cp); + if (length < 1 || length > 4) { + return false; } - - UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End) - { - this->utf16Begin = utf16Begin; - this->utf16End = utf16End; + + // Now that we have a valid sequence length, get trail octets and calculate the code point + if (!getSequence(cp, length)) { + return false; } - - UTF16Decoder::~UTF16Decoder() - { + + // Decoding succeeded, now security checks + return (isValidCodePoint(cp) && !isOverlongSequence(cp, length)); +} + +int32_t UTF8Decoder::utf8to16(wchar_t* unicode, int32_t length) { + int32_t position = 0; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + if (!isValidNext(next)) { + return 0; + } + if (next > 0xffff) { // make a surrogate pair + unicode[position++] = static_cast((next >> 10) + LEAD_OFFSET); + unicode[position++] = static_cast((next & 0x3ff) + TRAIL_SURROGATE_MIN); + } else { + unicode[position++] = static_cast(next); + } + if (position >= length) { + break; + } + next = readNext(); } - - uint32_t UTF16Decoder::readNext() - { - return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++; + + return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; +} + +int32_t UTF8Decoder::utf8to32(wchar_t* unicode, int32_t length) { + int32_t position = 0; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + if (!isValidNext(next)) { + return 0; + } + unicode[position++] = static_cast(next); + if (position >= length) { + break; + } + next = readNext(); } - - int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length) - { - int32_t position = 0; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - uint32_t cp = mask16(next); - if (isLeadSurrogate(cp)) - { - next = readNext(); - if (next == UNICODE_TERMINATOR) - return 0; - uint32_t trail_surrogate = mask16(next); - if (!isTrailSurrogate(trail_surrogate)) - return 0; - unicode[position++] = static_cast(((cp - LEAD_SURROGATE_MIN) << 10) + (trail_surrogate - TRAIL_SURROGATE_MIN) + 0x0010000); + + return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; +} + +int32_t UTF8Decoder::decode(wchar_t* unicode, int32_t length) { +#ifdef LPP_UNICODE_CHAR_SIZE_2 + return utf8to16(unicode, length); +#else + return utf8to32(unicode, length); +#endif +} + +UTF8DecoderStream::UTF8DecoderStream(const ReaderPtr& reader) : UTF8Decoder(NULL, NULL) { + this->reader = reader; +} + +UTF8DecoderStream::~UTF8DecoderStream() { +} + +uint32_t UTF8DecoderStream::readNext() { + int32_t next = reader->read(); + return next == Reader::READER_EOF ? UNICODE_TERMINATOR : (uint32_t)next; +} + +UTF16Decoder::UTF16Decoder(const uint16_t* utf16Begin, const uint16_t* utf16End) { + this->utf16Begin = utf16Begin; + this->utf16End = utf16End; +} + +UTF16Decoder::~UTF16Decoder() { +} + +uint32_t UTF16Decoder::readNext() { + return utf16Begin == utf16End ? (uint32_t)UNICODE_TERMINATOR : (uint32_t)*utf16Begin++; +} + +int32_t UTF16Decoder::utf16to32(wchar_t* unicode, int32_t length) { + int32_t position = 0; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + uint32_t cp = mask16(next); + if (isLeadSurrogate(cp)) { + next = readNext(); + if (next == UNICODE_TERMINATOR) { + return 0; } - else if (isTrailSurrogate(cp)) + uint32_t trail_surrogate = mask16(next); + if (!isTrailSurrogate(trail_surrogate)) { return 0; - else - unicode[position++] = static_cast(cp); - if (position >= length) - break; - next = readNext(); + } + unicode[position++] = static_cast(((cp - LEAD_SURROGATE_MIN) << 10) + (trail_surrogate - TRAIL_SURROGATE_MIN) + 0x0010000); + } else if (isTrailSurrogate(cp)) { + return 0; + } else { + unicode[position++] = static_cast(cp); } - - return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; - } - - int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length) - { - int32_t position = 0; - uint32_t next = readNext(); - - while (next != UNICODE_TERMINATOR) - { - unicode[position++] = static_cast(next); - if (position >= length) - break; - next = readNext(); + if (position >= length) { + break; } - - return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; + next = readNext(); } - - int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length) - { - #ifdef LPP_UNICODE_CHAR_SIZE_2 - return utf16to16(unicode, length); - #else - return utf16to32(unicode, length); - #endif + + return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; +} + +int32_t UTF16Decoder::utf16to16(wchar_t* unicode, int32_t length) { + int32_t position = 0; + uint32_t next = readNext(); + + while (next != UNICODE_TERMINATOR) { + unicode[position++] = static_cast(next); + if (position >= length) { + break; + } + next = readNext(); } + + return (position == 0 && next == UNICODE_TERMINATOR) ? Reader::READER_EOF : position; +} + +int32_t UTF16Decoder::decode(wchar_t* unicode, int32_t length) { +#ifdef LPP_UNICODE_CHAR_SIZE_2 + return utf16to16(unicode, length); +#else + return utf16to32(unicode, length); +#endif +} + } diff --git a/src/core/util/UnicodeUtils.cpp b/src/core/util/UnicodeUtils.cpp index c6572db2..f416ecf3 100644 --- a/src/core/util/UnicodeUtils.cpp +++ b/src/core/util/UnicodeUtils.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -9,67 +9,55 @@ #include "UnicodeUtils.h" #include "unicode/guniprop.h" -namespace Lucene -{ - UnicodeUtil::~UnicodeUtil() - { - } - - bool UnicodeUtil::isAlnum(wchar_t c) - { - return g_unichar_isalnum(c); - } - - bool UnicodeUtil::isAlpha(wchar_t c) - { - return g_unichar_isalpha(c); - } - - bool UnicodeUtil::isDigit(wchar_t c) - { - return g_unichar_isdigit(c); - } - - bool UnicodeUtil::isSpace(wchar_t c) - { - return g_unichar_isspace(c); - } - - bool UnicodeUtil::isUpper(wchar_t c) - { - return g_unichar_isupper(c); - } - - bool UnicodeUtil::isLower(wchar_t c) - { - return g_unichar_islower(c); - } - - bool UnicodeUtil::isOther(wchar_t c) - { - return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER); - } - - bool UnicodeUtil::isNonSpacing(wchar_t c) - { - return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK); - } - - wchar_t UnicodeUtil::toUpper(wchar_t c) - { - return (wchar_t)g_unichar_toupper(c); - } - - wchar_t UnicodeUtil::toLower(wchar_t c) - { - return (wchar_t)g_unichar_tolower(c); - } - - UTF8Result::~UTF8Result() - { - } - - UnicodeResult::~UnicodeResult() - { - } +namespace Lucene { + +UnicodeUtil::~UnicodeUtil() { +} + +bool UnicodeUtil::isAlnum(wchar_t c) { + return g_unichar_isalnum(c); +} + +bool UnicodeUtil::isAlpha(wchar_t c) { + return g_unichar_isalpha(c); +} + +bool UnicodeUtil::isDigit(wchar_t c) { + return g_unichar_isdigit(c); +} + +bool UnicodeUtil::isSpace(wchar_t c) { + return g_unichar_isspace(c); +} + +bool UnicodeUtil::isUpper(wchar_t c) { + return g_unichar_isupper(c); +} + +bool UnicodeUtil::isLower(wchar_t c) { + return g_unichar_islower(c); +} + +bool UnicodeUtil::isOther(wchar_t c) { + return (g_unichar_type(c) == G_UNICODE_OTHER_LETTER); +} + +bool UnicodeUtil::isNonSpacing(wchar_t c) { + return (g_unichar_type(c) == G_UNICODE_NON_SPACING_MARK); +} + +wchar_t UnicodeUtil::toUpper(wchar_t c) { + return (wchar_t)g_unichar_toupper(c); +} + +wchar_t UnicodeUtil::toLower(wchar_t c) { + return (wchar_t)g_unichar_tolower(c); +} + +UTF8Result::~UTF8Result() { +} + +UnicodeResult::~UnicodeResult() { +} + } diff --git a/src/core/util/md5/md5.h b/src/core/util/md5/md5.h index 698c995d..fb751737 100644 --- a/src/core/util/md5/md5.h +++ b/src/core/util/md5/md5.h @@ -27,7 +27,7 @@ This code implements the MD5 Algorithm defined in RFC 1321, whose text is available at - http://www.ietf.org/rfc/rfc1321.txt + http://www.ietf.org/rfc/rfc1321.txt The code is derived from the text of the RFC, including the test suite (section A.5) but excluding the rest of Appendix A. It does not include any code or documentation that is identified in the RFC as being @@ -38,12 +38,12 @@ that follows (in reverse chronological order): 2002-04-13 lpd Removed support for non-ANSI compilers; removed - references to Ghostscript; clarified derivation from RFC 1321; - now handles byte order either statically or dynamically. + references to Ghostscript; clarified derivation from RFC 1321; + now handles byte order either statically or dynamically. 1999-11-04 lpd Edited comments slightly for automatic TOC extraction. 1999-10-18 lpd Fixed typo in header comment (ansi2knr rather than md5); - added conditionalization for C++ compilation from Martin - Purschke . + added conditionalization for C++ compilation from Martin + Purschke . 1999-05-03 lpd Original version. */ @@ -65,24 +65,24 @@ typedef unsigned int md5_word_t; /* 32-bit word */ /* Define the state of the MD5 Algorithm. */ typedef struct md5_state_s { - md5_word_t count[2]; /* message length in bits, lsw first */ - md5_word_t abcd[4]; /* digest buffer */ - md5_byte_t buf[64]; /* accumulate block */ + md5_word_t count[2]; /* message length in bits, lsw first */ + md5_word_t abcd[4]; /* digest buffer */ + md5_byte_t buf[64]; /* accumulate block */ } md5_state_t; #ifdef __cplusplus -extern "C" +extern "C" { #endif /* Initialize the algorithm. */ -void md5_init(md5_state_t *pms); +void md5_init(md5_state_t* pms); /* Append a string to the message. */ -void md5_append(md5_state_t *pms, const md5_byte_t *data, int nbytes); +void md5_append(md5_state_t* pms, const md5_byte_t* data, int nbytes); /* Finish the message and return the digest. */ -void md5_finish(md5_state_t *pms, md5_byte_t digest[16]); +void md5_finish(md5_state_t* pms, md5_byte_t digest[16]); #ifdef __cplusplus } /* end extern "C" */ diff --git a/src/core/util/nedmalloc/License.txt b/src/core/util/nedmalloc/License.txt deleted file mode 100644 index 36b7cd93..00000000 --- a/src/core/util/nedmalloc/License.txt +++ /dev/null @@ -1,23 +0,0 @@ -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. diff --git a/src/core/util/nedmalloc/malloc.c.h b/src/core/util/nedmalloc/malloc.c.h deleted file mode 100644 index 7f0458f7..00000000 --- a/src/core/util/nedmalloc/malloc.c.h +++ /dev/null @@ -1,5694 +0,0 @@ -/* - This is a version (aka dlmalloc) of malloc/free/realloc written by - Doug Lea and released to the public domain, as explained at - http://creativecommons.org/licenses/publicdomain. Send questions, - comments, complaints, performance data, etc to dl@cs.oswego.edu - -* Version pre-2.8.4 Mon Nov 27 11:22:37 2006 (dl at gee) - - Note: There may be an updated version of this malloc obtainable at - ftp://gee.cs.oswego.edu/pub/misc/malloc.c - Check before installing! - -* Quickstart - - This library is all in one file to simplify the most common usage: - ftp it, compile it (-O3), and link it into another program. All of - the compile-time options default to reasonable values for use on - most platforms. You might later want to step through various - compile-time and dynamic tuning options. - - For convenience, an include file for code using this malloc is at: - ftp://gee.cs.oswego.edu/pub/misc/malloc-2.8.4.h - You don't really need this .h file unless you call functions not - defined in your system include files. The .h file contains only the - excerpts from this file needed for using this malloc on ANSI C/C++ - systems, so long as you haven't changed compile-time options about - naming and tuning parameters. If you do, then you can create your - own malloc.h that does include all settings by cutting at the point - indicated below. Note that you may already by default be using a C - library containing a malloc that is based on some version of this - malloc (for example in linux). You might still want to use the one - in this file to customize settings or to avoid overheads associated - with library versions. - -* Vital statistics: - - Supported pointer/size_t representation: 4 or 8 bytes - size_t MUST be an unsigned type of the same width as - pointers. (If you are using an ancient system that declares - size_t as a signed type, or need it to be a different width - than pointers, you can use a previous release of this malloc - (e.g. 2.7.2) supporting these.) - - Alignment: 8 bytes (default) - This suffices for nearly all current machines and C compilers. - However, you can define MALLOC_ALIGNMENT to be wider than this - if necessary (up to 128bytes), at the expense of using more space. - - Minimum overhead per allocated chunk: 4 or 8 bytes (if 4byte sizes) - 8 or 16 bytes (if 8byte sizes) - Each malloced chunk has a hidden word of overhead holding size - and status information, and additional cross-check word - if FOOTERS is defined. - - Minimum allocated size: 4-byte ptrs: 16 bytes (including overhead) - 8-byte ptrs: 32 bytes (including overhead) - - Even a request for zero bytes (i.e., malloc(0)) returns a - pointer to something of the minimum allocatable size. - The maximum overhead wastage (i.e., number of extra bytes - allocated than were requested in malloc) is less than or equal - to the minimum size, except for requests >= mmap_threshold that - are serviced via mmap(), where the worst case wastage is about - 32 bytes plus the remainder from a system page (the minimal - mmap unit); typically 4096 or 8192 bytes. - - Security: static-safe; optionally more or less - The "security" of malloc refers to the ability of malicious - code to accentuate the effects of errors (for example, freeing - space that is not currently malloc'ed or overwriting past the - ends of chunks) in code that calls malloc. This malloc - guarantees not to modify any memory locations below the base of - heap, i.e., static variables, even in the presence of usage - errors. The routines additionally detect most improper frees - and reallocs. All this holds as long as the static bookkeeping - for malloc itself is not corrupted by some other means. This - is only one aspect of security -- these checks do not, and - cannot, detect all possible programming errors. - - If FOOTERS is defined nonzero, then each allocated chunk - carries an additional check word to verify that it was malloced - from its space. These check words are the same within each - execution of a program using malloc, but differ across - executions, so externally crafted fake chunks cannot be - freed. This improves security by rejecting frees/reallocs that - could corrupt heap memory, in addition to the checks preventing - writes to statics that are always on. This may further improve - security at the expense of time and space overhead. (Note that - FOOTERS may also be worth using with MSPACES.) - - By default detected errors cause the program to abort (calling - "abort()"). You can override this to instead proceed past - errors by defining PROCEED_ON_ERROR. In this case, a bad free - has no effect, and a malloc that encounters a bad address - caused by user overwrites will ignore the bad address by - dropping pointers and indices to all known memory. This may - be appropriate for programs that should continue if at all - possible in the face of programming errors, although they may - run out of memory because dropped memory is never reclaimed. - - If you don't like either of these options, you can define - CORRUPTION_ERROR_ACTION and USAGE_ERROR_ACTION to do anything - else. And if if you are sure that your program using malloc has - no errors or vulnerabilities, you can define INSECURE to 1, - which might (or might not) provide a small performance improvement. - - Thread-safety: NOT thread-safe unless USE_LOCKS defined - When USE_LOCKS is defined, each public call to malloc, free, - etc is surrounded with either a pthread mutex or a win32 - spinlock (depending on WIN32). This is not especially fast, and - can be a major bottleneck. It is designed only to provide - minimal protection in concurrent environments, and to provide a - basis for extensions. If you are using malloc in a concurrent - program, consider instead using nedmalloc - (http://www.nedprod.com/programs/portable/nedmalloc/) or - ptmalloc (See http://www.malloc.de), which are derived - from versions of this malloc. - - System requirements: Any combination of MORECORE and/or MMAP/MUNMAP - This malloc can use unix sbrk or any emulation (invoked using - the CALL_MORECORE macro) and/or mmap/munmap or any emulation - (invoked using CALL_MMAP/CALL_MUNMAP) to get and release system - memory. On most unix systems, it tends to work best if both - MORECORE and MMAP are enabled. On Win32, it uses emulations - based on VirtualAlloc. It also uses common C library functions - like memset. - - Compliance: I believe it is compliant with the Single Unix Specification - (See http://www.unix.org). Also SVID/XPG, ANSI C, and probably - others as well. - -* Overview of algorithms - - This is not the fastest, most space-conserving, most portable, or - most tunable malloc ever written. However it is among the fastest - while also being among the most space-conserving, portable and - tunable. Consistent balance across these factors results in a good - general-purpose allocator for malloc-intensive programs. - - In most ways, this malloc is a best-fit allocator. Generally, it - chooses the best-fitting existing chunk for a request, with ties - broken in approximately least-recently-used order. (This strategy - normally maintains low fragmentation.) However, for requests less - than 256bytes, it deviates from best-fit when there is not an - exactly fitting available chunk by preferring to use space adjacent - to that used for the previous small request, as well as by breaking - ties in approximately most-recently-used order. (These enhance - locality of series of small allocations.) And for very large requests - (>= 256Kb by default), it relies on system memory mapping - facilities, if supported. (This helps avoid carrying around and - possibly fragmenting memory used only for large chunks.) - - All operations (except malloc_stats and mallinfo) have execution - times that are bounded by a constant factor of the number of bits in - a size_t, not counting any clearing in calloc or copying in realloc, - or actions surrounding MORECORE and MMAP that have times - proportional to the number of non-contiguous regions returned by - system allocation routines, which is often just 1. In real-time - applications, you can optionally suppress segment traversals using - NO_SEGMENT_TRAVERSAL, which assures bounded execution even when - system allocators return non-contiguous spaces, at the typical - expense of carrying around more memory and increased fragmentation. - - The implementation is not very modular and seriously overuses - macros. Perhaps someday all C compilers will do as good a job - inlining modular code as can now be done by brute-force expansion, - but now, enough of them seem not to. - - Some compilers issue a lot of warnings about code that is - dead/unreachable only on some platforms, and also about intentional - uses of negation on unsigned types. All known cases of each can be - ignored. - - For a longer but out of date high-level description, see - http://gee.cs.oswego.edu/dl/html/malloc.html - -* MSPACES - If MSPACES is defined, then in addition to malloc, free, etc., - this file also defines mspace_malloc, mspace_free, etc. These - are versions of malloc routines that take an "mspace" argument - obtained using create_mspace, to control all internal bookkeeping. - If ONLY_MSPACES is defined, only these versions are compiled. - So if you would like to use this allocator for only some allocations, - and your system malloc for others, you can compile with - ONLY_MSPACES and then do something like... - static mspace mymspace = create_mspace(0,0); // for example - #define mymalloc(bytes) mspace_malloc(mymspace, bytes) - - (Note: If you only need one instance of an mspace, you can instead - use "USE_DL_PREFIX" to relabel the global malloc.) - - You can similarly create thread-local allocators by storing - mspaces as thread-locals. For example: - static __thread mspace tlms = 0; - void* tlmalloc(size_t bytes) { - if (tlms == 0) tlms = create_mspace(0, 0); - return mspace_malloc(tlms, bytes); - } - void tlfree(void* mem) { mspace_free(tlms, mem); } - - Unless FOOTERS is defined, each mspace is completely independent. - You cannot allocate from one and free to another (although - conformance is only weakly checked, so usage errors are not always - caught). If FOOTERS is defined, then each chunk carries around a tag - indicating its originating mspace, and frees are directed to their - originating spaces. - - ------------------------- Compile-time options --------------------------- - -Be careful in setting #define values for numerical constants of type -size_t. On some systems, literal values are not automatically extended -to size_t precision unless they are explicitly casted. You can also -use the symbolic values MAX_SIZE_T, SIZE_T_ONE, etc below. - -WIN32 default: defined if _WIN32 defined - Defining WIN32 sets up defaults for MS environment and compilers. - Otherwise defaults are for unix. Beware that there seem to be some - cases where this malloc might not be a pure drop-in replacement for - Win32 malloc: Random-looking failures from Win32 GDI API's (eg; - SetDIBits()) may be due to bugs in some video driver implementations - when pixel buffers are malloc()ed, and the region spans more than - one VirtualAlloc()ed region. Because dlmalloc uses a small (64Kb) - default granularity, pixel buffers may straddle virtual allocation - regions more often than when using the Microsoft allocator. You can - avoid this by using VirtualAlloc() and VirtualFree() for all pixel - buffers rather than using malloc(). If this is not possible, - recompile this malloc with a larger DEFAULT_GRANULARITY. - -MALLOC_ALIGNMENT default: (size_t)8 - Controls the minimum alignment for malloc'ed chunks. It must be a - power of two and at least 8, even on machines for which smaller - alignments would suffice. It may be defined as larger than this - though. Note however that code and data structures are optimized for - the case of 8-byte alignment. - -MSPACES default: 0 (false) - If true, compile in support for independent allocation spaces. - This is only supported if HAVE_MMAP is true. - -ONLY_MSPACES default: 0 (false) - If true, only compile in mspace versions, not regular versions. - -USE_LOCKS default: 0 (false) - Causes each call to each public routine to be surrounded with - pthread or WIN32 mutex lock/unlock. (If set true, this can be - overridden on a per-mspace basis for mspace versions.) If set to a - non-zero value other than 1, locks are used, but their - implementation is left out, so lock functions must be supplied manually. - -USE_SPIN_LOCKS default: 1 iff USE_LOCKS and on x86 using gcc or MSC - If true, uses custom spin locks for locking. This is currently - supported only for x86 platforms using gcc or recent MS compilers. - Otherwise, posix locks or win32 critical sections are used. - -FOOTERS default: 0 - If true, provide extra checking and dispatching by placing - information in the footers of allocated chunks. This adds - space and time overhead. - -INSECURE default: 0 - If true, omit checks for usage errors and heap space overwrites. - -USE_DL_PREFIX default: NOT defined - Causes compiler to prefix all public routines with the string 'dl'. - This can be useful when you only want to use this malloc in one part - of a program, using your regular system malloc elsewhere. - -ABORT default: defined as abort() - Defines how to abort on failed checks. On most systems, a failed - check cannot die with an "assert" or even print an informative - message, because the underlying print routines in turn call malloc, - which will fail again. Generally, the best policy is to simply call - abort(). It's not very useful to do more than this because many - errors due to overwriting will show up as address faults (null, odd - addresses etc) rather than malloc-triggered checks, so will also - abort. Also, most compilers know that abort() does not return, so - can better optimize code conditionally calling it. - -PROCEED_ON_ERROR default: defined as 0 (false) - Controls whether detected bad addresses cause them to bypassed - rather than aborting. If set, detected bad arguments to free and - realloc are ignored. And all bookkeeping information is zeroed out - upon a detected overwrite of freed heap space, thus losing the - ability to ever return it from malloc again, but enabling the - application to proceed. If PROCEED_ON_ERROR is defined, the - static variable malloc_corruption_error_count is compiled in - and can be examined to see if errors have occurred. This option - generates slower code than the default abort policy. - -DEBUG default: NOT defined - The DEBUG setting is mainly intended for people trying to modify - this code or diagnose problems when porting to new platforms. - However, it may also be able to better isolate user errors than just - using runtime checks. The assertions in the check routines spell - out in more detail the assumptions and invariants underlying the - algorithms. The checking is fairly extensive, and will slow down - execution noticeably. Calling malloc_stats or mallinfo with DEBUG - set will attempt to check every non-mmapped allocated and free chunk - in the course of computing the summaries. - -ABORT_ON_ASSERT_FAILURE default: defined as 1 (true) - Debugging assertion failures can be nearly impossible if your - version of the assert macro causes malloc to be called, which will - lead to a cascade of further failures, blowing the runtime stack. - ABORT_ON_ASSERT_FAILURE cause assertions failures to call abort(), - which will usually make debugging easier. - -MALLOC_FAILURE_ACTION default: sets errno to ENOMEM, or no-op on win32 - The action to take before "return 0" when malloc fails to be able to - return memory because there is none available. - -HAVE_MORECORE default: 1 (true) unless win32 or ONLY_MSPACES - True if this system supports sbrk or an emulation of it. - -MORECORE default: sbrk - The name of the sbrk-style system routine to call to obtain more - memory. See below for guidance on writing custom MORECORE - functions. The type of the argument to sbrk/MORECORE varies across - systems. It cannot be size_t, because it supports negative - arguments, so it is normally the signed type of the same width as - size_t (sometimes declared as "intptr_t"). It doesn't much matter - though. Internally, we only call it with arguments less than half - the max value of a size_t, which should work across all reasonable - possibilities, although sometimes generating compiler warnings. - -MORECORE_CONTIGUOUS default: 1 (true) if HAVE_MORECORE - If true, take advantage of fact that consecutive calls to MORECORE - with positive arguments always return contiguous increasing - addresses. This is true of unix sbrk. It does not hurt too much to - set it true anyway, since malloc copes with non-contiguities. - Setting it false when definitely non-contiguous saves time - and possibly wasted space it would take to discover this though. - -MORECORE_CANNOT_TRIM default: NOT defined - True if MORECORE cannot release space back to the system when given - negative arguments. This is generally necessary only if you are - using a hand-crafted MORECORE function that cannot handle negative - arguments. - -NO_SEGMENT_TRAVERSAL default: 0 - If non-zero, suppresses traversals of memory segments - returned by either MORECORE or CALL_MMAP. This disables - merging of segments that are contiguous, and selectively - releasing them to the OS if unused, but bounds execution times. - -HAVE_MMAP default: 1 (true) - True if this system supports mmap or an emulation of it. If so, and - HAVE_MORECORE is not true, MMAP is used for all system - allocation. If set and HAVE_MORECORE is true as well, MMAP is - primarily used to directly allocate very large blocks. It is also - used as a backup strategy in cases where MORECORE fails to provide - space from system. Note: A single call to MUNMAP is assumed to be - able to unmap memory that may have be allocated using multiple calls - to MMAP, so long as they are adjacent. - -HAVE_MREMAP default: 1 on linux, else 0 - If true realloc() uses mremap() to re-allocate large blocks and - extend or shrink allocation spaces. - -MMAP_CLEARS default: 1 except on WINCE. - True if mmap clears memory so calloc doesn't need to. This is true - for standard unix mmap using /dev/zero and on WIN32 except for WINCE. - -USE_BUILTIN_FFS default: 0 (i.e., not used) - Causes malloc to use the builtin ffs() function to compute indices. - Some compilers may recognize and intrinsify ffs to be faster than the - supplied C version. Also, the case of x86 using gcc is special-cased - to an asm instruction, so is already as fast as it can be, and so - this setting has no effect. Similarly for Win32 under recent MS compilers. - (On most x86s, the asm version is only slightly faster than the C version.) - -malloc_getpagesize default: derive from system includes, or 4096. - The system page size. To the extent possible, this malloc manages - memory from the system in page-size units. This may be (and - usually is) a function rather than a constant. This is ignored - if WIN32, where page size is determined using getSystemInfo during - initialization. - -USE_DEV_RANDOM default: 0 (i.e., not used) - Causes malloc to use /dev/random to initialize secure magic seed for - stamping footers. Otherwise, the current time is used. - -NO_MALLINFO default: 0 - If defined, don't compile "mallinfo". This can be a simple way - of dealing with mismatches between system declarations and - those in this file. - -MALLINFO_FIELD_TYPE default: size_t - The type of the fields in the mallinfo struct. This was originally - defined as "int" in SVID etc, but is more usefully defined as - size_t. The value is used only if HAVE_USR_INCLUDE_MALLOC_H is not set - -REALLOC_ZERO_BYTES_FREES default: not defined - This should be set if a call to realloc with zero bytes should - be the same as a call to free. Some people think it should. Otherwise, - since this malloc returns a unique pointer for malloc(0), so does - realloc(p, 0). - -LACKS_UNISTD_H, LACKS_FCNTL_H, LACKS_SYS_PARAM_H, LACKS_SYS_MMAN_H -LACKS_STRINGS_H, LACKS_STRING_H, LACKS_SYS_TYPES_H, LACKS_ERRNO_H -LACKS_STDLIB_H default: NOT defined unless on WIN32 - Define these if your system does not have these header files. - You might need to manually insert some of the declarations they provide. - -DEFAULT_GRANULARITY default: page size if MORECORE_CONTIGUOUS, - system_info.dwAllocationGranularity in WIN32, - otherwise 64K. - Also settable using mallopt(M_GRANULARITY, x) - The unit for allocating and deallocating memory from the system. On - most systems with contiguous MORECORE, there is no reason to - make this more than a page. However, systems with MMAP tend to - either require or encourage larger granularities. You can increase - this value to prevent system allocation functions to be called so - often, especially if they are slow. The value must be at least one - page and must be a power of two. Setting to 0 causes initialization - to either page size or win32 region size. (Note: In previous - versions of malloc, the equivalent of this option was called - "TOP_PAD") - -DEFAULT_TRIM_THRESHOLD default: 2MB - Also settable using mallopt(M_TRIM_THRESHOLD, x) - The maximum amount of unused top-most memory to keep before - releasing via malloc_trim in free(). Automatic trimming is mainly - useful in long-lived programs using contiguous MORECORE. Because - trimming via sbrk can be slow on some systems, and can sometimes be - wasteful (in cases where programs immediately afterward allocate - more large chunks) the value should be high enough so that your - overall system performance would improve by releasing this much - memory. As a rough guide, you might set to a value close to the - average size of a process (program) running on your system. - Releasing this much memory would allow such a process to run in - memory. Generally, it is worth tuning trim thresholds when a - program undergoes phases where several large chunks are allocated - and released in ways that can reuse each other's storage, perhaps - mixed with phases where there are no such chunks at all. The trim - value must be greater than page size to have any useful effect. To - disable trimming completely, you can set to MAX_SIZE_T. Note that the trick - some people use of mallocing a huge space and then freeing it at - program startup, in an attempt to reserve system memory, doesn't - have the intended effect under automatic trimming, since that memory - will immediately be returned to the system. - -DEFAULT_MMAP_THRESHOLD default: 256K - Also settable using mallopt(M_MMAP_THRESHOLD, x) - The request size threshold for using MMAP to directly service a - request. Requests of at least this size that cannot be allocated - using already-existing space will be serviced via mmap. (If enough - normal freed space already exists it is used instead.) Using mmap - segregates relatively large chunks of memory so that they can be - individually obtained and released from the host system. A request - serviced through mmap is never reused by any other request (at least - not directly; the system may just so happen to remap successive - requests to the same locations). Segregating space in this way has - the benefits that: Mmapped space can always be individually released - back to the system, which helps keep the system level memory demands - of a long-lived program low. Also, mapped memory doesn't become - `locked' between other chunks, as can happen with normally allocated - chunks, which means that even trimming via malloc_trim would not - release them. However, it has the disadvantage that the space - cannot be reclaimed, consolidated, and then used to service later - requests, as happens with normal chunks. The advantages of mmap - nearly always outweigh disadvantages for "large" chunks, but the - value of "large" may vary across systems. The default is an - empirically derived value that works well in most systems. You can - disable mmap by setting to MAX_SIZE_T. - -MAX_RELEASE_CHECK_RATE default: 4095 unless not HAVE_MMAP - The number of consolidated frees between checks to release - unused segments when freeing. When using non-contiguous segments, - especially with multiple mspaces, checking only for topmost space - doesn't always suffice to trigger trimming. To compensate for this, - free() will, with a period of MAX_RELEASE_CHECK_RATE (or the - current number of segments, if greater) try to release unused - segments to the OS when freeing chunks that result in - consolidation. The best value for this parameter is a compromise - between slowing down frees with relatively costly checks that - rarely trigger versus holding on to unused memory. To effectively - disable, set to MAX_SIZE_T. This may lead to a very slight speed - improvement at the expense of carrying around more memory. -*/ - -/* Version identifier to allow people to support multiple versions */ -#ifndef DLMALLOC_VERSION -#define DLMALLOC_VERSION 20804 -#endif /* DLMALLOC_VERSION */ - -#ifndef WIN32 -#ifdef _WIN32 -#define WIN32 1 -#endif /* _WIN32 */ -#ifdef _WIN32_WCE -#define LACKS_FCNTL_H -#define WIN32 1 -#endif /* _WIN32_WCE */ -#endif /* WIN32 */ -#ifdef WIN32 -#define WIN32_LEAN_AND_MEAN -#define _WIN32_WINNT 0x403 -#include -#define HAVE_MMAP 1 -#define HAVE_MORECORE 0 -#define LACKS_UNISTD_H -#define LACKS_SYS_PARAM_H -#define LACKS_SYS_MMAN_H -#define LACKS_STRING_H -#define LACKS_STRINGS_H -#define LACKS_SYS_TYPES_H -#define LACKS_ERRNO_H -#ifndef MALLOC_FAILURE_ACTION -#define MALLOC_FAILURE_ACTION -#endif /* MALLOC_FAILURE_ACTION */ -#ifdef _WIN32_WCE /* WINCE reportedly does not clear */ -#define MMAP_CLEARS 0 -#else -#define MMAP_CLEARS 1 -#endif /* _WIN32_WCE */ -#endif /* WIN32 */ - -#if defined(DARWIN) || defined(_DARWIN) -/* Mac OSX docs advise not to use sbrk; it seems better to use mmap */ -#ifndef HAVE_MORECORE -#define HAVE_MORECORE 0 -#define HAVE_MMAP 1 -/* OSX allocators provide 16 byte alignment */ -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT ((size_t)16U) -#endif -#endif /* HAVE_MORECORE */ -#endif /* DARWIN */ - -#ifndef LACKS_SYS_TYPES_H -#include /* For size_t */ -#endif /* LACKS_SYS_TYPES_H */ - -/* The maximum possible size_t value has all bits set */ -#define MAX_SIZE_T (~(size_t)0) - -#ifndef ONLY_MSPACES -#define ONLY_MSPACES 0 /* define to a value */ -#else -#define ONLY_MSPACES 1 -#endif /* ONLY_MSPACES */ -#ifndef MSPACES -#if ONLY_MSPACES -#define MSPACES 1 -#else /* ONLY_MSPACES */ -#define MSPACES 0 -#endif /* ONLY_MSPACES */ -#endif /* MSPACES */ -#ifndef MALLOC_ALIGNMENT -#define MALLOC_ALIGNMENT ((size_t)8U) -#endif /* MALLOC_ALIGNMENT */ -#ifndef FOOTERS -#define FOOTERS 0 -#endif /* FOOTERS */ -#ifndef ABORT -#define ABORT abort() -#endif /* ABORT */ -#ifndef ABORT_ON_ASSERT_FAILURE -#define ABORT_ON_ASSERT_FAILURE 1 -#endif /* ABORT_ON_ASSERT_FAILURE */ -#ifndef PROCEED_ON_ERROR -#define PROCEED_ON_ERROR 0 -#endif /* PROCEED_ON_ERROR */ -#ifndef USE_LOCKS -#define USE_LOCKS 0 -#endif /* USE_LOCKS */ -#ifndef USE_SPIN_LOCKS -#if USE_LOCKS && (defined(__GNUC__) && ((defined(__i386__) || defined(__x86_64__)))) || (defined(_MSC_VER) && _MSC_VER>=1310) -#define USE_SPIN_LOCKS 1 -#else -#define USE_SPIN_LOCKS 0 -#endif /* USE_LOCKS && ... */ -#endif /* USE_SPIN_LOCKS */ -#ifndef INSECURE -#define INSECURE 0 -#endif /* INSECURE */ -#ifndef HAVE_MMAP -#define HAVE_MMAP 1 -#endif /* HAVE_MMAP */ -#ifndef MMAP_CLEARS -#define MMAP_CLEARS 1 -#endif /* MMAP_CLEARS */ -#ifndef HAVE_MREMAP -#ifdef linux -#define HAVE_MREMAP 1 -#else /* linux */ -#define HAVE_MREMAP 0 -#endif /* linux */ -#endif /* HAVE_MREMAP */ -#ifndef MALLOC_FAILURE_ACTION -#define MALLOC_FAILURE_ACTION errno = ENOMEM; -#endif /* MALLOC_FAILURE_ACTION */ -#ifndef HAVE_MORECORE -#if ONLY_MSPACES -#define HAVE_MORECORE 0 -#else /* ONLY_MSPACES */ -#define HAVE_MORECORE 1 -#endif /* ONLY_MSPACES */ -#endif /* HAVE_MORECORE */ -#if !HAVE_MORECORE -#define MORECORE_CONTIGUOUS 0 -#else /* !HAVE_MORECORE */ -#define MORECORE_DEFAULT sbrk -#ifndef MORECORE_CONTIGUOUS -#define MORECORE_CONTIGUOUS 1 -#endif /* MORECORE_CONTIGUOUS */ -#endif /* HAVE_MORECORE */ -#ifndef DEFAULT_GRANULARITY -#if (MORECORE_CONTIGUOUS || defined(WIN32)) -#define DEFAULT_GRANULARITY (0) /* 0 means to compute in init_mparams */ -#else /* MORECORE_CONTIGUOUS */ -#define DEFAULT_GRANULARITY ((size_t)64U * (size_t)1024U) -#endif /* MORECORE_CONTIGUOUS */ -#endif /* DEFAULT_GRANULARITY */ -#ifndef DEFAULT_TRIM_THRESHOLD -#ifndef MORECORE_CANNOT_TRIM -#define DEFAULT_TRIM_THRESHOLD ((size_t)2U * (size_t)1024U * (size_t)1024U) -#else /* MORECORE_CANNOT_TRIM */ -#define DEFAULT_TRIM_THRESHOLD MAX_SIZE_T -#endif /* MORECORE_CANNOT_TRIM */ -#endif /* DEFAULT_TRIM_THRESHOLD */ -#ifndef DEFAULT_MMAP_THRESHOLD -#if HAVE_MMAP -#define DEFAULT_MMAP_THRESHOLD ((size_t)256U * (size_t)1024U) -#else /* HAVE_MMAP */ -#define DEFAULT_MMAP_THRESHOLD MAX_SIZE_T -#endif /* HAVE_MMAP */ -#endif /* DEFAULT_MMAP_THRESHOLD */ -#ifndef MAX_RELEASE_CHECK_RATE -#if HAVE_MMAP -#define MAX_RELEASE_CHECK_RATE 4095 -#else -#define MAX_RELEASE_CHECK_RATE MAX_SIZE_T -#endif /* HAVE_MMAP */ -#endif /* MAX_RELEASE_CHECK_RATE */ -#ifndef USE_BUILTIN_FFS -#define USE_BUILTIN_FFS 0 -#endif /* USE_BUILTIN_FFS */ -#ifndef USE_DEV_RANDOM -#define USE_DEV_RANDOM 0 -#endif /* USE_DEV_RANDOM */ -#ifndef NO_MALLINFO -#define NO_MALLINFO 0 -#endif /* NO_MALLINFO */ -#ifndef MALLINFO_FIELD_TYPE -#define MALLINFO_FIELD_TYPE size_t -#endif /* MALLINFO_FIELD_TYPE */ -#ifndef NO_SEGMENT_TRAVERSAL -#define NO_SEGMENT_TRAVERSAL 0 -#endif /* NO_SEGMENT_TRAVERSAL */ - -/* - mallopt tuning options. SVID/XPG defines four standard parameter - numbers for mallopt, normally defined in malloc.h. None of these - are used in this malloc, so setting them has no effect. But this - malloc does support the following options. -*/ - -#define M_TRIM_THRESHOLD (-1) -#define M_GRANULARITY (-2) -#define M_MMAP_THRESHOLD (-3) - -/* ------------------------ Mallinfo declarations ------------------------ */ - -#if !NO_MALLINFO -/* - This version of malloc supports the standard SVID/XPG mallinfo - routine that returns a struct containing usage properties and - statistics. It should work on any system that has a - /usr/include/malloc.h defining struct mallinfo. The main - declaration needed is the mallinfo struct that is returned (by-copy) - by mallinfo(). The malloinfo struct contains a bunch of fields that - are not even meaningful in this version of malloc. These fields are - are instead filled by mallinfo() with other numbers that might be of - interest. - - HAVE_USR_INCLUDE_MALLOC_H should be set if you have a - /usr/include/malloc.h file that includes a declaration of struct - mallinfo. If so, it is included; else a compliant version is - declared below. These must be precisely the same for mallinfo() to - work. The original SVID version of this struct, defined on most - systems with mallinfo, declares all fields as ints. But some others - define as unsigned long. If your system defines the fields using a - type of different width than listed here, you MUST #include your - system version and #define HAVE_USR_INCLUDE_MALLOC_H. -*/ - -/* #define HAVE_USR_INCLUDE_MALLOC_H */ - -#ifdef HAVE_USR_INCLUDE_MALLOC_H -#include "/usr/include/malloc.h" -#else /* HAVE_USR_INCLUDE_MALLOC_H */ -#ifndef STRUCT_MALLINFO_DECLARED -#define STRUCT_MALLINFO_DECLARED 1 -struct mallinfo { - MALLINFO_FIELD_TYPE arena; /* non-mmapped space allocated from system */ - MALLINFO_FIELD_TYPE ordblks; /* number of free chunks */ - MALLINFO_FIELD_TYPE smblks; /* always 0 */ - MALLINFO_FIELD_TYPE hblks; /* always 0 */ - MALLINFO_FIELD_TYPE hblkhd; /* space in mmapped regions */ - MALLINFO_FIELD_TYPE usmblks; /* maximum total allocated space */ - MALLINFO_FIELD_TYPE fsmblks; /* always 0 */ - MALLINFO_FIELD_TYPE uordblks; /* total allocated space */ - MALLINFO_FIELD_TYPE fordblks; /* total free space */ - MALLINFO_FIELD_TYPE keepcost; /* releasable (via malloc_trim) space */ -}; -#endif /* STRUCT_MALLINFO_DECLARED */ -#endif /* HAVE_USR_INCLUDE_MALLOC_H */ -#endif /* NO_MALLINFO */ - -/* - Try to persuade compilers to inline. The most critical functions for - inlining are defined as macros, so these aren't used for them. -*/ - -#ifndef FORCEINLINE - #if defined(__GNUC__) -#define FORCEINLINE __inline __attribute__ ((always_inline)) - #elif defined(_MSC_VER) - #define FORCEINLINE __forceinline - #endif -#endif -#ifndef NOINLINE - #if defined(__GNUC__) - #define NOINLINE __attribute__ ((noinline)) - #elif defined(_MSC_VER) - #define NOINLINE __declspec(noinline) - #else - #define NOINLINE - #endif -#endif - -#ifdef __cplusplus -extern "C" { -#ifndef FORCEINLINE - #define FORCEINLINE inline -#endif -#endif /* __cplusplus */ -#ifndef FORCEINLINE - #define FORCEINLINE -#endif - -#if !ONLY_MSPACES - -/* ------------------- Declarations of public routines ------------------- */ - -#ifndef USE_DL_PREFIX -#define dlcalloc calloc -#define dlfree free -#define dlmalloc malloc -#define dlmemalign memalign -#define dlrealloc realloc -#define dlvalloc valloc -#define dlpvalloc pvalloc -#define dlmallinfo mallinfo -#define dlmallopt mallopt -#define dlmalloc_trim malloc_trim -#define dlmalloc_stats malloc_stats -#define dlmalloc_usable_size malloc_usable_size -#define dlmalloc_footprint malloc_footprint -#define dlmalloc_max_footprint malloc_max_footprint -#define dlindependent_calloc independent_calloc -#define dlindependent_comalloc independent_comalloc -#endif /* USE_DL_PREFIX */ - - -/* - malloc(size_t n) - Returns a pointer to a newly allocated chunk of at least n bytes, or - null if no space is available, in which case errno is set to ENOMEM - on ANSI C systems. - - If n is zero, malloc returns a minimum-sized chunk. (The minimum - size is 16 bytes on most 32bit systems, and 32 bytes on 64bit - systems.) Note that size_t is an unsigned type, so calls with - arguments that would be negative if signed are interpreted as - requests for huge amounts of space, which will often fail. The - maximum supported value of n differs across systems, but is in all - cases less than the maximum representable value of a size_t. -*/ -void* dlmalloc(size_t); - -/* - free(void* p) - Releases the chunk of memory pointed to by p, that had been previously - allocated using malloc or a related routine such as realloc. - It has no effect if p is null. If p was not malloced or already - freed, free(p) will by default cause the current program to abort. -*/ -void dlfree(void*); - -/* - calloc(size_t n_elements, size_t element_size); - Returns a pointer to n_elements * element_size bytes, with all locations - set to zero. -*/ -void* dlcalloc(size_t, size_t); - -/* - realloc(void* p, size_t n) - Returns a pointer to a chunk of size n that contains the same data - as does chunk p up to the minimum of (n, p's size) bytes, or null - if no space is available. - - The returned pointer may or may not be the same as p. The algorithm - prefers extending p in most cases when possible, otherwise it - employs the equivalent of a malloc-copy-free sequence. - - If p is null, realloc is equivalent to malloc. - - If space is not available, realloc returns null, errno is set (if on - ANSI) and p is NOT freed. - - if n is for fewer bytes than already held by p, the newly unused - space is lopped off and freed if possible. realloc with a size - argument of zero (re)allocates a minimum-sized chunk. - - The old unix realloc convention of allowing the last-free'd chunk - to be used as an argument to realloc is not supported. -*/ - -void* dlrealloc(void*, size_t); - -/* - memalign(size_t alignment, size_t n); - Returns a pointer to a newly allocated chunk of n bytes, aligned - in accord with the alignment argument. - - The alignment argument should be a power of two. If the argument is - not a power of two, the nearest greater power is used. - 8-byte alignment is guaranteed by normal malloc calls, so don't - bother calling memalign with an argument of 8 or less. - - Overreliance on memalign is a sure way to fragment space. -*/ -void* dlmemalign(size_t, size_t); - -/* - valloc(size_t n); - Equivalent to memalign(pagesize, n), where pagesize is the page - size of the system. If the pagesize is unknown, 4096 is used. -*/ -void* dlvalloc(size_t); - -/* - mallopt(int parameter_number, int parameter_value) - Sets tunable parameters The format is to provide a - (parameter-number, parameter-value) pair. mallopt then sets the - corresponding parameter to the argument value if it can (i.e., so - long as the value is meaningful), and returns 1 if successful else - 0. To workaround the fact that mallopt is specified to use int, - not size_t parameters, the value -1 is specially treated as the - maximum unsigned size_t value. - - SVID/XPG/ANSI defines four standard param numbers for mallopt, - normally defined in malloc.h. None of these are use in this malloc, - so setting them has no effect. But this malloc also supports other - options in mallopt. See below for details. Briefly, supported - parameters are as follows (listed defaults are for "typical" - configurations). - - Symbol param # default allowed param values - M_TRIM_THRESHOLD -1 2*1024*1024 any (-1 disables) - M_GRANULARITY -2 page size any power of 2 >= page size - M_MMAP_THRESHOLD -3 256*1024 any (or 0 if no MMAP support) -*/ -int dlmallopt(int, int); - -/* - malloc_footprint(); - Returns the number of bytes obtained from the system. The total - number of bytes allocated by malloc, realloc etc., is less than this - value. Unlike mallinfo, this function returns only a precomputed - result, so can be called frequently to monitor memory consumption. - Even if locks are otherwise defined, this function does not use them, - so results might not be up to date. -*/ -size_t dlmalloc_footprint(void); - -/* - malloc_max_footprint(); - Returns the maximum number of bytes obtained from the system. This - value will be greater than current footprint if deallocated space - has been reclaimed by the system. The peak number of bytes allocated - by malloc, realloc etc., is less than this value. Unlike mallinfo, - this function returns only a precomputed result, so can be called - frequently to monitor memory consumption. Even if locks are - otherwise defined, this function does not use them, so results might - not be up to date. -*/ -size_t dlmalloc_max_footprint(void); - -#if !NO_MALLINFO -/* - mallinfo() - Returns (by copy) a struct containing various summary statistics: - - arena: current total non-mmapped bytes allocated from system - ordblks: the number of free chunks - smblks: always zero. - hblks: current number of mmapped regions - hblkhd: total bytes held in mmapped regions - usmblks: the maximum total allocated space. This will be greater - than current total if trimming has occurred. - fsmblks: always zero - uordblks: current total allocated space (normal or mmapped) - fordblks: total free space - keepcost: the maximum number of bytes that could ideally be released - back to system via malloc_trim. ("ideally" means that - it ignores page restrictions etc.) - - Because these fields are ints, but internal bookkeeping may - be kept as longs, the reported values may wrap around zero and - thus be inaccurate. -*/ -struct mallinfo dlmallinfo(void); -#endif /* NO_MALLINFO */ - -/* - independent_calloc(size_t n_elements, size_t element_size, void* chunks[]); - - independent_calloc is similar to calloc, but instead of returning a - single cleared space, it returns an array of pointers to n_elements - independent elements that can hold contents of size elem_size, each - of which starts out cleared, and can be independently freed, - realloc'ed etc. The elements are guaranteed to be adjacently - allocated (this is not guaranteed to occur with multiple callocs or - mallocs), which may also improve cache locality in some - applications. - - The "chunks" argument is optional (i.e., may be null, which is - probably the most typical usage). If it is null, the returned array - is itself dynamically allocated and should also be freed when it is - no longer needed. Otherwise, the chunks array must be of at least - n_elements in length. It is filled in with the pointers to the - chunks. - - In either case, independent_calloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and "chunks" - is null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use regular calloc and assign pointers into this - space to represent elements. (In this case though, you cannot - independently free elements.) - - independent_calloc simplifies and speeds up implementations of many - kinds of pools. It may also be useful when constructing large data - structures that initially have a fixed number of fixed-sized nodes, - but the number is not known at compile time, and some of the nodes - may later need to be freed. For example: - - struct Node { int item; struct Node* next; }; - - struct Node* build_list() { - struct Node** pool; - int n = read_number_of_nodes_needed(); - if (n <= 0) return 0; - pool = (struct Node**)(independent_calloc(n, sizeof(struct Node), 0); - if (pool == 0) die(); - // organize into a linked list... - struct Node* first = pool[0]; - for (i = 0; i < n-1; ++i) - pool[i]->next = pool[i+1]; - free(pool); // Can now free the array (or not, if it is needed later) - return first; - } -*/ -void** dlindependent_calloc(size_t, size_t, void**); - -/* - independent_comalloc(size_t n_elements, size_t sizes[], void* chunks[]); - - independent_comalloc allocates, all at once, a set of n_elements - chunks with sizes indicated in the "sizes" array. It returns - an array of pointers to these elements, each of which can be - independently freed, realloc'ed etc. The elements are guaranteed to - be adjacently allocated (this is not guaranteed to occur with - multiple callocs or mallocs), which may also improve cache locality - in some applications. - - The "chunks" argument is optional (i.e., may be null). If it is null - the returned array is itself dynamically allocated and should also - be freed when it is no longer needed. Otherwise, the chunks array - must be of at least n_elements in length. It is filled in with the - pointers to the chunks. - - In either case, independent_comalloc returns this pointer array, or - null if the allocation failed. If n_elements is zero and chunks is - null, it returns a chunk representing an array with zero elements - (which should be freed if not wanted). - - Each element must be individually freed when it is no longer - needed. If you'd like to instead be able to free all at once, you - should instead use a single regular malloc, and assign pointers at - particular offsets in the aggregate space. (In this case though, you - cannot independently free elements.) - - independent_comallac differs from independent_calloc in that each - element may have a different size, and also that it does not - automatically clear elements. - - independent_comalloc can be used to speed up allocation in cases - where several structs or objects must always be allocated at the - same time. For example: - - struct Head { ... } - struct Foot { ... } - - void send_message(char* msg) { - int msglen = strlen(msg); - size_t sizes[3] = { sizeof(struct Head), msglen, sizeof(struct Foot) }; - void* chunks[3]; - if (independent_comalloc(3, sizes, chunks) == 0) - die(); - struct Head* head = (struct Head*)(chunks[0]); - char* body = (char*)(chunks[1]); - struct Foot* foot = (struct Foot*)(chunks[2]); - // ... - } - - In general though, independent_comalloc is worth using only for - larger values of n_elements. For small values, you probably won't - detect enough difference from series of malloc calls to bother. - - Overuse of independent_comalloc can increase overall memory usage, - since it cannot reuse existing noncontiguous small chunks that - might be available for some of the elements. -*/ -void** dlindependent_comalloc(size_t, size_t*, void**); - - -/* - pvalloc(size_t n); - Equivalent to valloc(minimum-page-that-holds(n)), that is, - round up n to nearest pagesize. - */ -void* dlpvalloc(size_t); - -/* - malloc_trim(size_t pad); - - If possible, gives memory back to the system (via negative arguments - to sbrk) if there is unused memory at the `high' end of the malloc - pool or in unused MMAP segments. You can call this after freeing - large blocks of memory to potentially reduce the system-level memory - requirements of a program. However, it cannot guarantee to reduce - memory. Under some allocation patterns, some large free blocks of - memory will be locked between two used chunks, so they cannot be - given back to the system. - - The `pad' argument to malloc_trim represents the amount of free - trailing space to leave untrimmed. If this argument is zero, only - the minimum amount of memory to maintain internal data structures - will be left. Non-zero arguments can be supplied to maintain enough - trailing space to service future expected allocations without having - to re-obtain memory from the system. - - Malloc_trim returns 1 if it actually released any memory, else 0. -*/ -int dlmalloc_trim(size_t); - -/* - malloc_stats(); - Prints on stderr the amount of space obtained from the system (both - via sbrk and mmap), the maximum amount (which may be more than - current if malloc_trim and/or munmap got called), and the current - number of bytes allocated via malloc (or realloc, etc) but not yet - freed. Note that this is the number of bytes allocated, not the - number requested. It will be larger than the number requested - because of alignment and bookkeeping overhead. Because it includes - alignment wastage as being in use, this figure may be greater than - zero even when no user-level chunks are allocated. - - The reported current and maximum system memory can be inaccurate if - a program makes other calls to system memory allocation functions - (normally sbrk) outside of malloc. - - malloc_stats prints only the most commonly interesting statistics. - More information can be obtained by calling mallinfo. -*/ -void dlmalloc_stats(void); - -#endif /* ONLY_MSPACES */ - -/* - malloc_usable_size(void* p); - - Returns the number of bytes you can actually use in - an allocated chunk, which may be more than you requested (although - often not) due to alignment and minimum size constraints. - You can use this many bytes without worrying about - overwriting other allocated objects. This is not a particularly great - programming practice. malloc_usable_size can be more useful in - debugging and assertions, for example: - - p = malloc(n); - assert(malloc_usable_size(p) >= 256); -*/ -size_t dlmalloc_usable_size(void*); - - -#if MSPACES - -/* - mspace is an opaque type representing an independent - region of space that supports mspace_malloc, etc. -*/ -typedef void* mspace; - -/* - create_mspace creates and returns a new independent space with the - given initial capacity, or, if 0, the default granularity size. It - returns null if there is no system memory available to create the - space. If argument locked is non-zero, the space uses a separate - lock to control access. The capacity of the space will grow - dynamically as needed to service mspace_malloc requests. You can - control the sizes of incremental increases of this space by - compiling with a different DEFAULT_GRANULARITY or dynamically - setting with mallopt(M_GRANULARITY, value). -*/ -mspace create_mspace(size_t capacity, int locked); - -/* - destroy_mspace destroys the given space, and attempts to return all - of its memory back to the system, returning the total number of - bytes freed. After destruction, the results of access to all memory - used by the space become undefined. -*/ -size_t destroy_mspace(mspace msp); - -/* - create_mspace_with_base uses the memory supplied as the initial base - of a new mspace. Part (less than 128*sizeof(size_t) bytes) of this - space is used for bookkeeping, so the capacity must be at least this - large. (Otherwise 0 is returned.) When this initial space is - exhausted, additional memory will be obtained from the system. - Destroying this space will deallocate all additionally allocated - space (if possible) but not the initial base. -*/ -mspace create_mspace_with_base(void* base, size_t capacity, int locked); - -/* - mspace_mmap_large_chunks controls whether requests for large chunks - are allocated in their own mmapped regions, separate from others in - this mspace. By default this is enabled, which reduces - fragmentation. However, such chunks are not necessarily released to - the system upon destroy_mspace. Disabling by setting to false may - increase fragmentation, but avoids leakage when relying on - destroy_mspace to release all memory allocated using this space. -*/ -int mspace_mmap_large_chunks(mspace msp, int enable); - - -/* - mspace_malloc behaves as malloc, but operates within - the given space. -*/ -void* mspace_malloc(mspace msp, size_t bytes); - -/* - mspace_free behaves as free, but operates within - the given space. - - If compiled with FOOTERS==1, mspace_free is not actually needed. - free may be called instead of mspace_free because freed chunks from - any space are handled by their originating spaces. -*/ -void mspace_free(mspace msp, void* mem); - -/* - mspace_realloc behaves as realloc, but operates within - the given space. - - If compiled with FOOTERS==1, mspace_realloc is not actually - needed. realloc may be called instead of mspace_realloc because - realloced chunks from any space are handled by their originating - spaces. -*/ -void* mspace_realloc(mspace msp, void* mem, size_t newsize); - -/* - mspace_calloc behaves as calloc, but operates within - the given space. -*/ -void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size); - -/* - mspace_memalign behaves as memalign, but operates within - the given space. -*/ -void* mspace_memalign(mspace msp, size_t alignment, size_t bytes); - -/* - mspace_independent_calloc behaves as independent_calloc, but - operates within the given space. -*/ -void** mspace_independent_calloc(mspace msp, size_t n_elements, - size_t elem_size, void* chunks[]); - -/* - mspace_independent_comalloc behaves as independent_comalloc, but - operates within the given space. -*/ -void** mspace_independent_comalloc(mspace msp, size_t n_elements, - size_t sizes[], void* chunks[]); - -/* - mspace_footprint() returns the number of bytes obtained from the - system for this space. -*/ -size_t mspace_footprint(mspace msp); - -/* - mspace_max_footprint() returns the peak number of bytes obtained from the - system for this space. -*/ -size_t mspace_max_footprint(mspace msp); - - -#if !NO_MALLINFO -/* - mspace_mallinfo behaves as mallinfo, but reports properties of - the given space. -*/ -struct mallinfo mspace_mallinfo(mspace msp); -#endif /* NO_MALLINFO */ - -/* - malloc_usable_size(void* p) behaves the same as malloc_usable_size; -*/ - size_t mspace_usable_size(void* mem); - -/* - mspace_malloc_stats behaves as malloc_stats, but reports - properties of the given space. -*/ -void mspace_malloc_stats(mspace msp); - -/* - mspace_trim behaves as malloc_trim, but - operates within the given space. -*/ -int mspace_trim(mspace msp, size_t pad); - -/* - An alias for mallopt. -*/ -int mspace_mallopt(int, int); - -#endif /* MSPACES */ - -#ifdef __cplusplus -}; /* end of extern "C" */ -#endif /* __cplusplus */ - -/* - ======================================================================== - To make a fully customizable malloc.h header file, cut everything - above this line, put into file malloc.h, edit to suit, and #include it - on the next line, as well as in programs that use this malloc. - ======================================================================== -*/ - -/* #include "malloc.h" */ - -/*------------------------------ internal #includes ---------------------- */ - -#ifdef WIN32 -#pragma warning( disable : 4146 ) /* no "unsigned" warnings */ -#endif /* WIN32 */ - -#include /* for printing in malloc_stats */ - -#ifndef LACKS_ERRNO_H -#include /* for MALLOC_FAILURE_ACTION */ -#endif /* LACKS_ERRNO_H */ -#if FOOTERS -#include /* for magic initialization */ -#endif /* FOOTERS */ -#ifndef LACKS_STDLIB_H -#include /* for abort() */ -#endif /* LACKS_STDLIB_H */ -#ifdef DEBUG -#if ABORT_ON_ASSERT_FAILURE -#define assert(x) if(!(x)) ABORT -#else /* ABORT_ON_ASSERT_FAILURE */ -#include -#endif /* ABORT_ON_ASSERT_FAILURE */ -#else /* DEBUG */ -#ifndef assert -#define assert(x) -#endif -#define DEBUG 0 -#endif /* DEBUG */ -#ifndef LACKS_STRING_H -#include /* for memset etc */ -#endif /* LACKS_STRING_H */ -#if USE_BUILTIN_FFS -#ifndef LACKS_STRINGS_H -#include /* for ffs */ -#endif /* LACKS_STRINGS_H */ -#endif /* USE_BUILTIN_FFS */ -#if HAVE_MMAP -#ifndef LACKS_SYS_MMAN_H -#include /* for mmap */ -#endif /* LACKS_SYS_MMAN_H */ -#ifndef LACKS_FCNTL_H -#include -#endif /* LACKS_FCNTL_H */ -#endif /* HAVE_MMAP */ -#ifndef LACKS_UNISTD_H -#include /* for sbrk, sysconf */ -#else /* LACKS_UNISTD_H */ -#if !defined(__FreeBSD__) && !defined(__OpenBSD__) && !defined(__NetBSD__) -extern void* sbrk(ptrdiff_t); -#endif /* FreeBSD etc */ -#endif /* LACKS_UNISTD_H */ - -/* Declarations for locking */ -#if USE_LOCKS -#ifndef WIN32 -#include -#if defined (__SVR4) && defined (__sun) /* solaris */ -#include -#endif /* solaris */ -#else -#ifndef _M_AMD64 -/* These are already defined on AMD64 builds */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ -LONG __cdecl _InterlockedCompareExchange(LONG volatile *Dest, LONG Exchange, LONG Comp); -LONG __cdecl _InterlockedExchange(LONG volatile *Target, LONG Value); -#ifdef __cplusplus -} -#endif /* __cplusplus */ -#endif /* _M_AMD64 */ -#pragma intrinsic (_InterlockedCompareExchange) -#pragma intrinsic (_InterlockedExchange) -#define interlockedcompareexchange _InterlockedCompareExchange -#define interlockedexchange _InterlockedExchange -#endif /* Win32 */ -#endif /* USE_LOCKS */ - -/* Declarations for bit scanning on win32 */ -#if defined(_MSC_VER) && _MSC_VER>=1300 -#ifndef BitScanForward /* Try to avoid pulling in WinNT.h */ -#ifdef __cplusplus -extern "C" { -#endif /* __cplusplus */ -unsigned char _BitScanForward(unsigned long *index, unsigned long mask); -unsigned char _BitScanReverse(unsigned long *index, unsigned long mask); -#ifdef __cplusplus -} -#endif /* __cplusplus */ - -#define BitScanForward _BitScanForward -#define BitScanReverse _BitScanReverse -#pragma intrinsic(_BitScanForward) -#pragma intrinsic(_BitScanReverse) -#endif /* BitScanForward */ -#endif /* defined(_MSC_VER) && _MSC_VER>=1300 */ - -#ifndef WIN32 -#ifndef malloc_getpagesize -# ifdef _SC_PAGESIZE /* some SVR4 systems omit an underscore */ -# ifndef _SC_PAGE_SIZE -# define _SC_PAGE_SIZE _SC_PAGESIZE -# endif -# endif -# ifdef _SC_PAGE_SIZE -# define malloc_getpagesize sysconf(_SC_PAGE_SIZE) -# else -# if defined(BSD) || defined(DGUX) || defined(HAVE_GETPAGESIZE) - extern size_t getpagesize(); -# define malloc_getpagesize getpagesize() -# else -# ifdef WIN32 /* use supplied emulation of getpagesize */ -# define malloc_getpagesize getpagesize() -# else -# ifndef LACKS_SYS_PARAM_H -# include -# endif -# ifdef EXEC_PAGESIZE -# define malloc_getpagesize EXEC_PAGESIZE -# else -# ifdef NBPG -# ifndef CLSIZE -# define malloc_getpagesize NBPG -# else -# define malloc_getpagesize (NBPG * CLSIZE) -# endif -# else -# ifdef NBPC -# define malloc_getpagesize NBPC -# else -# ifdef PAGESIZE -# define malloc_getpagesize PAGESIZE -# else /* just guess */ -# define malloc_getpagesize ((size_t)4096U) -# endif -# endif -# endif -# endif -# endif -# endif -# endif -#endif -#endif - - - -/* ------------------- size_t and alignment properties -------------------- */ - -/* The byte and bit size of a size_t */ -#define SIZE_T_SIZE (sizeof(size_t)) -#define SIZE_T_BITSIZE (sizeof(size_t) << 3) - -/* Some constants coerced to size_t */ -/* Annoying but necessary to avoid errors on some platforms */ -#define SIZE_T_ZERO ((size_t)0) -#define SIZE_T_ONE ((size_t)1) -#define SIZE_T_TWO ((size_t)2) -#define SIZE_T_FOUR ((size_t)4) -#define TWO_SIZE_T_SIZES (SIZE_T_SIZE<<1) -#define FOUR_SIZE_T_SIZES (SIZE_T_SIZE<<2) -#define SIX_SIZE_T_SIZES (FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES) -#define HALF_MAX_SIZE_T (MAX_SIZE_T / 2U) - -/* The bit mask value corresponding to MALLOC_ALIGNMENT */ -#define CHUNK_ALIGN_MASK (MALLOC_ALIGNMENT - SIZE_T_ONE) - -/* True if address a has acceptable alignment */ -#define is_aligned(A) (((size_t)((A)) & (CHUNK_ALIGN_MASK)) == 0) - -/* the number of bytes to offset an address to align it */ -#define align_offset(A)\ - ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\ - ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK)) - -/* -------------------------- MMAP preliminaries ------------------------- */ - -/* - If HAVE_MORECORE or HAVE_MMAP are false, we just define calls and - checks to fail so compiler optimizer can delete code rather than - using so many "#if"s. -*/ - - -/* MORECORE and MMAP must return MFAIL on failure */ -#define MFAIL ((void*)(MAX_SIZE_T)) -#define CMFAIL ((char*)(MFAIL)) /* defined for convenience */ - -#if HAVE_MMAP - -#ifndef WIN32 -#define MUNMAP_DEFAULT(a, s) munmap((a), (s)) -#define MMAP_PROT (PROT_READ|PROT_WRITE) -#if !defined(MAP_ANONYMOUS) && defined(MAP_ANON) -#define MAP_ANONYMOUS MAP_ANON -#endif /* MAP_ANON */ -#ifdef MAP_ANONYMOUS -#define MMAP_FLAGS (MAP_PRIVATE|MAP_ANONYMOUS) -#define MMAP_DEFAULT(s) mmap(0, (s), MMAP_PROT, MMAP_FLAGS, -1, 0) -#else /* MAP_ANONYMOUS */ -/* - Nearly all versions of mmap support MAP_ANONYMOUS, so the following - is unlikely to be needed, but is supplied just in case. -*/ -#define MMAP_FLAGS (MAP_PRIVATE) -static int dev_zero_fd = -1; /* Cached file descriptor for /dev/zero. */ -#define MMAP_DEFAULT(s) ((dev_zero_fd < 0) ? \ - (dev_zero_fd = open("/dev/zero", O_RDWR), \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) : \ - mmap(0, (s), MMAP_PROT, MMAP_FLAGS, dev_zero_fd, 0)) -#endif /* MAP_ANONYMOUS */ - -#define DIRECT_MMAP_DEFAULT(s) MMAP_DEFAULT(s) - -#else /* WIN32 */ - -/* Win32 MMAP via VirtualAlloc */ -static FORCEINLINE void* win32mmap(size_t size) { - void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE); - return (ptr != 0)? ptr: MFAIL; -} - -/* For direct MMAP, use MEM_TOP_DOWN to minimize interference */ -static FORCEINLINE void* win32direct_mmap(size_t size) { - void* ptr = VirtualAlloc(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, - PAGE_READWRITE); - return (ptr != 0)? ptr: MFAIL; -} - -/* This function supports releasing coalesed segments */ -static FORCEINLINE int win32munmap(void* ptr, size_t size) { - MEMORY_BASIC_INFORMATION minfo; - char* cptr = (char*)ptr; - while (size) { - if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0) - return -1; - if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr || - minfo.State != MEM_COMMIT || minfo.RegionSize > size) - return -1; - if (VirtualFree(cptr, 0, MEM_RELEASE) == 0) - return -1; - cptr += minfo.RegionSize; - size -= minfo.RegionSize; - } - return 0; -} - -#define MMAP_DEFAULT(s) win32mmap(s) -#define MUNMAP_DEFAULT(a, s) win32munmap((a), (s)) -#define DIRECT_MMAP_DEFAULT(s) win32direct_mmap(s) -#endif /* WIN32 */ -#endif /* HAVE_MMAP */ - -#if HAVE_MREMAP -#ifndef WIN32 -#define MREMAP_DEFAULT(addr, osz, nsz, mv) mremap((addr), (osz), (nsz), (mv)) -#endif /* WIN32 */ -#endif /* HAVE_MREMAP */ - - -/** - * Define CALL_MORECORE - */ -#if HAVE_MORECORE - #ifdef MORECORE - #define CALL_MORECORE(S) MORECORE(S) - #else /* MORECORE */ - #define CALL_MORECORE(S) MORECORE_DEFAULT(S) - #endif /* MORECORE */ -#else /* HAVE_MORECORE */ - #define CALL_MORECORE(S) MFAIL -#endif /* HAVE_MORECORE */ - -/** - * Define CALL_MMAP/CALL_MUNMAP/CALL_DIRECT_MMAP - */ -#if HAVE_MMAP - #define IS_MMAPPED_BIT (SIZE_T_ONE) - #define USE_MMAP_BIT (SIZE_T_ONE) - - #ifdef MMAP - #define CALL_MMAP(s) MMAP(s) - #else /* MMAP */ - #define CALL_MMAP(s) MMAP_DEFAULT(s) - #endif /* MMAP */ - #ifdef MUNMAP - #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) - #else /* MUNMAP */ - #define CALL_MUNMAP(a, s) MUNMAP_DEFAULT((a), (s)) - #endif /* MUNMAP */ - #ifdef DIRECT_MMAP - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) - #else /* DIRECT_MMAP */ - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP_DEFAULT(s) - #endif /* DIRECT_MMAP */ -#else /* HAVE_MMAP */ - #define IS_MMAPPED_BIT (SIZE_T_ZERO) - #define USE_MMAP_BIT (SIZE_T_ZERO) - - #define MMAP(s) MFAIL - #define MUNMAP(a, s) (-1) - #define DIRECT_MMAP(s) MFAIL - #define CALL_DIRECT_MMAP(s) DIRECT_MMAP(s) - #define CALL_MMAP(s) MMAP(s) - #define CALL_MUNMAP(a, s) MUNMAP((a), (s)) -#endif /* HAVE_MMAP */ - -/** - * Define CALL_MREMAP - */ -#if HAVE_MMAP && HAVE_MREMAP - #ifdef MREMAP - #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP((addr), (osz), (nsz), (mv)) - #else /* MREMAP */ - #define CALL_MREMAP(addr, osz, nsz, mv) MREMAP_DEFAULT((addr), (osz), (nsz), (mv)) - #endif /* MREMAP */ -#else /* HAVE_MMAP && HAVE_MREMAP */ - #define CALL_MREMAP(addr, osz, nsz, mv) MFAIL -#endif /* HAVE_MMAP && HAVE_MREMAP */ - -/* mstate bit set if continguous morecore disabled or failed */ -#define USE_NONCONTIGUOUS_BIT (4U) - -/* segment bit set in create_mspace_with_base */ -#define EXTERN_BIT (8U) - - -/* --------------------------- Lock preliminaries ------------------------ */ - -/* - When locks are defined, there is one global lock, plus - one per-mspace lock. - - The global lock_ensures that mparams.magic and other unique - mparams values are initialized only once. It also protects - sequences of calls to MORECORE. In many cases sys_alloc requires - two calls, that should not be interleaved with calls by other - threads. This does not protect against direct calls to MORECORE - by other threads not using this lock, so there is still code to - cope the best we can on interference. - - Per-mspace locks surround calls to malloc, free, etc. To enable use - in layered extensions, per-mspace locks are reentrant. - - Because lock-protected regions generally have bounded times, it is - OK to use the supplied simple spinlocks in the custom versions for - x86. - - If USE_LOCKS is > 1, the definitions of lock routines here are - bypassed, in which case you will need to define at least - INITIAL_LOCK, ACQUIRE_LOCK, RELEASE_LOCK and possibly TRY_LOCK - (which is not used in this malloc, but commonly needed in - extensions.) -*/ - -#if USE_LOCKS == 1 - -#if USE_SPIN_LOCKS -#ifndef WIN32 - -/* Custom pthread-style spin locks on x86 and x64 for gcc */ -struct pthread_mlock_t { - volatile unsigned int l; - volatile unsigned int c; - volatile pthread_t threadid; -}; -#define MLOCK_T struct pthread_mlock_t -#define CURRENT_THREAD pthread_self() -#define INITIAL_LOCK(sl) (memset(sl, 0, sizeof(MLOCK_T)), 0) -#define ACQUIRE_LOCK(sl) pthread_acquire_lock(sl) -#define RELEASE_LOCK(sl) pthread_release_lock(sl) -#define TRY_LOCK(sl) pthread_try_lock(sl) -#define SPINS_PER_YIELD 63 - -static MLOCK_T malloc_global_mutex = { 0, 0, 0}; - -static FORCEINLINE int pthread_acquire_lock (MLOCK_T *sl) { - int spins = 0; - volatile unsigned int* lp = &sl->l; - for (;;) { - if (*lp != 0) { - if (sl->threadid == CURRENT_THREAD) { - ++sl->c; - return 0; - } - } - else { - /* place args to cmpxchgl in locals to evade oddities in some gccs */ - int cmp = 0; - int val = 1; - int ret; - __asm__ __volatile__ ("lock; cmpxchgl %1, %2" - : "=a" (ret) - : "r" (val), "m" (*(lp)), "0"(cmp) - : "memory", "cc"); - if (!ret) { - assert(!sl->threadid); - sl->c = 1; - sl->threadid = CURRENT_THREAD; - return 0; - } - if ((++spins & SPINS_PER_YIELD) == 0) { -#if defined (__SVR4) && defined (__sun) /* solaris */ - thr_yield(); -#else -#if defined(__linux__) || defined(__FreeBSD__) || defined(__APPLE__) - sched_yield(); -#else /* no-op yield on unknown systems */ - ; -#endif /* __linux__ || __FreeBSD__ || __APPLE__ */ -#endif /* solaris */ - } - } - } -} - -static FORCEINLINE void pthread_release_lock (MLOCK_T *sl) { - assert(sl->l != 0); - assert(sl->threadid == CURRENT_THREAD); - if (--sl->c == 0) { - sl->threadid = 0; - volatile unsigned int* lp = &sl->l; - int prev = 0; - int ret; - __asm__ __volatile__ ("lock; xchgl %0, %1" - : "=r" (ret) - : "m" (*(lp)), "0"(prev) - : "memory"); - } -} - -static FORCEINLINE int pthread_try_lock (MLOCK_T *sl) { - volatile unsigned int* lp = &sl->l; - if (*lp != 0) { - if (sl->threadid == CURRENT_THREAD) { - ++sl->c; - return 1; - } - } - else { - int cmp = 0; - int val = 1; - int ret; - __asm__ __volatile__ ("lock; cmpxchgl %1, %2" - : "=a" (ret) - : "r" (val), "m" (*(lp)), "0"(cmp) - : "memory", "cc"); - if (!ret) { - assert(!sl->threadid); - sl->c = 1; - sl->threadid = CURRENT_THREAD; - return 1; - } - } - return 0; -} - - -#else /* WIN32 */ -/* Custom win32-style spin locks on x86 and x64 for MSC */ -struct win32_mlock_t -{ - volatile long l; - volatile unsigned int c; - volatile long threadid; -}; - -#define MLOCK_T struct win32_mlock_t -#define CURRENT_THREAD win32_getcurrentthreadid() -#define INITIAL_LOCK(sl) (memset(sl, 0, sizeof(MLOCK_T)), 0) -#define ACQUIRE_LOCK(sl) win32_acquire_lock(sl) -#define RELEASE_LOCK(sl) win32_release_lock(sl) -#define TRY_LOCK(sl) win32_try_lock(sl) -#define SPINS_PER_YIELD 63 - -static MLOCK_T malloc_global_mutex = { 0, 0, 0}; - -static FORCEINLINE long win32_getcurrentthreadid() { -#ifdef _MSC_VER -#if defined(_M_IX86) - long *threadstruct=(long *)__readfsdword(0x18); - long threadid=threadstruct[0x24/sizeof(long)]; - return threadid; -#elif defined(_M_X64) - /* todo */ - return GetCurrentThreadId(); -#else - return GetCurrentThreadId(); -#endif -#else - return GetCurrentThreadId(); -#endif -} - -static FORCEINLINE int win32_acquire_lock (MLOCK_T *sl) { - int spins = 0; - for (;;) { - if (sl->l != 0) { - if (sl->threadid == CURRENT_THREAD) { - ++sl->c; - return 0; - } - } - else { - if (!interlockedexchange(&sl->l, 1)) { - assert(!sl->threadid); - sl->c=CURRENT_THREAD; - sl->threadid = CURRENT_THREAD; - sl->c = 1; - return 0; - } - } - if ((++spins & SPINS_PER_YIELD) == 0) - SleepEx(0, FALSE); - } -} - -static FORCEINLINE void win32_release_lock (MLOCK_T *sl) { - assert(sl->threadid == CURRENT_THREAD); - assert(sl->l != 0); - if (--sl->c == 0) { - sl->threadid = 0; - interlockedexchange (&sl->l, 0); - } -} - -static FORCEINLINE int win32_try_lock (MLOCK_T *sl) { - if(sl->l != 0) { - if (sl->threadid == CURRENT_THREAD) { - ++sl->c; - return 1; - } - } - else { - if (!interlockedexchange(&sl->l, 1)){ - assert(!sl->threadid); - sl->threadid = CURRENT_THREAD; - sl->c = 1; - return 1; - } - } - return 0; -} - -#endif /* WIN32 */ -#else /* USE_SPIN_LOCKS */ - -#ifndef WIN32 -/* pthreads-based locks */ - -#define MLOCK_T pthread_mutex_t -#define CURRENT_THREAD pthread_self() -#define INITIAL_LOCK(sl) pthread_init_lock(sl) -#define ACQUIRE_LOCK(sl) pthread_mutex_lock(sl) -#define RELEASE_LOCK(sl) pthread_mutex_unlock(sl) -#define TRY_LOCK(sl) (!pthread_mutex_trylock(sl)) - -static MLOCK_T malloc_global_mutex = PTHREAD_MUTEX_INITIALIZER; - -/* Cope with old-style linux recursive lock initialization by adding */ -/* skipped internal declaration from pthread.h */ -#ifdef linux -#ifndef PTHREAD_MUTEX_RECURSIVE -extern int pthread_mutexattr_setkind_np __P ((pthread_mutexattr_t *__attr, - int __kind)); -#define PTHREAD_MUTEX_RECURSIVE PTHREAD_MUTEX_RECURSIVE_NP -#define pthread_mutexattr_settype(x,y) pthread_mutexattr_setkind_np(x,y) -#endif -#endif - -static int pthread_init_lock (MLOCK_T *sl) { - pthread_mutexattr_t attr; - if (pthread_mutexattr_init(&attr)) return 1; - if (pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE)) return 1; - if (pthread_mutex_init(sl, &attr)) return 1; - if (pthread_mutexattr_destroy(&attr)) return 1; - return 0; -} - -#else /* WIN32 */ -/* Win32 critical sections */ -#define MLOCK_T CRITICAL_SECTION -#define CURRENT_THREAD GetCurrentThreadId() -#define INITIAL_LOCK(s) (!InitializeCriticalSectionAndSpinCount((s), 0x80000000|4000)) -#define ACQUIRE_LOCK(s) (EnterCriticalSection(s), 0) -#define RELEASE_LOCK(s) LeaveCriticalSection(s) -#define TRY_LOCK(s) TryEnterCriticalSection(s) -#define NEED_GLOBAL_LOCK_INIT - -static MLOCK_T malloc_global_mutex; -static volatile long malloc_global_mutex_status; - -/* Use spin loop to initialize global lock */ -static void init_malloc_global_mutex() { - for (;;) { - long stat = malloc_global_mutex_status; - if (stat > 0) - return; - /* transition to < 0 while initializing, then to > 0) */ - if (stat == 0 && - interlockedcompareexchange(&malloc_global_mutex_status, -1, 0) == 0) { - InitializeCriticalSection(&malloc_global_mutex); - interlockedexchange(&malloc_global_mutex_status,1); - return; - } - SleepEx(0, FALSE); - } -} - -#endif /* WIN32 */ -#endif /* USE_SPIN_LOCKS */ -#endif /* USE_LOCKS == 1 */ - -/* ----------------------- User-defined locks ------------------------ */ - -#if USE_LOCKS > 1 -/* Define your own lock implementation here */ -/* #define INITIAL_LOCK(sl) ... */ -/* #define ACQUIRE_LOCK(sl) ... */ -/* #define RELEASE_LOCK(sl) ... */ -/* #define TRY_LOCK(sl) ... */ -/* static MLOCK_T malloc_global_mutex = ... */ -#endif /* USE_LOCKS > 1 */ - -/* ----------------------- Lock-based state ------------------------ */ - -#if USE_LOCKS -#define USE_LOCK_BIT (2U) -#else /* USE_LOCKS */ -#define USE_LOCK_BIT (0U) -#define INITIAL_LOCK(l) -#endif /* USE_LOCKS */ - -#if USE_LOCKS -#define ACQUIRE_MALLOC_GLOBAL_LOCK() ACQUIRE_LOCK(&malloc_global_mutex); -#define RELEASE_MALLOC_GLOBAL_LOCK() RELEASE_LOCK(&malloc_global_mutex); -#else /* USE_LOCKS */ -#define ACQUIRE_MALLOC_GLOBAL_LOCK() -#define RELEASE_MALLOC_GLOBAL_LOCK() -#endif /* USE_LOCKS */ - - -/* ----------------------- Chunk representations ------------------------ */ - -/* - (The following includes lightly edited explanations by Colin Plumb.) - - The malloc_chunk declaration below is misleading (but accurate and - necessary). It declares a "view" into memory allowing access to - necessary fields at known offsets from a given base. - - Chunks of memory are maintained using a `boundary tag' method as - originally described by Knuth. (See the paper by Paul Wilson - ftp://ftp.cs.utexas.edu/pub/garbage/allocsrv.ps for a survey of such - techniques.) Sizes of free chunks are stored both in the front of - each chunk and at the end. This makes consolidating fragmented - chunks into bigger chunks fast. The head fields also hold bits - representing whether chunks are free or in use. - - Here are some pictures to make it clearer. They are "exploded" to - show that the state of a chunk can be thought of as extending from - the high 31 bits of the head field of its header through the - prev_foot and PINUSE_BIT bit of the following chunk header. - - A chunk that's in use looks like: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk (if P = 0) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| - | Size of this chunk 1| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | | - +- -+ - | | - +- -+ - | : - +- size - sizeof(size_t) available payload bytes -+ - : | - chunk-> +- -+ - | | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |1| - | Size of next chunk (may or may not be in use) | +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - And if it's free, it looks like this: - - chunk-> +- -+ - | User payload (must be in use, or we would have merged!) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |P| - | Size of this chunk 0| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Next pointer | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Prev pointer | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : - +- size - sizeof(struct chunk) unused bytes -+ - : | - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of this chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ |0| - | Size of next chunk (must be in use, or we would have merged)| +-+ - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | : - +- User payload -+ - : | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - |0| - +-+ - Note that since we always merge adjacent free chunks, the chunks - adjacent to a free chunk must be in use. - - Given a pointer to a chunk (which can be derived trivially from the - payload pointer) we can, in O(1) time, find out whether the adjacent - chunks are free, and if so, unlink them from the lists that they - are on and merge them with the current chunk. - - Chunks always begin on even word boundaries, so the mem portion - (which is returned to the user) is also on an even word boundary, and - thus at least double-word aligned. - - The P (PINUSE_BIT) bit, stored in the unused low-order bit of the - chunk size (which is always a multiple of two words), is an in-use - bit for the *previous* chunk. If that bit is *clear*, then the - word before the current chunk size contains the previous chunk - size, and can be used to find the front of the previous chunk. - The very first chunk allocated always has this bit set, preventing - access to non-existent (or non-owned) memory. If pinuse is set for - any given chunk, then you CANNOT determine the size of the - previous chunk, and might even get a memory addressing fault when - trying to do so. - - The C (CINUSE_BIT) bit, stored in the unused second-lowest bit of - the chunk size redundantly records whether the current chunk is - inuse. This redundancy enables usage checks within free and realloc, - and reduces indirection when freeing and consolidating chunks. - - Each freshly allocated chunk must have both cinuse and pinuse set. - That is, each allocated chunk borders either a previously allocated - and still in-use chunk, or the base of its memory arena. This is - ensured by making all allocations from the the `lowest' part of any - found chunk. Further, no free chunk physically borders another one, - so each free chunk is known to be preceded and followed by either - inuse chunks or the ends of memory. - - Note that the `foot' of the current chunk is actually represented - as the prev_foot of the NEXT chunk. This makes it easier to - deal with alignments etc but can be very confusing when trying - to extend or adapt this code. - - The exceptions to all this are - - 1. The special chunk `top' is the top-most available chunk (i.e., - the one bordering the end of available memory). It is treated - specially. Top is never included in any bin, is used only if - no other chunk is available, and is released back to the - system if it is very large (see M_TRIM_THRESHOLD). In effect, - the top chunk is treated as larger (and thus less well - fitting) than any other available chunk. The top chunk - doesn't update its trailing size field since there is no next - contiguous chunk that would have to index off it. However, - space is still allocated for it (TOP_FOOT_SIZE) to enable - separation or merging when space is extended. - - 3. Chunks allocated via mmap, which have the lowest-order bit - (IS_MMAPPED_BIT) set in their prev_foot fields, and do not set - PINUSE_BIT in their head fields. Because they are allocated - one-by-one, each must carry its own prev_foot field, which is - also used to hold the offset this chunk has within its mmapped - region, which is needed to preserve alignment. Each mmapped - chunk is trailed by the first two fields of a fake next-chunk - for sake of usage checks. - -*/ - -struct malloc_chunk { - size_t prev_foot; /* Size of previous chunk (if free). */ - size_t head; /* Size and inuse bits. */ - struct malloc_chunk* fd; /* double links -- used only if free. */ - struct malloc_chunk* bk; -}; - -typedef struct malloc_chunk mchunk; -typedef struct malloc_chunk* mchunkptr; -typedef struct malloc_chunk* sbinptr; /* The type of bins of chunks */ -typedef unsigned int bindex_t; /* Described below */ -typedef unsigned int binmap_t; /* Described below */ -typedef unsigned int flag_t; /* The type of various bit flag sets */ - -/* ------------------- Chunks sizes and alignments ----------------------- */ - -#define MCHUNK_SIZE (sizeof(mchunk)) - -#if FOOTERS -#define CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) -#else /* FOOTERS */ -#define CHUNK_OVERHEAD (SIZE_T_SIZE) -#endif /* FOOTERS */ - -/* MMapped chunks need a second word of overhead ... */ -#define MMAP_CHUNK_OVERHEAD (TWO_SIZE_T_SIZES) -/* ... and additional padding for fake next-chunk at foot */ -#define MMAP_FOOT_PAD (FOUR_SIZE_T_SIZES) - -/* The smallest size we can malloc is an aligned minimal chunk */ -#define MIN_CHUNK_SIZE\ - ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* conversion from malloc headers to user pointers, and back */ -#define chunk2mem(p) ((void*)((char*)(p) + TWO_SIZE_T_SIZES)) -#define mem2chunk(mem) ((mchunkptr)((char*)(mem) - TWO_SIZE_T_SIZES)) -/* chunk associated with aligned address A */ -#define align_as_chunk(A) (mchunkptr)((A) + align_offset(chunk2mem(A))) - -/* Bounds on request (not chunk) sizes. */ -#define MAX_REQUEST ((-MIN_CHUNK_SIZE) << 2) -#define MIN_REQUEST (MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE) - -/* pad request bytes into a usable size */ -#define pad_request(req) \ - (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK) - -/* pad request, checking for minimum (but not maximum) */ -#define request2size(req) \ - (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req)) - - -/* ------------------ Operations on head and foot fields ----------------- */ - -/* - The head field of a chunk is or'ed with PINUSE_BIT when previous - adjacent chunk in use, and or'ed with CINUSE_BIT if this chunk is in - use. If the chunk was obtained with mmap, the prev_foot field has - IS_MMAPPED_BIT set, otherwise holding the offset of the base of the - mmapped region to the base of the chunk. - - FLAG4_BIT is not used by this malloc, but might be useful in extensions. -*/ - -#define PINUSE_BIT (SIZE_T_ONE) -#define CINUSE_BIT (SIZE_T_TWO) -#define FLAG4_BIT (SIZE_T_FOUR) -#define INUSE_BITS (PINUSE_BIT|CINUSE_BIT) -#define FLAG_BITS (PINUSE_BIT|CINUSE_BIT|FLAG4_BIT) - -/* Head value for fenceposts */ -#define FENCEPOST_HEAD (INUSE_BITS|SIZE_T_SIZE) - -/* extraction of fields from head words */ -#define cinuse(p) ((p)->head & CINUSE_BIT) -#define pinuse(p) ((p)->head & PINUSE_BIT) -#define chunksize(p) ((p)->head & ~(FLAG_BITS)) - -#define clear_pinuse(p) ((p)->head &= ~PINUSE_BIT) -#define clear_cinuse(p) ((p)->head &= ~CINUSE_BIT) - -/* Treat space at ptr +/- offset as a chunk */ -#define chunk_plus_offset(p, s) ((mchunkptr)(((char*)(p)) + (s))) -#define chunk_minus_offset(p, s) ((mchunkptr)(((char*)(p)) - (s))) - -/* Ptr to next or previous physical malloc_chunk. */ -#define next_chunk(p) ((mchunkptr)( ((char*)(p)) + ((p)->head & ~FLAG_BITS))) -#define prev_chunk(p) ((mchunkptr)( ((char*)(p)) - ((p)->prev_foot) )) - -/* extract next chunk's pinuse bit */ -#define next_pinuse(p) ((next_chunk(p)->head) & PINUSE_BIT) - -/* Get/set size at footer */ -#define get_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot) -#define set_foot(p, s) (((mchunkptr)((char*)(p) + (s)))->prev_foot = (s)) - -/* Set size, pinuse bit, and foot */ -#define set_size_and_pinuse_of_free_chunk(p, s)\ - ((p)->head = (s|PINUSE_BIT), set_foot(p, s)) - -/* Set size, pinuse bit, foot, and clear next pinuse */ -#define set_free_with_pinuse(p, s, n)\ - (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s)) - -#define is_mmapped(p)\ - (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_MMAPPED_BIT)) - -/* Get the internal overhead associated with chunk p */ -#define overhead_for(p)\ - (is_mmapped(p)? MMAP_CHUNK_OVERHEAD : CHUNK_OVERHEAD) - -/* Return true if malloced space is not necessarily cleared */ -#if MMAP_CLEARS -#define calloc_must_clear(p) (!is_mmapped(p)) -#else /* MMAP_CLEARS */ -#define calloc_must_clear(p) (1) -#endif /* MMAP_CLEARS */ - -/* ---------------------- Overlaid data structures ----------------------- */ - -/* - When chunks are not in use, they are treated as nodes of either - lists or trees. - - "Small" chunks are stored in circular doubly-linked lists, and look - like this: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk in list | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space (may be 0 bytes long) . - . . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Larger chunks are kept in a form of bitwise digital trees (aka - tries) keyed on chunksizes. Because malloc_tree_chunks are only for - free chunks greater than 256 bytes, their size doesn't impose any - constraints on user chunk sizes. Each node looks like: - - chunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Size of previous chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `head:' | Size of chunk, in bytes |P| - mem-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Forward pointer to next chunk of same size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Back pointer to previous chunk of same size | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to left child (child[0]) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to right child (child[1]) | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Pointer to parent | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | bin index of this chunk | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - | Unused space . - . | -nextchunk-> +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - `foot:' | Size of chunk, in bytes | - +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ - - Each tree holding treenodes is a tree of unique chunk sizes. Chunks - of the same size are arranged in a circularly-linked list, with only - the oldest chunk (the next to be used, in our FIFO ordering) - actually in the tree. (Tree members are distinguished by a non-null - parent pointer.) If a chunk with the same size an an existing node - is inserted, it is linked off the existing node using pointers that - work in the same way as fd/bk pointers of small chunks. - - Each tree contains a power of 2 sized range of chunk sizes (the - smallest is 0x100 <= x < 0x180), which is is divided in half at each - tree level, with the chunks in the smaller half of the range (0x100 - <= x < 0x140 for the top nose) in the left subtree and the larger - half (0x140 <= x < 0x180) in the right subtree. This is, of course, - done by inspecting individual bits. - - Using these rules, each node's left subtree contains all smaller - sizes than its right subtree. However, the node at the root of each - subtree has no particular ordering relationship to either. (The - dividing line between the subtree sizes is based on trie relation.) - If we remove the last chunk of a given size from the interior of the - tree, we need to replace it with a leaf node. The tree ordering - rules permit a node to be replaced by any leaf below it. - - The smallest chunk in a tree (a common operation in a best-fit - allocator) can be found by walking a path to the leftmost leaf in - the tree. Unlike a usual binary tree, where we follow left child - pointers until we reach a null, here we follow the right child - pointer any time the left one is null, until we reach a leaf with - both child pointers null. The smallest chunk in the tree will be - somewhere along that path. - - The worst case number of steps to add, find, or remove a node is - bounded by the number of bits differentiating chunks within - bins. Under current bin calculations, this ranges from 6 up to 21 - (for 32 bit sizes) or up to 53 (for 64 bit sizes). The typical case - is of course much better. -*/ - -struct malloc_tree_chunk { - /* The first four fields must be compatible with malloc_chunk */ - size_t prev_foot; - size_t head; - struct malloc_tree_chunk* fd; - struct malloc_tree_chunk* bk; - - struct malloc_tree_chunk* child[2]; - struct malloc_tree_chunk* parent; - bindex_t index; -}; - -typedef struct malloc_tree_chunk tchunk; -typedef struct malloc_tree_chunk* tchunkptr; -typedef struct malloc_tree_chunk* tbinptr; /* The type of bins of trees */ - -/* A little helper macro for trees */ -#define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1]) - -/* ----------------------------- Segments -------------------------------- */ - -/* - Each malloc space may include non-contiguous segments, held in a - list headed by an embedded malloc_segment record representing the - top-most space. Segments also include flags holding properties of - the space. Large chunks that are directly allocated by mmap are not - included in this list. They are instead independently created and - destroyed without otherwise keeping track of them. - - Segment management mainly comes into play for spaces allocated by - MMAP. Any call to MMAP might or might not return memory that is - adjacent to an existing segment. MORECORE normally contiguously - extends the current space, so this space is almost always adjacent, - which is simpler and faster to deal with. (This is why MORECORE is - used preferentially to MMAP when both are available -- see - sys_alloc.) When allocating using MMAP, we don't use any of the - hinting mechanisms (inconsistently) supported in various - implementations of unix mmap, or distinguish reserving from - committing memory. Instead, we just ask for space, and exploit - contiguity when we get it. It is probably possible to do - better than this on some systems, but no general scheme seems - to be significantly better. - - Management entails a simpler variant of the consolidation scheme - used for chunks to reduce fragmentation -- new adjacent memory is - normally prepended or appended to an existing segment. However, - there are limitations compared to chunk consolidation that mostly - reflect the fact that segment processing is relatively infrequent - (occurring only when getting memory from system) and that we - don't expect to have huge numbers of segments: - - * Segments are not indexed, so traversal requires linear scans. (It - would be possible to index these, but is not worth the extra - overhead and complexity for most programs on most platforms.) - * New segments are only appended to old ones when holding top-most - memory; if they cannot be prepended to others, they are held in - different segments. - - Except for the top-most segment of an mstate, each segment record - is kept at the tail of its segment. Segments are added by pushing - segment records onto the list headed by &mstate.seg for the - containing mstate. - - Segment flags control allocation/merge/deallocation policies: - * If EXTERN_BIT set, then we did not allocate this segment, - and so should not try to deallocate or merge with others. - (This currently holds only for the initial segment passed - into create_mspace_with_base.) - * If IS_MMAPPED_BIT set, the segment may be merged with - other surrounding mmapped segments and trimmed/de-allocated - using munmap. - * If neither bit is set, then the segment was obtained using - MORECORE so can be merged with surrounding MORECORE'd segments - and deallocated/trimmed using MORECORE with negative arguments. -*/ - -struct malloc_segment { - char* base; /* base address */ - size_t size; /* allocated size */ - struct malloc_segment* next; /* ptr to next segment */ - flag_t sflags; /* mmap and extern flag */ -}; - -#define is_mmapped_segment(S) ((S)->sflags & IS_MMAPPED_BIT) -#define is_extern_segment(S) ((S)->sflags & EXTERN_BIT) - -typedef struct malloc_segment msegment; -typedef struct malloc_segment* msegmentptr; - -/* ---------------------------- malloc_state ----------------------------- */ - -/* - A malloc_state holds all of the bookkeeping for a space. - The main fields are: - - Top - The topmost chunk of the currently active segment. Its size is - cached in topsize. The actual size of topmost space is - topsize+TOP_FOOT_SIZE, which includes space reserved for adding - fenceposts and segment records if necessary when getting more - space from the system. The size at which to autotrim top is - cached from mparams in trim_check, except that it is disabled if - an autotrim fails. - - Designated victim (dv) - This is the preferred chunk for servicing small requests that - don't have exact fits. It is normally the chunk split off most - recently to service another small request. Its size is cached in - dvsize. The link fields of this chunk are not maintained since it - is not kept in a bin. - - SmallBins - An array of bin headers for free chunks. These bins hold chunks - with sizes less than MIN_LARGE_SIZE bytes. Each bin contains - chunks of all the same size, spaced 8 bytes apart. To simplify - use in double-linked lists, each bin header acts as a malloc_chunk - pointing to the real first node, if it exists (else pointing to - itself). This avoids special-casing for headers. But to avoid - waste, we allocate only the fd/bk pointers of bins, and then use - repositioning tricks to treat these as the fields of a chunk. - - TreeBins - Treebins are pointers to the roots of trees holding a range of - sizes. There are 2 equally spaced treebins for each power of two - from TREE_SHIFT to TREE_SHIFT+16. The last bin holds anything - larger. - - Bin maps - There is one bit map for small bins ("smallmap") and one for - treebins ("treemap). Each bin sets its bit when non-empty, and - clears the bit when empty. Bit operations are then used to avoid - bin-by-bin searching -- nearly all "search" is done without ever - looking at bins that won't be selected. The bit maps - conservatively use 32 bits per map word, even if on 64bit system. - For a good description of some of the bit-based techniques used - here, see Henry S. Warren Jr's book "Hacker's Delight" (and - supplement at http://hackersdelight.org/). Many of these are - intended to reduce the branchiness of paths through malloc etc, as - well as to reduce the number of memory locations read or written. - - Segments - A list of segments headed by an embedded malloc_segment record - representing the initial space. - - Address check support - The least_addr field is the least address ever obtained from - MORECORE or MMAP. Attempted frees and reallocs of any address less - than this are trapped (unless INSECURE is defined). - - Magic tag - A cross-check field that should always hold same value as mparams.magic. - - Flags - Bits recording whether to use MMAP, locks, or contiguous MORECORE - - Statistics - Each space keeps track of current and maximum system memory - obtained via MORECORE or MMAP. - - Trim support - Fields holding the amount of unused topmost memory that should trigger - timming, and a counter to force periodic scanning to release unused - non-topmost segments. - - Locking - If USE_LOCKS is defined, the "mutex" lock is acquired and released - around every public call using this mspace. - - Extension support - A void* pointer and a size_t field that can be used to help implement - extensions to this malloc. -*/ - -/* Bin types, widths and sizes */ -#define NSMALLBINS (32U) -#define NTREEBINS (32U) -#define SMALLBIN_SHIFT (3U) -#define SMALLBIN_WIDTH (SIZE_T_ONE << SMALLBIN_SHIFT) -#define TREEBIN_SHIFT (8U) -#define MIN_LARGE_SIZE (SIZE_T_ONE << TREEBIN_SHIFT) -#define MAX_SMALL_SIZE (MIN_LARGE_SIZE - SIZE_T_ONE) -#define MAX_SMALL_REQUEST (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD) - -struct malloc_state { - binmap_t smallmap; - binmap_t treemap; - size_t dvsize; - size_t topsize; - char* least_addr; - mchunkptr dv; - mchunkptr top; - size_t trim_check; - size_t release_checks; - size_t magic; - mchunkptr smallbins[(NSMALLBINS+1)*2]; - tbinptr treebins[NTREEBINS]; - size_t footprint; - size_t max_footprint; - flag_t mflags; -#if USE_LOCKS - MLOCK_T mutex; /* locate lock among fields that rarely change */ -#endif /* USE_LOCKS */ - msegment seg; - void* extp; /* Unused but available for extensions */ - size_t exts; -}; - -typedef struct malloc_state* mstate; - -/* ------------- Global malloc_state and malloc_params ------------------- */ - -/* - malloc_params holds global properties, including those that can be - dynamically set using mallopt. There is a single instance, mparams, - initialized in init_mparams. Note that the non-zeroness of "magic" - also serves as an initialization flag. -*/ - -struct malloc_params { - volatile size_t magic; - size_t page_size; - size_t granularity; - size_t mmap_threshold; - size_t trim_threshold; - flag_t default_mflags; -}; - -static struct malloc_params mparams; - -/* Ensure mparams initialized */ -#define ensure_initialization() (mparams.magic != 0 || init_mparams()) - -#if !ONLY_MSPACES - -/* The global malloc_state used for all non-"mspace" calls */ -static struct malloc_state _gm_; -#define gm (&_gm_) -#define is_global(M) ((M) == &_gm_) - -#endif /* !ONLY_MSPACES */ - -#define is_initialized(M) ((M)->top != 0) - -/* -------------------------- system alloc setup ------------------------- */ - -/* Operations on mflags */ - -#define use_lock(M) ((M)->mflags & USE_LOCK_BIT) -#define enable_lock(M) ((M)->mflags |= USE_LOCK_BIT) -#define disable_lock(M) ((M)->mflags &= ~USE_LOCK_BIT) - -#define use_mmap(M) ((M)->mflags & USE_MMAP_BIT) -#define enable_mmap(M) ((M)->mflags |= USE_MMAP_BIT) -#define disable_mmap(M) ((M)->mflags &= ~USE_MMAP_BIT) - -#define use_noncontiguous(M) ((M)->mflags & USE_NONCONTIGUOUS_BIT) -#define disable_contiguous(M) ((M)->mflags |= USE_NONCONTIGUOUS_BIT) - -#define set_lock(M,L)\ - ((M)->mflags = (L)?\ - ((M)->mflags | USE_LOCK_BIT) :\ - ((M)->mflags & ~USE_LOCK_BIT)) - -/* page-align a size */ -#define page_align(S)\ - (((S) + (mparams.page_size - SIZE_T_ONE)) & ~(mparams.page_size - SIZE_T_ONE)) - -/* granularity-align a size */ -#define granularity_align(S)\ - (((S) + (mparams.granularity - SIZE_T_ONE))\ - & ~(mparams.granularity - SIZE_T_ONE)) - - -/* For mmap, use granularity alignment on windows, else page-align */ -#ifdef WIN32 -#define mmap_align(S) granularity_align(S) -#else -#define mmap_align(S) page_align(S) -#endif - -/* For sys_alloc, enough padding to ensure can malloc request on success */ -#define SYS_ALLOC_PADDING (TOP_FOOT_SIZE + MALLOC_ALIGNMENT) - -#define is_page_aligned(S)\ - (((size_t)(S) & (mparams.page_size - SIZE_T_ONE)) == 0) -#define is_granularity_aligned(S)\ - (((size_t)(S) & (mparams.granularity - SIZE_T_ONE)) == 0) - -/* True if segment S holds address A */ -#define segment_holds(S, A)\ - ((char*)(A) >= S->base && (char*)(A) < S->base + S->size) - -/* Return segment holding given address */ -static msegmentptr segment_holding(mstate m, char* addr) { - msegmentptr sp = &m->seg; - for (;;) { - if (addr >= sp->base && addr < sp->base + sp->size) - return sp; - if ((sp = sp->next) == 0) - return 0; - } -} - -/* Return true if segment contains a segment link */ -static int has_segment_link(mstate m, msegmentptr ss) { - msegmentptr sp = &m->seg; - for (;;) { - if ((char*)sp >= ss->base && (char*)sp < ss->base + ss->size) - return 1; - if ((sp = sp->next) == 0) - return 0; - } -} - -#ifndef MORECORE_CANNOT_TRIM -#define should_trim(M,s) ((s) > (M)->trim_check) -#else /* MORECORE_CANNOT_TRIM */ -#define should_trim(M,s) (0) -#endif /* MORECORE_CANNOT_TRIM */ - -/* - TOP_FOOT_SIZE is padding at the end of a segment, including space - that may be needed to place segment records and fenceposts when new - noncontiguous segments are added. -*/ -#define TOP_FOOT_SIZE\ - (align_offset(chunk2mem(0))+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE) - - -/* ------------------------------- Hooks -------------------------------- */ - -/* - PREACTION should be defined to return 0 on success, and nonzero on - failure. If you are not using locking, you can redefine these to do - anything you like. -*/ - -#if USE_LOCKS - -#define PREACTION(M) ((use_lock(M))? ACQUIRE_LOCK(&(M)->mutex) : 0) -#define POSTACTION(M) { if (use_lock(M)) RELEASE_LOCK(&(M)->mutex); } -#else /* USE_LOCKS */ - -#ifndef PREACTION -#define PREACTION(M) (0) -#endif /* PREACTION */ - -#ifndef POSTACTION -#define POSTACTION(M) -#endif /* POSTACTION */ - -#endif /* USE_LOCKS */ - -/* - CORRUPTION_ERROR_ACTION is triggered upon detected bad addresses. - USAGE_ERROR_ACTION is triggered on detected bad frees and - reallocs. The argument p is an address that might have triggered the - fault. It is ignored by the two predefined actions, but might be - useful in custom actions that try to help diagnose errors. -*/ - -#if PROCEED_ON_ERROR - -/* A count of the number of corruption errors causing resets */ -int malloc_corruption_error_count; - -/* default corruption action */ -static void reset_on_error(mstate m); - -#define CORRUPTION_ERROR_ACTION(m) reset_on_error(m) -#define USAGE_ERROR_ACTION(m, p) - -#else /* PROCEED_ON_ERROR */ - -#ifndef CORRUPTION_ERROR_ACTION -#define CORRUPTION_ERROR_ACTION(m) ABORT -#endif /* CORRUPTION_ERROR_ACTION */ - -#ifndef USAGE_ERROR_ACTION -#define USAGE_ERROR_ACTION(m,p) ABORT -#endif /* USAGE_ERROR_ACTION */ - -#endif /* PROCEED_ON_ERROR */ - -/* -------------------------- Debugging setup ---------------------------- */ - -#if ! DEBUG - -#define check_free_chunk(M,P) -#define check_inuse_chunk(M,P) -#define check_malloced_chunk(M,P,N) -#define check_mmapped_chunk(M,P) -#define check_malloc_state(M) -#define check_top_chunk(M,P) - -#else /* DEBUG */ -#define check_free_chunk(M,P) do_check_free_chunk(M,P) -#define check_inuse_chunk(M,P) do_check_inuse_chunk(M,P) -#define check_top_chunk(M,P) do_check_top_chunk(M,P) -#define check_malloced_chunk(M,P,N) do_check_malloced_chunk(M,P,N) -#define check_mmapped_chunk(M,P) do_check_mmapped_chunk(M,P) -#define check_malloc_state(M) do_check_malloc_state(M) - -static void do_check_any_chunk(mstate m, mchunkptr p); -static void do_check_top_chunk(mstate m, mchunkptr p); -static void do_check_mmapped_chunk(mstate m, mchunkptr p); -static void do_check_inuse_chunk(mstate m, mchunkptr p); -static void do_check_free_chunk(mstate m, mchunkptr p); -static void do_check_malloced_chunk(mstate m, void* mem, size_t s); -static void do_check_tree(mstate m, tchunkptr t); -static void do_check_treebin(mstate m, bindex_t i); -static void do_check_smallbin(mstate m, bindex_t i); -static void do_check_malloc_state(mstate m); -static int bin_find(mstate m, mchunkptr x); -static size_t traverse_and_check(mstate m); -#endif /* DEBUG */ - -/* ---------------------------- Indexing Bins ---------------------------- */ - -#define is_small(s) (((s) >> SMALLBIN_SHIFT) < NSMALLBINS) -#define small_index(s) ((s) >> SMALLBIN_SHIFT) -#define small_index2size(i) ((i) << SMALLBIN_SHIFT) -#define MIN_SMALL_INDEX (small_index(MIN_CHUNK_SIZE)) - -/* addressing by index. See above about smallbin repositioning */ -#define smallbin_at(M, i) ((sbinptr)((char*)&((M)->smallbins[(i)<<1]))) -#define treebin_at(M,i) (&((M)->treebins[i])) - -/* assign tree index for size S to variable I. Use x86 asm if possible */ -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) -#define compute_tree_index(S, I)\ -{\ - unsigned int X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K;\ - __asm__("bsrl\t%1, %0\n\t" : "=r" (K) : "rm" (X));\ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#elif defined (__INTEL_COMPILER) -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K = _bit_scan_reverse (X); \ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#elif defined(_MSC_VER) && _MSC_VER>=1300 -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int K;\ - _BitScanReverse((DWORD *) &K, X);\ - I = (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\ - }\ -} - -#else /* GNUC */ -#define compute_tree_index(S, I)\ -{\ - size_t X = S >> TREEBIN_SHIFT;\ - if (X == 0)\ - I = 0;\ - else if (X > 0xFFFF)\ - I = NTREEBINS-1;\ - else {\ - unsigned int Y = (unsigned int)X;\ - unsigned int N = ((Y - 0x100) >> 16) & 8;\ - unsigned int K = (((Y <<= N) - 0x1000) >> 16) & 4;\ - N += K;\ - N += K = (((Y <<= K) - 0x4000) >> 16) & 2;\ - K = 14 - N + ((Y <<= K) >> 15);\ - I = (K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1));\ - }\ -} -#endif /* GNUC */ - -/* Bit representing maximum resolved size in a treebin at i */ -#define bit_for_tree_index(i) \ - (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2) - -/* Shift placing maximum resolved bit in a treebin at i as sign bit */ -#define leftshift_for_tree_index(i) \ - ((i == NTREEBINS-1)? 0 : \ - ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2))) - -/* The size of the smallest chunk held in bin with index i */ -#define minsize_for_tree_index(i) \ - ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) | \ - (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1))) - - -/* ------------------------ Operations on bin maps ----------------------- */ - -/* bit corresponding to given index */ -#define idx2bit(i) ((binmap_t)(1) << (i)) - -/* Mark/Clear bits with given index */ -#define mark_smallmap(M,i) ((M)->smallmap |= idx2bit(i)) -#define clear_smallmap(M,i) ((M)->smallmap &= ~idx2bit(i)) -#define smallmap_is_marked(M,i) ((M)->smallmap & idx2bit(i)) - -#define mark_treemap(M,i) ((M)->treemap |= idx2bit(i)) -#define clear_treemap(M,i) ((M)->treemap &= ~idx2bit(i)) -#define treemap_is_marked(M,i) ((M)->treemap & idx2bit(i)) - -/* isolate the least set bit of a bitmap */ -#define least_bit(x) ((x) & -(x)) - -/* mask with all bits to left of least bit of x on */ -#define left_bits(x) ((x<<1) | -(x<<1)) - -/* mask with all bits to left of or equal to least bit of x on */ -#define same_or_left_bits(x) ((x) | -(x)) - -/* index corresponding to given bit. Use x86 asm if possible */ - -#if defined(__GNUC__) && (defined(__i386__) || defined(__x86_64__)) -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - __asm__("bsfl\t%1, %0\n\t" : "=r" (J) : "rm" (X));\ - I = (bindex_t)J;\ -} - -#elif defined (__INTEL_COMPILER) -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - J = _bit_scan_forward (X); \ - I = (bindex_t)J;\ -} - -#elif defined(_MSC_VER) && _MSC_VER>=1300 -#define compute_bit2idx(X, I)\ -{\ - unsigned int J;\ - _BitScanForward((DWORD *) &J, X);\ - I = (bindex_t)J;\ -} - -#elif USE_BUILTIN_FFS -#define compute_bit2idx(X, I) I = ffs(X)-1 - -#else -#define compute_bit2idx(X, I)\ -{\ - unsigned int Y = X - 1;\ - unsigned int K = Y >> (16-4) & 16;\ - unsigned int N = K; Y >>= K;\ - N += K = Y >> (8-3) & 8; Y >>= K;\ - N += K = Y >> (4-2) & 4; Y >>= K;\ - N += K = Y >> (2-1) & 2; Y >>= K;\ - N += K = Y >> (1-0) & 1; Y >>= K;\ - I = (bindex_t)(N + Y);\ -} -#endif /* GNUC */ - - -/* ----------------------- Runtime Check Support ------------------------- */ - -/* - For security, the main invariant is that malloc/free/etc never - writes to a static address other than malloc_state, unless static - malloc_state itself has been corrupted, which cannot occur via - malloc (because of these checks). In essence this means that we - believe all pointers, sizes, maps etc held in malloc_state, but - check all of those linked or offsetted from other embedded data - structures. These checks are interspersed with main code in a way - that tends to minimize their run-time cost. - - When FOOTERS is defined, in addition to range checking, we also - verify footer fields of inuse chunks, which can be used guarantee - that the mstate controlling malloc/free is intact. This is a - streamlined version of the approach described by William Robertson - et al in "Run-time Detection of Heap-based Overflows" LISA'03 - http://www.usenix.org/events/lisa03/tech/robertson.html The footer - of an inuse chunk holds the xor of its mstate and a random seed, - that is checked upon calls to free() and realloc(). This is - (probablistically) unguessable from outside the program, but can be - computed by any code successfully malloc'ing any chunk, so does not - itself provide protection against code that has already broken - security through some other means. Unlike Robertson et al, we - always dynamically check addresses of all offset chunks (previous, - next, etc). This turns out to be cheaper than relying on hashes. -*/ - -#if !INSECURE -/* Check if address a is at least as high as any from MORECORE or MMAP */ -#define ok_address(M, a) ((char*)(a) >= (M)->least_addr) -/* Check if address of next chunk n is higher than base chunk p */ -#define ok_next(p, n) ((char*)(p) < (char*)(n)) -/* Check if p has its cinuse bit on */ -#define ok_cinuse(p) cinuse(p) -/* Check if p has its pinuse bit on */ -#define ok_pinuse(p) pinuse(p) - -#else /* !INSECURE */ -#define ok_address(M, a) (1) -#define ok_next(b, n) (1) -#define ok_cinuse(p) (1) -#define ok_pinuse(p) (1) -#endif /* !INSECURE */ - -#if (FOOTERS && !INSECURE) -/* Check if (alleged) mstate m has expected magic field */ -#define ok_magic(M) ((M)->magic == mparams.magic) -#else /* (FOOTERS && !INSECURE) */ -#define ok_magic(M) (1) -#endif /* (FOOTERS && !INSECURE) */ - - -/* In gcc, use __builtin_expect to minimize impact of checks */ -#if !INSECURE -#if defined(__GNUC__) && __GNUC__ >= 3 -#define RTCHECK(e) __builtin_expect(e, 1) -#else /* GNUC */ -#define RTCHECK(e) (e) -#endif /* GNUC */ -#else /* !INSECURE */ -#define RTCHECK(e) (1) -#endif /* !INSECURE */ - -/* macros to set up inuse chunks with or without footers */ - -#if !FOOTERS - -#define mark_inuse_foot(M,p,s) - -/* Set cinuse bit and pinuse bit of next chunk */ -#define set_inuse(M,p,s)\ - ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ - ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set cinuse and pinuse of this chunk and pinuse of next chunk */ -#define set_inuse_and_pinuse(M,p,s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - ((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT) - -/* Set size, cinuse and pinuse bit of this chunk */ -#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT)) - -#else /* FOOTERS */ - -/* Set foot of inuse chunk to be xor of mstate and seed */ -#define mark_inuse_foot(M,p,s)\ - (((mchunkptr)((char*)(p) + (s)))->prev_foot = ((size_t)(M) ^ mparams.magic)) - -#define get_mstate_for(p)\ - ((mstate)(((mchunkptr)((char*)(p) +\ - (chunksize(p))))->prev_foot ^ mparams.magic)) - -#define set_inuse(M,p,s)\ - ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\ - (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT), \ - mark_inuse_foot(M,p,s)) - -#define set_inuse_and_pinuse(M,p,s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - (((mchunkptr)(((char*)(p)) + (s)))->head |= PINUSE_BIT),\ - mark_inuse_foot(M,p,s)) - -#define set_size_and_pinuse_of_inuse_chunk(M, p, s)\ - ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\ - mark_inuse_foot(M, p, s)) - -#endif /* !FOOTERS */ - -/* ---------------------------- setting mparams -------------------------- */ - -/* Initialize mparams */ -static int init_mparams(void) { -#ifdef NEED_GLOBAL_LOCK_INIT - if (malloc_global_mutex_status <= 0) - init_malloc_global_mutex(); -#endif - - ACQUIRE_MALLOC_GLOBAL_LOCK(); - if (mparams.magic == 0) { - size_t magic; - size_t psize; - size_t gsize; - -#ifndef WIN32 - psize = malloc_getpagesize; - gsize = ((DEFAULT_GRANULARITY != 0)? DEFAULT_GRANULARITY : psize); -#else /* WIN32 */ - { - SYSTEM_INFO system_info; - GetSystemInfo(&system_info); - psize = system_info.dwPageSize; - gsize = ((DEFAULT_GRANULARITY != 0)? - DEFAULT_GRANULARITY : system_info.dwAllocationGranularity); - } -#endif /* WIN32 */ - - /* Sanity-check configuration: - size_t must be unsigned and as wide as pointer type. - ints must be at least 4 bytes. - alignment must be at least 8. - Alignment, min chunk size, and page size must all be powers of 2. - */ - if ((sizeof(size_t) != sizeof(char*)) || - (MAX_SIZE_T < MIN_CHUNK_SIZE) || - (sizeof(int) < 4) || - (MALLOC_ALIGNMENT < (size_t)8U) || - ((MALLOC_ALIGNMENT & (MALLOC_ALIGNMENT-SIZE_T_ONE)) != 0) || - ((MCHUNK_SIZE & (MCHUNK_SIZE-SIZE_T_ONE)) != 0) || - ((gsize & (gsize-SIZE_T_ONE)) != 0) || - ((psize & (psize-SIZE_T_ONE)) != 0)) - ABORT; - - mparams.granularity = gsize; - mparams.page_size = psize; - mparams.mmap_threshold = DEFAULT_MMAP_THRESHOLD; - mparams.trim_threshold = DEFAULT_TRIM_THRESHOLD; -#if MORECORE_CONTIGUOUS - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT; -#else /* MORECORE_CONTIGUOUS */ - mparams.default_mflags = USE_LOCK_BIT|USE_MMAP_BIT|USE_NONCONTIGUOUS_BIT; -#endif /* MORECORE_CONTIGUOUS */ - -#if !ONLY_MSPACES - /* Set up lock for main malloc area */ - gm->mflags = mparams.default_mflags; - INITIAL_LOCK(&gm->mutex); -#endif - -#if (FOOTERS && !INSECURE) - { -#if USE_DEV_RANDOM - int fd; - unsigned char buf[sizeof(size_t)]; - /* Try to use /dev/urandom, else fall back on using time */ - if ((fd = open("/dev/urandom", O_RDONLY)) >= 0 && - read(fd, buf, sizeof(buf)) == sizeof(buf)) { - magic = *((size_t *) buf); - close(fd); - } - else -#endif /* USE_DEV_RANDOM */ -#ifdef WIN32 - magic = (size_t)(GetTickCount() ^ (size_t)0x55555555U); -#else - magic = (size_t)(time(0) ^ (size_t)0x55555555U); -#endif - magic |= (size_t)8U; /* ensure nonzero */ - magic &= ~(size_t)7U; /* improve chances of fault for bad values */ - } -#else /* (FOOTERS && !INSECURE) */ - magic = (size_t)0x58585858U; -#endif /* (FOOTERS && !INSECURE) */ - - mparams.magic = magic; - } - - RELEASE_MALLOC_GLOBAL_LOCK(); - return 1; -} - -/* support for mallopt */ -static int change_mparam(int param_number, int value) { - size_t val = (value == -1)? MAX_SIZE_T : (size_t)value; - ensure_initialization(); - switch(param_number) { - case M_TRIM_THRESHOLD: - mparams.trim_threshold = val; - return 1; - case M_GRANULARITY: - if (val >= mparams.page_size && ((val & (val-1)) == 0)) { - mparams.granularity = val; - return 1; - } - else - return 0; - case M_MMAP_THRESHOLD: - mparams.mmap_threshold = val; - return 1; - default: - return 0; - } -} - -#if DEBUG -/* ------------------------- Debugging Support --------------------------- */ - -/* Check properties of any chunk, whether free, inuse, mmapped etc */ -static void do_check_any_chunk(mstate m, mchunkptr p) { - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); -} - -/* Check properties of top chunk */ -static void do_check_top_chunk(mstate m, mchunkptr p) { - msegmentptr sp = segment_holding(m, (char*)p); - size_t sz = p->head & ~INUSE_BITS; /* third-lowest bit can be set! */ - assert(sp != 0); - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); - assert(sz == m->topsize); - assert(sz > 0); - assert(sz == ((sp->base + sp->size) - (char*)p) - TOP_FOOT_SIZE); - assert(pinuse(p)); - assert(!pinuse(chunk_plus_offset(p, sz))); -} - -/* Check properties of (inuse) mmapped chunks */ -static void do_check_mmapped_chunk(mstate m, mchunkptr p) { - size_t sz = chunksize(p); - size_t len = (sz + (p->prev_foot & ~IS_MMAPPED_BIT) + MMAP_FOOT_PAD); - assert(is_mmapped(p)); - assert(use_mmap(m)); - assert((is_aligned(chunk2mem(p))) || (p->head == FENCEPOST_HEAD)); - assert(ok_address(m, p)); - assert(!is_small(sz)); - assert((len & (mparams.page_size-SIZE_T_ONE)) == 0); - assert(chunk_plus_offset(p, sz)->head == FENCEPOST_HEAD); - assert(chunk_plus_offset(p, sz+SIZE_T_SIZE)->head == 0); -} - -/* Check properties of inuse chunks */ -static void do_check_inuse_chunk(mstate m, mchunkptr p) { - do_check_any_chunk(m, p); - assert(cinuse(p)); - assert(next_pinuse(p)); - /* If not pinuse and not mmapped, previous chunk has OK offset */ - assert(is_mmapped(p) || pinuse(p) || next_chunk(prev_chunk(p)) == p); - if (is_mmapped(p)) - do_check_mmapped_chunk(m, p); -} - -/* Check properties of free chunks */ -static void do_check_free_chunk(mstate m, mchunkptr p) { - size_t sz = chunksize(p); - mchunkptr next = chunk_plus_offset(p, sz); - do_check_any_chunk(m, p); - assert(!cinuse(p)); - assert(!next_pinuse(p)); - assert (!is_mmapped(p)); - if (p != m->dv && p != m->top) { - if (sz >= MIN_CHUNK_SIZE) { - assert((sz & CHUNK_ALIGN_MASK) == 0); - assert(is_aligned(chunk2mem(p))); - assert(next->prev_foot == sz); - assert(pinuse(p)); - assert (next == m->top || cinuse(next)); - assert(p->fd->bk == p); - assert(p->bk->fd == p); - } - else /* markers are always of size SIZE_T_SIZE */ - assert(sz == SIZE_T_SIZE); - } -} - -/* Check properties of malloced chunks at the point they are malloced */ -static void do_check_malloced_chunk(mstate m, void* mem, size_t s) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - size_t sz = p->head & ~(PINUSE_BIT|CINUSE_BIT); - do_check_inuse_chunk(m, p); - assert((sz & CHUNK_ALIGN_MASK) == 0); - assert(sz >= MIN_CHUNK_SIZE); - assert(sz >= s); - /* unless mmapped, size is less than MIN_CHUNK_SIZE more than request */ - assert(is_mmapped(p) || sz < (s + MIN_CHUNK_SIZE)); - } -} - -/* Check a tree and its subtrees. */ -static void do_check_tree(mstate m, tchunkptr t) { - tchunkptr head = 0; - tchunkptr u = t; - bindex_t tindex = t->index; - size_t tsize = chunksize(t); - bindex_t idx; - compute_tree_index(tsize, idx); - assert(tindex == idx); - assert(tsize >= MIN_LARGE_SIZE); - assert(tsize >= minsize_for_tree_index(idx)); - assert((idx == NTREEBINS-1) || (tsize < minsize_for_tree_index((idx+1)))); - - do { /* traverse through chain of same-sized nodes */ - do_check_any_chunk(m, ((mchunkptr)u)); - assert(u->index == tindex); - assert(chunksize(u) == tsize); - assert(!cinuse(u)); - assert(!next_pinuse(u)); - assert(u->fd->bk == u); - assert(u->bk->fd == u); - if (u->parent == 0) { - assert(u->child[0] == 0); - assert(u->child[1] == 0); - } - else { - assert(head == 0); /* only one node on chain has parent */ - head = u; - assert(u->parent != u); - assert (u->parent->child[0] == u || - u->parent->child[1] == u || - *((tbinptr*)(u->parent)) == u); - if (u->child[0] != 0) { - assert(u->child[0]->parent == u); - assert(u->child[0] != u); - do_check_tree(m, u->child[0]); - } - if (u->child[1] != 0) { - assert(u->child[1]->parent == u); - assert(u->child[1] != u); - do_check_tree(m, u->child[1]); - } - if (u->child[0] != 0 && u->child[1] != 0) { - assert(chunksize(u->child[0]) < chunksize(u->child[1])); - } - } - u = u->fd; - } while (u != t); - assert(head != 0); -} - -/* Check all the chunks in a treebin. */ -static void do_check_treebin(mstate m, bindex_t i) { - tbinptr* tb = treebin_at(m, i); - tchunkptr t = *tb; - int empty = (m->treemap & (1U << i)) == 0; - if (t == 0) - assert(empty); - if (!empty) - do_check_tree(m, t); -} - -/* Check all the chunks in a smallbin. */ -static void do_check_smallbin(mstate m, bindex_t i) { - sbinptr b = smallbin_at(m, i); - mchunkptr p = b->bk; - unsigned int empty = (m->smallmap & (1U << i)) == 0; - if (p == b) - assert(empty); - if (!empty) { - for (; p != b; p = p->bk) { - size_t size = chunksize(p); - mchunkptr q; - /* each chunk claims to be free */ - do_check_free_chunk(m, p); - /* chunk belongs in bin */ - assert(small_index(size) == i); - assert(p->bk == b || chunksize(p->bk) == chunksize(p)); - /* chunk is followed by an inuse chunk */ - q = next_chunk(p); - if (q->head != FENCEPOST_HEAD) - do_check_inuse_chunk(m, q); - } - } -} - -/* Find x in a bin. Used in other check functions. */ -static int bin_find(mstate m, mchunkptr x) { - size_t size = chunksize(x); - if (is_small(size)) { - bindex_t sidx = small_index(size); - sbinptr b = smallbin_at(m, sidx); - if (smallmap_is_marked(m, sidx)) { - mchunkptr p = b; - do { - if (p == x) - return 1; - } while ((p = p->fd) != b); - } - } - else { - bindex_t tidx; - compute_tree_index(size, tidx); - if (treemap_is_marked(m, tidx)) { - tchunkptr t = *treebin_at(m, tidx); - size_t sizebits = size << leftshift_for_tree_index(tidx); - while (t != 0 && chunksize(t) != size) { - t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; - sizebits <<= 1; - } - if (t != 0) { - tchunkptr u = t; - do { - if (u == (tchunkptr)x) - return 1; - } while ((u = u->fd) != t); - } - } - } - return 0; -} - -/* Traverse each chunk and check it; return total */ -static size_t traverse_and_check(mstate m) { - size_t sum = 0; - if (is_initialized(m)) { - msegmentptr s = &m->seg; - sum += m->topsize + TOP_FOOT_SIZE; - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - mchunkptr lastq = 0; - assert(pinuse(q)); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - sum += chunksize(q); - if (cinuse(q)) { - assert(!bin_find(m, q)); - do_check_inuse_chunk(m, q); - } - else { - assert(q == m->dv || bin_find(m, q)); - assert(lastq == 0 || cinuse(lastq)); /* Not 2 consecutive free */ - do_check_free_chunk(m, q); - } - lastq = q; - q = next_chunk(q); - } - s = s->next; - } - } - return sum; -} - -/* Check all properties of malloc_state. */ -static void do_check_malloc_state(mstate m) { - bindex_t i; - size_t total; - /* check bins */ - for (i = 0; i < NSMALLBINS; ++i) - do_check_smallbin(m, i); - for (i = 0; i < NTREEBINS; ++i) - do_check_treebin(m, i); - - if (m->dvsize != 0) { /* check dv chunk */ - do_check_any_chunk(m, m->dv); - assert(m->dvsize == chunksize(m->dv)); - assert(m->dvsize >= MIN_CHUNK_SIZE); - assert(bin_find(m, m->dv) == 0); - } - - if (m->top != 0) { /* check top chunk */ - do_check_top_chunk(m, m->top); - /*assert(m->topsize == chunksize(m->top)); redundant */ - assert(m->topsize > 0); - assert(bin_find(m, m->top) == 0); - } - - total = traverse_and_check(m); - assert(total <= m->footprint); - assert(m->footprint <= m->max_footprint); -} -#endif /* DEBUG */ - -/* ----------------------------- statistics ------------------------------ */ - -#if !NO_MALLINFO -static struct mallinfo internal_mallinfo(mstate m) { - struct mallinfo nm = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - ensure_initialization(); - if (!PREACTION(m)) { - check_malloc_state(m); - if (is_initialized(m)) { - size_t nfree = SIZE_T_ONE; /* top always free */ - size_t mfree = m->topsize + TOP_FOOT_SIZE; - size_t sum = mfree; - msegmentptr s = &m->seg; - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - size_t sz = chunksize(q); - sum += sz; - if (!cinuse(q)) { - mfree += sz; - ++nfree; - } - q = next_chunk(q); - } - s = s->next; - } - - nm.arena = sum; - nm.ordblks = nfree; - nm.hblkhd = m->footprint - sum; - nm.usmblks = m->max_footprint; - nm.uordblks = m->footprint - mfree; - nm.fordblks = mfree; - nm.keepcost = m->topsize; - } - - POSTACTION(m); - } - return nm; -} -#endif /* !NO_MALLINFO */ - -static void internal_malloc_stats(mstate m) { - ensure_initialization(); - if (!PREACTION(m)) { - size_t maxfp = 0; - size_t fp = 0; - size_t used = 0; - check_malloc_state(m); - if (is_initialized(m)) { - msegmentptr s = &m->seg; - maxfp = m->max_footprint; - fp = m->footprint; - used = fp - (m->topsize + TOP_FOOT_SIZE); - - while (s != 0) { - mchunkptr q = align_as_chunk(s->base); - while (segment_holds(s, q) && - q != m->top && q->head != FENCEPOST_HEAD) { - if (!cinuse(q)) - used -= chunksize(q); - q = next_chunk(q); - } - s = s->next; - } - } - - fprintf(stderr, "max system bytes = %10lu\n", (unsigned long)(maxfp)); - fprintf(stderr, "system bytes = %10lu\n", (unsigned long)(fp)); - fprintf(stderr, "in use bytes = %10lu\n", (unsigned long)(used)); - - POSTACTION(m); - } -} - -/* ----------------------- Operations on smallbins ----------------------- */ - -/* - Various forms of linking and unlinking are defined as macros. Even - the ones for trees, which are very long but have very short typical - paths. This is ugly but reduces reliance on inlining support of - compilers. -*/ - -/* Link a free chunk into a smallbin */ -#define insert_small_chunk(M, P, S) {\ - bindex_t I = small_index(S);\ - mchunkptr B = smallbin_at(M, I);\ - mchunkptr F = B;\ - assert(S >= MIN_CHUNK_SIZE);\ - if (!smallmap_is_marked(M, I))\ - mark_smallmap(M, I);\ - else if (RTCHECK(ok_address(M, B->fd)))\ - F = B->fd;\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - B->fd = P;\ - F->bk = P;\ - P->fd = F;\ - P->bk = B;\ -} - -/* Unlink a chunk from a smallbin */ -#define unlink_small_chunk(M, P, S) {\ - mchunkptr F = P->fd;\ - mchunkptr B = P->bk;\ - bindex_t I = small_index(S);\ - assert(P != B);\ - assert(P != F);\ - assert(chunksize(P) == small_index2size(I));\ - if (F == B)\ - clear_smallmap(M, I);\ - else if (RTCHECK((F == smallbin_at(M,I) || ok_address(M, F)) &&\ - (B == smallbin_at(M,I) || ok_address(M, B)))) {\ - F->bk = B;\ - B->fd = F;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ -} - -/* Unlink the first chunk from a smallbin */ -#define unlink_first_small_chunk(M, B, P, I) {\ - mchunkptr F = P->fd;\ - assert(P != B);\ - assert(P != F);\ - assert(chunksize(P) == small_index2size(I));\ - if (B == F)\ - clear_smallmap(M, I);\ - else if (RTCHECK(ok_address(M, F))) {\ - B->fd = F;\ - F->bk = B;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ -} - - - -/* Replace dv node, binning the old one */ -/* Used only when dvsize known to be small */ -#define replace_dv(M, P, S) {\ - size_t DVS = M->dvsize;\ - if (DVS != 0) {\ - mchunkptr DV = M->dv;\ - assert(is_small(DVS));\ - insert_small_chunk(M, DV, DVS);\ - }\ - M->dvsize = S;\ - M->dv = P;\ -} - -/* ------------------------- Operations on trees ------------------------- */ - -/* Insert chunk into tree */ -#define insert_large_chunk(M, X, S) {\ - tbinptr* H;\ - bindex_t I;\ - compute_tree_index(S, I);\ - H = treebin_at(M, I);\ - X->index = I;\ - X->child[0] = X->child[1] = 0;\ - if (!treemap_is_marked(M, I)) {\ - mark_treemap(M, I);\ - *H = X;\ - X->parent = (tchunkptr)H;\ - X->fd = X->bk = X;\ - }\ - else {\ - tchunkptr T = *H;\ - size_t K = S << leftshift_for_tree_index(I);\ - for (;;) {\ - if (chunksize(T) != S) {\ - tchunkptr* C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\ - K <<= 1;\ - if (*C != 0)\ - T = *C;\ - else if (RTCHECK(ok_address(M, C))) {\ - *C = X;\ - X->parent = T;\ - X->fd = X->bk = X;\ - break;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - break;\ - }\ - }\ - else {\ - tchunkptr F = T->fd;\ - if (RTCHECK(ok_address(M, T) && ok_address(M, F))) {\ - T->fd = F->bk = X;\ - X->fd = F;\ - X->bk = T;\ - X->parent = 0;\ - break;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - break;\ - }\ - }\ - }\ - }\ -} - -/* - Unlink steps: - - 1. If x is a chained node, unlink it from its same-sized fd/bk links - and choose its bk node as its replacement. - 2. If x was the last node of its size, but not a leaf node, it must - be replaced with a leaf node (not merely one with an open left or - right), to make sure that lefts and rights of descendents - correspond properly to bit masks. We use the rightmost descendent - of x. We could use any other leaf, but this is easy to locate and - tends to counteract removal of leftmosts elsewhere, and so keeps - paths shorter than minimally guaranteed. This doesn't loop much - because on average a node in a tree is near the bottom. - 3. If x is the base of a chain (i.e., has parent links) relink - x's parent and children to x's replacement (or null if none). -*/ - -#define unlink_large_chunk(M, X) {\ - tchunkptr XP = X->parent;\ - tchunkptr R;\ - if (X->bk != X) {\ - tchunkptr F = X->fd;\ - R = X->bk;\ - if (RTCHECK(ok_address(M, F))) {\ - F->bk = R;\ - R->fd = F;\ - }\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - else {\ - tchunkptr* RP;\ - if (((R = *(RP = &(X->child[1]))) != 0) ||\ - ((R = *(RP = &(X->child[0]))) != 0)) {\ - tchunkptr* CP;\ - while ((*(CP = &(R->child[1])) != 0) ||\ - (*(CP = &(R->child[0])) != 0)) {\ - R = *(RP = CP);\ - }\ - if (RTCHECK(ok_address(M, RP)))\ - *RP = 0;\ - else {\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - }\ - if (XP != 0) {\ - tbinptr* H = treebin_at(M, X->index);\ - if (X == *H) {\ - if ((*H = R) == 0) \ - clear_treemap(M, X->index);\ - }\ - else if (RTCHECK(ok_address(M, XP))) {\ - if (XP->child[0] == X) \ - XP->child[0] = R;\ - else \ - XP->child[1] = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - if (R != 0) {\ - if (RTCHECK(ok_address(M, R))) {\ - tchunkptr C0, C1;\ - R->parent = XP;\ - if ((C0 = X->child[0]) != 0) {\ - if (RTCHECK(ok_address(M, C0))) {\ - R->child[0] = C0;\ - C0->parent = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - if ((C1 = X->child[1]) != 0) {\ - if (RTCHECK(ok_address(M, C1))) {\ - R->child[1] = C1;\ - C1->parent = R;\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ - else\ - CORRUPTION_ERROR_ACTION(M);\ - }\ - }\ -} - -/* Relays to large vs small bin operations */ - -#define insert_chunk(M, P, S)\ - if (is_small(S)) insert_small_chunk(M, P, S)\ - else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); } - -#define unlink_chunk(M, P, S)\ - if (is_small(S)) unlink_small_chunk(M, P, S)\ - else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); } - - -/* Relays to internal calls to malloc/free from realloc, memalign etc */ - -#if ONLY_MSPACES -#define internal_malloc(m, b) mspace_malloc(m, b) -#define internal_free(m, mem) mspace_free(m,mem); -#else /* ONLY_MSPACES */ -#if MSPACES -#define internal_malloc(m, b)\ - (m == gm)? dlmalloc(b) : mspace_malloc(m, b) -#define internal_free(m, mem)\ - if (m == gm) dlfree(mem); else mspace_free(m,mem); -#else /* MSPACES */ -#define internal_malloc(m, b) dlmalloc(b) -#define internal_free(m, mem) dlfree(mem) -#endif /* MSPACES */ -#endif /* ONLY_MSPACES */ - -/* ----------------------- Direct-mmapping chunks ----------------------- */ - -/* - Directly mmapped chunks are set up with an offset to the start of - the mmapped region stored in the prev_foot field of the chunk. This - allows reconstruction of the required argument to MUNMAP when freed, - and also allows adjustment of the returned chunk to meet alignment - requirements (especially in memalign). There is also enough space - allocated to hold a fake next chunk of size SIZE_T_SIZE to maintain - the PINUSE bit so frees can be checked. -*/ - -/* Malloc using mmap */ -static void* mmap_alloc(mstate m, size_t nb) { - size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - if (mmsize > nb) { /* Check for wrap around 0 */ - char* mm = (char*)(CALL_DIRECT_MMAP(mmsize)); - if (mm != CMFAIL) { - size_t offset = align_offset(chunk2mem(mm)); - size_t psize = mmsize - offset - MMAP_FOOT_PAD; - mchunkptr p = (mchunkptr)(mm + offset); - p->prev_foot = offset | IS_MMAPPED_BIT; - (p)->head = (psize|CINUSE_BIT); - mark_inuse_foot(m, p, psize); - chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0; - - if (mm < m->least_addr) - m->least_addr = mm; - if ((m->footprint += mmsize) > m->max_footprint) - m->max_footprint = m->footprint; - assert(is_aligned(chunk2mem(p))); - check_mmapped_chunk(m, p); - return chunk2mem(p); - } - } - return 0; -} - -/* Realloc using mmap */ -static mchunkptr mmap_resize(mstate m, mchunkptr oldp, size_t nb) { - size_t oldsize = chunksize(oldp); - if (is_small(nb)) /* Can't shrink mmap regions below small size */ - return 0; - /* Keep old chunk if big enough but not too big */ - if (oldsize >= nb + SIZE_T_SIZE && - (oldsize - nb) <= (mparams.granularity << 1)) - return oldp; - else { - size_t offset = oldp->prev_foot & ~IS_MMAPPED_BIT; - size_t oldmmsize = oldsize + offset + MMAP_FOOT_PAD; - size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - char* cp = (char*)(intptr_t)CALL_MREMAP((char*)oldp - offset, - oldmmsize, newmmsize, 1); - if (cp != CMFAIL) { - mchunkptr newp = (mchunkptr)(cp + offset); - size_t psize = newmmsize - offset - MMAP_FOOT_PAD; - newp->head = (psize|CINUSE_BIT); - mark_inuse_foot(m, newp, psize); - chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD; - chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0; - - if (cp < m->least_addr) - m->least_addr = cp; - if ((m->footprint += newmmsize - oldmmsize) > m->max_footprint) - m->max_footprint = m->footprint; - check_mmapped_chunk(m, newp); - return newp; - } - } - return 0; -} - -/* -------------------------- mspace management -------------------------- */ - -/* Initialize top chunk and its size */ -static void init_top(mstate m, mchunkptr p, size_t psize) { - /* Ensure alignment */ - size_t offset = align_offset(chunk2mem(p)); - p = (mchunkptr)((char*)p + offset); - psize -= offset; - - m->top = p; - m->topsize = psize; - p->head = psize | PINUSE_BIT; - /* set size of fake trailing chunk holding overhead space only once */ - chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE; - m->trim_check = mparams.trim_threshold; /* reset on each update */ -} - -/* Initialize bins for a new mstate that is otherwise zeroed out */ -static void init_bins(mstate m) { - /* Establish circular links for smallbins */ - bindex_t i; - for (i = 0; i < NSMALLBINS; ++i) { - sbinptr bin = smallbin_at(m,i); - bin->fd = bin->bk = bin; - } -} - -#if PROCEED_ON_ERROR - -/* default corruption action */ -static void reset_on_error(mstate m) { - int i; - ++malloc_corruption_error_count; - /* Reinitialize fields to forget about all memory */ - m->smallbins = m->treebins = 0; - m->dvsize = m->topsize = 0; - m->seg.base = 0; - m->seg.size = 0; - m->seg.next = 0; - m->top = m->dv = 0; - for (i = 0; i < NTREEBINS; ++i) - *treebin_at(m, i) = 0; - init_bins(m); -} -#endif /* PROCEED_ON_ERROR */ - -/* Allocate chunk and prepend remainder with chunk in successor base. */ -static void* prepend_alloc(mstate m, char* newbase, char* oldbase, - size_t nb) { - mchunkptr p = align_as_chunk(newbase); - mchunkptr oldfirst = align_as_chunk(oldbase); - size_t psize = (char*)oldfirst - (char*)p; - mchunkptr q = chunk_plus_offset(p, nb); - size_t qsize = psize - nb; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - - assert((char*)oldfirst > (char*)q); - assert(pinuse(oldfirst)); - assert(qsize >= MIN_CHUNK_SIZE); - - /* consolidate remainder with first chunk of old base */ - if (oldfirst == m->top) { - size_t tsize = m->topsize += qsize; - m->top = q; - q->head = tsize | PINUSE_BIT; - check_top_chunk(m, q); - } - else if (oldfirst == m->dv) { - size_t dsize = m->dvsize += qsize; - m->dv = q; - set_size_and_pinuse_of_free_chunk(q, dsize); - } - else { - if (!cinuse(oldfirst)) { - size_t nsize = chunksize(oldfirst); - unlink_chunk(m, oldfirst, nsize); - oldfirst = chunk_plus_offset(oldfirst, nsize); - qsize += nsize; - } - set_free_with_pinuse(q, qsize, oldfirst); - insert_chunk(m, q, qsize); - check_free_chunk(m, q); - } - - check_malloced_chunk(m, chunk2mem(p), nb); - return chunk2mem(p); -} - -/* Add a segment to hold a new noncontiguous region */ -static void add_segment(mstate m, char* tbase, size_t tsize, flag_t mmapped) { - /* Determine locations and sizes of segment, fenceposts, old top */ - char* old_top = (char*)m->top; - msegmentptr oldsp = segment_holding(m, old_top); - char* old_end = oldsp->base + oldsp->size; - size_t ssize = pad_request(sizeof(struct malloc_segment)); - char* rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK); - size_t offset = align_offset(chunk2mem(rawsp)); - char* asp = rawsp + offset; - char* csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp; - mchunkptr sp = (mchunkptr)csp; - msegmentptr ss = (msegmentptr)(chunk2mem(sp)); - mchunkptr tnext = chunk_plus_offset(sp, ssize); - mchunkptr p = tnext; - int nfences = 0; - - /* reset top to new space */ - init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - - /* Set up segment record */ - assert(is_aligned(ss)); - set_size_and_pinuse_of_inuse_chunk(m, sp, ssize); - *ss = m->seg; /* Push current record */ - m->seg.base = tbase; - m->seg.size = tsize; - m->seg.sflags = mmapped; - m->seg.next = ss; - - /* Insert trailing fenceposts */ - for (;;) { - mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE); - p->head = FENCEPOST_HEAD; - ++nfences; - if ((char*)(&(nextp->head)) < old_end) - p = nextp; - else - break; - } - assert(nfences >= 2); - - /* Insert the rest of old top into a bin as an ordinary free chunk */ - if (csp != old_top) { - mchunkptr q = (mchunkptr)old_top; - size_t psize = csp - old_top; - mchunkptr tn = chunk_plus_offset(q, psize); - set_free_with_pinuse(q, psize, tn); - insert_chunk(m, q, psize); - } - - check_top_chunk(m, m->top); -} - -/* -------------------------- System allocation -------------------------- */ - -/* Get memory from system using MORECORE or MMAP */ -static void* sys_alloc(mstate m, size_t nb) { - char* tbase = CMFAIL; - size_t tsize = 0; - flag_t mmap_flag = 0; - - ensure_initialization(); - - /* Directly map large chunks */ - if (use_mmap(m) && nb >= mparams.mmap_threshold) { - void* mem = mmap_alloc(m, nb); - if (mem != 0) - return mem; - } - - /* - Try getting memory in any of three ways (in most-preferred to - least-preferred order): - 1. A call to MORECORE that can normally contiguously extend memory. - (disabled if not MORECORE_CONTIGUOUS or not HAVE_MORECORE or - or main space is mmapped or a previous contiguous call failed) - 2. A call to MMAP new space (disabled if not HAVE_MMAP). - Note that under the default settings, if MORECORE is unable to - fulfill a request, and HAVE_MMAP is true, then mmap is - used as a noncontiguous system allocator. This is a useful backup - strategy for systems with holes in address spaces -- in this case - sbrk cannot contiguously expand the heap, but mmap may be able to - find space. - 3. A call to MORECORE that cannot usually contiguously extend memory. - (disabled if not HAVE_MORECORE) - - In all cases, we need to request enough bytes from system to ensure - we can malloc nb bytes upon success, so pad with enough space for - top_foot, plus alignment-pad to make sure we don't lose bytes if - not on boundary, and round this up to a granularity unit. - */ - - if (MORECORE_CONTIGUOUS && !use_noncontiguous(m)) { - char* br = CMFAIL; - msegmentptr ss = (m->top == 0)? 0 : segment_holding(m, (char*)m->top); - size_t asize = 0; - ACQUIRE_MALLOC_GLOBAL_LOCK(); - - if (ss == 0) { /* First time through or recovery */ - char* base = (char*)CALL_MORECORE(0); - if (base != CMFAIL) { - asize = granularity_align(nb + SYS_ALLOC_PADDING); - /* Adjust to end on a page boundary */ - if (!is_page_aligned(base)) - asize += (page_align((size_t)base) - (size_t)base); - /* Can't call MORECORE if size is negative when treated as signed */ - if (asize < HALF_MAX_SIZE_T && - (br = (char*)(CALL_MORECORE(asize))) == base) { - tbase = base; - tsize = asize; - } - } - } - else { - /* Subtract out existing available top space from MORECORE request. */ - asize = granularity_align(nb - m->topsize + SYS_ALLOC_PADDING); - /* Use mem here only if it did continuously extend old space */ - if (asize < HALF_MAX_SIZE_T && - (br = (char*)(CALL_MORECORE(asize))) == ss->base+ss->size) { - tbase = br; - tsize = asize; - } - } - - if (tbase == CMFAIL) { /* Cope with partial failure */ - if (br != CMFAIL) { /* Try to use/extend the space we did get */ - if (asize < HALF_MAX_SIZE_T && - asize < nb + SYS_ALLOC_PADDING) { - size_t esize = granularity_align(nb + SYS_ALLOC_PADDING - asize); - if (esize < HALF_MAX_SIZE_T) { - char* end = (char*)CALL_MORECORE(esize); - if (end != CMFAIL) - asize += esize; - else { /* Can't use; try to release */ - (void) CALL_MORECORE(-asize); - br = CMFAIL; - } - } - } - } - if (br != CMFAIL) { /* Use the space we did get */ - tbase = br; - tsize = asize; - } - else - disable_contiguous(m); /* Don't try contiguous path in the future */ - } - - RELEASE_MALLOC_GLOBAL_LOCK(); - } - - if (HAVE_MMAP && tbase == CMFAIL) { /* Try MMAP */ - size_t rsize = granularity_align(nb + SYS_ALLOC_PADDING); - if (rsize > nb) { /* Fail if wraps around zero */ - char* mp = (char*)(CALL_MMAP(rsize)); - if (mp != CMFAIL) { - tbase = mp; - tsize = rsize; - mmap_flag = IS_MMAPPED_BIT; - } - } - } - - if (HAVE_MORECORE && tbase == CMFAIL) { /* Try noncontiguous MORECORE */ - size_t asize = granularity_align(nb + SYS_ALLOC_PADDING); - if (asize < HALF_MAX_SIZE_T) { - char* br = CMFAIL; - char* end = CMFAIL; - ACQUIRE_MALLOC_GLOBAL_LOCK(); - br = (char*)(CALL_MORECORE(asize)); - end = (char*)(CALL_MORECORE(0)); - RELEASE_MALLOC_GLOBAL_LOCK(); - if (br != CMFAIL && end != CMFAIL && br < end) { - size_t ssize = end - br; - if (ssize > nb + TOP_FOOT_SIZE) { - tbase = br; - tsize = ssize; - } - } - } - } - - if (tbase != CMFAIL) { - - if ((m->footprint += tsize) > m->max_footprint) - m->max_footprint = m->footprint; - - if (!is_initialized(m)) { /* first-time initialization */ - m->seg.base = m->least_addr = tbase; - m->seg.size = tsize; - m->seg.sflags = mmap_flag; - m->magic = mparams.magic; - m->release_checks = MAX_RELEASE_CHECK_RATE; - init_bins(m); -#if !ONLY_MSPACES - if (is_global(m)) - init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE); - else -#endif - { - /* Offset top by embedded malloc_state */ - mchunkptr mn = next_chunk(mem2chunk(m)); - init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) -TOP_FOOT_SIZE); - } - } - - else { - /* Try to merge with an existing segment */ - msegmentptr sp = &m->seg; - /* Only consider most recent segment if traversal suppressed */ - while (sp != 0 && tbase != sp->base + sp->size) - sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; - if (sp != 0 && - !is_extern_segment(sp) && - (sp->sflags & IS_MMAPPED_BIT) == mmap_flag && - segment_holds(sp, m->top)) { /* append */ - sp->size += tsize; - init_top(m, m->top, m->topsize + tsize); - } - else { - if (tbase < m->least_addr) - m->least_addr = tbase; - sp = &m->seg; - while (sp != 0 && sp->base != tbase + tsize) - sp = (NO_SEGMENT_TRAVERSAL) ? 0 : sp->next; - if (sp != 0 && - !is_extern_segment(sp) && - (sp->sflags & IS_MMAPPED_BIT) == mmap_flag) { - char* oldbase = sp->base; - sp->base = tbase; - sp->size += tsize; - return prepend_alloc(m, tbase, oldbase, nb); - } - else - add_segment(m, tbase, tsize, mmap_flag); - } - } - - if (nb < m->topsize) { /* Allocate from new or extended top space */ - size_t rsize = m->topsize -= nb; - mchunkptr p = m->top; - mchunkptr r = m->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(m, p, nb); - check_top_chunk(m, m->top); - check_malloced_chunk(m, chunk2mem(p), nb); - return chunk2mem(p); - } - } - - MALLOC_FAILURE_ACTION; - return 0; -} - -/* ----------------------- system deallocation -------------------------- */ - -/* Unmap and unlink any mmapped segments that don't contain used chunks */ -static size_t release_unused_segments(mstate m) { - size_t released = 0; - int nsegs = 0; - msegmentptr pred = &m->seg; - msegmentptr sp = pred->next; - while (sp != 0) { - char* base = sp->base; - size_t size = sp->size; - msegmentptr next = sp->next; - ++nsegs; - if (is_mmapped_segment(sp) && !is_extern_segment(sp)) { - mchunkptr p = align_as_chunk(base); - size_t psize = chunksize(p); - /* Can unmap if first chunk holds entire segment and not pinned */ - if (!cinuse(p) && (char*)p + psize >= base + size - TOP_FOOT_SIZE) { - tchunkptr tp = (tchunkptr)p; - assert(segment_holds(sp, (char*)sp)); - if (p == m->dv) { - m->dv = 0; - m->dvsize = 0; - } - else { - unlink_large_chunk(m, tp); - } - if (CALL_MUNMAP(base, size) == 0) { - released += size; - m->footprint -= size; - /* unlink obsoleted record */ - sp = pred; - sp->next = next; - } - else { /* back out if cannot unmap */ - insert_large_chunk(m, tp, psize); - } - } - } - if (NO_SEGMENT_TRAVERSAL) /* scan only first segment */ - break; - pred = sp; - sp = next; - } - /* Reset check counter */ - m->release_checks = ((nsegs > MAX_RELEASE_CHECK_RATE)? - nsegs : MAX_RELEASE_CHECK_RATE); - return released; -} - -static int sys_trim(mstate m, size_t pad) { - size_t released = 0; - ensure_initialization(); - if (pad < MAX_REQUEST && is_initialized(m)) { - pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */ - - if (m->topsize > pad) { - /* Shrink top space in granularity-size units, keeping at least one */ - size_t unit = mparams.granularity; - size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit - - SIZE_T_ONE) * unit; - msegmentptr sp = segment_holding(m, (char*)m->top); - - if (!is_extern_segment(sp)) { - if (is_mmapped_segment(sp)) { - if (HAVE_MMAP && - sp->size >= extra && - !has_segment_link(m, sp)) { /* can't shrink if pinned */ - size_t newsize = sp->size - extra; - /* Prefer mremap, fall back to munmap */ - if (((void*)(intptr_t)CALL_MREMAP(sp->base, sp->size, newsize, 0) != (void*)MFAIL) || - (CALL_MUNMAP(sp->base + newsize, extra) == 0)) { - released = extra; - } - } - } - else if (HAVE_MORECORE) { - if (extra >= HALF_MAX_SIZE_T) /* Avoid wrapping negative */ - extra = (HALF_MAX_SIZE_T) + SIZE_T_ONE - unit; - ACQUIRE_MALLOC_GLOBAL_LOCK(); - { - /* Make sure end of memory is where we last set it. */ - char* old_br = (char*)(CALL_MORECORE(0)); - if (old_br == sp->base + sp->size) { - char* rel_br = (char*)(CALL_MORECORE(-extra)); - char* new_br = (char*)(CALL_MORECORE(0)); - if (rel_br != CMFAIL && new_br < old_br) - released = old_br - new_br; - } - } - RELEASE_MALLOC_GLOBAL_LOCK(); - } - } - - if (released != 0) { - sp->size -= released; - m->footprint -= released; - init_top(m, m->top, m->topsize - released); - check_top_chunk(m, m->top); - } - } - - /* Unmap any unused mmapped segments */ - if (HAVE_MMAP) - released += release_unused_segments(m); - - /* On failure, disable autotrim to avoid repeated failed future calls */ - if (released == 0 && m->topsize > m->trim_check) - m->trim_check = MAX_SIZE_T; - } - - return (released != 0)? 1 : 0; -} - - -/* ---------------------------- malloc support --------------------------- */ - -/* allocate a large request from the best fitting chunk in a treebin */ -static void* tmalloc_large(mstate m, size_t nb) { - tchunkptr v = 0; - size_t rsize = -nb; /* Unsigned negation */ - tchunkptr t; - bindex_t idx; - compute_tree_index(nb, idx); - if ((t = *treebin_at(m, idx)) != 0) { - /* Traverse tree for this bin looking for node with size == nb */ - size_t sizebits = nb << leftshift_for_tree_index(idx); - tchunkptr rst = 0; /* The deepest untaken right subtree */ - for (;;) { - tchunkptr rt; - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - v = t; - if ((rsize = trem) == 0) - break; - } - rt = t->child[1]; - t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]; - if (rt != 0 && rt != t) - rst = rt; - if (t == 0) { - t = rst; /* set t to least subtree holding sizes > nb */ - break; - } - sizebits <<= 1; - } - } - if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */ - binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap; - if (leftbits != 0) { - bindex_t i; - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - t = *treebin_at(m, i); - } - } - - while (t != 0) { /* find smallest of tree or subtree */ - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - t = leftmost_child(t); - } - - /* If dv is a better fit, return 0 so malloc will use it */ - if (v != 0 && rsize < (size_t)(m->dvsize - nb)) { - if (RTCHECK(ok_address(m, v))) { /* split */ - mchunkptr r = chunk_plus_offset(v, nb); - assert(chunksize(v) == rsize + nb); - if (RTCHECK(ok_next(v, r))) { - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(m, v, (rsize + nb)); - else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - insert_chunk(m, r, rsize); - } - return chunk2mem(v); - } - } - CORRUPTION_ERROR_ACTION(m); - } - return 0; -} - -/* allocate a small request from the best fitting chunk in a treebin */ -static void* tmalloc_small(mstate m, size_t nb) { - tchunkptr t, v; - size_t rsize; - bindex_t i; - binmap_t leastbit = least_bit(m->treemap); - compute_bit2idx(leastbit, i); - v = t = *treebin_at(m, i); - rsize = chunksize(t) - nb; - - while ((t = leftmost_child(t)) != 0) { - size_t trem = chunksize(t) - nb; - if (trem < rsize) { - rsize = trem; - v = t; - } - } - - if (RTCHECK(ok_address(m, v))) { - mchunkptr r = chunk_plus_offset(v, nb); - assert(chunksize(v) == rsize + nb); - if (RTCHECK(ok_next(v, r))) { - unlink_large_chunk(m, v); - if (rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(m, v, (rsize + nb)); - else { - set_size_and_pinuse_of_inuse_chunk(m, v, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(m, r, rsize); - } - return chunk2mem(v); - } - } - - CORRUPTION_ERROR_ACTION(m); - return 0; -} - -/* --------------------------- realloc support --------------------------- */ - -static void* internal_realloc(mstate m, void* oldmem, size_t bytes) { - if (bytes >= MAX_REQUEST) { - MALLOC_FAILURE_ACTION; - return 0; - } - if (!PREACTION(m)) { - mchunkptr oldp = mem2chunk(oldmem); - size_t oldsize = chunksize(oldp); - mchunkptr next = chunk_plus_offset(oldp, oldsize); - mchunkptr newp = 0; - void* extra = 0; - - /* Try to either shrink or extend into top. Else malloc-copy-free */ - - if (RTCHECK(ok_address(m, oldp) && ok_cinuse(oldp) && - ok_next(oldp, next) && ok_pinuse(next))) { - size_t nb = request2size(bytes); - if (is_mmapped(oldp)) - newp = mmap_resize(m, oldp, nb); - else if (oldsize >= nb) { /* already big enough */ - size_t rsize = oldsize - nb; - newp = oldp; - if (rsize >= MIN_CHUNK_SIZE) { - mchunkptr remainder = chunk_plus_offset(newp, nb); - set_inuse(m, newp, nb); - set_inuse(m, remainder, rsize); - extra = chunk2mem(remainder); - } - } - else if (next == m->top && oldsize + m->topsize > nb) { - /* Expand into top */ - size_t newsize = oldsize + m->topsize; - size_t newtopsize = newsize - nb; - mchunkptr newtop = chunk_plus_offset(oldp, nb); - set_inuse(m, oldp, nb); - newtop->head = newtopsize |PINUSE_BIT; - m->top = newtop; - m->topsize = newtopsize; - newp = oldp; - } - } - else { - USAGE_ERROR_ACTION(m, oldmem); - POSTACTION(m); - return 0; - } - - POSTACTION(m); - - if (newp != 0) { - if (extra != 0) { - internal_free(m, extra); - } - check_inuse_chunk(m, newp); - return chunk2mem(newp); - } - else { - void* newmem = internal_malloc(m, bytes); - if (newmem != 0) { - size_t oc = oldsize - overhead_for(oldp); - memcpy(newmem, oldmem, (oc < bytes)? oc : bytes); - internal_free(m, oldmem); - } - return newmem; - } - } - return 0; -} - -/* --------------------------- memalign support -------------------------- */ - -static void* internal_memalign(mstate m, size_t alignment, size_t bytes) { - if (alignment <= MALLOC_ALIGNMENT) /* Can just use malloc */ - return internal_malloc(m, bytes); - if (alignment < MIN_CHUNK_SIZE) /* must be at least a minimum chunk size */ - alignment = MIN_CHUNK_SIZE; - if ((alignment & (alignment-SIZE_T_ONE)) != 0) {/* Ensure a power of 2 */ - size_t a = MALLOC_ALIGNMENT << 1; - while (a < alignment) a <<= 1; - alignment = a; - } - - if (bytes >= MAX_REQUEST - alignment) { - if (m != 0) { /* Test isn't needed but avoids compiler warning */ - MALLOC_FAILURE_ACTION; - } - } - else { - size_t nb = request2size(bytes); - size_t req = nb + alignment + MIN_CHUNK_SIZE - CHUNK_OVERHEAD; - char* mem = (char*)internal_malloc(m, req); - if (mem != 0) { - void* leader = 0; - void* trailer = 0; - mchunkptr p = mem2chunk(mem); - - if (PREACTION(m)) return 0; - if ((((size_t)(mem)) % alignment) != 0) { /* misaligned */ - /* - Find an aligned spot inside chunk. Since we need to give - back leading space in a chunk of at least MIN_CHUNK_SIZE, if - the first calculation places us at a spot with less than - MIN_CHUNK_SIZE leader, we can move to the next aligned spot. - We've allocated enough total room so that this is always - possible. - */ - char* br = (char*)mem2chunk((size_t)(((size_t)(mem + - alignment - - SIZE_T_ONE)) & - -alignment)); - char* pos = ((size_t)(br - (char*)(p)) >= MIN_CHUNK_SIZE)? - br : br+alignment; - mchunkptr newp = (mchunkptr)pos; - size_t leadsize = pos - (char*)(p); - size_t newsize = chunksize(p) - leadsize; - - if (is_mmapped(p)) { /* For mmapped chunks, just adjust offset */ - newp->prev_foot = p->prev_foot + leadsize; - newp->head = (newsize|CINUSE_BIT); - } - else { /* Otherwise, give back leader, use the rest */ - set_inuse(m, newp, newsize); - set_inuse(m, p, leadsize); - leader = chunk2mem(p); - } - p = newp; - } - - /* Give back spare room at the end */ - if (!is_mmapped(p)) { - size_t size = chunksize(p); - if (size > nb + MIN_CHUNK_SIZE) { - size_t remainder_size = size - nb; - mchunkptr remainder = chunk_plus_offset(p, nb); - set_inuse(m, p, nb); - set_inuse(m, remainder, remainder_size); - trailer = chunk2mem(remainder); - } - } - - assert (chunksize(p) >= nb); - assert((((size_t)(chunk2mem(p))) % alignment) == 0); - check_inuse_chunk(m, p); - POSTACTION(m); - if (leader != 0) { - internal_free(m, leader); - } - if (trailer != 0) { - internal_free(m, trailer); - } - return chunk2mem(p); - } - } - return 0; -} - -/* ------------------------ comalloc/coalloc support --------------------- */ - -static void** ialloc(mstate m, - size_t n_elements, - size_t* sizes, - int opts, - void* chunks[]) { - /* - This provides common support for independent_X routines, handling - all of the combinations that can result. - - The opts arg has: - bit 0 set if all elements are same size (using sizes[0]) - bit 1 set if elements should be zeroed - */ - - size_t element_size; /* chunksize of each element, if all same */ - size_t contents_size; /* total size of elements */ - size_t array_size; /* request size of pointer array */ - void* mem; /* malloced aggregate space */ - mchunkptr p; /* corresponding chunk */ - size_t remainder_size; /* remaining bytes while splitting */ - void** marray; /* either "chunks" or malloced ptr array */ - mchunkptr array_chunk; /* chunk for malloced ptr array */ - flag_t was_enabled; /* to disable mmap */ - size_t size; - size_t i; - - ensure_initialization(); - /* compute array length, if needed */ - if (chunks != 0) { - if (n_elements == 0) - return chunks; /* nothing to do */ - marray = chunks; - array_size = 0; - } - else { - /* if empty req, must still return chunk representing empty array */ - if (n_elements == 0) - return (void**)internal_malloc(m, 0); - marray = 0; - array_size = request2size(n_elements * (sizeof(void*))); - } - - /* compute total element size */ - if (opts & 0x1) { /* all-same-size */ - element_size = request2size(*sizes); - contents_size = n_elements * element_size; - } - else { /* add up all the sizes */ - element_size = 0; - contents_size = 0; - for (i = 0; i != n_elements; ++i) - contents_size += request2size(sizes[i]); - } - - size = contents_size + array_size; - - /* - Allocate the aggregate chunk. First disable direct-mmapping so - malloc won't use it, since we would not be able to later - free/realloc space internal to a segregated mmap region. - */ - was_enabled = use_mmap(m); - disable_mmap(m); - mem = internal_malloc(m, size - CHUNK_OVERHEAD); - if (was_enabled) - enable_mmap(m); - if (mem == 0) - return 0; - - if (PREACTION(m)) return 0; - p = mem2chunk(mem); - remainder_size = chunksize(p); - - assert(!is_mmapped(p)); - - if (opts & 0x2) { /* optionally clear the elements */ - memset((size_t*)mem, 0, remainder_size - SIZE_T_SIZE - array_size); - } - - /* If not provided, allocate the pointer array as final part of chunk */ - if (marray == 0) { - size_t array_chunk_size; - array_chunk = chunk_plus_offset(p, contents_size); - array_chunk_size = remainder_size - contents_size; - marray = (void**) (chunk2mem(array_chunk)); - set_size_and_pinuse_of_inuse_chunk(m, array_chunk, array_chunk_size); - remainder_size = contents_size; - } - - /* split out elements */ - for (i = 0; ; ++i) { - marray[i] = chunk2mem(p); - if (i != n_elements-1) { - if (element_size != 0) - size = element_size; - else - size = request2size(sizes[i]); - remainder_size -= size; - set_size_and_pinuse_of_inuse_chunk(m, p, size); - p = chunk_plus_offset(p, size); - } - else { /* the final element absorbs any overallocation slop */ - set_size_and_pinuse_of_inuse_chunk(m, p, remainder_size); - break; - } - } - -#if DEBUG - if (marray != chunks) { - /* final element must have exactly exhausted chunk */ - if (element_size != 0) { - assert(remainder_size == element_size); - } - else { - assert(remainder_size == request2size(sizes[i])); - } - check_inuse_chunk(m, mem2chunk(marray)); - } - for (i = 0; i != n_elements; ++i) - check_inuse_chunk(m, mem2chunk(marray[i])); - -#endif /* DEBUG */ - - POSTACTION(m); - return marray; -} - - -/* -------------------------- public routines ---------------------------- */ - -#if !ONLY_MSPACES - -void* dlmalloc(size_t bytes) { - /* - Basic algorithm: - If a small request (< 256 bytes minus per-chunk overhead): - 1. If one exists, use a remainderless chunk in associated smallbin. - (Remainderless means that there are too few excess bytes to - represent as a chunk.) - 2. If it is big enough, use the dv chunk, which is normally the - chunk adjacent to the one used for the most recent small request. - 3. If one exists, split the smallest available chunk in a bin, - saving remainder in dv. - 4. If it is big enough, use the top chunk. - 5. If available, get memory from system and use it - Otherwise, for a large request: - 1. Find the smallest available binned chunk that fits, and use it - if it is better fitting than dv chunk, splitting if necessary. - 2. If better fitting than any binned chunk, use the dv chunk. - 3. If it is big enough, use the top chunk. - 4. If request size >= mmap threshold, try to directly mmap this chunk. - 5. If available, get memory from system and use it - - The ugly goto's here ensure that postaction occurs along all paths. - */ - -#if USE_LOCKS - ensure_initialization(); /* initialize in sys_alloc if not using locks */ -#endif - - if (!PREACTION(gm)) { - void* mem; - size_t nb; - if (bytes <= MAX_SMALL_REQUEST) { - bindex_t idx; - binmap_t smallbits; - nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); - idx = small_index(nb); - smallbits = gm->smallmap >> idx; - - if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ - mchunkptr b, p; - idx += ~smallbits & 1; /* Uses next bin if idx empty */ - b = smallbin_at(gm, idx); - p = b->fd; - assert(chunksize(p) == small_index2size(idx)); - unlink_first_small_chunk(gm, b, p, idx); - set_inuse_and_pinuse(gm, p, small_index2size(idx)); - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (nb > gm->dvsize) { - if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ - mchunkptr b, p, r; - size_t rsize; - bindex_t i; - binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - b = smallbin_at(gm, i); - p = b->fd; - assert(chunksize(p) == small_index2size(i)); - unlink_first_small_chunk(gm, b, p, i); - rsize = small_index2size(i) - nb; - /* Fit here cannot be remainderless if 4byte sizes */ - if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(gm, p, small_index2size(i)); - else { - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - r = chunk_plus_offset(p, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(gm, r, rsize); - } - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (gm->treemap != 0 && (mem = tmalloc_small(gm, nb)) != 0) { - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - } - } - else if (bytes >= MAX_REQUEST) - nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ - else { - nb = pad_request(bytes); - if (gm->treemap != 0 && (mem = tmalloc_large(gm, nb)) != 0) { - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - } - - if (nb <= gm->dvsize) { - size_t rsize = gm->dvsize - nb; - mchunkptr p = gm->dv; - if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ - mchunkptr r = gm->dv = chunk_plus_offset(p, nb); - gm->dvsize = rsize; - set_size_and_pinuse_of_free_chunk(r, rsize); - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - } - else { /* exhaust dv */ - size_t dvs = gm->dvsize; - gm->dvsize = 0; - gm->dv = 0; - set_inuse_and_pinuse(gm, p, dvs); - } - mem = chunk2mem(p); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - else if (nb < gm->topsize) { /* Split top */ - size_t rsize = gm->topsize -= nb; - mchunkptr p = gm->top; - mchunkptr r = gm->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(gm, p, nb); - mem = chunk2mem(p); - check_top_chunk(gm, gm->top); - check_malloced_chunk(gm, mem, nb); - goto postaction; - } - - mem = sys_alloc(gm, nb); - - postaction: - POSTACTION(gm); - return mem; - } - - return 0; -} - -void dlfree(void* mem) { - /* - Consolidate freed chunks with preceeding or succeeding bordering - free chunks, if they exist, and then place in a bin. Intermixed - with special cases for top, dv, mmapped chunks, and usage errors. - */ - - if (mem != 0) { - mchunkptr p = mem2chunk(mem); -#if FOOTERS - mstate fm = get_mstate_for(p); - if (!ok_magic(fm)) { - USAGE_ERROR_ACTION(fm, p); - return; - } -#else /* FOOTERS */ -#define fm gm -#endif /* FOOTERS */ - if (!PREACTION(fm)) { - check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { - size_t psize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - size_t prevsize = p->prev_foot; - if ((prevsize & IS_MMAPPED_BIT) != 0) { - prevsize &= ~IS_MMAPPED_BIT; - psize += prevsize + MMAP_FOOT_PAD; - if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) - fm->footprint -= psize; - goto postaction; - } - else { - mchunkptr prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ - if (p != fm->dv) { - unlink_chunk(fm, p, prevsize); - } - else if ((next->head & INUSE_BITS) == INUSE_BITS) { - fm->dvsize = psize; - set_free_with_pinuse(p, psize, next); - goto postaction; - } - } - else - goto erroraction; - } - } - - if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { - if (!cinuse(next)) { /* consolidate forward */ - if (next == fm->top) { - size_t tsize = fm->topsize += psize; - fm->top = p; - p->head = tsize | PINUSE_BIT; - if (p == fm->dv) { - fm->dv = 0; - fm->dvsize = 0; - } - if (should_trim(fm, tsize)) - sys_trim(fm, 0); - goto postaction; - } - else if (next == fm->dv) { - size_t dsize = fm->dvsize += psize; - fm->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - goto postaction; - } - else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(fm, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == fm->dv) { - fm->dvsize = psize; - goto postaction; - } - } - } - else - set_free_with_pinuse(p, psize, next); - - if (is_small(psize)) { - insert_small_chunk(fm, p, psize); - check_free_chunk(fm, p); - } - else { - tchunkptr tp = (tchunkptr)p; - insert_large_chunk(fm, tp, psize); - check_free_chunk(fm, p); - if (--fm->release_checks == 0) - release_unused_segments(fm); - } - goto postaction; - } - } - erroraction: - USAGE_ERROR_ACTION(fm, p); - postaction: - POSTACTION(fm); - } - } -#if !FOOTERS -#undef fm -#endif /* FOOTERS */ -} - -void* dlcalloc(size_t n_elements, size_t elem_size) { - void* mem; - size_t req = 0; - if (n_elements != 0) { - req = n_elements * elem_size; - if (((n_elements | elem_size) & ~(size_t)0xffff) && - (req / n_elements != elem_size)) - req = MAX_SIZE_T; /* force downstream failure on overflow */ - } - mem = dlmalloc(req); - if (mem != 0 && calloc_must_clear(mem2chunk(mem))) - memset(mem, 0, req); - return mem; -} - -void* dlrealloc(void* oldmem, size_t bytes) { - if (oldmem == 0) - return dlmalloc(bytes); -#ifdef REALLOC_ZERO_BYTES_FREES - if (bytes == 0) { - dlfree(oldmem); - return 0; - } -#endif /* REALLOC_ZERO_BYTES_FREES */ - else { -#if ! FOOTERS - mstate m = gm; -#else /* FOOTERS */ - mstate m = get_mstate_for(mem2chunk(oldmem)); - if (!ok_magic(m)) { - USAGE_ERROR_ACTION(m, oldmem); - return 0; - } -#endif /* FOOTERS */ - return internal_realloc(m, oldmem, bytes); - } -} - -void* dlmemalign(size_t alignment, size_t bytes) { - return internal_memalign(gm, alignment, bytes); -} - -void** dlindependent_calloc(size_t n_elements, size_t elem_size, - void* chunks[]) { - size_t sz = elem_size; /* serves as 1-element array */ - return ialloc(gm, n_elements, &sz, 3, chunks); -} - -void** dlindependent_comalloc(size_t n_elements, size_t sizes[], - void* chunks[]) { - return ialloc(gm, n_elements, sizes, 0, chunks); -} - -void* dlvalloc(size_t bytes) { - size_t pagesz; - ensure_initialization(); - pagesz = mparams.page_size; - return dlmemalign(pagesz, bytes); -} - -void* dlpvalloc(size_t bytes) { - size_t pagesz; - ensure_initialization(); - pagesz = mparams.page_size; - return dlmemalign(pagesz, (bytes + pagesz - SIZE_T_ONE) & ~(pagesz - SIZE_T_ONE)); -} - -int dlmalloc_trim(size_t pad) { - ensure_initialization(); - int result = 0; - if (!PREACTION(gm)) { - result = sys_trim(gm, pad); - POSTACTION(gm); - } - return result; -} - -size_t dlmalloc_footprint(void) { - return gm->footprint; -} - -size_t dlmalloc_max_footprint(void) { - return gm->max_footprint; -} - -#if !NO_MALLINFO -struct mallinfo dlmallinfo(void) { - return internal_mallinfo(gm); -} -#endif /* NO_MALLINFO */ - -void dlmalloc_stats() { - internal_malloc_stats(gm); -} - -int dlmallopt(int param_number, int value) { - return change_mparam(param_number, value); -} - -#endif /* !ONLY_MSPACES */ - -size_t dlmalloc_usable_size(void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (cinuse(p)) - return chunksize(p) - overhead_for(p); - } - return 0; -} - -/* ----------------------------- user mspaces ---------------------------- */ - -#if MSPACES - -static mstate init_user_mstate(char* tbase, size_t tsize) { - size_t msize = pad_request(sizeof(struct malloc_state)); - mchunkptr mn; - mchunkptr msp = align_as_chunk(tbase); - mstate m = (mstate)(chunk2mem(msp)); - memset(m, 0, msize); - INITIAL_LOCK(&m->mutex); - msp->head = (msize|PINUSE_BIT|CINUSE_BIT); - m->seg.base = m->least_addr = tbase; - m->seg.size = m->footprint = m->max_footprint = tsize; - m->magic = mparams.magic; - m->release_checks = MAX_RELEASE_CHECK_RATE; - m->mflags = mparams.default_mflags; - m->extp = 0; - m->exts = 0; - disable_contiguous(m); - init_bins(m); - mn = next_chunk(mem2chunk(m)); - init_top(m, mn, (size_t)((tbase + tsize) - (char*)mn) - TOP_FOOT_SIZE); - check_top_chunk(m, m->top); - return m; -} - -mspace create_mspace(size_t capacity, int locked) { - mstate m = 0; - size_t msize; - ensure_initialization(); - msize = pad_request(sizeof(struct malloc_state)); - if (capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { - size_t rs = ((capacity == 0)? mparams.granularity : - (capacity + TOP_FOOT_SIZE + msize)); - size_t tsize = granularity_align(rs); - char* tbase = (char*)(CALL_MMAP(tsize)); - if (tbase != CMFAIL) { - m = init_user_mstate(tbase, tsize); - m->seg.sflags = IS_MMAPPED_BIT; - set_lock(m, locked); - } - } - return (mspace)m; -} - -mspace create_mspace_with_base(void* base, size_t capacity, int locked) { - mstate m = 0; - size_t msize; - ensure_initialization(); - msize = pad_request(sizeof(struct malloc_state)); - if (capacity > msize + TOP_FOOT_SIZE && - capacity < (size_t) -(msize + TOP_FOOT_SIZE + mparams.page_size)) { - m = init_user_mstate((char*)base, capacity); - m->seg.sflags = EXTERN_BIT; - set_lock(m, locked); - } - return (mspace)m; -} - -int mspace_mmap_large_chunks(mspace msp, int enable) { - int ret = 0; - mstate ms = (mstate)msp; - if (!PREACTION(ms)) { - if (use_mmap(ms)) - ret = 1; - if (enable) - enable_mmap(ms); - else - disable_mmap(ms); - POSTACTION(ms); - } - return ret; -} - -size_t destroy_mspace(mspace msp) { - size_t freed = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - msegmentptr sp = &ms->seg; - while (sp != 0) { - char* base = sp->base; - size_t size = sp->size; - flag_t flag = sp->sflags; - sp = sp->next; - if ((flag & IS_MMAPPED_BIT) && !(flag & EXTERN_BIT) && - CALL_MUNMAP(base, size) == 0) - freed += size; - } - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return freed; -} - -/* - mspace versions of routines are near-clones of the global - versions. This is not so nice but better than the alternatives. -*/ - - -void* mspace_malloc(mspace msp, size_t bytes) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - if (!PREACTION(ms)) { - void* mem; - size_t nb; - if (bytes <= MAX_SMALL_REQUEST) { - bindex_t idx; - binmap_t smallbits; - nb = (bytes < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(bytes); - idx = small_index(nb); - smallbits = ms->smallmap >> idx; - - if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */ - mchunkptr b, p; - idx += ~smallbits & 1; /* Uses next bin if idx empty */ - b = smallbin_at(ms, idx); - p = b->fd; - assert(chunksize(p) == small_index2size(idx)); - unlink_first_small_chunk(ms, b, p, idx); - set_inuse_and_pinuse(ms, p, small_index2size(idx)); - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (nb > ms->dvsize) { - if (smallbits != 0) { /* Use chunk in next nonempty smallbin */ - mchunkptr b, p, r; - size_t rsize; - bindex_t i; - binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx)); - binmap_t leastbit = least_bit(leftbits); - compute_bit2idx(leastbit, i); - b = smallbin_at(ms, i); - p = b->fd; - assert(chunksize(p) == small_index2size(i)); - unlink_first_small_chunk(ms, b, p, i); - rsize = small_index2size(i) - nb; - /* Fit here cannot be remainderless if 4byte sizes */ - if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) - set_inuse_and_pinuse(ms, p, small_index2size(i)); - else { - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - r = chunk_plus_offset(p, nb); - set_size_and_pinuse_of_free_chunk(r, rsize); - replace_dv(ms, r, rsize); - } - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) { - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - } - } - else if (bytes >= MAX_REQUEST) - nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */ - else { - nb = pad_request(bytes); - if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) { - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - } - - if (nb <= ms->dvsize) { - size_t rsize = ms->dvsize - nb; - mchunkptr p = ms->dv; - if (rsize >= MIN_CHUNK_SIZE) { /* split dv */ - mchunkptr r = ms->dv = chunk_plus_offset(p, nb); - ms->dvsize = rsize; - set_size_and_pinuse_of_free_chunk(r, rsize); - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - } - else { /* exhaust dv */ - size_t dvs = ms->dvsize; - ms->dvsize = 0; - ms->dv = 0; - set_inuse_and_pinuse(ms, p, dvs); - } - mem = chunk2mem(p); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - else if (nb < ms->topsize) { /* Split top */ - size_t rsize = ms->topsize -= nb; - mchunkptr p = ms->top; - mchunkptr r = ms->top = chunk_plus_offset(p, nb); - r->head = rsize | PINUSE_BIT; - set_size_and_pinuse_of_inuse_chunk(ms, p, nb); - mem = chunk2mem(p); - check_top_chunk(ms, ms->top); - check_malloced_chunk(ms, mem, nb); - goto postaction; - } - - mem = sys_alloc(ms, nb); - - postaction: - POSTACTION(ms); - return mem; - } - - return 0; -} - -void mspace_free(mspace msp, void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); -#if FOOTERS - mstate fm = get_mstate_for(p); -#else /* FOOTERS */ - mstate fm = (mstate)msp; -#endif /* FOOTERS */ - if (!ok_magic(fm)) { - USAGE_ERROR_ACTION(fm, p); - return; - } - if (!PREACTION(fm)) { - check_inuse_chunk(fm, p); - if (RTCHECK(ok_address(fm, p) && ok_cinuse(p))) { - size_t psize = chunksize(p); - mchunkptr next = chunk_plus_offset(p, psize); - if (!pinuse(p)) { - size_t prevsize = p->prev_foot; - if ((prevsize & IS_MMAPPED_BIT) != 0) { - prevsize &= ~IS_MMAPPED_BIT; - psize += prevsize + MMAP_FOOT_PAD; - if (CALL_MUNMAP((char*)p - prevsize, psize) == 0) - fm->footprint -= psize; - goto postaction; - } - else { - mchunkptr prev = chunk_minus_offset(p, prevsize); - psize += prevsize; - p = prev; - if (RTCHECK(ok_address(fm, prev))) { /* consolidate backward */ - if (p != fm->dv) { - unlink_chunk(fm, p, prevsize); - } - else if ((next->head & INUSE_BITS) == INUSE_BITS) { - fm->dvsize = psize; - set_free_with_pinuse(p, psize, next); - goto postaction; - } - } - else - goto erroraction; - } - } - - if (RTCHECK(ok_next(p, next) && ok_pinuse(next))) { - if (!cinuse(next)) { /* consolidate forward */ - if (next == fm->top) { - size_t tsize = fm->topsize += psize; - fm->top = p; - p->head = tsize | PINUSE_BIT; - if (p == fm->dv) { - fm->dv = 0; - fm->dvsize = 0; - } - if (should_trim(fm, tsize)) - sys_trim(fm, 0); - goto postaction; - } - else if (next == fm->dv) { - size_t dsize = fm->dvsize += psize; - fm->dv = p; - set_size_and_pinuse_of_free_chunk(p, dsize); - goto postaction; - } - else { - size_t nsize = chunksize(next); - psize += nsize; - unlink_chunk(fm, next, nsize); - set_size_and_pinuse_of_free_chunk(p, psize); - if (p == fm->dv) { - fm->dvsize = psize; - goto postaction; - } - } - } - else - set_free_with_pinuse(p, psize, next); - - if (is_small(psize)) { - insert_small_chunk(fm, p, psize); - check_free_chunk(fm, p); - } - else { - tchunkptr tp = (tchunkptr)p; - insert_large_chunk(fm, tp, psize); - check_free_chunk(fm, p); - if (--fm->release_checks == 0) - release_unused_segments(fm); - } - goto postaction; - } - } - erroraction: - USAGE_ERROR_ACTION(fm, p); - postaction: - POSTACTION(fm); - } - } -} - -void* mspace_calloc(mspace msp, size_t n_elements, size_t elem_size) { - void* mem; - size_t req = 0; - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - if (n_elements != 0) { - req = n_elements * elem_size; - if (((n_elements | elem_size) & ~(size_t)0xffff) && - (req / n_elements != elem_size)) - req = MAX_SIZE_T; /* force downstream failure on overflow */ - } - mem = internal_malloc(ms, req); - if (mem != 0 && calloc_must_clear(mem2chunk(mem))) - memset(mem, 0, req); - return mem; -} - -void* mspace_realloc(mspace msp, void* oldmem, size_t bytes) { - if (oldmem == 0) - return mspace_malloc(msp, bytes); -#ifdef REALLOC_ZERO_BYTES_FREES - if (bytes == 0) { - mspace_free(msp, oldmem); - return 0; - } -#endif /* REALLOC_ZERO_BYTES_FREES */ - else { -#if FOOTERS - mchunkptr p = mem2chunk(oldmem); - mstate ms = get_mstate_for(p); -#else /* FOOTERS */ - mstate ms = (mstate)msp; -#endif /* FOOTERS */ - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return internal_realloc(ms, oldmem, bytes); - } -} - -void* mspace_memalign(mspace msp, size_t alignment, size_t bytes) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return internal_memalign(ms, alignment, bytes); -} - -void** mspace_independent_calloc(mspace msp, size_t n_elements, - size_t elem_size, void* chunks[]) { - size_t sz = elem_size; /* serves as 1-element array */ - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return ialloc(ms, n_elements, &sz, 3, chunks); -} - -void** mspace_independent_comalloc(mspace msp, size_t n_elements, - size_t sizes[], void* chunks[]) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - return 0; - } - return ialloc(ms, n_elements, sizes, 0, chunks); -} - -int mspace_trim(mspace msp, size_t pad) { - int result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - if (!PREACTION(ms)) { - result = sys_trim(ms, pad); - POSTACTION(ms); - } - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - -void mspace_malloc_stats(mspace msp) { - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - internal_malloc_stats(ms); - } - else { - USAGE_ERROR_ACTION(ms,ms); - } -} - -size_t mspace_footprint(mspace msp) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - result = ms->footprint; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - - -size_t mspace_max_footprint(mspace msp) { - size_t result = 0; - mstate ms = (mstate)msp; - if (ok_magic(ms)) { - result = ms->max_footprint; - } - else { - USAGE_ERROR_ACTION(ms,ms); - } - return result; -} - - -#if !NO_MALLINFO -struct mallinfo mspace_mallinfo(mspace msp) { - mstate ms = (mstate)msp; - if (!ok_magic(ms)) { - USAGE_ERROR_ACTION(ms,ms); - } - return internal_mallinfo(ms); -} -#endif /* NO_MALLINFO */ - -size_t mspace_usable_size(void* mem) { - if (mem != 0) { - mchunkptr p = mem2chunk(mem); - if (cinuse(p)) - return chunksize(p) - overhead_for(p); - } - return 0; -} - -int mspace_mallopt(int param_number, int value) { - return change_mparam(param_number, value); -} - -#endif /* MSPACES */ - -/* -------------------- Alternative MORECORE functions ------------------- */ - -/* - Guidelines for creating a custom version of MORECORE: - - * For best performance, MORECORE should allocate in multiples of pagesize. - * MORECORE may allocate more memory than requested. (Or even less, - but this will usually result in a malloc failure.) - * MORECORE must not allocate memory when given argument zero, but - instead return one past the end address of memory from previous - nonzero call. - * For best performance, consecutive calls to MORECORE with positive - arguments should return increasing addresses, indicating that - space has been contiguously extended. - * Even though consecutive calls to MORECORE need not return contiguous - addresses, it must be OK for malloc'ed chunks to span multiple - regions in those cases where they do happen to be contiguous. - * MORECORE need not handle negative arguments -- it may instead - just return MFAIL when given negative arguments. - Negative arguments are always multiples of pagesize. MORECORE - must not misinterpret negative args as large positive unsigned - args. You can suppress all such calls from even occurring by defining - MORECORE_CANNOT_TRIM, - - As an example alternative MORECORE, here is a custom allocator - kindly contributed for pre-OSX macOS. It uses virtually but not - necessarily physically contiguous non-paged memory (locked in, - present and won't get swapped out). You can use it by uncommenting - this section, adding some #includes, and setting up the appropriate - defines above: - - #define MORECORE osMoreCore - - There is also a shutdown routine that should somehow be called for - cleanup upon program exit. - - #define MAX_POOL_ENTRIES 100 - #define MINIMUM_MORECORE_SIZE (64 * 1024U) - static int next_os_pool; - void *our_os_pools[MAX_POOL_ENTRIES]; - - void *osMoreCore(int size) - { - void *ptr = 0; - static void *sbrk_top = 0; - - if (size > 0) - { - if (size < MINIMUM_MORECORE_SIZE) - size = MINIMUM_MORECORE_SIZE; - if (CurrentExecutionLevel() == kTaskLevel) - ptr = PoolAllocateResident(size + RM_PAGE_SIZE, 0); - if (ptr == 0) - { - return (void *) MFAIL; - } - // save ptrs so they can be freed during cleanup - our_os_pools[next_os_pool] = ptr; - next_os_pool++; - ptr = (void *) ((((size_t) ptr) + RM_PAGE_MASK) & ~RM_PAGE_MASK); - sbrk_top = (char *) ptr + size; - return ptr; - } - else if (size < 0) - { - // we don't currently support shrink behavior - return (void *) MFAIL; - } - else - { - return sbrk_top; - } - } - - // cleanup any allocated memory pools - // called as last thing before shutting down driver - - void osCleanupMem(void) - { - void **ptr; - - for (ptr = our_os_pools; ptr < &our_os_pools[MAX_POOL_ENTRIES]; ptr++) - if (*ptr) - { - PoolDeallocate(*ptr); - *ptr = 0; - } - } - -*/ - - -/* ----------------------------------------------------------------------- -History: - V2.8.4 (not yet released) - * Add mspace_mmap_large_chunks; thanks to Jean Brouwers - * Fix insufficient sys_alloc padding when using 16byte alignment - * Fix bad error check in mspace_footprint - * Adaptations for ptmalloc, courtesy of Wolfram Gloger. - * Reentrant spin locks, courtesy of Earl Chew and others - * Win32 improvements, courtesy of Niall Douglas and Earl Chew - * Add NO_SEGMENT_TRAVERSAL and MAX_RELEASE_CHECK_RATE options - * Extension hook in malloc_state - * Various small adjustments to reduce warnings on some compilers - * Various configuration extensions/changes for more platforms. Thanks - to all who contributed these. - - V2.8.3 Thu Sep 22 11:16:32 2005 Doug Lea (dl at gee) - * Add max_footprint functions - * Ensure all appropriate literals are size_t - * Fix conditional compilation problem for some #define settings - * Avoid concatenating segments with the one provided - in create_mspace_with_base - * Rename some variables to avoid compiler shadowing warnings - * Use explicit lock initialization. - * Better handling of sbrk interference. - * Simplify and fix segment insertion, trimming and mspace_destroy - * Reinstate REALLOC_ZERO_BYTES_FREES option from 2.7.x - * Thanks especially to Dennis Flanagan for help on these. - - V2.8.2 Sun Jun 12 16:01:10 2005 Doug Lea (dl at gee) - * Fix memalign brace error. - - V2.8.1 Wed Jun 8 16:11:46 2005 Doug Lea (dl at gee) - * Fix improper #endif nesting in C++ - * Add explicit casts needed for C++ - - V2.8.0 Mon May 30 14:09:02 2005 Doug Lea (dl at gee) - * Use trees for large bins - * Support mspaces - * Use segments to unify sbrk-based and mmap-based system allocation, - removing need for emulation on most platforms without sbrk. - * Default safety checks - * Optional footer checks. Thanks to William Robertson for the idea. - * Internal code refactoring - * Incorporate suggestions and platform-specific changes. - Thanks to Dennis Flanagan, Colin Plumb, Niall Douglas, - Aaron Bachmann, Emery Berger, and others. - * Speed up non-fastbin processing enough to remove fastbins. - * Remove useless cfree() to avoid conflicts with other apps. - * Remove internal memcpy, memset. Compilers handle builtins better. - * Remove some options that no one ever used and rename others. - - V2.7.2 Sat Aug 17 09:07:30 2002 Doug Lea (dl at gee) - * Fix malloc_state bitmap array misdeclaration - - V2.7.1 Thu Jul 25 10:58:03 2002 Doug Lea (dl at gee) - * Allow tuning of FIRST_SORTED_BIN_SIZE - * Use PTR_UINT as type for all ptr->int casts. Thanks to John Belmonte. - * Better detection and support for non-contiguousness of MORECORE. - Thanks to Andreas Mueller, Conal Walsh, and Wolfram Gloger - * Bypass most of malloc if no frees. Thanks To Emery Berger. - * Fix freeing of old top non-contiguous chunk im sysmalloc. - * Raised default trim and map thresholds to 256K. - * Fix mmap-related #defines. Thanks to Lubos Lunak. - * Fix copy macros; added LACKS_FCNTL_H. Thanks to Neal Walfield. - * Branch-free bin calculation - * Default trim and mmap thresholds now 256K. - - V2.7.0 Sun Mar 11 14:14:06 2001 Doug Lea (dl at gee) - * Introduce independent_comalloc and independent_calloc. - Thanks to Michael Pachos for motivation and help. - * Make optional .h file available - * Allow > 2GB requests on 32bit systems. - * new WIN32 sbrk, mmap, munmap, lock code from . - Thanks also to Andreas Mueller , - and Anonymous. - * Allow override of MALLOC_ALIGNMENT (Thanks to Ruud Waij for - helping test this.) - * memalign: check alignment arg - * realloc: don't try to shift chunks backwards, since this - leads to more fragmentation in some programs and doesn't - seem to help in any others. - * Collect all cases in malloc requiring system memory into sysmalloc - * Use mmap as backup to sbrk - * Place all internal state in malloc_state - * Introduce fastbins (although similar to 2.5.1) - * Many minor tunings and cosmetic improvements - * Introduce USE_PUBLIC_MALLOC_WRAPPERS, USE_MALLOC_LOCK - * Introduce MALLOC_FAILURE_ACTION, MORECORE_CONTIGUOUS - Thanks to Tony E. Bennett and others. - * Include errno.h to support default failure action. - - V2.6.6 Sun Dec 5 07:42:19 1999 Doug Lea (dl at gee) - * return null for negative arguments - * Added Several WIN32 cleanups from Martin C. Fong - * Add 'LACKS_SYS_PARAM_H' for those systems without 'sys/param.h' - (e.g. WIN32 platforms) - * Cleanup header file inclusion for WIN32 platforms - * Cleanup code to avoid Microsoft Visual C++ compiler complaints - * Add 'USE_DL_PREFIX' to quickly allow co-existence with existing - memory allocation routines - * Set 'malloc_getpagesize' for WIN32 platforms (needs more work) - * Use 'assert' rather than 'ASSERT' in WIN32 code to conform to - usage of 'assert' in non-WIN32 code - * Improve WIN32 'sbrk()' emulation's 'findRegion()' routine to - avoid infinite loop - * Always call 'fREe()' rather than 'free()' - - V2.6.5 Wed Jun 17 15:57:31 1998 Doug Lea (dl at gee) - * Fixed ordering problem with boundary-stamping - - V2.6.3 Sun May 19 08:17:58 1996 Doug Lea (dl at gee) - * Added pvalloc, as recommended by H.J. Liu - * Added 64bit pointer support mainly from Wolfram Gloger - * Added anonymously donated WIN32 sbrk emulation - * Malloc, calloc, getpagesize: add optimizations from Raymond Nijssen - * malloc_extend_top: fix mask error that caused wastage after - foreign sbrks - * Add linux mremap support code from HJ Liu - - V2.6.2 Tue Dec 5 06:52:55 1995 Doug Lea (dl at gee) - * Integrated most documentation with the code. - * Add support for mmap, with help from - Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Use last_remainder in more cases. - * Pack bins using idea from colin@nyx10.cs.du.edu - * Use ordered bins instead of best-fit threshhold - * Eliminate block-local decls to simplify tracing and debugging. - * Support another case of realloc via move into top - * Fix error occuring when initial sbrk_base not word-aligned. - * Rely on page size for units instead of SBRK_UNIT to - avoid surprises about sbrk alignment conventions. - * Add mallinfo, mallopt. Thanks to Raymond Nijssen - (raymond@es.ele.tue.nl) for the suggestion. - * Add `pad' argument to malloc_trim and top_pad mallopt parameter. - * More precautions for cases where other routines call sbrk, - courtesy of Wolfram Gloger (Gloger@lrz.uni-muenchen.de). - * Added macros etc., allowing use in linux libc from - H.J. Lu (hjl@gnu.ai.mit.edu) - * Inverted this history list - - V2.6.1 Sat Dec 2 14:10:57 1995 Doug Lea (dl at gee) - * Re-tuned and fixed to behave more nicely with V2.6.0 changes. - * Removed all preallocation code since under current scheme - the work required to undo bad preallocations exceeds - the work saved in good cases for most test programs. - * No longer use return list or unconsolidated bins since - no scheme using them consistently outperforms those that don't - given above changes. - * Use best fit for very large chunks to prevent some worst-cases. - * Added some support for debugging - - V2.6.0 Sat Nov 4 07:05:23 1995 Doug Lea (dl at gee) - * Removed footers when chunks are in use. Thanks to - Paul Wilson (wilson@cs.texas.edu) for the suggestion. - - V2.5.4 Wed Nov 1 07:54:51 1995 Doug Lea (dl at gee) - * Added malloc_trim, with help from Wolfram Gloger - (wmglo@Dent.MED.Uni-Muenchen.DE). - - V2.5.3 Tue Apr 26 10:16:01 1994 Doug Lea (dl at g) - - V2.5.2 Tue Apr 5 16:20:40 1994 Doug Lea (dl at g) - * realloc: try to expand in both directions - * malloc: swap order of clean-bin strategy; - * realloc: only conditionally expand backwards - * Try not to scavenge used bins - * Use bin counts as a guide to preallocation - * Occasionally bin return list chunks in first scan - * Add a few optimizations from colin@nyx10.cs.du.edu - - V2.5.1 Sat Aug 14 15:40:43 1993 Doug Lea (dl at g) - * faster bin computation & slightly different binning - * merged all consolidations to one part of malloc proper - (eliminating old malloc_find_space & malloc_clean_bin) - * Scan 2 returns chunks (not just 1) - * Propagate failure in realloc if malloc returns 0 - * Add stuff to allow compilation on non-ANSI compilers - from kpv@research.att.com - - V2.5 Sat Aug 7 07:41:59 1993 Doug Lea (dl at g.oswego.edu) - * removed potential for odd address access in prev_chunk - * removed dependency on getpagesize.h - * misc cosmetics and a bit more internal documentation - * anticosmetics: mangled names in macros to evade debugger strangeness - * tested on sparc, hp-700, dec-mips, rs6000 - with gcc & native cc (hp, dec only) allowing - Detlefs & Zorn comparison study (in SIGPLAN Notices.) - - Trial version Fri Aug 28 13:14:29 1992 Doug Lea (dl at g.oswego.edu) - * Based loosely on libg++-1.2X malloc. (It retains some of the overall - structure of old version, but most details differ.) - -*/ - - diff --git a/src/core/util/nedmalloc/nedmalloc.c b/src/core/util/nedmalloc/nedmalloc.c deleted file mode 100644 index 912e65f4..00000000 --- a/src/core/util/nedmalloc/nedmalloc.c +++ /dev/null @@ -1,950 +0,0 @@ -/* Alternative malloc implementation for multiple threads without -lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas - -Boost Software License - Version 1.0 - August 17th, 2003 - -Permission is hereby granted, free of charge, to any person or organization -obtaining a copy of the software and accompanying documentation covered by -this license (the "Software") to use, reproduce, display, distribute, -execute, and transmit the Software, and to prepare derivative works of the -Software, and to permit third-parties to whom the Software is furnished to -do so, all subject to the following: - -The copyright notices in the Software and this entire statement, including -the above license grant, this restriction and the following disclaimer, -must be included in all copies of the Software, in whole or in part, and -all derivative works of the Software, unless such copies or derivative -works are solely in the form of machine-executable object code generated by -a source language processor. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT -SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE -FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, -ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER -DEALINGS IN THE SOFTWARE. -*/ - -#ifdef _MSC_VER -/* Enable full aliasing on MSVC */ -/*#pragma optimize("a", on)*/ -#endif - -/*#define FULLSANITYCHECKS*/ - -#include "nedmalloc.h" -#ifdef WIN32 - #include -#endif -#define MSPACES 1 -#define ONLY_MSPACES 1 -#ifndef USE_LOCKS - #define USE_LOCKS 1 -#endif -#define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */ -#undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */ -#ifdef _DEBUG - #define DEBUG 1 -#else - #define DEBUG 0 -#endif -#ifdef NDEBUG /* Disable assert checking on release builds */ - #undef DEBUG -#endif -/* The default of 64Kb means we spend too much time kernel-side */ -#ifndef DEFAULT_GRANULARITY -#define DEFAULT_GRANULARITY (1*1024*1024) -#endif -/*#define USE_SPIN_LOCKS 0*/ - - -/*#define FORCEINLINE*/ -#include "malloc.c.h" -#ifdef NDEBUG /* Disable assert checking on release builds */ - #undef DEBUG -#endif - -/* The maximum concurrent threads in a pool possible */ -#ifndef MAXTHREADSINPOOL -#define MAXTHREADSINPOOL 16 -#endif -/* The maximum number of threadcaches which can be allocated */ -#ifndef THREADCACHEMAXCACHES -#define THREADCACHEMAXCACHES 256 -#endif -/* The maximum size to be allocated from the thread cache */ -#ifndef THREADCACHEMAX -#define THREADCACHEMAX 8192 -#endif -#if 0 -/* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */ -#define THREADCACHEMAXBINS ((13-4)*2) -#else -/* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */ -#define THREADCACHEMAXBINS (13-4) -#endif -/* Point at which the free space in a thread cache is garbage collected */ -#ifndef THREADCACHEMAXFREESPACE -#define THREADCACHEMAXFREESPACE (512*1024) -#endif - - -#ifdef WIN32 - #define TLSVAR DWORD - #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k)) - #define TLSFREE(k) (!TlsFree(k)) - #define TLSGET(k) TlsGetValue(k) - #define TLSSET(k, a) (!TlsSetValue(k, a)) - #ifdef DEBUG -static LPVOID ChkedTlsGetValue(DWORD idx) -{ - LPVOID ret=TlsGetValue(idx); - assert(S_OK==GetLastError()); - return ret; -} - #undef TLSGET - #define TLSGET(k) ChkedTlsGetValue(k) - #endif -#else - #define TLSVAR pthread_key_t - #define TLSALLOC(k) pthread_key_create(k, 0) - #define TLSFREE(k) pthread_key_delete(k) - #define TLSGET(k) pthread_getspecific(k) - #define TLSSET(k, a) pthread_setspecific(k, a) -#endif - -#if 0 -/* Only enable if testing with valgrind. Causes misoperation */ -#define mspace_malloc(p, s) malloc(s) -#define mspace_realloc(p, m, s) realloc(m, s) -#define mspace_calloc(p, n, s) calloc(n, s) -#define mspace_free(p, m) free(m) -#endif - - -#if defined(__cplusplus) -#if !defined(NO_NED_NAMESPACE) -namespace nedalloc { -#else -extern "C" { -#endif -#endif - -size_t nedblksize(void *mem) THROWSPEC -{ -#if 0 - /* Only enable if testing with valgrind. Causes misoperation */ - return THREADCACHEMAX; -#else - if(mem) - { - mchunkptr p=mem2chunk(mem); - assert(cinuse(p)); /* If this fails, someone tried to free a block twice */ - if(cinuse(p)) - return chunksize(p)-overhead_for(p); - } - return 0; -#endif -} - -void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); } -void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); } -void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); } -void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); } -void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); } -void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); } -#if !NO_MALLINFO -struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); } -#endif -int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); } -int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); } -void nedmalloc_stats() THROWSPEC { nedpmalloc_stats(0); } -size_t nedmalloc_footprint() THROWSPEC { return nedpmalloc_footprint(0); } -void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); } -void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); } - -struct threadcacheblk_t; -typedef struct threadcacheblk_t threadcacheblk; -struct threadcacheblk_t -{ /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */ -#ifdef FULLSANITYCHECKS - unsigned int magic; -#endif - unsigned int lastUsed, size; - threadcacheblk *next, *prev; -}; -typedef struct threadcache_t -{ -#ifdef FULLSANITYCHECKS - unsigned int magic1; -#endif - int mymspace; /* Last mspace entry this thread used */ - long threadid; - unsigned int mallocs, frees, successes; - size_t freeInCache; /* How much free space is stored in this cache */ - threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2]; -#ifdef FULLSANITYCHECKS - unsigned int magic2; -#endif -} threadcache; -struct nedpool_t -{ - MLOCK_T mutex; - void *uservalue; - int threads; /* Max entries in m to use */ - threadcache *caches[THREADCACHEMAXCACHES]; - TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */ - mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */ -}; -static nedpool syspool; - -static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC -{ /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */ - unsigned int topbit, size=(unsigned int)(_size>>4); - /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */ - -#if defined(__GNUC__) - topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size); -#elif defined(_MSC_VER) && _MSC_VER>=1300 - { - unsigned long bsrTopBit; - - _BitScanReverse(&bsrTopBit, size); - - topbit = bsrTopBit; - } -#else -#if 0 - union { - unsigned asInt[2]; - double asDouble; - }; - int n; - - asDouble = (double)size + 0.5; - topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023; -#else - { - unsigned int x=size; - x = x | (x >> 1); - x = x | (x >> 2); - x = x | (x >> 4); - x = x | (x >> 8); - x = x | (x >>16); - x = ~x; - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0F0F0F0F; - x = x + (x << 8); - x = x + (x << 16); - topbit=31 - (x >> 24); - } -#endif -#endif - return topbit; -} - - -#ifdef FULLSANITYCHECKS -static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC -{ - assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1])); - if(ptr[0] && ptr[1]) - { - assert(nedblksize(ptr[0])>=sizeof(threadcacheblk)); - assert(nedblksize(ptr[1])>=sizeof(threadcacheblk)); - assert(*(unsigned int *) "NEDN"==ptr[0]->magic); - assert(*(unsigned int *) "NEDN"==ptr[1]->magic); - assert(!ptr[0]->prev); - assert(!ptr[1]->next); - if(ptr[0]==ptr[1]) - { - assert(!ptr[0]->next); - assert(!ptr[1]->prev); - } - } -} -static void tcfullsanitycheck(threadcache *tc) THROWSPEC -{ - threadcacheblk **tcbptr=tc->bins; - int n; - for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk *b, *ob=0; - tcsanitycheck(tcbptr); - for(b=tcbptr[0]; b; ob=b, b=b->next) - { - assert(*(unsigned int *) "NEDN"==b->magic); - assert(!ob || ob->next==b); - assert(!ob || b->prev==ob); - } - } -} -#endif - -static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC -{ -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - if(tc->freeInCache) - { - threadcacheblk **tcbptr=tc->bins; - int n; - for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2) - { - threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */ - /*tcsanitycheck(tcbptr);*/ - for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; ) - { - threadcacheblk *f=*tcb; - size_t blksize=f->size; /*nedblksize(f);*/ - assert(blksize<=nedblksize(f)); - assert(blksize); -#ifdef FULLSANITYCHECKS - assert(*(unsigned int *) "NEDN"==(*tcb)->magic); -#endif - *tcb=(*tcb)->prev; - if(*tcb) - (*tcb)->next=0; - else - *tcbptr=0; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - mspace_free(0, f); - /*tcsanitycheck(tcbptr);*/ - } - } - } -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif -} -static void DestroyCaches(nedpool *p) THROWSPEC -{ - if(p->caches) - { - threadcache *tc; - int n; - for(n=0; ncaches[n])) - { - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - tc->mymspace=-1; - tc->threadid=0; - mspace_free(0, tc); - p->caches[n]=0; - } - } - } -} - -static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC -{ - threadcache *tc=0; - int n, end; - ACQUIRE_LOCK(&p->mutex); - for(n=0; ncaches[n]; n++); - if(THREADCACHEMAXCACHES==n) - { /* List exhausted, so disable for this thread */ - RELEASE_LOCK(&p->mutex); - return 0; - } - tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache)); - if(!tc) - { - RELEASE_LOCK(&p->mutex); - return 0; - } -#ifdef FULLSANITYCHECKS - tc->magic1=*(unsigned int *)"NEDMALC1"; - tc->magic2=*(unsigned int *)"NEDMALC2"; -#endif - tc->threadid=(long)(size_t)CURRENT_THREAD; - for(end=0; p->m[end]; end++); - tc->mymspace=tc->threadid % end; - RELEASE_LOCK(&p->mutex); - if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort(); - return tc; -} - -static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC -{ - void *ret=0; - unsigned int bestsize; - unsigned int idx=size2binidx(*size); - size_t blksize=0; - threadcacheblk *blk, **binsptr; -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); -#if 0 - /* Finer grained bin fit */ - idx<<=1; - if(*size>bestsize) - { - idx++; - bestsize+=bestsize>>1; - } - if(*size>bestsize) - { - idx++; - bestsize=1<<(4+(idx>>1)); - } -#else - if(*size>bestsize) - { - idx++; - bestsize<<=1; - } -#endif - assert(bestsize>=*size); - if(*sizebins[idx*2]; - /* Try to match close, but move up a bin if necessary */ - blk=*binsptr; - if(!blk || blk->size<*size) - { /* Bump it up a bin */ - if(idxsize; /*nedblksize(blk);*/ - assert(nedblksize(blk)>=blksize); - assert(blksize>=*size); - if(blk->next) - blk->next->prev=0; - *binsptr=blk->next; - if(!*binsptr) - binsptr[1]=0; -#ifdef FULLSANITYCHECKS - blk->magic=0; -#endif - assert(binsptr[0]!=blk && binsptr[1]!=blk); - assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD); - /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/ - ret=(void *) blk; - } - ++tc->mallocs; - if(ret) - { - assert(blksize>=*size); - ++tc->successes; - tc->freeInCache-=blksize; - assert((long) tc->freeInCache>=0); - } -#if defined(DEBUG) && 0 - if(!(tc->mallocs & 0xfff)) - { - printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs, - (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache); - } -#endif -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - return ret; -} -static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC -{ - unsigned int age=THREADCACHEMAXFREESPACE/8192; - /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/ - while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE) - { - RemoveCacheEntries(p, tc, age); - /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/ - age>>=1; - } - /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/ -} -static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC -{ - unsigned int bestsize; - unsigned int idx=size2binidx(size); - threadcacheblk **binsptr, *tck=(threadcacheblk *) mem; - assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD); -#ifdef DEBUG - { /* Make sure this is a valid memory block */ - mchunkptr p = mem2chunk(mem); - mstate fm = get_mstate_for(p); - if (!ok_magic(fm)) { - USAGE_ERROR_ACTION(fm, p); - return; - } - } -#endif -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif - /* Calculate best fit bin size */ - bestsize=1<<(idx+4); -#if 0 - /* Finer grained bin fit */ - idx<<=1; - if(size>bestsize) - { - unsigned int biggerbestsize=bestsize+bestsize<<1; - if(size>=biggerbestsize) - { - idx++; - bestsize=biggerbestsize; - } - } -#endif - if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */ - size=bestsize; - binsptr=&tc->bins[idx*2]; - assert(idx<=THREADCACHEMAXBINS); - if(tck==*binsptr) - { - fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck); - abort(); - } -#ifdef FULLSANITYCHECKS - tck->magic=*(unsigned int *) "NEDN"; -#endif - tck->lastUsed=++tc->frees; - tck->size=(unsigned int) size; - tck->next=*binsptr; - tck->prev=0; - if(tck->next) - tck->next->prev=tck; - else - binsptr[1]=tck; - assert(!*binsptr || (*binsptr)->size==tck->size); - *binsptr=tck; - assert(tck==tc->bins[idx*2]); - assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck); - /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/ - tc->freeInCache+=size; -#ifdef FULLSANITYCHECKS - tcfullsanitycheck(tc); -#endif -#if 1 - if(tc->freeInCache>=THREADCACHEMAXFREESPACE) - ReleaseFreeInCache(p, tc, mymspace); -#endif -} - - - - -static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC -{ /* threads is -1 for system pool */ - ensure_initialization(); - ACQUIRE_MALLOC_GLOBAL_LOCK(); - if(p->threads) goto done; - if(INITIAL_LOCK(&p->mutex)) goto err; - if(TLSALLOC(&p->mycache)) goto err; - if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err; - p->m[0]->extp=p; - p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads; -done: - RELEASE_MALLOC_GLOBAL_LOCK(); - return 1; -err: - if(threads<0) - abort(); /* If you can't allocate for system pool, we're screwed */ - DestroyCaches(p); - if(p->m[0]) - { - destroy_mspace(p->m[0]); - p->m[0]=0; - } - if(p->mycache) - { - if(TLSFREE(p->mycache)) abort(); - p->mycache=0; - } - RELEASE_MALLOC_GLOBAL_LOCK(); - return 0; -} -static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC -{ /* Gets called when thread's last used mspace is in use. The strategy - is to run through the list of all available mspaces looking for an - unlocked one and if we fail, we create a new one so long as we don't - exceed p->threads */ - int n, end; - for(n=end=*lastUsed+1; p->m[n]; end=++n) - { - if(TRY_LOCK(&p->m[n]->mutex)) goto found; - } - for(n=0; n<*lastUsed && p->m[n]; n++) - { - if(TRY_LOCK(&p->m[n]->mutex)) goto found; - } - if(endthreads) - { - mstate temp; - if(!(temp=(mstate) create_mspace(size, 1))) - goto badexit; - /* Now we're ready to modify the lists, we lock */ - ACQUIRE_LOCK(&p->mutex); - while(p->m[end] && endthreads) - end++; - if(end>=p->threads) - { /* Drat, must destroy it now */ - RELEASE_LOCK(&p->mutex); - destroy_mspace((mspace) temp); - goto badexit; - } - /* We really want to make sure this goes into memory now but we - have to be careful of breaking aliasing rules, so write it twice */ - *((volatile struct malloc_state **) &p->m[end])=p->m[end]=temp; - ACQUIRE_LOCK(&p->m[end]->mutex); - /*printf("Created mspace idx %d\n", end);*/ - RELEASE_LOCK(&p->mutex); - n=end; - goto found; - } - /* Let it lock on the last one it used */ -badexit: - ACQUIRE_LOCK(&p->m[*lastUsed]->mutex); - return p->m[*lastUsed]; -found: - *lastUsed=n; - if(tc) - tc->mymspace=n; - else - { - if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort(); - } - return p->m[n]; -} - -nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC -{ - nedpool *ret; - if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0; - if(!InitPool(ret, capacity, threads)) - { - nedpfree(0, ret); - return 0; - } - return ret; -} -void neddestroypool(nedpool *p) THROWSPEC -{ - int n; - ACQUIRE_LOCK(&p->mutex); - DestroyCaches(p); - for(n=0; p->m[n]; n++) - { - destroy_mspace(p->m[n]); - p->m[n]=0; - } - RELEASE_LOCK(&p->mutex); - if(TLSFREE(p->mycache)) abort(); - nedpfree(0, p); -} - -void nedpsetvalue(nedpool *p, void *v) THROWSPEC -{ - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - p->uservalue=v; -} -void *nedgetvalue(nedpool **p, void *mem) THROWSPEC -{ - nedpool *np=0; - mchunkptr mcp=mem2chunk(mem); - mstate fm; - if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0; - if(!cinuse(mcp)) return 0; - if(!next_pinuse(mcp)) return 0; - if(!is_mmapped(mcp) && !pinuse(mcp)) - { - if(next_chunk(prev_chunk(mcp))!=mcp) return 0; - } - fm=get_mstate_for(mcp); - if(!ok_magic(fm)) return 0; - if(!ok_address(fm, mcp)) return 0; - if(!fm->extp) return 0; - np=(nedpool *) fm->extp; - if(p) *p=np; - return np->uservalue; -} - -void neddisablethreadcache(nedpool *p) THROWSPEC -{ - int mycache; - if(!p) - { - p=&syspool; - if(!syspool.threads) InitPool(&syspool, 0, -1); - } - mycache=(int)(size_t) TLSGET(p->mycache); - if(!mycache) - { /* Set to mspace 0 */ - if(TLSSET(p->mycache, (void *)-1)) abort(); - } - else if(mycache>0) - { /* Set to last used mspace */ - threadcache *tc=p->caches[mycache-1]; -#if defined(DEBUG) - printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n", - 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs); -#endif - if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort(); - tc->frees++; - RemoveCacheEntries(p, tc, 0); - assert(!tc->freeInCache); - tc->mymspace=-1; - tc->threadid=0; - mspace_free(0, p->caches[mycache-1]); - p->caches[mycache-1]=0; - } -} - -#define GETMSPACE(m,p,tc,ms,s,action) \ - do \ - { \ - mstate m = GetMSpace((p),(tc),(ms),(s)); \ - action; \ - RELEASE_LOCK(&m->mutex); \ - } while (0) - -static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC -{ /* Returns a locked and ready for use mspace */ - mstate m=p->m[mymspace]; - assert(m); - if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\ - /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/ - return m; -} -static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC -{ - int mycache; - if(size && *sizemycache); - if(mycache>0) - { - *tc=(*p)->caches[mycache-1]; - *mymspace=(*tc)->mymspace; - } - else if(!mycache) - { - *tc=AllocCache(*p); - if(!*tc) - { /* Disable */ - if(TLSSET((*p)->mycache, (void *)-1)) abort(); - *mymspace=0; - } - else - *mymspace=(*tc)->mymspace; - } - else - { - *tc=0; - *mymspace=-mycache-1; - } - assert(*mymspace>=0); - assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid); -#ifdef FULLSANITYCHECKS - if(*tc) - { - if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2) - { - abort(); - } - } -#endif -} - -void * nedpmalloc(nedpool *p, size_t size) THROWSPEC -{ - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &size); -#if THREADCACHEMAX - if(tc && size<=THREADCACHEMAX) - { /* Use the thread cache */ - ret=threadcache_malloc(p, tc, &size); - } -#endif - if(!ret) - { /* Use this thread's mspace */ - GETMSPACE(m, p, tc, mymspace, size, - ret=mspace_malloc(m, size)); - } - return ret; -} -void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC -{ - size_t rsize=size*no; - void *ret=0; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &rsize); -#if THREADCACHEMAX - if(tc && rsize<=THREADCACHEMAX) - { /* Use the thread cache */ - if((ret=threadcache_malloc(p, tc, &rsize))) - memset(ret, 0, rsize); - } -#endif - if(!ret) - { /* Use this thread's mspace */ - GETMSPACE(m, p, tc, mymspace, rsize, - ret=mspace_calloc(m, 1, rsize)); - } - return ret; -} -void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC -{ - void *ret=0; - threadcache *tc; - int mymspace; - if(!mem) return nedpmalloc(p, size); - GetThreadCache(&p, &tc, &mymspace, &size); -#if THREADCACHEMAX - if(tc && size && size<=THREADCACHEMAX) - { /* Use the thread cache */ - size_t memsize=nedblksize(mem); - assert(memsize); - if((ret=threadcache_malloc(p, tc, &size))) - { - memcpy(ret, mem, memsizem[n]; n++) - { - struct mallinfo t=mspace_mallinfo(p->m[n]); - ret.arena+=t.arena; - ret.ordblks+=t.ordblks; - ret.hblkhd+=t.hblkhd; - ret.usmblks+=t.usmblks; - ret.uordblks+=t.uordblks; - ret.fordblks+=t.fordblks; - ret.keepcost+=t.keepcost; - } - return ret; -} -#endif -int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC -{ - return mspace_mallopt(parno, value); -} -int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC -{ - int n, ret=0; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { - ret+=mspace_trim(p->m[n], pad); - } - return ret; -} -void nedpmalloc_stats(nedpool *p) THROWSPEC -{ - int n; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { - mspace_malloc_stats(p->m[n]); - } -} -size_t nedpmalloc_footprint(nedpool *p) THROWSPEC -{ - size_t ret=0; - int n; - if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); } - for(n=0; p->m[n]; n++) - { - ret+=mspace_footprint(p->m[n]); - } - return ret; -} -void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC -{ - void **ret; - threadcache *tc; - int mymspace; - GetThreadCache(&p, &tc, &mymspace, &elemsize); - GETMSPACE(m, p, tc, mymspace, elemsno*elemsize, - ret=mspace_independent_calloc(m, elemsno, elemsize, chunks)); - return ret; -} -void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC -{ - void **ret; - threadcache *tc; - int mymspace; - size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t)); - if(!adjustedsizes) return 0; - for(i=0; i /* for size_t */ - -#ifndef EXTSPEC - #define EXTSPEC extern -#endif - -#if defined(_MSC_VER) && _MSC_VER>=1400 - #define MALLOCATTR __declspec(restrict) -#endif -#ifdef __GNUC__ - #define MALLOCATTR __attribute__ ((malloc)) -#endif -#ifndef MALLOCATTR - #define MALLOCATTR -#endif - -#ifdef REPLACE_SYSTEM_ALLOCATOR - #define nedmalloc malloc - #define nedcalloc calloc - #define nedrealloc realloc - #define nedfree free - #define nedmemalign memalign - #define nedmallinfo mallinfo - #define nedmallopt mallopt - #define nedmalloc_trim malloc_trim - #define nedmalloc_stats malloc_stats - #define nedmalloc_footprint malloc_footprint - #define nedindependent_calloc independent_calloc - #define nedindependent_comalloc independent_comalloc - #ifdef _MSC_VER - #define nedblksize _msize - #endif -#endif - -#ifndef NO_MALLINFO -#define NO_MALLINFO 0 -#endif - -#if !NO_MALLINFO -struct mallinfo; -#endif - -#if defined(__cplusplus) - #if !defined(NO_NED_NAMESPACE) -namespace nedalloc { - #else -extern "C" { - #endif - #define THROWSPEC throw() -#else - #define THROWSPEC -#endif - -/* These are the global functions */ - -/* Gets the usable size of an allocated block. Note this will always be bigger than what was -asked for due to rounding etc. -*/ -EXTSPEC size_t nedblksize(void *mem) THROWSPEC; - -EXTSPEC void nedsetvalue(void *v) THROWSPEC; - -EXTSPEC MALLOCATTR void * nedmalloc(size_t size) THROWSPEC; -EXTSPEC MALLOCATTR void * nedcalloc(size_t no, size_t size) THROWSPEC; -EXTSPEC MALLOCATTR void * nedrealloc(void *mem, size_t size) THROWSPEC; -EXTSPEC void nedfree(void *mem) THROWSPEC; -EXTSPEC MALLOCATTR void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC; -#if !NO_MALLINFO -EXTSPEC struct mallinfo nedmallinfo(void) THROWSPEC; -#endif -EXTSPEC int nedmallopt(int parno, int value) THROWSPEC; -EXTSPEC int nedmalloc_trim(size_t pad) THROWSPEC; -EXTSPEC void nedmalloc_stats(void) THROWSPEC; -EXTSPEC size_t nedmalloc_footprint(void) THROWSPEC; -EXTSPEC MALLOCATTR void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; -EXTSPEC MALLOCATTR void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC; - -/* These are the pool functions */ -struct nedpool_t; -typedef struct nedpool_t nedpool; - -/* Creates a memory pool for use with the nedp* functions below. -Capacity is how much to allocate immediately (if you know you'll be allocating a lot -of memory very soon) which you can leave at zero. Threads specifies how many threads -will *normally* be accessing the pool concurrently. Setting this to zero means it -extends on demand, but be careful of this as it can rapidly consume system resources -where bursts of concurrent threads use a pool at once. -*/ -EXTSPEC MALLOCATTR nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC; - -/* Destroys a memory pool previously created by nedcreatepool(). -*/ -EXTSPEC void neddestroypool(nedpool *p) THROWSPEC; - -/* Sets a value to be associated with a pool. You can retrieve this value by passing -any memory block allocated from that pool. -*/ -EXTSPEC void nedpsetvalue(nedpool *p, void *v) THROWSPEC; -/* Gets a previously set value using nedpsetvalue() or zero if memory is unknown. -Optionally can also retrieve pool. -*/ -EXTSPEC void *nedgetvalue(nedpool **p, void *mem) THROWSPEC; - -/* Disables the thread cache for the calling thread, returning any existing cache -data to the central pool. -*/ -EXTSPEC void neddisablethreadcache(nedpool *p) THROWSPEC; - -EXTSPEC MALLOCATTR void * nedpmalloc(nedpool *p, size_t size) THROWSPEC; -EXTSPEC MALLOCATTR void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC; -EXTSPEC MALLOCATTR void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC; -EXTSPEC void nedpfree(nedpool *p, void *mem) THROWSPEC; -EXTSPEC MALLOCATTR void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC; -#if !NO_MALLINFO -EXTSPEC struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC; -#endif -EXTSPEC int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC; -EXTSPEC int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC; -EXTSPEC void nedpmalloc_stats(nedpool *p) THROWSPEC; -EXTSPEC size_t nedpmalloc_footprint(nedpool *p) THROWSPEC; -EXTSPEC MALLOCATTR void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC; -EXTSPEC MALLOCATTR void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC; - -#if defined(__cplusplus) -} -#endif - -#undef MALLOCATTR -#undef EXTSPEC - -#endif diff --git a/src/core/util/unicode/gunichartables.h b/src/core/util/unicode/gunichartables.h index adc5a55d..4b376d6f 100644 --- a/src/core/util/unicode/gunichartables.h +++ b/src/core/util/unicode/gunichartables.h @@ -1,6 +1,8 @@ /* This file is automatically generated. DO NOT EDIT! Instead, edit gen-unicode-tables.pl and re-run. */ +// See COPYING file for licensing information. + #ifndef CHARTABLES_H #define CHARTABLES_H @@ -15,13046 +17,13168 @@ #define G_UNICODE_LAST_PAGE_PART1 762 static const char type_data[][256] = { - { /* page 0, index 0 */ - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, - G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_FORMAT, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER - }, - { /* page 1, index 1 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER - }, - { /* page 2, index 2 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL - }, - { /* page 3, index 3 */ - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER - }, - { /* page 4, index 4 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER - }, - { /* page 5, index 5 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 6, index 6 */ - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_FORMAT, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_LETTER - }, - { /* page 7, index 7 */ - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 9, index 8 */ - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 10, index 9 */ - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 11, index 10 */ - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 12, index 11 */ - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 13, index 12 */ - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 14, index 13 */ - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 15, index 14 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 16, index 15 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 17, index 16 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 18, index 17 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER - }, - { /* page 19, index 18 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 20, index 19 */ - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER - }, - { /* page 22, index 20 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 23, index 21 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 24, index 22 */ - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_UNASSIGNED, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 25, index 23 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL - }, - { /* page 26, index 24 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 27, index 25 */ - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 28, index 26 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 29, index 27 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK - }, - { /* page 30, index 28 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER - }, - { /* page 31, index 29 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED - }, - { /* page 32, index 30 */ - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 33, index 31 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL - }, - { /* page 35, index 32 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 36, index 33 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER - }, - { /* page 37, index 34 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL - }, - { /* page 38, index 35 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 39, index 36 */ - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL - }, - { /* page 41, index 37 */ - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL - }, - { /* page 43, index 38 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 44, index 39 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION - }, - { /* page 45, index 40 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK - }, - { /* page 46, index 41 */ - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, - G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 47, index 42 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 48, index 43 */ - G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER - }, - { /* page 49, index 44 */ - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER - }, - { /* page 50, index 45 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED - }, - { /* page 77, index 46 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL - }, - { /* page 159, index 47 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 160, index 48 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER - }, - { /* page 164, index 49 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 166, index 50 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, - G_UNICODE_ENCLOSING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 167, index 51 */ - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER - }, - { /* page 168, index 52 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 169, index 53 */ - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 170, index 54 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 215, index 55 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 250, index 56 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 251, index 57 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER - }, - { /* page 253, index 58 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 254, index 59 */ - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT - }, - { /* page 255, index 60 */ - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 256, index 61 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 257, index 62 */ - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 258, index 63 */ - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 259, index 64 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 260, index 65 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 264, index 66 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 265, index 67 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 266, index 68 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 291, index 69 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 292, index 70 */ - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, - G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 464, index 71 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 465, index 72 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, - G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 466, index 73 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 467, index 74 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 468, index 75 */ - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER - }, - { /* page 469, index 76 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER - }, - { /* page 470, index 77 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER - }, - { /* page 471, index 78 */ - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_DECIMAL_NUMBER - }, - { /* page 496, index 79 */ - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, - G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 678, index 80 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 762, index 81 */ - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 3584, index 82 */ - G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED - }, - { /* page 3585, index 83 */ - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 4095, index 84 */ - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - }, - { /* page 4351, index 85 */ - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, - G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, - G_UNICODE_UNASSIGNED - } + { + /* page 0, index 0 */ + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_CONTROL, + G_UNICODE_CONTROL, G_UNICODE_CONTROL, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_FORMAT, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 1, index 1 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 2, index 2 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL + }, + { + /* page 3, index 3 */ + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER + }, + { + /* page 4, index 4 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 5, index 5 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 6, index 6 */ + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_FORMAT, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_LETTER + }, + { + /* page 7, index 7 */ + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 9, index 8 */ + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 10, index 9 */ + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 11, index 10 */ + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 12, index 11 */ + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 13, index 12 */ + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 14, index 13 */ + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 15, index 14 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 16, index 15 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 17, index 16 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 18, index 17 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER + }, + { + /* page 19, index 18 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 20, index 19 */ + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER + }, + { + /* page 22, index 20 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 23, index 21 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 24, index 22 */ + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_UNASSIGNED, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 25, index 23 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL + }, + { + /* page 26, index 24 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 27, index 25 */ + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 28, index 26 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 29, index 27 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK + }, + { + /* page 30, index 28 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 31, index 29 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_TITLECASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_TITLECASE_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UNASSIGNED + }, + { + /* page 32, index 30 */ + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_SPACE_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_LINE_SEPARATOR, G_UNICODE_PARAGRAPH_SEPARATOR, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_OTHER_NUMBER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_UNASSIGNED, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 33, index 31 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL + }, + { + /* page 35, index 32 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 36, index 33 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER + }, + { + /* page 37, index 34 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL + }, + { + /* page 38, index 35 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 39, index 36 */ + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL + }, + { + /* page 41, index 37 */ + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL + }, + { + /* page 43, index 38 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 44, index 39 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION + }, + { + /* page 45, index 40 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_MODIFIER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK + }, + { + /* page 46, index 41 */ + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_INITIAL_PUNCTUATION, + G_UNICODE_FINAL_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 47, index 42 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 48, index 43 */ + G_UNICODE_SPACE_SEPARATOR, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER + }, + { + /* page 49, index 44 */ + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER + }, + { + /* page 50, index 45 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED + }, + { + /* page 77, index 46 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL + }, + { + /* page 159, index 47 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 160, index 48 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER + }, + { + /* page 164, index 49 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 166, index 50 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_ENCLOSING_MARK, + G_UNICODE_ENCLOSING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 167, index 51 */ + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER + }, + { + /* page 168, index 52 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 169, index 53 */ + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 170, index 54 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 215, index 55 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 250, index 56 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 251, index 57 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER + }, + { + /* page 253, index 58 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 254, index 59 */ + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_DASH_PUNCTUATION, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT + }, + { + /* page 255, index 60 */ + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_CONNECT_PUNCTUATION, G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OPEN_PUNCTUATION, G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_MODIFIER_LETTER, G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_CURRENCY_SYMBOL, G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 256, index 61 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 257, index 62 */ + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_NUMBER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 258, index 63 */ + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 259, index 64 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 260, index 65 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 264, index 66 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 265, index 67 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 266, index 68 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 291, index 69 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 292, index 70 */ + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_LETTER_NUMBER, + G_UNICODE_LETTER_NUMBER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 464, index 71 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 465, index 72 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, + G_UNICODE_COMBINING_MARK, G_UNICODE_COMBINING_MARK, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 466, index 73 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 467, index 74 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, G_UNICODE_OTHER_NUMBER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 468, index 75 */ + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 469, index 76 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 470, index 77 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_MATH_SYMBOL, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER + }, + { + /* page 471, index 78 */ + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_MATH_SYMBOL, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_LOWERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER, G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_DECIMAL_NUMBER + }, + { + /* page 496, index 79 */ + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, G_UNICODE_OTHER_SYMBOL, + G_UNICODE_OTHER_SYMBOL, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 678, index 80 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 762, index 81 */ + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, G_UNICODE_OTHER_LETTER, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 3584, index 82 */ + G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_FORMAT, G_UNICODE_FORMAT, G_UNICODE_FORMAT, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED + }, + { + /* page 3585, index 83 */ + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_NON_SPACING_MARK, G_UNICODE_NON_SPACING_MARK, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 4095, index 84 */ + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + }, + { + /* page 4351, index 85 */ + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, + G_UNICODE_PRIVATE_USE, G_UNICODE_PRIVATE_USE, G_UNICODE_UNASSIGNED, + G_UNICODE_UNASSIGNED + } }; /* U+0000 through U+2FAFF */ static const int16_t type_table_part1[763] = { - 0 /* page 0 */, - 1 /* page 1 */, - 2 /* page 2 */, - 3 /* page 3 */, - 4 /* page 4 */, - 5 /* page 5 */, - 6 /* page 6 */, - 7 /* page 7 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - 8 /* page 9 */, - 9 /* page 10 */, - 10 /* page 11 */, - 11 /* page 12 */, - 12 /* page 13 */, - 13 /* page 14 */, - 14 /* page 15 */, - 15 /* page 16 */, - 16 /* page 17 */, - 17 /* page 18 */, - 18 /* page 19 */, - 19 /* page 20 */, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 20 /* page 22 */, - 21 /* page 23 */, - 22 /* page 24 */, - 23 /* page 25 */, - 24 /* page 26 */, - 25 /* page 27 */, - 26 /* page 28 */, - 27 /* page 29 */, - 28 /* page 30 */, - 29 /* page 31 */, - 30 /* page 32 */, - 31 /* page 33 */, - G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, - 32 /* page 35 */, - 33 /* page 36 */, - 34 /* page 37 */, - 35 /* page 38 */, - 36 /* page 39 */, - G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, - 37 /* page 41 */, - G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, - 38 /* page 43 */, - 39 /* page 44 */, - 40 /* page 45 */, - 41 /* page 46 */, - 42 /* page 47 */, - 43 /* page 48 */, - 44 /* page 49 */, - 45 /* page 50 */, - G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 46 /* page 77 */, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 47 /* page 159 */, - 48 /* page 160 */, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 49 /* page 164 */, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 50 /* page 166 */, - 51 /* page 167 */, - 52 /* page 168 */, - 53 /* page 169 */, - 54 /* page 170 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 55 /* page 215 */, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 56 /* page 250 */, - 57 /* page 251 */, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 58 /* page 253 */, - 59 /* page 254 */, - 60 /* page 255 */, - 61 /* page 256 */, - 62 /* page 257 */, - 63 /* page 258 */, - 64 /* page 259 */, - 65 /* page 260 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - 66 /* page 264 */, - 67 /* page 265 */, - 68 /* page 266 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 69 /* page 291 */, - 70 /* page 292 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - 71 /* page 464 */, - 72 /* page 465 */, - 73 /* page 466 */, - 74 /* page 467 */, - 75 /* page 468 */, - 76 /* page 469 */, - 77 /* page 470 */, - 78 /* page 471 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - 79 /* page 496 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 80 /* page 678 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, - 81 /* page 762 */ + 0 /* page 0 */, + 1 /* page 1 */, + 2 /* page 2 */, + 3 /* page 3 */, + 4 /* page 4 */, + 5 /* page 5 */, + 6 /* page 6 */, + 7 /* page 7 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + 8 /* page 9 */, + 9 /* page 10 */, + 10 /* page 11 */, + 11 /* page 12 */, + 12 /* page 13 */, + 13 /* page 14 */, + 14 /* page 15 */, + 15 /* page 16 */, + 16 /* page 17 */, + 17 /* page 18 */, + 18 /* page 19 */, + 19 /* page 20 */, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 20 /* page 22 */, + 21 /* page 23 */, + 22 /* page 24 */, + 23 /* page 25 */, + 24 /* page 26 */, + 25 /* page 27 */, + 26 /* page 28 */, + 27 /* page 29 */, + 28 /* page 30 */, + 29 /* page 31 */, + 30 /* page 32 */, + 31 /* page 33 */, + G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, + 32 /* page 35 */, + 33 /* page 36 */, + 34 /* page 37 */, + 35 /* page 38 */, + 36 /* page 39 */, + G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, + 37 /* page 41 */, + G_UNICODE_MATH_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, + 38 /* page 43 */, + 39 /* page 44 */, + 40 /* page 45 */, + 41 /* page 46 */, + 42 /* page 47 */, + 43 /* page 48 */, + 44 /* page 49 */, + 45 /* page 50 */, + G_UNICODE_OTHER_SYMBOL + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 46 /* page 77 */, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 47 /* page 159 */, + 48 /* page 160 */, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 49 /* page 164 */, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 50 /* page 166 */, + 51 /* page 167 */, + 52 /* page 168 */, + 53 /* page 169 */, + 54 /* page 170 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 55 /* page 215 */, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_SURROGATE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 56 /* page 250 */, + 57 /* page 251 */, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 58 /* page 253 */, + 59 /* page 254 */, + 60 /* page 255 */, + 61 /* page 256 */, + 62 /* page 257 */, + 63 /* page 258 */, + 64 /* page 259 */, + 65 /* page 260 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + 66 /* page 264 */, + 67 /* page 265 */, + 68 /* page 266 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 69 /* page 291 */, + 70 /* page 292 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + 71 /* page 464 */, + 72 /* page 465 */, + 73 /* page 466 */, + 74 /* page 467 */, + 75 /* page 468 */, + 76 /* page 469 */, + 77 /* page 470 */, + 78 /* page 471 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + 79 /* page 496 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 80 /* page 678 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_OTHER_LETTER + G_UNICODE_MAX_TABLE_INDEX, + 81 /* page 762 */ }; /* U+E0000 through U+10FFFF */ static const int16_t type_table_part2[768] = { - 82 /* page 3584 */, - 83 /* page 3585 */, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - 84 /* page 4095 */, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, - 85 /* page 4351 */ + 82 /* page 3584 */, + 83 /* page 3585 */, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_UNASSIGNED + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + 84 /* page 4095 */, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + G_UNICODE_PRIVATE_USE + G_UNICODE_MAX_TABLE_INDEX, + 85 /* page 4351 */ }; static const gunichar attr_data[][256] = { - { /* page 0, index 0 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, - 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, - 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, - 0x007a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0041, 0x0042, - 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, - 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, - 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x039c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, - 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, - 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0000, - 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x1000000, - 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, - 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, - 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, 0x00d8, 0x00d9, 0x00da, - 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178 - }, - { /* page 1, index 1 */ - 0x0101, 0x0100, 0x0103, 0x0102, 0x0105, 0x0104, 0x0107, 0x0106, 0x0109, - 0x0108, 0x010b, 0x010a, 0x010d, 0x010c, 0x010f, 0x010e, 0x0111, 0x0110, - 0x0113, 0x0112, 0x0115, 0x0114, 0x0117, 0x0116, 0x0119, 0x0118, 0x011b, - 0x011a, 0x011d, 0x011c, 0x011f, 0x011e, 0x0121, 0x0120, 0x0123, 0x0122, - 0x0125, 0x0124, 0x0127, 0x0126, 0x0129, 0x0128, 0x012b, 0x012a, 0x012d, - 0x012c, 0x012f, 0x012e, 0x1000007, 0x0049, 0x0133, 0x0132, 0x0135, - 0x0134, 0x0137, 0x0136, 0x0000, 0x013a, 0x0139, 0x013c, 0x013b, 0x013e, - 0x013d, 0x0140, 0x013f, 0x0142, 0x0141, 0x0144, 0x0143, 0x0146, 0x0145, - 0x0148, 0x0147, 0x1000086, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, - 0x014e, 0x0151, 0x0150, 0x0153, 0x0152, 0x0155, 0x0154, 0x0157, 0x0156, - 0x0159, 0x0158, 0x015b, 0x015a, 0x015d, 0x015c, 0x015f, 0x015e, 0x0161, - 0x0160, 0x0163, 0x0162, 0x0165, 0x0164, 0x0167, 0x0166, 0x0169, 0x0168, - 0x016b, 0x016a, 0x016d, 0x016c, 0x016f, 0x016e, 0x0171, 0x0170, 0x0173, - 0x0172, 0x0175, 0x0174, 0x0177, 0x0176, 0x00ff, 0x017a, 0x0179, 0x017c, - 0x017b, 0x017e, 0x017d, 0x0053, 0x0243, 0x0253, 0x0183, 0x0182, 0x0185, - 0x0184, 0x0254, 0x0188, 0x0187, 0x0256, 0x0257, 0x018c, 0x018b, 0x0000, - 0x01dd, 0x0259, 0x025b, 0x0192, 0x0191, 0x0260, 0x0263, 0x01f6, 0x0269, - 0x0268, 0x0199, 0x0198, 0x023d, 0x0000, 0x026f, 0x0272, 0x0220, 0x0275, - 0x01a1, 0x01a0, 0x01a3, 0x01a2, 0x01a5, 0x01a4, 0x0280, 0x01a8, 0x01a7, - 0x0283, 0x0000, 0x0000, 0x01ad, 0x01ac, 0x0288, 0x01b0, 0x01af, 0x028a, - 0x028b, 0x01b4, 0x01b3, 0x01b6, 0x01b5, 0x0292, 0x01b9, 0x01b8, 0x0000, - 0x0000, 0x01bd, 0x01bc, 0x0000, 0x01f7, 0x0000, 0x0000, 0x0000, 0x0000, - 0x01c6, 0x0000, 0x01c4, 0x01c9, 0x0000, 0x01c7, 0x01cc, 0x0000, 0x01ca, - 0x01ce, 0x01cd, 0x01d0, 0x01cf, 0x01d2, 0x01d1, 0x01d4, 0x01d3, 0x01d6, - 0x01d5, 0x01d8, 0x01d7, 0x01da, 0x01d9, 0x01dc, 0x01db, 0x018e, 0x01df, - 0x01de, 0x01e1, 0x01e0, 0x01e3, 0x01e2, 0x01e5, 0x01e4, 0x01e7, 0x01e6, - 0x01e9, 0x01e8, 0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, - 0x10000ad, 0x01f3, 0x0000, 0x01f1, 0x01f5, 0x01f4, 0x0195, 0x01bf, - 0x01f9, 0x01f8, 0x01fb, 0x01fa, 0x01fd, 0x01fc, 0x01ff, 0x01fe - }, - { /* page 2, index 2 */ - 0x0201, 0x0200, 0x0203, 0x0202, 0x0205, 0x0204, 0x0207, 0x0206, 0x0209, - 0x0208, 0x020b, 0x020a, 0x020d, 0x020c, 0x020f, 0x020e, 0x0211, 0x0210, - 0x0213, 0x0212, 0x0215, 0x0214, 0x0217, 0x0216, 0x0219, 0x0218, 0x021b, - 0x021a, 0x021d, 0x021c, 0x021f, 0x021e, 0x019e, 0x0000, 0x0223, 0x0222, - 0x0225, 0x0224, 0x0227, 0x0226, 0x0229, 0x0228, 0x022b, 0x022a, 0x022d, - 0x022c, 0x022f, 0x022e, 0x0231, 0x0230, 0x0233, 0x0232, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x2c65, 0x023c, 0x023b, 0x019a, 0x2c66, - 0x0000, 0x0000, 0x0242, 0x0241, 0x0180, 0x0289, 0x028c, 0x0247, 0x0246, - 0x0249, 0x0248, 0x024b, 0x024a, 0x024d, 0x024c, 0x024f, 0x024e, 0x2c6f, - 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, 0x0000, 0x018f, - 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, 0x0193, 0x0000, 0x0000, - 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, 0x0197, 0x0196, 0x0000, 0x2c62, - 0x0000, 0x0000, 0x0000, 0x019c, 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, - 0x019f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, - 0x0000, 0x0000, 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, - 0x0000, 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 3, index 3 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0371, 0x0370, 0x0373, 0x0372, 0x0000, - 0x0000, 0x0377, 0x0376, 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ac, - 0x0000, 0x03ad, 0x03ae, 0x03af, 0x0000, 0x03cc, 0x0000, 0x03cd, 0x03ce, - 0x100008f, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, - 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, - 0x03c1, 0x0000, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, - 0x03ca, 0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0x100009e, 0x0391, - 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, - 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, 0x03a3, 0x03a3, - 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, - 0x038e, 0x038f, 0x03d7, 0x0392, 0x0398, 0x0000, 0x0000, 0x0000, 0x03a6, - 0x03a0, 0x03cf, 0x03d9, 0x03d8, 0x03db, 0x03da, 0x03dd, 0x03dc, 0x03df, - 0x03de, 0x03e1, 0x03e0, 0x03e3, 0x03e2, 0x03e5, 0x03e4, 0x03e7, 0x03e6, - 0x03e9, 0x03e8, 0x03eb, 0x03ea, 0x03ed, 0x03ec, 0x03ef, 0x03ee, 0x039a, - 0x03a1, 0x03f9, 0x0000, 0x03b8, 0x0395, 0x0000, 0x03f8, 0x03f7, 0x03f2, - 0x03fb, 0x03fa, 0x0000, 0x037b, 0x037c, 0x037d - }, - { /* page 4, index 4 */ - 0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, - 0x0459, 0x045a, 0x045b, 0x045c, 0x045d, 0x045e, 0x045f, 0x0430, 0x0431, - 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, - 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 0x0440, 0x0441, 0x0442, 0x0443, - 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, - 0x044d, 0x044e, 0x044f, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, - 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, - 0x041f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, - 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 0x0400, - 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, - 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, 0x0461, 0x0460, 0x0463, - 0x0462, 0x0465, 0x0464, 0x0467, 0x0466, 0x0469, 0x0468, 0x046b, 0x046a, - 0x046d, 0x046c, 0x046f, 0x046e, 0x0471, 0x0470, 0x0473, 0x0472, 0x0475, - 0x0474, 0x0477, 0x0476, 0x0479, 0x0478, 0x047b, 0x047a, 0x047d, 0x047c, - 0x047f, 0x047e, 0x0481, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x048b, 0x048a, 0x048d, 0x048c, 0x048f, 0x048e, - 0x0491, 0x0490, 0x0493, 0x0492, 0x0495, 0x0494, 0x0497, 0x0496, 0x0499, - 0x0498, 0x049b, 0x049a, 0x049d, 0x049c, 0x049f, 0x049e, 0x04a1, 0x04a0, - 0x04a3, 0x04a2, 0x04a5, 0x04a4, 0x04a7, 0x04a6, 0x04a9, 0x04a8, 0x04ab, - 0x04aa, 0x04ad, 0x04ac, 0x04af, 0x04ae, 0x04b1, 0x04b0, 0x04b3, 0x04b2, - 0x04b5, 0x04b4, 0x04b7, 0x04b6, 0x04b9, 0x04b8, 0x04bb, 0x04ba, 0x04bd, - 0x04bc, 0x04bf, 0x04be, 0x04cf, 0x04c2, 0x04c1, 0x04c4, 0x04c3, 0x04c6, - 0x04c5, 0x04c8, 0x04c7, 0x04ca, 0x04c9, 0x04cc, 0x04cb, 0x04ce, 0x04cd, - 0x04c0, 0x04d1, 0x04d0, 0x04d3, 0x04d2, 0x04d5, 0x04d4, 0x04d7, 0x04d6, - 0x04d9, 0x04d8, 0x04db, 0x04da, 0x04dd, 0x04dc, 0x04df, 0x04de, 0x04e1, - 0x04e0, 0x04e3, 0x04e2, 0x04e5, 0x04e4, 0x04e7, 0x04e6, 0x04e9, 0x04e8, - 0x04eb, 0x04ea, 0x04ed, 0x04ec, 0x04ef, 0x04ee, 0x04f1, 0x04f0, 0x04f3, - 0x04f2, 0x04f5, 0x04f4, 0x04f7, 0x04f6, 0x04f9, 0x04f8, 0x04fb, 0x04fa, - 0x04fd, 0x04fc, 0x04ff, 0x04fe - }, - { /* page 5, index 5 */ - 0x0501, 0x0500, 0x0503, 0x0502, 0x0505, 0x0504, 0x0507, 0x0506, 0x0509, - 0x0508, 0x050b, 0x050a, 0x050d, 0x050c, 0x050f, 0x050e, 0x0511, 0x0510, - 0x0513, 0x0512, 0x0515, 0x0514, 0x0517, 0x0516, 0x0519, 0x0518, 0x051b, - 0x051a, 0x051d, 0x051c, 0x051f, 0x051e, 0x0521, 0x0520, 0x0523, 0x0522, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, - 0x0566, 0x0567, 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, - 0x056f, 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, - 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f, 0x0580, - 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0531, 0x0532, - 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053a, 0x053b, - 0x053c, 0x053d, 0x053e, 0x053f, 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, - 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, - 0x054e, 0x054f, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, - 0x1000044, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 6, index 6 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, - 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, - 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 7, index 7 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 9, index 8 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 10, index 9 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 11, index 10 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 12, index 11 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 13, index 12 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 14, index 13 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 15, index 14 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 16, index 15 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, - 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2d00, 0x2d01, - 0x2d02, 0x2d03, 0x2d04, 0x2d05, 0x2d06, 0x2d07, 0x2d08, 0x2d09, 0x2d0a, - 0x2d0b, 0x2d0c, 0x2d0d, 0x2d0e, 0x2d0f, 0x2d10, 0x2d11, 0x2d12, 0x2d13, - 0x2d14, 0x2d15, 0x2d16, 0x2d17, 0x2d18, 0x2d19, 0x2d1a, 0x2d1b, 0x2d1c, - 0x2d1d, 0x2d1e, 0x2d1f, 0x2d20, 0x2d21, 0x2d22, 0x2d23, 0x2d24, 0x2d25, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 23, index 16 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 24, index 17 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, - 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 25, index 18 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, - 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 27, index 19 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 28, index 20 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 29, index 21 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 30, index 22 */ - 0x1e01, 0x1e00, 0x1e03, 0x1e02, 0x1e05, 0x1e04, 0x1e07, 0x1e06, 0x1e09, - 0x1e08, 0x1e0b, 0x1e0a, 0x1e0d, 0x1e0c, 0x1e0f, 0x1e0e, 0x1e11, 0x1e10, - 0x1e13, 0x1e12, 0x1e15, 0x1e14, 0x1e17, 0x1e16, 0x1e19, 0x1e18, 0x1e1b, - 0x1e1a, 0x1e1d, 0x1e1c, 0x1e1f, 0x1e1e, 0x1e21, 0x1e20, 0x1e23, 0x1e22, - 0x1e25, 0x1e24, 0x1e27, 0x1e26, 0x1e29, 0x1e28, 0x1e2b, 0x1e2a, 0x1e2d, - 0x1e2c, 0x1e2f, 0x1e2e, 0x1e31, 0x1e30, 0x1e33, 0x1e32, 0x1e35, 0x1e34, - 0x1e37, 0x1e36, 0x1e39, 0x1e38, 0x1e3b, 0x1e3a, 0x1e3d, 0x1e3c, 0x1e3f, - 0x1e3e, 0x1e41, 0x1e40, 0x1e43, 0x1e42, 0x1e45, 0x1e44, 0x1e47, 0x1e46, - 0x1e49, 0x1e48, 0x1e4b, 0x1e4a, 0x1e4d, 0x1e4c, 0x1e4f, 0x1e4e, 0x1e51, - 0x1e50, 0x1e53, 0x1e52, 0x1e55, 0x1e54, 0x1e57, 0x1e56, 0x1e59, 0x1e58, - 0x1e5b, 0x1e5a, 0x1e5d, 0x1e5c, 0x1e5f, 0x1e5e, 0x1e61, 0x1e60, 0x1e63, - 0x1e62, 0x1e65, 0x1e64, 0x1e67, 0x1e66, 0x1e69, 0x1e68, 0x1e6b, 0x1e6a, - 0x1e6d, 0x1e6c, 0x1e6f, 0x1e6e, 0x1e71, 0x1e70, 0x1e73, 0x1e72, 0x1e75, - 0x1e74, 0x1e77, 0x1e76, 0x1e79, 0x1e78, 0x1e7b, 0x1e7a, 0x1e7d, 0x1e7c, - 0x1e7f, 0x1e7e, 0x1e81, 0x1e80, 0x1e83, 0x1e82, 0x1e85, 0x1e84, 0x1e87, - 0x1e86, 0x1e89, 0x1e88, 0x1e8b, 0x1e8a, 0x1e8d, 0x1e8c, 0x1e8f, 0x1e8e, - 0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0x10000b6, 0x10000bf, - 0x10000c8, 0x10000d1, 0x10000da, 0x1e60, 0x0000, 0x0000, 0x00df, 0x0000, - 0x1ea1, 0x1ea0, 0x1ea3, 0x1ea2, 0x1ea5, 0x1ea4, 0x1ea7, 0x1ea6, 0x1ea9, - 0x1ea8, 0x1eab, 0x1eaa, 0x1ead, 0x1eac, 0x1eaf, 0x1eae, 0x1eb1, 0x1eb0, - 0x1eb3, 0x1eb2, 0x1eb5, 0x1eb4, 0x1eb7, 0x1eb6, 0x1eb9, 0x1eb8, 0x1ebb, - 0x1eba, 0x1ebd, 0x1ebc, 0x1ebf, 0x1ebe, 0x1ec1, 0x1ec0, 0x1ec3, 0x1ec2, - 0x1ec5, 0x1ec4, 0x1ec7, 0x1ec6, 0x1ec9, 0x1ec8, 0x1ecb, 0x1eca, 0x1ecd, - 0x1ecc, 0x1ecf, 0x1ece, 0x1ed1, 0x1ed0, 0x1ed3, 0x1ed2, 0x1ed5, 0x1ed4, - 0x1ed7, 0x1ed6, 0x1ed9, 0x1ed8, 0x1edb, 0x1eda, 0x1edd, 0x1edc, 0x1edf, - 0x1ede, 0x1ee1, 0x1ee0, 0x1ee3, 0x1ee2, 0x1ee5, 0x1ee4, 0x1ee7, 0x1ee6, - 0x1ee9, 0x1ee8, 0x1eeb, 0x1eea, 0x1eed, 0x1eec, 0x1eef, 0x1eee, 0x1ef1, - 0x1ef0, 0x1ef3, 0x1ef2, 0x1ef5, 0x1ef4, 0x1ef7, 0x1ef6, 0x1ef9, 0x1ef8, - 0x1efb, 0x1efa, 0x1efd, 0x1efc, 0x1eff, 0x1efe - }, - { /* page 31, index 23 */ - 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f00, - 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07, 0x1f18, 0x1f19, - 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x1f10, 0x1f11, 0x1f12, - 0x1f13, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, - 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, - 0x1f25, 0x1f26, 0x1f27, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, - 0x1f3e, 0x1f3f, 0x1f30, 0x1f31, 0x1f32, 0x1f33, 0x1f34, 0x1f35, 0x1f36, - 0x1f37, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, - 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, - 0x10000e3, 0x1f59, 0x10000ee, 0x1f5b, 0x10000fd, 0x1f5d, 0x100010c, - 0x1f5f, 0x0000, 0x1f51, 0x0000, 0x1f53, 0x0000, 0x1f55, 0x0000, 0x1f57, - 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f60, - 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1fba, 0x1fbb, - 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, - 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, 0x10001b7, 0x10001c4, 0x10001d1, - 0x10001de, 0x10001eb, 0x10001f8, 0x1000205, 0x1000212, 0x100021f, - 0x1000229, 0x1000233, 0x100023d, 0x1000247, 0x1000251, 0x100025b, - 0x1000265, 0x100026f, 0x100027c, 0x1000289, 0x1000296, 0x10002a3, - 0x10002b0, 0x10002bd, 0x10002ca, 0x10002d7, 0x10002e1, 0x10002eb, - 0x10002f5, 0x10002ff, 0x1000309, 0x1000313, 0x100031d, 0x1000327, - 0x1000334, 0x1000341, 0x100034e, 0x100035b, 0x1000368, 0x1000375, - 0x1000382, 0x100038f, 0x1000399, 0x10003a3, 0x10003ad, 0x10003b7, - 0x10003c1, 0x10003cb, 0x10003d5, 0x1fb8, 0x1fb9, 0x100041e, 0x10003df, - 0x100042b, 0x0000, 0x100011b, 0x1000466, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, - 0x10003eb, 0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0x1000436, 0x10003f4, - 0x1000443, 0x0000, 0x1000126, 0x1000475, 0x1f72, 0x1f73, 0x1f74, 0x1f75, - 0x1000400, 0x0000, 0x0000, 0x0000, 0x1fd8, 0x1fd9, 0x1000131, 0x1000140, - 0x0000, 0x0000, 0x100014f, 0x100015a, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, - 0x0000, 0x0000, 0x0000, 0x0000, 0x1fe8, 0x1fe9, 0x1000169, 0x1000178, - 0x1000187, 0x1fec, 0x1000192, 0x100019d, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, - 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100044e, 0x1000409, - 0x100045b, 0x0000, 0x10001ac, 0x1000484, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, - 0x1000415, 0x0000, 0x0000, 0x0000 - }, - { /* page 33, index 24 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x03c9, 0x0000, 0x0000, 0x0000, 0x006b, 0x00e5, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x214e, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2184, 0x2183, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 44, index 25 */ - 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37, 0x2c38, - 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f, 0x2c40, 0x2c41, - 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47, 0x2c48, 0x2c49, 0x2c4a, - 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f, 0x2c50, 0x2c51, 0x2c52, 0x2c53, - 0x2c54, 0x2c55, 0x2c56, 0x2c57, 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, - 0x2c5d, 0x2c5e, 0x0000, 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, - 0x2c06, 0x2c07, 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, - 0x2c0f, 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, - 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, - 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, 0x2c29, - 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, 0x2c61, 0x2c60, 0x026b, - 0x1d7d, 0x027d, 0x023a, 0x023e, 0x2c68, 0x2c67, 0x2c6a, 0x2c69, 0x2c6c, - 0x2c6b, 0x0251, 0x0271, 0x0250, 0x0000, 0x0000, 0x2c73, 0x2c72, 0x0000, - 0x2c76, 0x2c75, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x2c81, 0x2c80, 0x2c83, 0x2c82, 0x2c85, 0x2c84, 0x2c87, - 0x2c86, 0x2c89, 0x2c88, 0x2c8b, 0x2c8a, 0x2c8d, 0x2c8c, 0x2c8f, 0x2c8e, - 0x2c91, 0x2c90, 0x2c93, 0x2c92, 0x2c95, 0x2c94, 0x2c97, 0x2c96, 0x2c99, - 0x2c98, 0x2c9b, 0x2c9a, 0x2c9d, 0x2c9c, 0x2c9f, 0x2c9e, 0x2ca1, 0x2ca0, - 0x2ca3, 0x2ca2, 0x2ca5, 0x2ca4, 0x2ca7, 0x2ca6, 0x2ca9, 0x2ca8, 0x2cab, - 0x2caa, 0x2cad, 0x2cac, 0x2caf, 0x2cae, 0x2cb1, 0x2cb0, 0x2cb3, 0x2cb2, - 0x2cb5, 0x2cb4, 0x2cb7, 0x2cb6, 0x2cb9, 0x2cb8, 0x2cbb, 0x2cba, 0x2cbd, - 0x2cbc, 0x2cbf, 0x2cbe, 0x2cc1, 0x2cc0, 0x2cc3, 0x2cc2, 0x2cc5, 0x2cc4, - 0x2cc7, 0x2cc6, 0x2cc9, 0x2cc8, 0x2ccb, 0x2cca, 0x2ccd, 0x2ccc, 0x2ccf, - 0x2cce, 0x2cd1, 0x2cd0, 0x2cd3, 0x2cd2, 0x2cd5, 0x2cd4, 0x2cd7, 0x2cd6, - 0x2cd9, 0x2cd8, 0x2cdb, 0x2cda, 0x2cdd, 0x2cdc, 0x2cdf, 0x2cde, 0x2ce1, - 0x2ce0, 0x2ce3, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 45, index 26 */ - 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, - 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, 0x10b1, - 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, 0x10b9, 0x10ba, - 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, 0x10c1, 0x10c2, 0x10c3, - 0x10c4, 0x10c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 166, index 27 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, - 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0xa641, 0xa640, 0xa643, 0xa642, 0xa645, 0xa644, 0xa647, 0xa646, - 0xa649, 0xa648, 0xa64b, 0xa64a, 0xa64d, 0xa64c, 0xa64f, 0xa64e, 0xa651, - 0xa650, 0xa653, 0xa652, 0xa655, 0xa654, 0xa657, 0xa656, 0xa659, 0xa658, - 0xa65b, 0xa65a, 0xa65d, 0xa65c, 0xa65f, 0xa65e, 0x0000, 0x0000, 0xa663, - 0xa662, 0xa665, 0xa664, 0xa667, 0xa666, 0xa669, 0xa668, 0xa66b, 0xa66a, - 0xa66d, 0xa66c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0xa681, 0xa680, 0xa683, 0xa682, 0xa685, 0xa684, 0xa687, - 0xa686, 0xa689, 0xa688, 0xa68b, 0xa68a, 0xa68d, 0xa68c, 0xa68f, 0xa68e, - 0xa691, 0xa690, 0xa693, 0xa692, 0xa695, 0xa694, 0xa697, 0xa696, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 167, index 28 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa723, 0xa722, - 0xa725, 0xa724, 0xa727, 0xa726, 0xa729, 0xa728, 0xa72b, 0xa72a, 0xa72d, - 0xa72c, 0xa72f, 0xa72e, 0x0000, 0x0000, 0xa733, 0xa732, 0xa735, 0xa734, - 0xa737, 0xa736, 0xa739, 0xa738, 0xa73b, 0xa73a, 0xa73d, 0xa73c, 0xa73f, - 0xa73e, 0xa741, 0xa740, 0xa743, 0xa742, 0xa745, 0xa744, 0xa747, 0xa746, - 0xa749, 0xa748, 0xa74b, 0xa74a, 0xa74d, 0xa74c, 0xa74f, 0xa74e, 0xa751, - 0xa750, 0xa753, 0xa752, 0xa755, 0xa754, 0xa757, 0xa756, 0xa759, 0xa758, - 0xa75b, 0xa75a, 0xa75d, 0xa75c, 0xa75f, 0xa75e, 0xa761, 0xa760, 0xa763, - 0xa762, 0xa765, 0xa764, 0xa767, 0xa766, 0xa769, 0xa768, 0xa76b, 0xa76a, - 0xa76d, 0xa76c, 0xa76f, 0xa76e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0xa77a, 0xa779, 0xa77c, 0xa77b, 0x1d79, - 0xa77f, 0xa77e, 0xa781, 0xa780, 0xa783, 0xa782, 0xa785, 0xa784, 0xa787, - 0xa786, 0x0000, 0x0000, 0x0000, 0xa78c, 0xa78b, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 168, index 29 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 169, index 30 */ - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, - 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 170, index 31 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 251, index 32 */ - 0x100000f, 0x1000016, 0x100001d, 0x1000024, 0x100002d, 0x1000036, - 0x100003d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100004f, 0x100005a, 0x1000065, - 0x1000070, 0x100007b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000 - }, - { /* page 255, index 33 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, - 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff41, 0xff42, 0xff43, - 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, - 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, - 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, - 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, - 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, - 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 260, index 34 */ - 0x10428, 0x10429, 0x1042a, 0x1042b, 0x1042c, 0x1042d, 0x1042e, 0x1042f, - 0x10430, 0x10431, 0x10432, 0x10433, 0x10434, 0x10435, 0x10436, 0x10437, - 0x10438, 0x10439, 0x1043a, 0x1043b, 0x1043c, 0x1043d, 0x1043e, 0x1043f, - 0x10440, 0x10441, 0x10442, 0x10443, 0x10444, 0x10445, 0x10446, 0x10447, - 0x10448, 0x10449, 0x1044a, 0x1044b, 0x1044c, 0x1044d, 0x1044e, 0x1044f, - 0x10400, 0x10401, 0x10402, 0x10403, 0x10404, 0x10405, 0x10406, 0x10407, - 0x10408, 0x10409, 0x1040a, 0x1040b, 0x1040c, 0x1040d, 0x1040e, 0x1040f, - 0x10410, 0x10411, 0x10412, 0x10413, 0x10414, 0x10415, 0x10416, 0x10417, - 0x10418, 0x10419, 0x1041a, 0x1041b, 0x1041c, 0x1041d, 0x1041e, 0x1041f, - 0x10420, 0x10421, 0x10422, 0x10423, 0x10424, 0x10425, 0x10426, 0x10427, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 - }, - { /* page 471, index 35 */ - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, - 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, - 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, - 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, - 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, - 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, - 0x0006, 0x0007, 0x0008, 0x0009 - } + { + /* page 0, index 0 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, + 0x0068, 0x0069, 0x006a, 0x006b, 0x006c, 0x006d, 0x006e, 0x006f, 0x0070, + 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, + 0x007a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0041, 0x0042, + 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004a, 0x004b, + 0x004c, 0x004d, 0x004e, 0x004f, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054, + 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005a, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x039c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x00e0, 0x00e1, 0x00e2, 0x00e3, 0x00e4, 0x00e5, + 0x00e6, 0x00e7, 0x00e8, 0x00e9, 0x00ea, 0x00eb, 0x00ec, 0x00ed, 0x00ee, + 0x00ef, 0x00f0, 0x00f1, 0x00f2, 0x00f3, 0x00f4, 0x00f5, 0x00f6, 0x0000, + 0x00f8, 0x00f9, 0x00fa, 0x00fb, 0x00fc, 0x00fd, 0x00fe, 0x1000000, + 0x00c0, 0x00c1, 0x00c2, 0x00c3, 0x00c4, 0x00c5, 0x00c6, 0x00c7, 0x00c8, + 0x00c9, 0x00ca, 0x00cb, 0x00cc, 0x00cd, 0x00ce, 0x00cf, 0x00d0, 0x00d1, + 0x00d2, 0x00d3, 0x00d4, 0x00d5, 0x00d6, 0x0000, 0x00d8, 0x00d9, 0x00da, + 0x00db, 0x00dc, 0x00dd, 0x00de, 0x0178 + }, + { + /* page 1, index 1 */ + 0x0101, 0x0100, 0x0103, 0x0102, 0x0105, 0x0104, 0x0107, 0x0106, 0x0109, + 0x0108, 0x010b, 0x010a, 0x010d, 0x010c, 0x010f, 0x010e, 0x0111, 0x0110, + 0x0113, 0x0112, 0x0115, 0x0114, 0x0117, 0x0116, 0x0119, 0x0118, 0x011b, + 0x011a, 0x011d, 0x011c, 0x011f, 0x011e, 0x0121, 0x0120, 0x0123, 0x0122, + 0x0125, 0x0124, 0x0127, 0x0126, 0x0129, 0x0128, 0x012b, 0x012a, 0x012d, + 0x012c, 0x012f, 0x012e, 0x1000007, 0x0049, 0x0133, 0x0132, 0x0135, + 0x0134, 0x0137, 0x0136, 0x0000, 0x013a, 0x0139, 0x013c, 0x013b, 0x013e, + 0x013d, 0x0140, 0x013f, 0x0142, 0x0141, 0x0144, 0x0143, 0x0146, 0x0145, + 0x0148, 0x0147, 0x1000086, 0x014b, 0x014a, 0x014d, 0x014c, 0x014f, + 0x014e, 0x0151, 0x0150, 0x0153, 0x0152, 0x0155, 0x0154, 0x0157, 0x0156, + 0x0159, 0x0158, 0x015b, 0x015a, 0x015d, 0x015c, 0x015f, 0x015e, 0x0161, + 0x0160, 0x0163, 0x0162, 0x0165, 0x0164, 0x0167, 0x0166, 0x0169, 0x0168, + 0x016b, 0x016a, 0x016d, 0x016c, 0x016f, 0x016e, 0x0171, 0x0170, 0x0173, + 0x0172, 0x0175, 0x0174, 0x0177, 0x0176, 0x00ff, 0x017a, 0x0179, 0x017c, + 0x017b, 0x017e, 0x017d, 0x0053, 0x0243, 0x0253, 0x0183, 0x0182, 0x0185, + 0x0184, 0x0254, 0x0188, 0x0187, 0x0256, 0x0257, 0x018c, 0x018b, 0x0000, + 0x01dd, 0x0259, 0x025b, 0x0192, 0x0191, 0x0260, 0x0263, 0x01f6, 0x0269, + 0x0268, 0x0199, 0x0198, 0x023d, 0x0000, 0x026f, 0x0272, 0x0220, 0x0275, + 0x01a1, 0x01a0, 0x01a3, 0x01a2, 0x01a5, 0x01a4, 0x0280, 0x01a8, 0x01a7, + 0x0283, 0x0000, 0x0000, 0x01ad, 0x01ac, 0x0288, 0x01b0, 0x01af, 0x028a, + 0x028b, 0x01b4, 0x01b3, 0x01b6, 0x01b5, 0x0292, 0x01b9, 0x01b8, 0x0000, + 0x0000, 0x01bd, 0x01bc, 0x0000, 0x01f7, 0x0000, 0x0000, 0x0000, 0x0000, + 0x01c6, 0x0000, 0x01c4, 0x01c9, 0x0000, 0x01c7, 0x01cc, 0x0000, 0x01ca, + 0x01ce, 0x01cd, 0x01d0, 0x01cf, 0x01d2, 0x01d1, 0x01d4, 0x01d3, 0x01d6, + 0x01d5, 0x01d8, 0x01d7, 0x01da, 0x01d9, 0x01dc, 0x01db, 0x018e, 0x01df, + 0x01de, 0x01e1, 0x01e0, 0x01e3, 0x01e2, 0x01e5, 0x01e4, 0x01e7, 0x01e6, + 0x01e9, 0x01e8, 0x01eb, 0x01ea, 0x01ed, 0x01ec, 0x01ef, 0x01ee, + 0x10000ad, 0x01f3, 0x0000, 0x01f1, 0x01f5, 0x01f4, 0x0195, 0x01bf, + 0x01f9, 0x01f8, 0x01fb, 0x01fa, 0x01fd, 0x01fc, 0x01ff, 0x01fe + }, + { + /* page 2, index 2 */ + 0x0201, 0x0200, 0x0203, 0x0202, 0x0205, 0x0204, 0x0207, 0x0206, 0x0209, + 0x0208, 0x020b, 0x020a, 0x020d, 0x020c, 0x020f, 0x020e, 0x0211, 0x0210, + 0x0213, 0x0212, 0x0215, 0x0214, 0x0217, 0x0216, 0x0219, 0x0218, 0x021b, + 0x021a, 0x021d, 0x021c, 0x021f, 0x021e, 0x019e, 0x0000, 0x0223, 0x0222, + 0x0225, 0x0224, 0x0227, 0x0226, 0x0229, 0x0228, 0x022b, 0x022a, 0x022d, + 0x022c, 0x022f, 0x022e, 0x0231, 0x0230, 0x0233, 0x0232, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x2c65, 0x023c, 0x023b, 0x019a, 0x2c66, + 0x0000, 0x0000, 0x0242, 0x0241, 0x0180, 0x0289, 0x028c, 0x0247, 0x0246, + 0x0249, 0x0248, 0x024b, 0x024a, 0x024d, 0x024c, 0x024f, 0x024e, 0x2c6f, + 0x2c6d, 0x0000, 0x0181, 0x0186, 0x0000, 0x0189, 0x018a, 0x0000, 0x018f, + 0x0000, 0x0190, 0x0000, 0x0000, 0x0000, 0x0000, 0x0193, 0x0000, 0x0000, + 0x0194, 0x0000, 0x0000, 0x0000, 0x0000, 0x0197, 0x0196, 0x0000, 0x2c62, + 0x0000, 0x0000, 0x0000, 0x019c, 0x0000, 0x2c6e, 0x019d, 0x0000, 0x0000, + 0x019f, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2c64, + 0x0000, 0x0000, 0x01a6, 0x0000, 0x0000, 0x01a9, 0x0000, 0x0000, 0x0000, + 0x0000, 0x01ae, 0x0244, 0x01b1, 0x01b2, 0x0245, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x01b7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 3, index 3 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0371, 0x0370, 0x0373, 0x0372, 0x0000, + 0x0000, 0x0377, 0x0376, 0x0000, 0x0000, 0x0000, 0x03fd, 0x03fe, 0x03ff, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x03ac, + 0x0000, 0x03ad, 0x03ae, 0x03af, 0x0000, 0x03cc, 0x0000, 0x03cd, 0x03ce, + 0x100008f, 0x03b1, 0x03b2, 0x03b3, 0x03b4, 0x03b5, 0x03b6, 0x03b7, + 0x03b8, 0x03b9, 0x03ba, 0x03bb, 0x03bc, 0x03bd, 0x03be, 0x03bf, 0x03c0, + 0x03c1, 0x0000, 0x03c3, 0x03c4, 0x03c5, 0x03c6, 0x03c7, 0x03c8, 0x03c9, + 0x03ca, 0x03cb, 0x0386, 0x0388, 0x0389, 0x038a, 0x100009e, 0x0391, + 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039a, + 0x039b, 0x039c, 0x039d, 0x039e, 0x039f, 0x03a0, 0x03a1, 0x03a3, 0x03a3, + 0x03a4, 0x03a5, 0x03a6, 0x03a7, 0x03a8, 0x03a9, 0x03aa, 0x03ab, 0x038c, + 0x038e, 0x038f, 0x03d7, 0x0392, 0x0398, 0x0000, 0x0000, 0x0000, 0x03a6, + 0x03a0, 0x03cf, 0x03d9, 0x03d8, 0x03db, 0x03da, 0x03dd, 0x03dc, 0x03df, + 0x03de, 0x03e1, 0x03e0, 0x03e3, 0x03e2, 0x03e5, 0x03e4, 0x03e7, 0x03e6, + 0x03e9, 0x03e8, 0x03eb, 0x03ea, 0x03ed, 0x03ec, 0x03ef, 0x03ee, 0x039a, + 0x03a1, 0x03f9, 0x0000, 0x03b8, 0x0395, 0x0000, 0x03f8, 0x03f7, 0x03f2, + 0x03fb, 0x03fa, 0x0000, 0x037b, 0x037c, 0x037d + }, + { + /* page 4, index 4 */ + 0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, + 0x0459, 0x045a, 0x045b, 0x045c, 0x045d, 0x045e, 0x045f, 0x0430, 0x0431, + 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043a, + 0x043b, 0x043c, 0x043d, 0x043e, 0x043f, 0x0440, 0x0441, 0x0442, 0x0443, + 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044a, 0x044b, 0x044c, + 0x044d, 0x044e, 0x044f, 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, + 0x0416, 0x0417, 0x0418, 0x0419, 0x041a, 0x041b, 0x041c, 0x041d, 0x041e, + 0x041f, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, + 0x0428, 0x0429, 0x042a, 0x042b, 0x042c, 0x042d, 0x042e, 0x042f, 0x0400, + 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, + 0x040a, 0x040b, 0x040c, 0x040d, 0x040e, 0x040f, 0x0461, 0x0460, 0x0463, + 0x0462, 0x0465, 0x0464, 0x0467, 0x0466, 0x0469, 0x0468, 0x046b, 0x046a, + 0x046d, 0x046c, 0x046f, 0x046e, 0x0471, 0x0470, 0x0473, 0x0472, 0x0475, + 0x0474, 0x0477, 0x0476, 0x0479, 0x0478, 0x047b, 0x047a, 0x047d, 0x047c, + 0x047f, 0x047e, 0x0481, 0x0480, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x048b, 0x048a, 0x048d, 0x048c, 0x048f, 0x048e, + 0x0491, 0x0490, 0x0493, 0x0492, 0x0495, 0x0494, 0x0497, 0x0496, 0x0499, + 0x0498, 0x049b, 0x049a, 0x049d, 0x049c, 0x049f, 0x049e, 0x04a1, 0x04a0, + 0x04a3, 0x04a2, 0x04a5, 0x04a4, 0x04a7, 0x04a6, 0x04a9, 0x04a8, 0x04ab, + 0x04aa, 0x04ad, 0x04ac, 0x04af, 0x04ae, 0x04b1, 0x04b0, 0x04b3, 0x04b2, + 0x04b5, 0x04b4, 0x04b7, 0x04b6, 0x04b9, 0x04b8, 0x04bb, 0x04ba, 0x04bd, + 0x04bc, 0x04bf, 0x04be, 0x04cf, 0x04c2, 0x04c1, 0x04c4, 0x04c3, 0x04c6, + 0x04c5, 0x04c8, 0x04c7, 0x04ca, 0x04c9, 0x04cc, 0x04cb, 0x04ce, 0x04cd, + 0x04c0, 0x04d1, 0x04d0, 0x04d3, 0x04d2, 0x04d5, 0x04d4, 0x04d7, 0x04d6, + 0x04d9, 0x04d8, 0x04db, 0x04da, 0x04dd, 0x04dc, 0x04df, 0x04de, 0x04e1, + 0x04e0, 0x04e3, 0x04e2, 0x04e5, 0x04e4, 0x04e7, 0x04e6, 0x04e9, 0x04e8, + 0x04eb, 0x04ea, 0x04ed, 0x04ec, 0x04ef, 0x04ee, 0x04f1, 0x04f0, 0x04f3, + 0x04f2, 0x04f5, 0x04f4, 0x04f7, 0x04f6, 0x04f9, 0x04f8, 0x04fb, 0x04fa, + 0x04fd, 0x04fc, 0x04ff, 0x04fe + }, + { + /* page 5, index 5 */ + 0x0501, 0x0500, 0x0503, 0x0502, 0x0505, 0x0504, 0x0507, 0x0506, 0x0509, + 0x0508, 0x050b, 0x050a, 0x050d, 0x050c, 0x050f, 0x050e, 0x0511, 0x0510, + 0x0513, 0x0512, 0x0515, 0x0514, 0x0517, 0x0516, 0x0519, 0x0518, 0x051b, + 0x051a, 0x051d, 0x051c, 0x051f, 0x051e, 0x0521, 0x0520, 0x0523, 0x0522, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, + 0x0566, 0x0567, 0x0568, 0x0569, 0x056a, 0x056b, 0x056c, 0x056d, 0x056e, + 0x056f, 0x0570, 0x0571, 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, + 0x0578, 0x0579, 0x057a, 0x057b, 0x057c, 0x057d, 0x057e, 0x057f, 0x0580, + 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0531, 0x0532, + 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538, 0x0539, 0x053a, 0x053b, + 0x053c, 0x053d, 0x053e, 0x053f, 0x0540, 0x0541, 0x0542, 0x0543, 0x0544, + 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054a, 0x054b, 0x054c, 0x054d, + 0x054e, 0x054f, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556, + 0x1000044, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 6, index 6 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, + 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, + 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 7, index 7 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 9, index 8 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 10, index 9 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 11, index 10 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 12, index 11 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 13, index 12 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 14, index 13 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 15, index 14 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 16, index 15 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2d00, 0x2d01, + 0x2d02, 0x2d03, 0x2d04, 0x2d05, 0x2d06, 0x2d07, 0x2d08, 0x2d09, 0x2d0a, + 0x2d0b, 0x2d0c, 0x2d0d, 0x2d0e, 0x2d0f, 0x2d10, 0x2d11, 0x2d12, 0x2d13, + 0x2d14, 0x2d15, 0x2d16, 0x2d17, 0x2d18, 0x2d19, 0x2d1a, 0x2d1b, 0x2d1c, + 0x2d1d, 0x2d1e, 0x2d1f, 0x2d20, 0x2d21, 0x2d22, 0x2d23, 0x2d24, 0x2d25, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 23, index 16 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 24, index 17 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, + 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 25, index 18 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, + 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 27, index 19 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 28, index 20 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 29, index 21 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0xa77d, 0x0000, 0x0000, 0x0000, 0x2c63, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 30, index 22 */ + 0x1e01, 0x1e00, 0x1e03, 0x1e02, 0x1e05, 0x1e04, 0x1e07, 0x1e06, 0x1e09, + 0x1e08, 0x1e0b, 0x1e0a, 0x1e0d, 0x1e0c, 0x1e0f, 0x1e0e, 0x1e11, 0x1e10, + 0x1e13, 0x1e12, 0x1e15, 0x1e14, 0x1e17, 0x1e16, 0x1e19, 0x1e18, 0x1e1b, + 0x1e1a, 0x1e1d, 0x1e1c, 0x1e1f, 0x1e1e, 0x1e21, 0x1e20, 0x1e23, 0x1e22, + 0x1e25, 0x1e24, 0x1e27, 0x1e26, 0x1e29, 0x1e28, 0x1e2b, 0x1e2a, 0x1e2d, + 0x1e2c, 0x1e2f, 0x1e2e, 0x1e31, 0x1e30, 0x1e33, 0x1e32, 0x1e35, 0x1e34, + 0x1e37, 0x1e36, 0x1e39, 0x1e38, 0x1e3b, 0x1e3a, 0x1e3d, 0x1e3c, 0x1e3f, + 0x1e3e, 0x1e41, 0x1e40, 0x1e43, 0x1e42, 0x1e45, 0x1e44, 0x1e47, 0x1e46, + 0x1e49, 0x1e48, 0x1e4b, 0x1e4a, 0x1e4d, 0x1e4c, 0x1e4f, 0x1e4e, 0x1e51, + 0x1e50, 0x1e53, 0x1e52, 0x1e55, 0x1e54, 0x1e57, 0x1e56, 0x1e59, 0x1e58, + 0x1e5b, 0x1e5a, 0x1e5d, 0x1e5c, 0x1e5f, 0x1e5e, 0x1e61, 0x1e60, 0x1e63, + 0x1e62, 0x1e65, 0x1e64, 0x1e67, 0x1e66, 0x1e69, 0x1e68, 0x1e6b, 0x1e6a, + 0x1e6d, 0x1e6c, 0x1e6f, 0x1e6e, 0x1e71, 0x1e70, 0x1e73, 0x1e72, 0x1e75, + 0x1e74, 0x1e77, 0x1e76, 0x1e79, 0x1e78, 0x1e7b, 0x1e7a, 0x1e7d, 0x1e7c, + 0x1e7f, 0x1e7e, 0x1e81, 0x1e80, 0x1e83, 0x1e82, 0x1e85, 0x1e84, 0x1e87, + 0x1e86, 0x1e89, 0x1e88, 0x1e8b, 0x1e8a, 0x1e8d, 0x1e8c, 0x1e8f, 0x1e8e, + 0x1e91, 0x1e90, 0x1e93, 0x1e92, 0x1e95, 0x1e94, 0x10000b6, 0x10000bf, + 0x10000c8, 0x10000d1, 0x10000da, 0x1e60, 0x0000, 0x0000, 0x00df, 0x0000, + 0x1ea1, 0x1ea0, 0x1ea3, 0x1ea2, 0x1ea5, 0x1ea4, 0x1ea7, 0x1ea6, 0x1ea9, + 0x1ea8, 0x1eab, 0x1eaa, 0x1ead, 0x1eac, 0x1eaf, 0x1eae, 0x1eb1, 0x1eb0, + 0x1eb3, 0x1eb2, 0x1eb5, 0x1eb4, 0x1eb7, 0x1eb6, 0x1eb9, 0x1eb8, 0x1ebb, + 0x1eba, 0x1ebd, 0x1ebc, 0x1ebf, 0x1ebe, 0x1ec1, 0x1ec0, 0x1ec3, 0x1ec2, + 0x1ec5, 0x1ec4, 0x1ec7, 0x1ec6, 0x1ec9, 0x1ec8, 0x1ecb, 0x1eca, 0x1ecd, + 0x1ecc, 0x1ecf, 0x1ece, 0x1ed1, 0x1ed0, 0x1ed3, 0x1ed2, 0x1ed5, 0x1ed4, + 0x1ed7, 0x1ed6, 0x1ed9, 0x1ed8, 0x1edb, 0x1eda, 0x1edd, 0x1edc, 0x1edf, + 0x1ede, 0x1ee1, 0x1ee0, 0x1ee3, 0x1ee2, 0x1ee5, 0x1ee4, 0x1ee7, 0x1ee6, + 0x1ee9, 0x1ee8, 0x1eeb, 0x1eea, 0x1eed, 0x1eec, 0x1eef, 0x1eee, 0x1ef1, + 0x1ef0, 0x1ef3, 0x1ef2, 0x1ef5, 0x1ef4, 0x1ef7, 0x1ef6, 0x1ef9, 0x1ef8, + 0x1efb, 0x1efa, 0x1efd, 0x1efc, 0x1eff, 0x1efe + }, + { + /* page 31, index 23 */ + 0x1f08, 0x1f09, 0x1f0a, 0x1f0b, 0x1f0c, 0x1f0d, 0x1f0e, 0x1f0f, 0x1f00, + 0x1f01, 0x1f02, 0x1f03, 0x1f04, 0x1f05, 0x1f06, 0x1f07, 0x1f18, 0x1f19, + 0x1f1a, 0x1f1b, 0x1f1c, 0x1f1d, 0x0000, 0x0000, 0x1f10, 0x1f11, 0x1f12, + 0x1f13, 0x1f14, 0x1f15, 0x0000, 0x0000, 0x1f28, 0x1f29, 0x1f2a, 0x1f2b, + 0x1f2c, 0x1f2d, 0x1f2e, 0x1f2f, 0x1f20, 0x1f21, 0x1f22, 0x1f23, 0x1f24, + 0x1f25, 0x1f26, 0x1f27, 0x1f38, 0x1f39, 0x1f3a, 0x1f3b, 0x1f3c, 0x1f3d, + 0x1f3e, 0x1f3f, 0x1f30, 0x1f31, 0x1f32, 0x1f33, 0x1f34, 0x1f35, 0x1f36, + 0x1f37, 0x1f48, 0x1f49, 0x1f4a, 0x1f4b, 0x1f4c, 0x1f4d, 0x0000, 0x0000, + 0x1f40, 0x1f41, 0x1f42, 0x1f43, 0x1f44, 0x1f45, 0x0000, 0x0000, + 0x10000e3, 0x1f59, 0x10000ee, 0x1f5b, 0x10000fd, 0x1f5d, 0x100010c, + 0x1f5f, 0x0000, 0x1f51, 0x0000, 0x1f53, 0x0000, 0x1f55, 0x0000, 0x1f57, + 0x1f68, 0x1f69, 0x1f6a, 0x1f6b, 0x1f6c, 0x1f6d, 0x1f6e, 0x1f6f, 0x1f60, + 0x1f61, 0x1f62, 0x1f63, 0x1f64, 0x1f65, 0x1f66, 0x1f67, 0x1fba, 0x1fbb, + 0x1fc8, 0x1fc9, 0x1fca, 0x1fcb, 0x1fda, 0x1fdb, 0x1ff8, 0x1ff9, 0x1fea, + 0x1feb, 0x1ffa, 0x1ffb, 0x0000, 0x0000, 0x10001b7, 0x10001c4, 0x10001d1, + 0x10001de, 0x10001eb, 0x10001f8, 0x1000205, 0x1000212, 0x100021f, + 0x1000229, 0x1000233, 0x100023d, 0x1000247, 0x1000251, 0x100025b, + 0x1000265, 0x100026f, 0x100027c, 0x1000289, 0x1000296, 0x10002a3, + 0x10002b0, 0x10002bd, 0x10002ca, 0x10002d7, 0x10002e1, 0x10002eb, + 0x10002f5, 0x10002ff, 0x1000309, 0x1000313, 0x100031d, 0x1000327, + 0x1000334, 0x1000341, 0x100034e, 0x100035b, 0x1000368, 0x1000375, + 0x1000382, 0x100038f, 0x1000399, 0x10003a3, 0x10003ad, 0x10003b7, + 0x10003c1, 0x10003cb, 0x10003d5, 0x1fb8, 0x1fb9, 0x100041e, 0x10003df, + 0x100042b, 0x0000, 0x100011b, 0x1000466, 0x1fb0, 0x1fb1, 0x1f70, 0x1f71, + 0x10003eb, 0x0000, 0x0399, 0x0000, 0x0000, 0x0000, 0x1000436, 0x10003f4, + 0x1000443, 0x0000, 0x1000126, 0x1000475, 0x1f72, 0x1f73, 0x1f74, 0x1f75, + 0x1000400, 0x0000, 0x0000, 0x0000, 0x1fd8, 0x1fd9, 0x1000131, 0x1000140, + 0x0000, 0x0000, 0x100014f, 0x100015a, 0x1fd0, 0x1fd1, 0x1f76, 0x1f77, + 0x0000, 0x0000, 0x0000, 0x0000, 0x1fe8, 0x1fe9, 0x1000169, 0x1000178, + 0x1000187, 0x1fec, 0x1000192, 0x100019d, 0x1fe0, 0x1fe1, 0x1f7a, 0x1f7b, + 0x1fe5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100044e, 0x1000409, + 0x100045b, 0x0000, 0x10001ac, 0x1000484, 0x1f78, 0x1f79, 0x1f7c, 0x1f7d, + 0x1000415, 0x0000, 0x0000, 0x0000 + }, + { + /* page 33, index 24 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x03c9, 0x0000, 0x0000, 0x0000, 0x006b, 0x00e5, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x214e, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2132, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x2184, 0x2183, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 44, index 25 */ + 0x2c30, 0x2c31, 0x2c32, 0x2c33, 0x2c34, 0x2c35, 0x2c36, 0x2c37, 0x2c38, + 0x2c39, 0x2c3a, 0x2c3b, 0x2c3c, 0x2c3d, 0x2c3e, 0x2c3f, 0x2c40, 0x2c41, + 0x2c42, 0x2c43, 0x2c44, 0x2c45, 0x2c46, 0x2c47, 0x2c48, 0x2c49, 0x2c4a, + 0x2c4b, 0x2c4c, 0x2c4d, 0x2c4e, 0x2c4f, 0x2c50, 0x2c51, 0x2c52, 0x2c53, + 0x2c54, 0x2c55, 0x2c56, 0x2c57, 0x2c58, 0x2c59, 0x2c5a, 0x2c5b, 0x2c5c, + 0x2c5d, 0x2c5e, 0x0000, 0x2c00, 0x2c01, 0x2c02, 0x2c03, 0x2c04, 0x2c05, + 0x2c06, 0x2c07, 0x2c08, 0x2c09, 0x2c0a, 0x2c0b, 0x2c0c, 0x2c0d, 0x2c0e, + 0x2c0f, 0x2c10, 0x2c11, 0x2c12, 0x2c13, 0x2c14, 0x2c15, 0x2c16, 0x2c17, + 0x2c18, 0x2c19, 0x2c1a, 0x2c1b, 0x2c1c, 0x2c1d, 0x2c1e, 0x2c1f, 0x2c20, + 0x2c21, 0x2c22, 0x2c23, 0x2c24, 0x2c25, 0x2c26, 0x2c27, 0x2c28, 0x2c29, + 0x2c2a, 0x2c2b, 0x2c2c, 0x2c2d, 0x2c2e, 0x0000, 0x2c61, 0x2c60, 0x026b, + 0x1d7d, 0x027d, 0x023a, 0x023e, 0x2c68, 0x2c67, 0x2c6a, 0x2c69, 0x2c6c, + 0x2c6b, 0x0251, 0x0271, 0x0250, 0x0000, 0x0000, 0x2c73, 0x2c72, 0x0000, + 0x2c76, 0x2c75, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x2c81, 0x2c80, 0x2c83, 0x2c82, 0x2c85, 0x2c84, 0x2c87, + 0x2c86, 0x2c89, 0x2c88, 0x2c8b, 0x2c8a, 0x2c8d, 0x2c8c, 0x2c8f, 0x2c8e, + 0x2c91, 0x2c90, 0x2c93, 0x2c92, 0x2c95, 0x2c94, 0x2c97, 0x2c96, 0x2c99, + 0x2c98, 0x2c9b, 0x2c9a, 0x2c9d, 0x2c9c, 0x2c9f, 0x2c9e, 0x2ca1, 0x2ca0, + 0x2ca3, 0x2ca2, 0x2ca5, 0x2ca4, 0x2ca7, 0x2ca6, 0x2ca9, 0x2ca8, 0x2cab, + 0x2caa, 0x2cad, 0x2cac, 0x2caf, 0x2cae, 0x2cb1, 0x2cb0, 0x2cb3, 0x2cb2, + 0x2cb5, 0x2cb4, 0x2cb7, 0x2cb6, 0x2cb9, 0x2cb8, 0x2cbb, 0x2cba, 0x2cbd, + 0x2cbc, 0x2cbf, 0x2cbe, 0x2cc1, 0x2cc0, 0x2cc3, 0x2cc2, 0x2cc5, 0x2cc4, + 0x2cc7, 0x2cc6, 0x2cc9, 0x2cc8, 0x2ccb, 0x2cca, 0x2ccd, 0x2ccc, 0x2ccf, + 0x2cce, 0x2cd1, 0x2cd0, 0x2cd3, 0x2cd2, 0x2cd5, 0x2cd4, 0x2cd7, 0x2cd6, + 0x2cd9, 0x2cd8, 0x2cdb, 0x2cda, 0x2cdd, 0x2cdc, 0x2cdf, 0x2cde, 0x2ce1, + 0x2ce0, 0x2ce3, 0x2ce2, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 45, index 26 */ + 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, + 0x10a9, 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, 0x10b1, + 0x10b2, 0x10b3, 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, 0x10b9, 0x10ba, + 0x10bb, 0x10bc, 0x10bd, 0x10be, 0x10bf, 0x10c0, 0x10c1, 0x10c2, 0x10c3, + 0x10c4, 0x10c5, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 166, index 27 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, + 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0xa641, 0xa640, 0xa643, 0xa642, 0xa645, 0xa644, 0xa647, 0xa646, + 0xa649, 0xa648, 0xa64b, 0xa64a, 0xa64d, 0xa64c, 0xa64f, 0xa64e, 0xa651, + 0xa650, 0xa653, 0xa652, 0xa655, 0xa654, 0xa657, 0xa656, 0xa659, 0xa658, + 0xa65b, 0xa65a, 0xa65d, 0xa65c, 0xa65f, 0xa65e, 0x0000, 0x0000, 0xa663, + 0xa662, 0xa665, 0xa664, 0xa667, 0xa666, 0xa669, 0xa668, 0xa66b, 0xa66a, + 0xa66d, 0xa66c, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0xa681, 0xa680, 0xa683, 0xa682, 0xa685, 0xa684, 0xa687, + 0xa686, 0xa689, 0xa688, 0xa68b, 0xa68a, 0xa68d, 0xa68c, 0xa68f, 0xa68e, + 0xa691, 0xa690, 0xa693, 0xa692, 0xa695, 0xa694, 0xa697, 0xa696, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 167, index 28 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xa723, 0xa722, + 0xa725, 0xa724, 0xa727, 0xa726, 0xa729, 0xa728, 0xa72b, 0xa72a, 0xa72d, + 0xa72c, 0xa72f, 0xa72e, 0x0000, 0x0000, 0xa733, 0xa732, 0xa735, 0xa734, + 0xa737, 0xa736, 0xa739, 0xa738, 0xa73b, 0xa73a, 0xa73d, 0xa73c, 0xa73f, + 0xa73e, 0xa741, 0xa740, 0xa743, 0xa742, 0xa745, 0xa744, 0xa747, 0xa746, + 0xa749, 0xa748, 0xa74b, 0xa74a, 0xa74d, 0xa74c, 0xa74f, 0xa74e, 0xa751, + 0xa750, 0xa753, 0xa752, 0xa755, 0xa754, 0xa757, 0xa756, 0xa759, 0xa758, + 0xa75b, 0xa75a, 0xa75d, 0xa75c, 0xa75f, 0xa75e, 0xa761, 0xa760, 0xa763, + 0xa762, 0xa765, 0xa764, 0xa767, 0xa766, 0xa769, 0xa768, 0xa76b, 0xa76a, + 0xa76d, 0xa76c, 0xa76f, 0xa76e, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0xa77a, 0xa779, 0xa77c, 0xa77b, 0x1d79, + 0xa77f, 0xa77e, 0xa781, 0xa780, 0xa783, 0xa782, 0xa785, 0xa784, 0xa787, + 0xa786, 0x0000, 0x0000, 0x0000, 0xa78c, 0xa78b, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 168, index 29 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 169, index 30 */ + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0x0009, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 170, index 31 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 251, index 32 */ + 0x100000f, 0x1000016, 0x100001d, 0x1000024, 0x100002d, 0x1000036, + 0x100003d, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x100004f, 0x100005a, 0x1000065, + 0x1000070, 0x100007b, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000 + }, + { + /* page 255, index 33 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, + 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xff41, 0xff42, 0xff43, + 0xff44, 0xff45, 0xff46, 0xff47, 0xff48, 0xff49, 0xff4a, 0xff4b, 0xff4c, + 0xff4d, 0xff4e, 0xff4f, 0xff50, 0xff51, 0xff52, 0xff53, 0xff54, 0xff55, + 0xff56, 0xff57, 0xff58, 0xff59, 0xff5a, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0xff21, 0xff22, 0xff23, 0xff24, 0xff25, 0xff26, 0xff27, + 0xff28, 0xff29, 0xff2a, 0xff2b, 0xff2c, 0xff2d, 0xff2e, 0xff2f, 0xff30, + 0xff31, 0xff32, 0xff33, 0xff34, 0xff35, 0xff36, 0xff37, 0xff38, 0xff39, + 0xff3a, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 260, index 34 */ + 0x10428, 0x10429, 0x1042a, 0x1042b, 0x1042c, 0x1042d, 0x1042e, 0x1042f, + 0x10430, 0x10431, 0x10432, 0x10433, 0x10434, 0x10435, 0x10436, 0x10437, + 0x10438, 0x10439, 0x1043a, 0x1043b, 0x1043c, 0x1043d, 0x1043e, 0x1043f, + 0x10440, 0x10441, 0x10442, 0x10443, 0x10444, 0x10445, 0x10446, 0x10447, + 0x10448, 0x10449, 0x1044a, 0x1044b, 0x1044c, 0x1044d, 0x1044e, 0x1044f, + 0x10400, 0x10401, 0x10402, 0x10403, 0x10404, 0x10405, 0x10406, 0x10407, + 0x10408, 0x10409, 0x1040a, 0x1040b, 0x1040c, 0x1040d, 0x1040e, 0x1040f, + 0x10410, 0x10411, 0x10412, 0x10413, 0x10414, 0x10415, 0x10416, 0x10417, + 0x10418, 0x10419, 0x1041a, 0x1041b, 0x1041c, 0x1041d, 0x1041e, 0x1041f, + 0x10420, 0x10421, 0x10422, 0x10423, 0x10424, 0x10425, 0x10426, 0x10427, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 + }, + { + /* page 471, index 35 */ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, 0x0009, + 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, 0x0008, + 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007, + 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, + 0x0007, 0x0008, 0x0009, 0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, + 0x0006, 0x0007, 0x0008, 0x0009 + } }; /* U+0000 through U+2FAFF */ static const int16_t attr_table_part1[763] = { - 0 /* page 0 */, - 1 /* page 1 */, - 2 /* page 2 */, - 3 /* page 3 */, - 4 /* page 4 */, - 5 /* page 5 */, - 6 /* page 6 */, - 7 /* page 7 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 8 /* page 9 */, - 9 /* page 10 */, - 10 /* page 11 */, - 11 /* page 12 */, - 12 /* page 13 */, - 13 /* page 14 */, - 14 /* page 15 */, - 15 /* page 16 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 16 /* page 23 */, - 17 /* page 24 */, - 18 /* page 25 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 19 /* page 27 */, - 20 /* page 28 */, - 21 /* page 29 */, - 22 /* page 30 */, - 23 /* page 31 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 24 /* page 33 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 25 /* page 44 */, - 26 /* page 45 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 27 /* page 166 */, - 28 /* page 167 */, - 29 /* page 168 */, - 30 /* page 169 */, - 31 /* page 170 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 32 /* page 251 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 33 /* page 255 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 34 /* page 260 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 35 /* page 471 */, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX + 0 /* page 0 */, + 1 /* page 1 */, + 2 /* page 2 */, + 3 /* page 3 */, + 4 /* page 4 */, + 5 /* page 5 */, + 6 /* page 6 */, + 7 /* page 7 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 8 /* page 9 */, + 9 /* page 10 */, + 10 /* page 11 */, + 11 /* page 12 */, + 12 /* page 13 */, + 13 /* page 14 */, + 14 /* page 15 */, + 15 /* page 16 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 16 /* page 23 */, + 17 /* page 24 */, + 18 /* page 25 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 19 /* page 27 */, + 20 /* page 28 */, + 21 /* page 29 */, + 22 /* page 30 */, + 23 /* page 31 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 24 /* page 33 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 25 /* page 44 */, + 26 /* page 45 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 27 /* page 166 */, + 28 /* page 167 */, + 29 /* page 168 */, + 30 /* page 169 */, + 31 /* page 170 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 32 /* page 251 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 33 /* page 255 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 34 /* page 260 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 35 /* page 471 */, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; /* U+E0000 through U+10FFFF */ static const int16_t attr_table_part2[768] = { - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX, - 0x0000 + G_UNICODE_MAX_TABLE_INDEX + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX, + 0x0000 + G_UNICODE_MAX_TABLE_INDEX }; static const gunichar title_table[][3] = { - { 0x01c5, 0x01c4, 0x01c6 }, - { 0x01c8, 0x01c7, 0x01c9 }, - { 0x01cb, 0x01ca, 0x01cc }, - { 0x01f2, 0x01f1, 0x01f3 }, - { 0x1f88, 0x0000, 0x1f80 }, - { 0x1f89, 0x0000, 0x1f81 }, - { 0x1f8a, 0x0000, 0x1f82 }, - { 0x1f8b, 0x0000, 0x1f83 }, - { 0x1f8c, 0x0000, 0x1f84 }, - { 0x1f8d, 0x0000, 0x1f85 }, - { 0x1f8e, 0x0000, 0x1f86 }, - { 0x1f8f, 0x0000, 0x1f87 }, - { 0x1f98, 0x0000, 0x1f90 }, - { 0x1f99, 0x0000, 0x1f91 }, - { 0x1f9a, 0x0000, 0x1f92 }, - { 0x1f9b, 0x0000, 0x1f93 }, - { 0x1f9c, 0x0000, 0x1f94 }, - { 0x1f9d, 0x0000, 0x1f95 }, - { 0x1f9e, 0x0000, 0x1f96 }, - { 0x1f9f, 0x0000, 0x1f97 }, - { 0x1fa8, 0x0000, 0x1fa0 }, - { 0x1fa9, 0x0000, 0x1fa1 }, - { 0x1faa, 0x0000, 0x1fa2 }, - { 0x1fab, 0x0000, 0x1fa3 }, - { 0x1fac, 0x0000, 0x1fa4 }, - { 0x1fad, 0x0000, 0x1fa5 }, - { 0x1fae, 0x0000, 0x1fa6 }, - { 0x1faf, 0x0000, 0x1fa7 }, - { 0x1fbc, 0x0000, 0x1fb3 }, - { 0x1fcc, 0x0000, 0x1fc3 }, - { 0x1ffc, 0x0000, 0x1ff3 } + { 0x01c5, 0x01c4, 0x01c6 }, + { 0x01c8, 0x01c7, 0x01c9 }, + { 0x01cb, 0x01ca, 0x01cc }, + { 0x01f2, 0x01f1, 0x01f3 }, + { 0x1f88, 0x0000, 0x1f80 }, + { 0x1f89, 0x0000, 0x1f81 }, + { 0x1f8a, 0x0000, 0x1f82 }, + { 0x1f8b, 0x0000, 0x1f83 }, + { 0x1f8c, 0x0000, 0x1f84 }, + { 0x1f8d, 0x0000, 0x1f85 }, + { 0x1f8e, 0x0000, 0x1f86 }, + { 0x1f8f, 0x0000, 0x1f87 }, + { 0x1f98, 0x0000, 0x1f90 }, + { 0x1f99, 0x0000, 0x1f91 }, + { 0x1f9a, 0x0000, 0x1f92 }, + { 0x1f9b, 0x0000, 0x1f93 }, + { 0x1f9c, 0x0000, 0x1f94 }, + { 0x1f9d, 0x0000, 0x1f95 }, + { 0x1f9e, 0x0000, 0x1f96 }, + { 0x1f9f, 0x0000, 0x1f97 }, + { 0x1fa8, 0x0000, 0x1fa0 }, + { 0x1fa9, 0x0000, 0x1fa1 }, + { 0x1faa, 0x0000, 0x1fa2 }, + { 0x1fab, 0x0000, 0x1fa3 }, + { 0x1fac, 0x0000, 0x1fa4 }, + { 0x1fad, 0x0000, 0x1fa5 }, + { 0x1fae, 0x0000, 0x1fa6 }, + { 0x1faf, 0x0000, 0x1fa7 }, + { 0x1fbc, 0x0000, 0x1fb3 }, + { 0x1fcc, 0x0000, 0x1fc3 }, + { 0x1ffc, 0x0000, 0x1ff3 } }; /* Table of special cases for case conversion; each record contains - * First, the best single character mapping to lowercase if Lu, - * and to uppercase if Ll, followed by the output mapping for the two cases + * First, the best single character mapping to lowercase if Lu, + * and to uppercase if Ll, followed by the output mapping for the two cases * other than the case of the codepoint, in the order [Ll],[Lu],[Lt], * encoded in UTF-8, separated and terminated by a null character. */ static const char special_case_table[] = { - "\x00\x53\x53\x00\x53\x73\0" /* offset 0 */ - "\x69\x69\xcc\x87\x00\xc4\xb0\0" /* offset 7 */ - "\x00\x46\x46\x00\x46\x66\0" /* offset 15 */ - "\x00\x46\x49\x00\x46\x69\0" /* offset 22 */ - "\x00\x46\x4c\x00\x46\x6c\0" /* offset 29 */ - "\x00\x46\x46\x49\x00\x46\x66\x69\0" /* offset 36 */ - "\x00\x46\x46\x4c\x00\x46\x66\x6c\0" /* offset 45 */ - "\x00\x53\x54\x00\x53\x74\0" /* offset 54 */ - "\x00\x53\x54\x00\x53\x74\0" /* offset 61 */ - "\x00\xd4\xb5\xd5\x92\x00\xd4\xb5\xd6\x82\0" /* offset 68 */ - "\x00\xd5\x84\xd5\x86\x00\xd5\x84\xd5\xb6\0" /* offset 79 */ - "\x00\xd5\x84\xd4\xb5\x00\xd5\x84\xd5\xa5\0" /* offset 90 */ - "\x00\xd5\x84\xd4\xbb\x00\xd5\x84\xd5\xab\0" /* offset 101 */ - "\x00\xd5\x8e\xd5\x86\x00\xd5\x8e\xd5\xb6\0" /* offset 112 */ - "\x00\xd5\x84\xd4\xbd\x00\xd5\x84\xd5\xad\0" /* offset 123 */ - "\x00\xca\xbc\x4e\x00\xca\xbc\x4e\0" /* offset 134 */ - "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 143 */ - "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 158 */ - "\x00\x4a\xcc\x8c\x00\x4a\xcc\x8c\0" /* offset 173 */ - "\x00\x48\xcc\xb1\x00\x48\xcc\xb1\0" /* offset 182 */ - "\x00\x54\xcc\x88\x00\x54\xcc\x88\0" /* offset 191 */ - "\x00\x57\xcc\x8a\x00\x57\xcc\x8a\0" /* offset 200 */ - "\x00\x59\xcc\x8a\x00\x59\xcc\x8a\0" /* offset 209 */ - "\x00\x41\xca\xbe\x00\x41\xca\xbe\0" /* offset 218 */ - "\x00\xce\xa5\xcc\x93\x00\xce\xa5\xcc\x93\0" /* offset 227 */ - "\x00\xce\xa5\xcc\x93\xcc\x80\x00\xce\xa5\xcc\x93\xcc\x80\0" /* offset 238 */ - "\x00\xce\xa5\xcc\x93\xcc\x81\x00\xce\xa5\xcc\x93\xcc\x81\0" /* offset 253 */ - "\x00\xce\xa5\xcc\x93\xcd\x82\x00\xce\xa5\xcc\x93\xcd\x82\0" /* offset 268 */ - "\x00\xce\x91\xcd\x82\x00\xce\x91\xcd\x82\0" /* offset 283 */ - "\x00\xce\x97\xcd\x82\x00\xce\x97\xcd\x82\0" /* offset 294 */ - "\x00\xce\x99\xcc\x88\xcc\x80\x00\xce\x99\xcc\x88\xcc\x80\0" /* offset 305 */ - "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 320 */ - "\x00\xce\x99\xcd\x82\x00\xce\x99\xcd\x82\0" /* offset 335 */ - "\x00\xce\x99\xcc\x88\xcd\x82\x00\xce\x99\xcc\x88\xcd\x82\0" /* offset 346 */ - "\x00\xce\xa5\xcc\x88\xcc\x80\x00\xce\xa5\xcc\x88\xcc\x80\0" /* offset 361 */ - "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 376 */ - "\x00\xce\xa1\xcc\x93\x00\xce\xa1\xcc\x93\0" /* offset 391 */ - "\x00\xce\xa5\xcd\x82\x00\xce\xa5\xcd\x82\0" /* offset 402 */ - "\x00\xce\xa5\xcc\x88\xcd\x82\x00\xce\xa5\xcc\x88\xcd\x82\0" /* offset 413 */ - "\x00\xce\xa9\xcd\x82\x00\xce\xa9\xcd\x82\0" /* offset 428 */ - "\xe1\xbe\x88\xe1\xbc\x88\xce\x99\x00\xe1\xbe\x88\0" /* offset 439 */ - "\xe1\xbe\x89\xe1\xbc\x89\xce\x99\x00\xe1\xbe\x89\0" /* offset 452 */ - "\xe1\xbe\x8a\xe1\xbc\x8a\xce\x99\x00\xe1\xbe\x8a\0" /* offset 465 */ - "\xe1\xbe\x8b\xe1\xbc\x8b\xce\x99\x00\xe1\xbe\x8b\0" /* offset 478 */ - "\xe1\xbe\x8c\xe1\xbc\x8c\xce\x99\x00\xe1\xbe\x8c\0" /* offset 491 */ - "\xe1\xbe\x8d\xe1\xbc\x8d\xce\x99\x00\xe1\xbe\x8d\0" /* offset 504 */ - "\xe1\xbe\x8e\xe1\xbc\x8e\xce\x99\x00\xe1\xbe\x8e\0" /* offset 517 */ - "\xe1\xbe\x8f\xe1\xbc\x8f\xce\x99\x00\xe1\xbe\x8f\0" /* offset 530 */ - "\xe1\xbe\x80\x00\xe1\xbc\x88\xce\x99\0" /* offset 543 */ - "\xe1\xbe\x81\x00\xe1\xbc\x89\xce\x99\0" /* offset 553 */ - "\xe1\xbe\x82\x00\xe1\xbc\x8a\xce\x99\0" /* offset 563 */ - "\xe1\xbe\x83\x00\xe1\xbc\x8b\xce\x99\0" /* offset 573 */ - "\xe1\xbe\x84\x00\xe1\xbc\x8c\xce\x99\0" /* offset 583 */ - "\xe1\xbe\x85\x00\xe1\xbc\x8d\xce\x99\0" /* offset 593 */ - "\xe1\xbe\x86\x00\xe1\xbc\x8e\xce\x99\0" /* offset 603 */ - "\xe1\xbe\x87\x00\xe1\xbc\x8f\xce\x99\0" /* offset 613 */ - "\xe1\xbe\x98\xe1\xbc\xa8\xce\x99\x00\xe1\xbe\x98\0" /* offset 623 */ - "\xe1\xbe\x99\xe1\xbc\xa9\xce\x99\x00\xe1\xbe\x99\0" /* offset 636 */ - "\xe1\xbe\x9a\xe1\xbc\xaa\xce\x99\x00\xe1\xbe\x9a\0" /* offset 649 */ - "\xe1\xbe\x9b\xe1\xbc\xab\xce\x99\x00\xe1\xbe\x9b\0" /* offset 662 */ - "\xe1\xbe\x9c\xe1\xbc\xac\xce\x99\x00\xe1\xbe\x9c\0" /* offset 675 */ - "\xe1\xbe\x9d\xe1\xbc\xad\xce\x99\x00\xe1\xbe\x9d\0" /* offset 688 */ - "\xe1\xbe\x9e\xe1\xbc\xae\xce\x99\x00\xe1\xbe\x9e\0" /* offset 701 */ - "\xe1\xbe\x9f\xe1\xbc\xaf\xce\x99\x00\xe1\xbe\x9f\0" /* offset 714 */ - "\xe1\xbe\x90\x00\xe1\xbc\xa8\xce\x99\0" /* offset 727 */ - "\xe1\xbe\x91\x00\xe1\xbc\xa9\xce\x99\0" /* offset 737 */ - "\xe1\xbe\x92\x00\xe1\xbc\xaa\xce\x99\0" /* offset 747 */ - "\xe1\xbe\x93\x00\xe1\xbc\xab\xce\x99\0" /* offset 757 */ - "\xe1\xbe\x94\x00\xe1\xbc\xac\xce\x99\0" /* offset 767 */ - "\xe1\xbe\x95\x00\xe1\xbc\xad\xce\x99\0" /* offset 777 */ - "\xe1\xbe\x96\x00\xe1\xbc\xae\xce\x99\0" /* offset 787 */ - "\xe1\xbe\x97\x00\xe1\xbc\xaf\xce\x99\0" /* offset 797 */ - "\xe1\xbe\xa8\xe1\xbd\xa8\xce\x99\x00\xe1\xbe\xa8\0" /* offset 807 */ - "\xe1\xbe\xa9\xe1\xbd\xa9\xce\x99\x00\xe1\xbe\xa9\0" /* offset 820 */ - "\xe1\xbe\xaa\xe1\xbd\xaa\xce\x99\x00\xe1\xbe\xaa\0" /* offset 833 */ - "\xe1\xbe\xab\xe1\xbd\xab\xce\x99\x00\xe1\xbe\xab\0" /* offset 846 */ - "\xe1\xbe\xac\xe1\xbd\xac\xce\x99\x00\xe1\xbe\xac\0" /* offset 859 */ - "\xe1\xbe\xad\xe1\xbd\xad\xce\x99\x00\xe1\xbe\xad\0" /* offset 872 */ - "\xe1\xbe\xae\xe1\xbd\xae\xce\x99\x00\xe1\xbe\xae\0" /* offset 885 */ - "\xe1\xbe\xaf\xe1\xbd\xaf\xce\x99\x00\xe1\xbe\xaf\0" /* offset 898 */ - "\xe1\xbe\xa0\x00\xe1\xbd\xa8\xce\x99\0" /* offset 911 */ - "\xe1\xbe\xa1\x00\xe1\xbd\xa9\xce\x99\0" /* offset 921 */ - "\xe1\xbe\xa2\x00\xe1\xbd\xaa\xce\x99\0" /* offset 931 */ - "\xe1\xbe\xa3\x00\xe1\xbd\xab\xce\x99\0" /* offset 941 */ - "\xe1\xbe\xa4\x00\xe1\xbd\xac\xce\x99\0" /* offset 951 */ - "\xe1\xbe\xa5\x00\xe1\xbd\xad\xce\x99\0" /* offset 961 */ - "\xe1\xbe\xa6\x00\xe1\xbd\xae\xce\x99\0" /* offset 971 */ - "\xe1\xbe\xa7\x00\xe1\xbd\xaf\xce\x99\0" /* offset 981 */ - "\xe1\xbe\xbc\xce\x91\xce\x99\x00\xe1\xbe\xbc\0" /* offset 991 */ - "\xe1\xbe\xb3\x00\xce\x91\xce\x99\0" /* offset 1003 */ - "\xe1\xbf\x8c\xce\x97\xce\x99\x00\xe1\xbf\x8c\0" /* offset 1012 */ - "\xe1\xbf\x83\x00\xce\x97\xce\x99\0" /* offset 1024 */ - "\xe1\xbf\xbc\xce\xa9\xce\x99\x00\xe1\xbf\xbc\0" /* offset 1033 */ - "\xe1\xbf\xb3\x00\xce\xa9\xce\x99\0" /* offset 1045 */ - "\x00\xe1\xbe\xba\xce\x99\x00\xe1\xbe\xba\xcd\x85\0" /* offset 1054 */ - "\x00\xce\x86\xce\x99\x00\xce\x86\xcd\x85\0" /* offset 1067 */ - "\x00\xe1\xbf\x8a\xce\x99\x00\xe1\xbf\x8a\xcd\x85\0" /* offset 1078 */ - "\x00\xce\x89\xce\x99\x00\xce\x89\xcd\x85\0" /* offset 1091 */ - "\x00\xe1\xbf\xba\xce\x99\x00\xe1\xbf\xba\xcd\x85\0" /* offset 1102 */ - "\x00\xce\x8f\xce\x99\x00\xce\x8f\xcd\x85\0" /* offset 1115 */ - "\x00\xce\x91\xcd\x82\xce\x99\x00\xce\x91\xcd\x82\xcd\x85\0" /* offset 1126 */ - "\x00\xce\x97\xcd\x82\xce\x99\x00\xce\x97\xcd\x82\xcd\x85\0" /* offset 1141 */ - "\x00\xce\xa9\xcd\x82\xce\x99\x00\xce\xa9\xcd\x82\xcd\x85\0" /* offset 1156 */ + "\x00\x53\x53\x00\x53\x73\0" /* offset 0 */ + "\x69\x69\xcc\x87\x00\xc4\xb0\0" /* offset 7 */ + "\x00\x46\x46\x00\x46\x66\0" /* offset 15 */ + "\x00\x46\x49\x00\x46\x69\0" /* offset 22 */ + "\x00\x46\x4c\x00\x46\x6c\0" /* offset 29 */ + "\x00\x46\x46\x49\x00\x46\x66\x69\0" /* offset 36 */ + "\x00\x46\x46\x4c\x00\x46\x66\x6c\0" /* offset 45 */ + "\x00\x53\x54\x00\x53\x74\0" /* offset 54 */ + "\x00\x53\x54\x00\x53\x74\0" /* offset 61 */ + "\x00\xd4\xb5\xd5\x92\x00\xd4\xb5\xd6\x82\0" /* offset 68 */ + "\x00\xd5\x84\xd5\x86\x00\xd5\x84\xd5\xb6\0" /* offset 79 */ + "\x00\xd5\x84\xd4\xb5\x00\xd5\x84\xd5\xa5\0" /* offset 90 */ + "\x00\xd5\x84\xd4\xbb\x00\xd5\x84\xd5\xab\0" /* offset 101 */ + "\x00\xd5\x8e\xd5\x86\x00\xd5\x8e\xd5\xb6\0" /* offset 112 */ + "\x00\xd5\x84\xd4\xbd\x00\xd5\x84\xd5\xad\0" /* offset 123 */ + "\x00\xca\xbc\x4e\x00\xca\xbc\x4e\0" /* offset 134 */ + "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 143 */ + "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 158 */ + "\x00\x4a\xcc\x8c\x00\x4a\xcc\x8c\0" /* offset 173 */ + "\x00\x48\xcc\xb1\x00\x48\xcc\xb1\0" /* offset 182 */ + "\x00\x54\xcc\x88\x00\x54\xcc\x88\0" /* offset 191 */ + "\x00\x57\xcc\x8a\x00\x57\xcc\x8a\0" /* offset 200 */ + "\x00\x59\xcc\x8a\x00\x59\xcc\x8a\0" /* offset 209 */ + "\x00\x41\xca\xbe\x00\x41\xca\xbe\0" /* offset 218 */ + "\x00\xce\xa5\xcc\x93\x00\xce\xa5\xcc\x93\0" /* offset 227 */ + "\x00\xce\xa5\xcc\x93\xcc\x80\x00\xce\xa5\xcc\x93\xcc\x80\0" /* offset 238 */ + "\x00\xce\xa5\xcc\x93\xcc\x81\x00\xce\xa5\xcc\x93\xcc\x81\0" /* offset 253 */ + "\x00\xce\xa5\xcc\x93\xcd\x82\x00\xce\xa5\xcc\x93\xcd\x82\0" /* offset 268 */ + "\x00\xce\x91\xcd\x82\x00\xce\x91\xcd\x82\0" /* offset 283 */ + "\x00\xce\x97\xcd\x82\x00\xce\x97\xcd\x82\0" /* offset 294 */ + "\x00\xce\x99\xcc\x88\xcc\x80\x00\xce\x99\xcc\x88\xcc\x80\0" /* offset 305 */ + "\x00\xce\x99\xcc\x88\xcc\x81\x00\xce\x99\xcc\x88\xcc\x81\0" /* offset 320 */ + "\x00\xce\x99\xcd\x82\x00\xce\x99\xcd\x82\0" /* offset 335 */ + "\x00\xce\x99\xcc\x88\xcd\x82\x00\xce\x99\xcc\x88\xcd\x82\0" /* offset 346 */ + "\x00\xce\xa5\xcc\x88\xcc\x80\x00\xce\xa5\xcc\x88\xcc\x80\0" /* offset 361 */ + "\x00\xce\xa5\xcc\x88\xcc\x81\x00\xce\xa5\xcc\x88\xcc\x81\0" /* offset 376 */ + "\x00\xce\xa1\xcc\x93\x00\xce\xa1\xcc\x93\0" /* offset 391 */ + "\x00\xce\xa5\xcd\x82\x00\xce\xa5\xcd\x82\0" /* offset 402 */ + "\x00\xce\xa5\xcc\x88\xcd\x82\x00\xce\xa5\xcc\x88\xcd\x82\0" /* offset 413 */ + "\x00\xce\xa9\xcd\x82\x00\xce\xa9\xcd\x82\0" /* offset 428 */ + "\xe1\xbe\x88\xe1\xbc\x88\xce\x99\x00\xe1\xbe\x88\0" /* offset 439 */ + "\xe1\xbe\x89\xe1\xbc\x89\xce\x99\x00\xe1\xbe\x89\0" /* offset 452 */ + "\xe1\xbe\x8a\xe1\xbc\x8a\xce\x99\x00\xe1\xbe\x8a\0" /* offset 465 */ + "\xe1\xbe\x8b\xe1\xbc\x8b\xce\x99\x00\xe1\xbe\x8b\0" /* offset 478 */ + "\xe1\xbe\x8c\xe1\xbc\x8c\xce\x99\x00\xe1\xbe\x8c\0" /* offset 491 */ + "\xe1\xbe\x8d\xe1\xbc\x8d\xce\x99\x00\xe1\xbe\x8d\0" /* offset 504 */ + "\xe1\xbe\x8e\xe1\xbc\x8e\xce\x99\x00\xe1\xbe\x8e\0" /* offset 517 */ + "\xe1\xbe\x8f\xe1\xbc\x8f\xce\x99\x00\xe1\xbe\x8f\0" /* offset 530 */ + "\xe1\xbe\x80\x00\xe1\xbc\x88\xce\x99\0" /* offset 543 */ + "\xe1\xbe\x81\x00\xe1\xbc\x89\xce\x99\0" /* offset 553 */ + "\xe1\xbe\x82\x00\xe1\xbc\x8a\xce\x99\0" /* offset 563 */ + "\xe1\xbe\x83\x00\xe1\xbc\x8b\xce\x99\0" /* offset 573 */ + "\xe1\xbe\x84\x00\xe1\xbc\x8c\xce\x99\0" /* offset 583 */ + "\xe1\xbe\x85\x00\xe1\xbc\x8d\xce\x99\0" /* offset 593 */ + "\xe1\xbe\x86\x00\xe1\xbc\x8e\xce\x99\0" /* offset 603 */ + "\xe1\xbe\x87\x00\xe1\xbc\x8f\xce\x99\0" /* offset 613 */ + "\xe1\xbe\x98\xe1\xbc\xa8\xce\x99\x00\xe1\xbe\x98\0" /* offset 623 */ + "\xe1\xbe\x99\xe1\xbc\xa9\xce\x99\x00\xe1\xbe\x99\0" /* offset 636 */ + "\xe1\xbe\x9a\xe1\xbc\xaa\xce\x99\x00\xe1\xbe\x9a\0" /* offset 649 */ + "\xe1\xbe\x9b\xe1\xbc\xab\xce\x99\x00\xe1\xbe\x9b\0" /* offset 662 */ + "\xe1\xbe\x9c\xe1\xbc\xac\xce\x99\x00\xe1\xbe\x9c\0" /* offset 675 */ + "\xe1\xbe\x9d\xe1\xbc\xad\xce\x99\x00\xe1\xbe\x9d\0" /* offset 688 */ + "\xe1\xbe\x9e\xe1\xbc\xae\xce\x99\x00\xe1\xbe\x9e\0" /* offset 701 */ + "\xe1\xbe\x9f\xe1\xbc\xaf\xce\x99\x00\xe1\xbe\x9f\0" /* offset 714 */ + "\xe1\xbe\x90\x00\xe1\xbc\xa8\xce\x99\0" /* offset 727 */ + "\xe1\xbe\x91\x00\xe1\xbc\xa9\xce\x99\0" /* offset 737 */ + "\xe1\xbe\x92\x00\xe1\xbc\xaa\xce\x99\0" /* offset 747 */ + "\xe1\xbe\x93\x00\xe1\xbc\xab\xce\x99\0" /* offset 757 */ + "\xe1\xbe\x94\x00\xe1\xbc\xac\xce\x99\0" /* offset 767 */ + "\xe1\xbe\x95\x00\xe1\xbc\xad\xce\x99\0" /* offset 777 */ + "\xe1\xbe\x96\x00\xe1\xbc\xae\xce\x99\0" /* offset 787 */ + "\xe1\xbe\x97\x00\xe1\xbc\xaf\xce\x99\0" /* offset 797 */ + "\xe1\xbe\xa8\xe1\xbd\xa8\xce\x99\x00\xe1\xbe\xa8\0" /* offset 807 */ + "\xe1\xbe\xa9\xe1\xbd\xa9\xce\x99\x00\xe1\xbe\xa9\0" /* offset 820 */ + "\xe1\xbe\xaa\xe1\xbd\xaa\xce\x99\x00\xe1\xbe\xaa\0" /* offset 833 */ + "\xe1\xbe\xab\xe1\xbd\xab\xce\x99\x00\xe1\xbe\xab\0" /* offset 846 */ + "\xe1\xbe\xac\xe1\xbd\xac\xce\x99\x00\xe1\xbe\xac\0" /* offset 859 */ + "\xe1\xbe\xad\xe1\xbd\xad\xce\x99\x00\xe1\xbe\xad\0" /* offset 872 */ + "\xe1\xbe\xae\xe1\xbd\xae\xce\x99\x00\xe1\xbe\xae\0" /* offset 885 */ + "\xe1\xbe\xaf\xe1\xbd\xaf\xce\x99\x00\xe1\xbe\xaf\0" /* offset 898 */ + "\xe1\xbe\xa0\x00\xe1\xbd\xa8\xce\x99\0" /* offset 911 */ + "\xe1\xbe\xa1\x00\xe1\xbd\xa9\xce\x99\0" /* offset 921 */ + "\xe1\xbe\xa2\x00\xe1\xbd\xaa\xce\x99\0" /* offset 931 */ + "\xe1\xbe\xa3\x00\xe1\xbd\xab\xce\x99\0" /* offset 941 */ + "\xe1\xbe\xa4\x00\xe1\xbd\xac\xce\x99\0" /* offset 951 */ + "\xe1\xbe\xa5\x00\xe1\xbd\xad\xce\x99\0" /* offset 961 */ + "\xe1\xbe\xa6\x00\xe1\xbd\xae\xce\x99\0" /* offset 971 */ + "\xe1\xbe\xa7\x00\xe1\xbd\xaf\xce\x99\0" /* offset 981 */ + "\xe1\xbe\xbc\xce\x91\xce\x99\x00\xe1\xbe\xbc\0" /* offset 991 */ + "\xe1\xbe\xb3\x00\xce\x91\xce\x99\0" /* offset 1003 */ + "\xe1\xbf\x8c\xce\x97\xce\x99\x00\xe1\xbf\x8c\0" /* offset 1012 */ + "\xe1\xbf\x83\x00\xce\x97\xce\x99\0" /* offset 1024 */ + "\xe1\xbf\xbc\xce\xa9\xce\x99\x00\xe1\xbf\xbc\0" /* offset 1033 */ + "\xe1\xbf\xb3\x00\xce\xa9\xce\x99\0" /* offset 1045 */ + "\x00\xe1\xbe\xba\xce\x99\x00\xe1\xbe\xba\xcd\x85\0" /* offset 1054 */ + "\x00\xce\x86\xce\x99\x00\xce\x86\xcd\x85\0" /* offset 1067 */ + "\x00\xe1\xbf\x8a\xce\x99\x00\xe1\xbf\x8a\xcd\x85\0" /* offset 1078 */ + "\x00\xce\x89\xce\x99\x00\xce\x89\xcd\x85\0" /* offset 1091 */ + "\x00\xe1\xbf\xba\xce\x99\x00\xe1\xbf\xba\xcd\x85\0" /* offset 1102 */ + "\x00\xce\x8f\xce\x99\x00\xce\x8f\xcd\x85\0" /* offset 1115 */ + "\x00\xce\x91\xcd\x82\xce\x99\x00\xce\x91\xcd\x82\xcd\x85\0" /* offset 1126 */ + "\x00\xce\x97\xcd\x82\xce\x99\x00\xce\x97\xcd\x82\xcd\x85\0" /* offset 1141 */ + "\x00\xce\xa9\xcd\x82\xce\x99\x00\xce\xa9\xcd\x82\xcd\x85\0" /* offset 1156 */ }; /* Table of casefolding cases that can't be derived by lowercasing */ static const struct { - uint16_t ch; - char data[7]; + uint16_t ch; + char data[7]; } casefold_table[] = { - { 0x00b5, "\xce\xbc" }, - { 0x00df, "\x73\x73" }, - { 0x0130, "\x69\xcc\x87" }, - { 0x0149, "\xca\xbc\x6e" }, - { 0x017f, "\x73" }, - { 0x01f0, "\x6a\xcc\x8c" }, - { 0x0345, "\xce\xb9" }, - { 0x0390, "\xce\xb9\xcc\x88\xcc\x81" }, - { 0x03b0, "\xcf\x85\xcc\x88\xcc\x81" }, - { 0x03c2, "\xcf\x83" }, - { 0x03d0, "\xce\xb2" }, - { 0x03d1, "\xce\xb8" }, - { 0x03d5, "\xcf\x86" }, - { 0x03d6, "\xcf\x80" }, - { 0x03f0, "\xce\xba" }, - { 0x03f1, "\xcf\x81" }, - { 0x03f5, "\xce\xb5" }, - { 0x0587, "\xd5\xa5\xd6\x82" }, - { 0x1e96, "\x68\xcc\xb1" }, - { 0x1e97, "\x74\xcc\x88" }, - { 0x1e98, "\x77\xcc\x8a" }, - { 0x1e99, "\x79\xcc\x8a" }, - { 0x1e9a, "\x61\xca\xbe" }, - { 0x1e9b, "\xe1\xb9\xa1" }, - { 0x1e9e, "\x73\x73" }, - { 0x1f50, "\xcf\x85\xcc\x93" }, - { 0x1f52, "\xcf\x85\xcc\x93\xcc\x80" }, - { 0x1f54, "\xcf\x85\xcc\x93\xcc\x81" }, - { 0x1f56, "\xcf\x85\xcc\x93\xcd\x82" }, - { 0x1f80, "\xe1\xbc\x80\xce\xb9" }, - { 0x1f81, "\xe1\xbc\x81\xce\xb9" }, - { 0x1f82, "\xe1\xbc\x82\xce\xb9" }, - { 0x1f83, "\xe1\xbc\x83\xce\xb9" }, - { 0x1f84, "\xe1\xbc\x84\xce\xb9" }, - { 0x1f85, "\xe1\xbc\x85\xce\xb9" }, - { 0x1f86, "\xe1\xbc\x86\xce\xb9" }, - { 0x1f87, "\xe1\xbc\x87\xce\xb9" }, - { 0x1f88, "\xe1\xbc\x80\xce\xb9" }, - { 0x1f89, "\xe1\xbc\x81\xce\xb9" }, - { 0x1f8a, "\xe1\xbc\x82\xce\xb9" }, - { 0x1f8b, "\xe1\xbc\x83\xce\xb9" }, - { 0x1f8c, "\xe1\xbc\x84\xce\xb9" }, - { 0x1f8d, "\xe1\xbc\x85\xce\xb9" }, - { 0x1f8e, "\xe1\xbc\x86\xce\xb9" }, - { 0x1f8f, "\xe1\xbc\x87\xce\xb9" }, - { 0x1f90, "\xe1\xbc\xa0\xce\xb9" }, - { 0x1f91, "\xe1\xbc\xa1\xce\xb9" }, - { 0x1f92, "\xe1\xbc\xa2\xce\xb9" }, - { 0x1f93, "\xe1\xbc\xa3\xce\xb9" }, - { 0x1f94, "\xe1\xbc\xa4\xce\xb9" }, - { 0x1f95, "\xe1\xbc\xa5\xce\xb9" }, - { 0x1f96, "\xe1\xbc\xa6\xce\xb9" }, - { 0x1f97, "\xe1\xbc\xa7\xce\xb9" }, - { 0x1f98, "\xe1\xbc\xa0\xce\xb9" }, - { 0x1f99, "\xe1\xbc\xa1\xce\xb9" }, - { 0x1f9a, "\xe1\xbc\xa2\xce\xb9" }, - { 0x1f9b, "\xe1\xbc\xa3\xce\xb9" }, - { 0x1f9c, "\xe1\xbc\xa4\xce\xb9" }, - { 0x1f9d, "\xe1\xbc\xa5\xce\xb9" }, - { 0x1f9e, "\xe1\xbc\xa6\xce\xb9" }, - { 0x1f9f, "\xe1\xbc\xa7\xce\xb9" }, - { 0x1fa0, "\xe1\xbd\xa0\xce\xb9" }, - { 0x1fa1, "\xe1\xbd\xa1\xce\xb9" }, - { 0x1fa2, "\xe1\xbd\xa2\xce\xb9" }, - { 0x1fa3, "\xe1\xbd\xa3\xce\xb9" }, - { 0x1fa4, "\xe1\xbd\xa4\xce\xb9" }, - { 0x1fa5, "\xe1\xbd\xa5\xce\xb9" }, - { 0x1fa6, "\xe1\xbd\xa6\xce\xb9" }, - { 0x1fa7, "\xe1\xbd\xa7\xce\xb9" }, - { 0x1fa8, "\xe1\xbd\xa0\xce\xb9" }, - { 0x1fa9, "\xe1\xbd\xa1\xce\xb9" }, - { 0x1faa, "\xe1\xbd\xa2\xce\xb9" }, - { 0x1fab, "\xe1\xbd\xa3\xce\xb9" }, - { 0x1fac, "\xe1\xbd\xa4\xce\xb9" }, - { 0x1fad, "\xe1\xbd\xa5\xce\xb9" }, - { 0x1fae, "\xe1\xbd\xa6\xce\xb9" }, - { 0x1faf, "\xe1\xbd\xa7\xce\xb9" }, - { 0x1fb2, "\xe1\xbd\xb0\xce\xb9" }, - { 0x1fb3, "\xce\xb1\xce\xb9" }, - { 0x1fb4, "\xce\xac\xce\xb9" }, - { 0x1fb6, "\xce\xb1\xcd\x82" }, - { 0x1fb7, "\xce\xb1\xcd\x82\xce\xb9" }, - { 0x1fbc, "\xce\xb1\xce\xb9" }, - { 0x1fbe, "\xce\xb9" }, - { 0x1fc2, "\xe1\xbd\xb4\xce\xb9" }, - { 0x1fc3, "\xce\xb7\xce\xb9" }, - { 0x1fc4, "\xce\xae\xce\xb9" }, - { 0x1fc6, "\xce\xb7\xcd\x82" }, - { 0x1fc7, "\xce\xb7\xcd\x82\xce\xb9" }, - { 0x1fcc, "\xce\xb7\xce\xb9" }, - { 0x1fd2, "\xce\xb9\xcc\x88\xcc\x80" }, - { 0x1fd3, "\xce\xb9\xcc\x88\xcc\x81" }, - { 0x1fd6, "\xce\xb9\xcd\x82" }, - { 0x1fd7, "\xce\xb9\xcc\x88\xcd\x82" }, - { 0x1fe2, "\xcf\x85\xcc\x88\xcc\x80" }, - { 0x1fe3, "\xcf\x85\xcc\x88\xcc\x81" }, - { 0x1fe4, "\xcf\x81\xcc\x93" }, - { 0x1fe6, "\xcf\x85\xcd\x82" }, - { 0x1fe7, "\xcf\x85\xcc\x88\xcd\x82" }, - { 0x1ff2, "\xe1\xbd\xbc\xce\xb9" }, - { 0x1ff3, "\xcf\x89\xce\xb9" }, - { 0x1ff4, "\xcf\x8e\xce\xb9" }, - { 0x1ff6, "\xcf\x89\xcd\x82" }, - { 0x1ff7, "\xcf\x89\xcd\x82\xce\xb9" }, - { 0x1ffc, "\xcf\x89\xce\xb9" }, - { 0x2160, "\xe2\x85\xb0" }, - { 0x2161, "\xe2\x85\xb1" }, - { 0x2162, "\xe2\x85\xb2" }, - { 0x2163, "\xe2\x85\xb3" }, - { 0x2164, "\xe2\x85\xb4" }, - { 0x2165, "\xe2\x85\xb5" }, - { 0x2166, "\xe2\x85\xb6" }, - { 0x2167, "\xe2\x85\xb7" }, - { 0x2168, "\xe2\x85\xb8" }, - { 0x2169, "\xe2\x85\xb9" }, - { 0x216a, "\xe2\x85\xba" }, - { 0x216b, "\xe2\x85\xbb" }, - { 0x216c, "\xe2\x85\xbc" }, - { 0x216d, "\xe2\x85\xbd" }, - { 0x216e, "\xe2\x85\xbe" }, - { 0x216f, "\xe2\x85\xbf" }, - { 0x24b6, "\xe2\x93\x90" }, - { 0x24b7, "\xe2\x93\x91" }, - { 0x24b8, "\xe2\x93\x92" }, - { 0x24b9, "\xe2\x93\x93" }, - { 0x24ba, "\xe2\x93\x94" }, - { 0x24bb, "\xe2\x93\x95" }, - { 0x24bc, "\xe2\x93\x96" }, - { 0x24bd, "\xe2\x93\x97" }, - { 0x24be, "\xe2\x93\x98" }, - { 0x24bf, "\xe2\x93\x99" }, - { 0x24c0, "\xe2\x93\x9a" }, - { 0x24c1, "\xe2\x93\x9b" }, - { 0x24c2, "\xe2\x93\x9c" }, - { 0x24c3, "\xe2\x93\x9d" }, - { 0x24c4, "\xe2\x93\x9e" }, - { 0x24c5, "\xe2\x93\x9f" }, - { 0x24c6, "\xe2\x93\xa0" }, - { 0x24c7, "\xe2\x93\xa1" }, - { 0x24c8, "\xe2\x93\xa2" }, - { 0x24c9, "\xe2\x93\xa3" }, - { 0x24ca, "\xe2\x93\xa4" }, - { 0x24cb, "\xe2\x93\xa5" }, - { 0x24cc, "\xe2\x93\xa6" }, - { 0x24cd, "\xe2\x93\xa7" }, - { 0x24ce, "\xe2\x93\xa8" }, - { 0x24cf, "\xe2\x93\xa9" }, - { 0xfb00, "\x66\x66" }, - { 0xfb01, "\x66\x69" }, - { 0xfb02, "\x66\x6c" }, - { 0xfb03, "\x66\x66\x69" }, - { 0xfb04, "\x66\x66\x6c" }, - { 0xfb05, "\x73\x74" }, - { 0xfb06, "\x73\x74" }, - { 0xfb13, "\xd5\xb4\xd5\xb6" }, - { 0xfb14, "\xd5\xb4\xd5\xa5" }, - { 0xfb15, "\xd5\xb4\xd5\xab" }, - { 0xfb16, "\xd5\xbe\xd5\xb6" }, - { 0xfb17, "\xd5\xb4\xd5\xad" }, + { 0x00b5, "\xce\xbc" }, + { 0x00df, "\x73\x73" }, + { 0x0130, "\x69\xcc\x87" }, + { 0x0149, "\xca\xbc\x6e" }, + { 0x017f, "\x73" }, + { 0x01f0, "\x6a\xcc\x8c" }, + { 0x0345, "\xce\xb9" }, + { 0x0390, "\xce\xb9\xcc\x88\xcc\x81" }, + { 0x03b0, "\xcf\x85\xcc\x88\xcc\x81" }, + { 0x03c2, "\xcf\x83" }, + { 0x03d0, "\xce\xb2" }, + { 0x03d1, "\xce\xb8" }, + { 0x03d5, "\xcf\x86" }, + { 0x03d6, "\xcf\x80" }, + { 0x03f0, "\xce\xba" }, + { 0x03f1, "\xcf\x81" }, + { 0x03f5, "\xce\xb5" }, + { 0x0587, "\xd5\xa5\xd6\x82" }, + { 0x1e96, "\x68\xcc\xb1" }, + { 0x1e97, "\x74\xcc\x88" }, + { 0x1e98, "\x77\xcc\x8a" }, + { 0x1e99, "\x79\xcc\x8a" }, + { 0x1e9a, "\x61\xca\xbe" }, + { 0x1e9b, "\xe1\xb9\xa1" }, + { 0x1e9e, "\x73\x73" }, + { 0x1f50, "\xcf\x85\xcc\x93" }, + { 0x1f52, "\xcf\x85\xcc\x93\xcc\x80" }, + { 0x1f54, "\xcf\x85\xcc\x93\xcc\x81" }, + { 0x1f56, "\xcf\x85\xcc\x93\xcd\x82" }, + { 0x1f80, "\xe1\xbc\x80\xce\xb9" }, + { 0x1f81, "\xe1\xbc\x81\xce\xb9" }, + { 0x1f82, "\xe1\xbc\x82\xce\xb9" }, + { 0x1f83, "\xe1\xbc\x83\xce\xb9" }, + { 0x1f84, "\xe1\xbc\x84\xce\xb9" }, + { 0x1f85, "\xe1\xbc\x85\xce\xb9" }, + { 0x1f86, "\xe1\xbc\x86\xce\xb9" }, + { 0x1f87, "\xe1\xbc\x87\xce\xb9" }, + { 0x1f88, "\xe1\xbc\x80\xce\xb9" }, + { 0x1f89, "\xe1\xbc\x81\xce\xb9" }, + { 0x1f8a, "\xe1\xbc\x82\xce\xb9" }, + { 0x1f8b, "\xe1\xbc\x83\xce\xb9" }, + { 0x1f8c, "\xe1\xbc\x84\xce\xb9" }, + { 0x1f8d, "\xe1\xbc\x85\xce\xb9" }, + { 0x1f8e, "\xe1\xbc\x86\xce\xb9" }, + { 0x1f8f, "\xe1\xbc\x87\xce\xb9" }, + { 0x1f90, "\xe1\xbc\xa0\xce\xb9" }, + { 0x1f91, "\xe1\xbc\xa1\xce\xb9" }, + { 0x1f92, "\xe1\xbc\xa2\xce\xb9" }, + { 0x1f93, "\xe1\xbc\xa3\xce\xb9" }, + { 0x1f94, "\xe1\xbc\xa4\xce\xb9" }, + { 0x1f95, "\xe1\xbc\xa5\xce\xb9" }, + { 0x1f96, "\xe1\xbc\xa6\xce\xb9" }, + { 0x1f97, "\xe1\xbc\xa7\xce\xb9" }, + { 0x1f98, "\xe1\xbc\xa0\xce\xb9" }, + { 0x1f99, "\xe1\xbc\xa1\xce\xb9" }, + { 0x1f9a, "\xe1\xbc\xa2\xce\xb9" }, + { 0x1f9b, "\xe1\xbc\xa3\xce\xb9" }, + { 0x1f9c, "\xe1\xbc\xa4\xce\xb9" }, + { 0x1f9d, "\xe1\xbc\xa5\xce\xb9" }, + { 0x1f9e, "\xe1\xbc\xa6\xce\xb9" }, + { 0x1f9f, "\xe1\xbc\xa7\xce\xb9" }, + { 0x1fa0, "\xe1\xbd\xa0\xce\xb9" }, + { 0x1fa1, "\xe1\xbd\xa1\xce\xb9" }, + { 0x1fa2, "\xe1\xbd\xa2\xce\xb9" }, + { 0x1fa3, "\xe1\xbd\xa3\xce\xb9" }, + { 0x1fa4, "\xe1\xbd\xa4\xce\xb9" }, + { 0x1fa5, "\xe1\xbd\xa5\xce\xb9" }, + { 0x1fa6, "\xe1\xbd\xa6\xce\xb9" }, + { 0x1fa7, "\xe1\xbd\xa7\xce\xb9" }, + { 0x1fa8, "\xe1\xbd\xa0\xce\xb9" }, + { 0x1fa9, "\xe1\xbd\xa1\xce\xb9" }, + { 0x1faa, "\xe1\xbd\xa2\xce\xb9" }, + { 0x1fab, "\xe1\xbd\xa3\xce\xb9" }, + { 0x1fac, "\xe1\xbd\xa4\xce\xb9" }, + { 0x1fad, "\xe1\xbd\xa5\xce\xb9" }, + { 0x1fae, "\xe1\xbd\xa6\xce\xb9" }, + { 0x1faf, "\xe1\xbd\xa7\xce\xb9" }, + { 0x1fb2, "\xe1\xbd\xb0\xce\xb9" }, + { 0x1fb3, "\xce\xb1\xce\xb9" }, + { 0x1fb4, "\xce\xac\xce\xb9" }, + { 0x1fb6, "\xce\xb1\xcd\x82" }, + { 0x1fb7, "\xce\xb1\xcd\x82\xce\xb9" }, + { 0x1fbc, "\xce\xb1\xce\xb9" }, + { 0x1fbe, "\xce\xb9" }, + { 0x1fc2, "\xe1\xbd\xb4\xce\xb9" }, + { 0x1fc3, "\xce\xb7\xce\xb9" }, + { 0x1fc4, "\xce\xae\xce\xb9" }, + { 0x1fc6, "\xce\xb7\xcd\x82" }, + { 0x1fc7, "\xce\xb7\xcd\x82\xce\xb9" }, + { 0x1fcc, "\xce\xb7\xce\xb9" }, + { 0x1fd2, "\xce\xb9\xcc\x88\xcc\x80" }, + { 0x1fd3, "\xce\xb9\xcc\x88\xcc\x81" }, + { 0x1fd6, "\xce\xb9\xcd\x82" }, + { 0x1fd7, "\xce\xb9\xcc\x88\xcd\x82" }, + { 0x1fe2, "\xcf\x85\xcc\x88\xcc\x80" }, + { 0x1fe3, "\xcf\x85\xcc\x88\xcc\x81" }, + { 0x1fe4, "\xcf\x81\xcc\x93" }, + { 0x1fe6, "\xcf\x85\xcd\x82" }, + { 0x1fe7, "\xcf\x85\xcc\x88\xcd\x82" }, + { 0x1ff2, "\xe1\xbd\xbc\xce\xb9" }, + { 0x1ff3, "\xcf\x89\xce\xb9" }, + { 0x1ff4, "\xcf\x8e\xce\xb9" }, + { 0x1ff6, "\xcf\x89\xcd\x82" }, + { 0x1ff7, "\xcf\x89\xcd\x82\xce\xb9" }, + { 0x1ffc, "\xcf\x89\xce\xb9" }, + { 0x2160, "\xe2\x85\xb0" }, + { 0x2161, "\xe2\x85\xb1" }, + { 0x2162, "\xe2\x85\xb2" }, + { 0x2163, "\xe2\x85\xb3" }, + { 0x2164, "\xe2\x85\xb4" }, + { 0x2165, "\xe2\x85\xb5" }, + { 0x2166, "\xe2\x85\xb6" }, + { 0x2167, "\xe2\x85\xb7" }, + { 0x2168, "\xe2\x85\xb8" }, + { 0x2169, "\xe2\x85\xb9" }, + { 0x216a, "\xe2\x85\xba" }, + { 0x216b, "\xe2\x85\xbb" }, + { 0x216c, "\xe2\x85\xbc" }, + { 0x216d, "\xe2\x85\xbd" }, + { 0x216e, "\xe2\x85\xbe" }, + { 0x216f, "\xe2\x85\xbf" }, + { 0x24b6, "\xe2\x93\x90" }, + { 0x24b7, "\xe2\x93\x91" }, + { 0x24b8, "\xe2\x93\x92" }, + { 0x24b9, "\xe2\x93\x93" }, + { 0x24ba, "\xe2\x93\x94" }, + { 0x24bb, "\xe2\x93\x95" }, + { 0x24bc, "\xe2\x93\x96" }, + { 0x24bd, "\xe2\x93\x97" }, + { 0x24be, "\xe2\x93\x98" }, + { 0x24bf, "\xe2\x93\x99" }, + { 0x24c0, "\xe2\x93\x9a" }, + { 0x24c1, "\xe2\x93\x9b" }, + { 0x24c2, "\xe2\x93\x9c" }, + { 0x24c3, "\xe2\x93\x9d" }, + { 0x24c4, "\xe2\x93\x9e" }, + { 0x24c5, "\xe2\x93\x9f" }, + { 0x24c6, "\xe2\x93\xa0" }, + { 0x24c7, "\xe2\x93\xa1" }, + { 0x24c8, "\xe2\x93\xa2" }, + { 0x24c9, "\xe2\x93\xa3" }, + { 0x24ca, "\xe2\x93\xa4" }, + { 0x24cb, "\xe2\x93\xa5" }, + { 0x24cc, "\xe2\x93\xa6" }, + { 0x24cd, "\xe2\x93\xa7" }, + { 0x24ce, "\xe2\x93\xa8" }, + { 0x24cf, "\xe2\x93\xa9" }, + { 0xfb00, "\x66\x66" }, + { 0xfb01, "\x66\x69" }, + { 0xfb02, "\x66\x6c" }, + { 0xfb03, "\x66\x66\x69" }, + { 0xfb04, "\x66\x66\x6c" }, + { 0xfb05, "\x73\x74" }, + { 0xfb06, "\x73\x74" }, + { 0xfb13, "\xd5\xb4\xd5\xb6" }, + { 0xfb14, "\xd5\xb4\xd5\xa5" }, + { 0xfb15, "\xd5\xb4\xd5\xab" }, + { 0xfb16, "\xd5\xbe\xd5\xb6" }, + { 0xfb17, "\xd5\xb4\xd5\xad" }, }; #endif /* CHARTABLES_H */ diff --git a/src/core/util/unicode/guniprop.cpp b/src/core/util/unicode/guniprop.cpp index bde7ea4f..97c05520 100644 --- a/src/core/util/unicode/guniprop.cpp +++ b/src/core/util/unicode/guniprop.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,7 +16,7 @@ * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public @@ -24,7 +24,9 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ - + +// See COPYING file for licensing information. + #include "LuceneInc.h" #include "guniprop.h" @@ -53,8 +55,8 @@ : G_UNICODE_UNASSIGNED)) -#define IS(Type, Class) (((guint)1 << (Type)) & (Class)) -#define OR(Type, Rest) (((guint)1 << (Type)) | (Rest)) +#define IS(Type, Class) (((guint)1 << (Type)) & (Class)) +#define OR(Type, Rest) (((guint)1 << (Type)) | (Rest)) /* Count the number of elements in an array. The array must be defined * as such; using this with a dynamically allocated array will give @@ -62,268 +64,260 @@ */ #define G_N_ELEMENTS(arr) (sizeof (arr) / sizeof ((arr)[0])) -#define ISALPHA(Type) IS ((Type), \ - OR (G_UNICODE_LOWERCASE_LETTER, \ - OR (G_UNICODE_UPPERCASE_LETTER, \ - OR (G_UNICODE_TITLECASE_LETTER, \ - OR (G_UNICODE_MODIFIER_LETTER, \ - OR (G_UNICODE_OTHER_LETTER, 0)))))) - -#define ISALDIGIT(Type) IS ((Type), \ - OR (G_UNICODE_DECIMAL_NUMBER, \ - OR (G_UNICODE_LETTER_NUMBER, \ - OR (G_UNICODE_OTHER_NUMBER, \ - OR (G_UNICODE_LOWERCASE_LETTER, \ - OR (G_UNICODE_UPPERCASE_LETTER, \ - OR (G_UNICODE_TITLECASE_LETTER, \ - OR (G_UNICODE_MODIFIER_LETTER, \ - OR (G_UNICODE_OTHER_LETTER, 0))))))))) - -#define ISMARK(Type) IS ((Type), \ - OR (G_UNICODE_NON_SPACING_MARK, \ - OR (G_UNICODE_COMBINING_MARK, \ - OR (G_UNICODE_ENCLOSING_MARK, 0)))) - -#define ISZEROWIDTHTYPE(Type) IS ((Type), \ - OR (G_UNICODE_NON_SPACING_MARK, \ - OR (G_UNICODE_ENCLOSING_MARK, \ - OR (G_UNICODE_FORMAT, 0)))) - -#define UTF8_COMPUTE(Char, Mask, Len) \ - if (Char < 128) \ - { \ - Len = 1; \ - Mask = 0x7f; \ - } \ - else if ((Char & 0xe0) == 0xc0) \ - { \ - Len = 2; \ - Mask = 0x1f; \ - } \ - else if ((Char & 0xf0) == 0xe0) \ - { \ - Len = 3; \ - Mask = 0x0f; \ - } \ - else if ((Char & 0xf8) == 0xf0) \ - { \ - Len = 4; \ - Mask = 0x07; \ - } \ - else if ((Char & 0xfc) == 0xf8) \ - { \ - Len = 5; \ - Mask = 0x03; \ - } \ - else if ((Char & 0xfe) == 0xfc) \ - { \ - Len = 6; \ - Mask = 0x01; \ - } \ - else \ +#define ISALPHA(Type) IS ((Type), \ + OR (G_UNICODE_LOWERCASE_LETTER, \ + OR (G_UNICODE_UPPERCASE_LETTER, \ + OR (G_UNICODE_TITLECASE_LETTER, \ + OR (G_UNICODE_MODIFIER_LETTER, \ + OR (G_UNICODE_OTHER_LETTER, 0)))))) + +#define ISALDIGIT(Type) IS ((Type), \ + OR (G_UNICODE_DECIMAL_NUMBER, \ + OR (G_UNICODE_LETTER_NUMBER, \ + OR (G_UNICODE_OTHER_NUMBER, \ + OR (G_UNICODE_LOWERCASE_LETTER, \ + OR (G_UNICODE_UPPERCASE_LETTER, \ + OR (G_UNICODE_TITLECASE_LETTER, \ + OR (G_UNICODE_MODIFIER_LETTER, \ + OR (G_UNICODE_OTHER_LETTER, 0))))))))) + +#define ISMARK(Type) IS ((Type), \ + OR (G_UNICODE_NON_SPACING_MARK, \ + OR (G_UNICODE_COMBINING_MARK, \ + OR (G_UNICODE_ENCLOSING_MARK, 0)))) + +#define ISZEROWIDTHTYPE(Type) IS ((Type), \ + OR (G_UNICODE_NON_SPACING_MARK, \ + OR (G_UNICODE_ENCLOSING_MARK, \ + OR (G_UNICODE_FORMAT, 0)))) + +#define UTF8_COMPUTE(Char, Mask, Len) \ + if (Char < 128) \ + { \ + Len = 1; \ + Mask = 0x7f; \ + } \ + else if ((Char & 0xe0) == 0xc0) \ + { \ + Len = 2; \ + Mask = 0x1f; \ + } \ + else if ((Char & 0xf0) == 0xe0) \ + { \ + Len = 3; \ + Mask = 0x0f; \ + } \ + else if ((Char & 0xf8) == 0xf0) \ + { \ + Len = 4; \ + Mask = 0x07; \ + } \ + else if ((Char & 0xfc) == 0xf8) \ + { \ + Len = 5; \ + Mask = 0x03; \ + } \ + else if ((Char & 0xfe) == 0xfc) \ + { \ + Len = 6; \ + Mask = 0x01; \ + } \ + else \ Len = -1; #define UTF8_GET(Result, Chars, Count, Mask, Len) \ - (Result) = (Chars)[0] & (Mask); \ - for ((Count) = 1; (Count) < (Len); ++(Count)) \ - { \ - if (((Chars)[(Count)] & 0xc0) != 0x80) \ - { \ - (Result) = -1; \ - break; \ - } \ - (Result) <<= 6; \ - (Result) |= ((Chars)[(Count)] & 0x3f); \ + (Result) = (Chars)[0] & (Mask); \ + for ((Count) = 1; (Count) < (Len); ++(Count)) \ + { \ + if (((Chars)[(Count)] & 0xc0) != 0x80) \ + { \ + (Result) = -1; \ + break; \ + } \ + (Result) <<= 6; \ + (Result) |= ((Chars)[(Count)] & 0x3f); \ } - + /** * g_utf8_get_char: * @p: a pointer to Unicode character encoded as UTF-8 - * + * * Converts a sequence of bytes encoded as UTF-8 to a Unicode character. * If @p does not point to a valid UTF-8 encoded character, results are * undefined. If you are not sure that the bytes are complete * valid Unicode characters, you should use g_utf8_get_char_validated() * instead. - * + * * Return value: the resulting character **/ gunichar -g_utf8_get_char (const gchar *p) -{ - int i, mask = 0, len; - gunichar result; - unsigned char c = (unsigned char) *p; - - UTF8_COMPUTE (c, mask, len); - if (len == -1) - return (gunichar)-1; - UTF8_GET (result, p, i, mask, len); - - return result; +g_utf8_get_char (const gchar* p) { + int i, mask = 0, len; + gunichar result; + unsigned char c = (unsigned char) *p; + + UTF8_COMPUTE (c, mask, len); + if (len == -1) { + return (gunichar)-1; + } + UTF8_GET (result, p, i, mask, len); + + return result; } /** * g_unichar_isalnum: * @c: a Unicode character - * + * * Determines whether a character is alphanumeric. * Given some UTF-8 text, obtain a character value * with g_utf8_get_char(). - * + * * Return value: %TRUE if @c is an alphanumeric character **/ gboolean -g_unichar_isalnum (gunichar c) -{ - return ISALDIGIT (TYPE (c)) ? true : false; +g_unichar_isalnum (gunichar c) { + return ISALDIGIT (TYPE (c)) ? true : false; } /** * g_unichar_isalpha: * @c: a Unicode character - * + * * Determines whether a character is alphabetic (i.e. a letter). * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is an alphabetic character **/ gboolean -g_unichar_isalpha (gunichar c) -{ - return ISALPHA (TYPE (c)) ? true : false; +g_unichar_isalpha (gunichar c) { + return ISALPHA (TYPE (c)) ? true : false; } /** * g_unichar_iscntrl: * @c: a Unicode character - * + * * Determines whether a character is a control character. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is a control character **/ gboolean -g_unichar_iscntrl (gunichar c) -{ - return TYPE (c) == G_UNICODE_CONTROL; +g_unichar_iscntrl (gunichar c) { + return TYPE (c) == G_UNICODE_CONTROL; } /** * g_unichar_isdigit: * @c: a Unicode character - * + * * Determines whether a character is numeric (i.e. a digit). This * covers ASCII 0-9 and also digits in other languages/scripts. Given * some UTF-8 text, obtain a character value with g_utf8_get_char(). - * + * * Return value: %TRUE if @c is a digit **/ gboolean -g_unichar_isdigit (gunichar c) -{ - return TYPE (c) == G_UNICODE_DECIMAL_NUMBER; +g_unichar_isdigit (gunichar c) { + return TYPE (c) == G_UNICODE_DECIMAL_NUMBER; } /** * g_unichar_isgraph: * @c: a Unicode character - * + * * Determines whether a character is printable and not a space * (returns %FALSE for control characters, format characters, and * spaces). g_unichar_isprint() is similar, but returns %TRUE for * spaces. Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is printable unless it's a space **/ gboolean -g_unichar_isgraph (gunichar c) -{ - return !IS (TYPE(c), - OR (G_UNICODE_CONTROL, - OR (G_UNICODE_FORMAT, - OR (G_UNICODE_UNASSIGNED, - OR (G_UNICODE_SURROGATE, - OR (G_UNICODE_SPACE_SEPARATOR, - 0)))))); +g_unichar_isgraph (gunichar c) { + return !IS (TYPE(c), + OR (G_UNICODE_CONTROL, + OR (G_UNICODE_FORMAT, + OR (G_UNICODE_UNASSIGNED, + OR (G_UNICODE_SURROGATE, + OR (G_UNICODE_SPACE_SEPARATOR, + 0)))))); } /** * g_unichar_islower: * @c: a Unicode character - * + * * Determines whether a character is a lowercase letter. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is a lowercase letter **/ gboolean -g_unichar_islower (gunichar c) -{ - return TYPE (c) == G_UNICODE_LOWERCASE_LETTER; +g_unichar_islower (gunichar c) { + return TYPE (c) == G_UNICODE_LOWERCASE_LETTER; } /** * g_unichar_isprint: * @c: a Unicode character - * + * * Determines whether a character is printable. * Unlike g_unichar_isgraph(), returns %TRUE for spaces. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is printable **/ gboolean -g_unichar_isprint (gunichar c) -{ - return !IS (TYPE(c), - OR (G_UNICODE_CONTROL, - OR (G_UNICODE_FORMAT, - OR (G_UNICODE_UNASSIGNED, - OR (G_UNICODE_SURROGATE, - 0))))); +g_unichar_isprint (gunichar c) { + return !IS (TYPE(c), + OR (G_UNICODE_CONTROL, + OR (G_UNICODE_FORMAT, + OR (G_UNICODE_UNASSIGNED, + OR (G_UNICODE_SURROGATE, + 0))))); } /** * g_unichar_ispunct: * @c: a Unicode character - * + * * Determines whether a character is punctuation or a symbol. * Given some UTF-8 text, obtain a character value with * g_utf8_get_char(). - * + * * Return value: %TRUE if @c is a punctuation or symbol character **/ gboolean -g_unichar_ispunct (gunichar c) -{ - return IS (TYPE(c), - OR (G_UNICODE_CONNECT_PUNCTUATION, - OR (G_UNICODE_DASH_PUNCTUATION, - OR (G_UNICODE_CLOSE_PUNCTUATION, - OR (G_UNICODE_FINAL_PUNCTUATION, - OR (G_UNICODE_INITIAL_PUNCTUATION, - OR (G_UNICODE_OTHER_PUNCTUATION, - OR (G_UNICODE_OPEN_PUNCTUATION, - OR (G_UNICODE_CURRENCY_SYMBOL, - OR (G_UNICODE_MODIFIER_SYMBOL, - OR (G_UNICODE_MATH_SYMBOL, - OR (G_UNICODE_OTHER_SYMBOL, - 0)))))))))))) ? true : false; +g_unichar_ispunct (gunichar c) { + return IS (TYPE(c), + OR (G_UNICODE_CONNECT_PUNCTUATION, + OR (G_UNICODE_DASH_PUNCTUATION, + OR (G_UNICODE_CLOSE_PUNCTUATION, + OR (G_UNICODE_FINAL_PUNCTUATION, + OR (G_UNICODE_INITIAL_PUNCTUATION, + OR (G_UNICODE_OTHER_PUNCTUATION, + OR (G_UNICODE_OPEN_PUNCTUATION, + OR (G_UNICODE_CURRENCY_SYMBOL, + OR (G_UNICODE_MODIFIER_SYMBOL, + OR (G_UNICODE_MATH_SYMBOL, + OR (G_UNICODE_OTHER_SYMBOL, + 0)))))))))))) ? true : false; } /** * g_unichar_isspace: * @c: a Unicode character - * + * * Determines whether a character is a space, tab, or line separator * (newline, carriage return, etc.). Given some UTF-8 text, obtain a * character value with g_utf8_get_char(). @@ -331,31 +325,28 @@ g_unichar_ispunct (gunichar c) * (Note: don't use this to do word breaking; you have to use * Pango or equivalent to get word breaking right, the algorithm * is fairly complex.) - * + * * Return value: %TRUE if @c is a space character **/ gboolean -g_unichar_isspace (gunichar c) -{ - switch (c) - { - /* special-case these since Unicode thinks they are not spaces */ +g_unichar_isspace (gunichar c) { + switch (c) { + /* special-case these since Unicode thinks they are not spaces */ case '\t': case '\n': case '\r': case '\f': - return true; - break; - - default: - { - return IS (TYPE(c), - OR (G_UNICODE_SPACE_SEPARATOR, - OR (G_UNICODE_LINE_SEPARATOR, - OR (G_UNICODE_PARAGRAPH_SEPARATOR, - 0)))) ? true : false; - } - break; + return true; + break; + + default: { + return IS (TYPE(c), + OR (G_UNICODE_SPACE_SEPARATOR, + OR (G_UNICODE_LINE_SEPARATOR, + OR (G_UNICODE_PARAGRAPH_SEPARATOR, + 0)))) ? true : false; + } + break; } } @@ -378,188 +369,172 @@ g_unichar_isspace (gunichar c) * Since: 2.14 **/ gboolean -g_unichar_ismark (gunichar c) -{ - return ISMARK (TYPE (c)) ? true : false; +g_unichar_ismark (gunichar c) { + return ISMARK (TYPE (c)) ? true : false; } /** * g_unichar_isupper: * @c: a Unicode character - * + * * Determines if a character is uppercase. - * + * * Return value: %TRUE if @c is an uppercase character **/ gboolean -g_unichar_isupper (gunichar c) -{ - return TYPE (c) == G_UNICODE_UPPERCASE_LETTER; +g_unichar_isupper (gunichar c) { + return TYPE (c) == G_UNICODE_UPPERCASE_LETTER; } /** * g_unichar_istitle: * @c: a Unicode character - * + * * Determines if a character is titlecase. Some characters in * Unicode which are composites, such as the DZ digraph * have three case variants instead of just two. The titlecase * form is used at the beginning of a word where only the * first letter is capitalized. The titlecase form of the DZ * digraph is U+01F2 LATIN CAPITAL LETTTER D WITH SMALL LETTER Z. - * + * * Return value: %TRUE if the character is titlecase **/ gboolean -g_unichar_istitle (gunichar c) -{ - unsigned int i; - for (i = 0; i < G_N_ELEMENTS (title_table); ++i) - if (title_table[i][0] == c) - return true; - return false; +g_unichar_istitle (gunichar c) { + unsigned int i; + for (i = 0; i < G_N_ELEMENTS (title_table); ++i) + if (title_table[i][0] == c) { + return true; + } + return false; } /** * g_unichar_isxdigit: * @c: a Unicode character. - * + * * Determines if a character is a hexidecimal digit. - * + * * Return value: %TRUE if the character is a hexadecimal digit **/ gboolean -g_unichar_isxdigit (gunichar c) -{ - return ((c >= 'a' && c <= 'f') - || (c >= 'A' && c <= 'F') - || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER)); +g_unichar_isxdigit (gunichar c) { + return ((c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F') + || (TYPE (c) == G_UNICODE_DECIMAL_NUMBER)); } /** * g_unichar_isdefined: * @c: a Unicode character - * + * * Determines if a given character is assigned in the Unicode * standard. * * Return value: %TRUE if the character has an assigned value **/ gboolean -g_unichar_isdefined (gunichar c) -{ - return !IS (TYPE(c), - OR (G_UNICODE_UNASSIGNED, - OR (G_UNICODE_SURROGATE, - 0))); +g_unichar_isdefined (gunichar c) { + return !IS (TYPE(c), + OR (G_UNICODE_UNASSIGNED, + OR (G_UNICODE_SURROGATE, + 0))); } /** * g_unichar_toupper: * @c: a Unicode character - * + * * Converts a character to uppercase. - * + * * Return value: the result of converting @c to uppercase. * If @c is not an lowercase or titlecase character, * or has no upper case equivalent @c is returned unchanged. **/ gunichar -g_unichar_toupper (gunichar c) -{ - int t = TYPE (c); - if (t == G_UNICODE_LOWERCASE_LETTER) - { - gunichar val = ATTTABLE (c >> 8, c & 0xff); - if (val >= 0x1000000) - { - const gchar *p = special_case_table + val - 0x1000000; - val = g_utf8_get_char (p); - } - /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR, - * do not have an uppercase equivalent, in which case val will be - * zero. - */ - return val ? val : c; - } - else if (t == G_UNICODE_TITLECASE_LETTER) - { - unsigned int i; - for (i = 0; i < G_N_ELEMENTS (title_table); ++i) - { - if (title_table[i][0] == c) - return title_table[i][1]; - } +g_unichar_toupper (gunichar c) { + int t = TYPE (c); + if (t == G_UNICODE_LOWERCASE_LETTER) { + gunichar val = ATTTABLE (c >> 8, c & 0xff); + if (val >= 0x1000000) { + const gchar* p = special_case_table + val - 0x1000000; + val = g_utf8_get_char (p); + } + /* Some lowercase letters, e.g., U+000AA, FEMININE ORDINAL INDICATOR, + * do not have an uppercase equivalent, in which case val will be + * zero. + */ + return val ? val : c; + } else if (t == G_UNICODE_TITLECASE_LETTER) { + unsigned int i; + for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { + if (title_table[i][0] == c) { + return title_table[i][1]; + } + } } - return c; + return c; } /** * g_unichar_tolower: * @c: a Unicode character. - * + * * Converts a character to lower case. - * + * * Return value: the result of converting @c to lower case. * If @c is not an upperlower or titlecase character, * or has no lowercase equivalent @c is returned unchanged. **/ gunichar -g_unichar_tolower (gunichar c) -{ - int t = TYPE (c); - if (t == G_UNICODE_UPPERCASE_LETTER) - { - gunichar val = ATTTABLE (c >> 8, c & 0xff); - if (val >= 0x1000000) - { - const gchar *p = special_case_table + val - 0x1000000; - return g_utf8_get_char (p); - } - else - { - /* Not all uppercase letters are guaranteed to have a lowercase - * equivalent. If this is the case, val will be zero. */ - return val ? val : c; - } - } - else if (t == G_UNICODE_TITLECASE_LETTER) - { - unsigned int i; - for (i = 0; i < G_N_ELEMENTS (title_table); ++i) - { - if (title_table[i][0] == c) - return title_table[i][2]; - } +g_unichar_tolower (gunichar c) { + int t = TYPE (c); + if (t == G_UNICODE_UPPERCASE_LETTER) { + gunichar val = ATTTABLE (c >> 8, c & 0xff); + if (val >= 0x1000000) { + const gchar* p = special_case_table + val - 0x1000000; + return g_utf8_get_char (p); + } else { + /* Not all uppercase letters are guaranteed to have a lowercase + * equivalent. If this is the case, val will be zero. */ + return val ? val : c; + } + } else if (t == G_UNICODE_TITLECASE_LETTER) { + unsigned int i; + for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { + if (title_table[i][0] == c) { + return title_table[i][2]; + } + } } - return c; + return c; } /** * g_unichar_totitle: * @c: a Unicode character - * + * * Converts a character to the titlecase. - * + * * Return value: the result of converting @c to titlecase. * If @c is not an uppercase or lowercase character, * @c is returned unchanged. **/ gunichar -g_unichar_totitle (gunichar c) -{ - unsigned int i; - for (i = 0; i < G_N_ELEMENTS (title_table); ++i) - { - if (title_table[i][0] == c || title_table[i][1] == c - || title_table[i][2] == c) - return title_table[i][0]; +g_unichar_totitle (gunichar c) { + unsigned int i; + for (i = 0; i < G_N_ELEMENTS (title_table); ++i) { + if (title_table[i][0] == c || title_table[i][1] == c + || title_table[i][2] == c) { + return title_table[i][0]; + } + } + + if (TYPE (c) == G_UNICODE_LOWERCASE_LETTER) { + return g_unichar_toupper (c); } - - if (TYPE (c) == G_UNICODE_LOWERCASE_LETTER) - return g_unichar_toupper (c); - return c; + return c; } /** @@ -573,11 +548,11 @@ g_unichar_totitle (gunichar c) * g_unichar_isdigit()), its numeric value. Otherwise, -1. **/ int -g_unichar_digit_value (gunichar c) -{ - if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) - return ATTTABLE (c >> 8, c & 0xff); - return -1; +g_unichar_digit_value (gunichar c) { + if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) { + return ATTTABLE (c >> 8, c & 0xff); + } + return -1; } /** @@ -591,27 +566,28 @@ g_unichar_digit_value (gunichar c) * g_unichar_isxdigit()), its numeric value. Otherwise, -1. **/ int -g_unichar_xdigit_value (gunichar c) -{ - if (c >= 'A' && c <= 'F') - return c - 'A' + 10; - if (c >= 'a' && c <= 'f') - return c - 'a' + 10; - if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) - return ATTTABLE (c >> 8, c & 0xff); - return -1; +g_unichar_xdigit_value (gunichar c) { + if (c >= 'A' && c <= 'F') { + return c - 'A' + 10; + } + if (c >= 'a' && c <= 'f') { + return c - 'a' + 10; + } + if (TYPE (c) == G_UNICODE_DECIMAL_NUMBER) { + return ATTTABLE (c >> 8, c & 0xff); + } + return -1; } /** * g_unichar_type: * @c: a Unicode character - * + * * Classifies a Unicode character by type. - * + * * Return value: the type of the character. **/ GUnicodeType -g_unichar_type (gunichar c) -{ - return (GUnicodeType)TYPE (c); +g_unichar_type (gunichar c) { + return (GUnicodeType)TYPE (c); } diff --git a/src/core/util/unicode/guniprop.h b/src/core/util/unicode/guniprop.h index a68d8863..74a00bfd 100644 --- a/src/core/util/unicode/guniprop.h +++ b/src/core/util/unicode/guniprop.h @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,7 +16,7 @@ * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public @@ -24,7 +24,9 @@ * Free Software Foundation, Inc., 59 Temple Place - Suite 330, * Boston, MA 02111-1307, USA. */ - + +// See COPYING file for licensing information. + #include "Lucene.h" typedef uint32_t gunichar; @@ -40,38 +42,37 @@ typedef size_t gssize; /* These are the possible character classifications. * See http://www.unicode.org/Public/UNIDATA/UCD.html#General_Category_Values */ -typedef enum -{ - G_UNICODE_CONTROL, - G_UNICODE_FORMAT, - G_UNICODE_UNASSIGNED, - G_UNICODE_PRIVATE_USE, - G_UNICODE_SURROGATE, - G_UNICODE_LOWERCASE_LETTER, - G_UNICODE_MODIFIER_LETTER, - G_UNICODE_OTHER_LETTER, - G_UNICODE_TITLECASE_LETTER, - G_UNICODE_UPPERCASE_LETTER, - G_UNICODE_COMBINING_MARK, - G_UNICODE_ENCLOSING_MARK, - G_UNICODE_NON_SPACING_MARK, - G_UNICODE_DECIMAL_NUMBER, - G_UNICODE_LETTER_NUMBER, - G_UNICODE_OTHER_NUMBER, - G_UNICODE_CONNECT_PUNCTUATION, - G_UNICODE_DASH_PUNCTUATION, - G_UNICODE_CLOSE_PUNCTUATION, - G_UNICODE_FINAL_PUNCTUATION, - G_UNICODE_INITIAL_PUNCTUATION, - G_UNICODE_OTHER_PUNCTUATION, - G_UNICODE_OPEN_PUNCTUATION, - G_UNICODE_CURRENCY_SYMBOL, - G_UNICODE_MODIFIER_SYMBOL, - G_UNICODE_MATH_SYMBOL, - G_UNICODE_OTHER_SYMBOL, - G_UNICODE_LINE_SEPARATOR, - G_UNICODE_PARAGRAPH_SEPARATOR, - G_UNICODE_SPACE_SEPARATOR +typedef enum { + G_UNICODE_CONTROL, + G_UNICODE_FORMAT, + G_UNICODE_UNASSIGNED, + G_UNICODE_PRIVATE_USE, + G_UNICODE_SURROGATE, + G_UNICODE_LOWERCASE_LETTER, + G_UNICODE_MODIFIER_LETTER, + G_UNICODE_OTHER_LETTER, + G_UNICODE_TITLECASE_LETTER, + G_UNICODE_UPPERCASE_LETTER, + G_UNICODE_COMBINING_MARK, + G_UNICODE_ENCLOSING_MARK, + G_UNICODE_NON_SPACING_MARK, + G_UNICODE_DECIMAL_NUMBER, + G_UNICODE_LETTER_NUMBER, + G_UNICODE_OTHER_NUMBER, + G_UNICODE_CONNECT_PUNCTUATION, + G_UNICODE_DASH_PUNCTUATION, + G_UNICODE_CLOSE_PUNCTUATION, + G_UNICODE_FINAL_PUNCTUATION, + G_UNICODE_INITIAL_PUNCTUATION, + G_UNICODE_OTHER_PUNCTUATION, + G_UNICODE_OPEN_PUNCTUATION, + G_UNICODE_CURRENCY_SYMBOL, + G_UNICODE_MODIFIER_SYMBOL, + G_UNICODE_MATH_SYMBOL, + G_UNICODE_OTHER_SYMBOL, + G_UNICODE_LINE_SEPARATOR, + G_UNICODE_PARAGRAPH_SEPARATOR, + G_UNICODE_SPACE_SEPARATOR } GUnicodeType; /* These are the possible line break classifications. @@ -79,135 +80,134 @@ typedef enum * Implementations may regard unknown values like G_UNICODE_BREAK_UNKNOWN * See http://www.unicode.org/unicode/reports/tr14/ */ -typedef enum -{ - G_UNICODE_BREAK_MANDATORY, - G_UNICODE_BREAK_CARRIAGE_RETURN, - G_UNICODE_BREAK_LINE_FEED, - G_UNICODE_BREAK_COMBINING_MARK, - G_UNICODE_BREAK_SURROGATE, - G_UNICODE_BREAK_ZERO_WIDTH_SPACE, - G_UNICODE_BREAK_INSEPARABLE, - G_UNICODE_BREAK_NON_BREAKING_GLUE, - G_UNICODE_BREAK_CONTINGENT, - G_UNICODE_BREAK_SPACE, - G_UNICODE_BREAK_AFTER, - G_UNICODE_BREAK_BEFORE, - G_UNICODE_BREAK_BEFORE_AND_AFTER, - G_UNICODE_BREAK_HYPHEN, - G_UNICODE_BREAK_NON_STARTER, - G_UNICODE_BREAK_OPEN_PUNCTUATION, - G_UNICODE_BREAK_CLOSE_PUNCTUATION, - G_UNICODE_BREAK_QUOTATION, - G_UNICODE_BREAK_EXCLAMATION, - G_UNICODE_BREAK_IDEOGRAPHIC, - G_UNICODE_BREAK_NUMERIC, - G_UNICODE_BREAK_INFIX_SEPARATOR, - G_UNICODE_BREAK_SYMBOL, - G_UNICODE_BREAK_ALPHABETIC, - G_UNICODE_BREAK_PREFIX, - G_UNICODE_BREAK_POSTFIX, - G_UNICODE_BREAK_COMPLEX_CONTEXT, - G_UNICODE_BREAK_AMBIGUOUS, - G_UNICODE_BREAK_UNKNOWN, - G_UNICODE_BREAK_NEXT_LINE, - G_UNICODE_BREAK_WORD_JOINER, - G_UNICODE_BREAK_HANGUL_L_JAMO, - G_UNICODE_BREAK_HANGUL_V_JAMO, - G_UNICODE_BREAK_HANGUL_T_JAMO, - G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, - G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE +typedef enum { + G_UNICODE_BREAK_MANDATORY, + G_UNICODE_BREAK_CARRIAGE_RETURN, + G_UNICODE_BREAK_LINE_FEED, + G_UNICODE_BREAK_COMBINING_MARK, + G_UNICODE_BREAK_SURROGATE, + G_UNICODE_BREAK_ZERO_WIDTH_SPACE, + G_UNICODE_BREAK_INSEPARABLE, + G_UNICODE_BREAK_NON_BREAKING_GLUE, + G_UNICODE_BREAK_CONTINGENT, + G_UNICODE_BREAK_SPACE, + G_UNICODE_BREAK_AFTER, + G_UNICODE_BREAK_BEFORE, + G_UNICODE_BREAK_BEFORE_AND_AFTER, + G_UNICODE_BREAK_HYPHEN, + G_UNICODE_BREAK_NON_STARTER, + G_UNICODE_BREAK_OPEN_PUNCTUATION, + G_UNICODE_BREAK_CLOSE_PUNCTUATION, + G_UNICODE_BREAK_QUOTATION, + G_UNICODE_BREAK_EXCLAMATION, + G_UNICODE_BREAK_IDEOGRAPHIC, + G_UNICODE_BREAK_NUMERIC, + G_UNICODE_BREAK_INFIX_SEPARATOR, + G_UNICODE_BREAK_SYMBOL, + G_UNICODE_BREAK_ALPHABETIC, + G_UNICODE_BREAK_PREFIX, + G_UNICODE_BREAK_POSTFIX, + G_UNICODE_BREAK_COMPLEX_CONTEXT, + G_UNICODE_BREAK_AMBIGUOUS, + G_UNICODE_BREAK_UNKNOWN, + G_UNICODE_BREAK_NEXT_LINE, + G_UNICODE_BREAK_WORD_JOINER, + G_UNICODE_BREAK_HANGUL_L_JAMO, + G_UNICODE_BREAK_HANGUL_V_JAMO, + G_UNICODE_BREAK_HANGUL_T_JAMO, + G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, + G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE } GUnicodeBreakType; -typedef enum -{ /* ISO 15924 code */ - G_UNICODE_SCRIPT_INVALID_CODE = -1, - G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ - G_UNICODE_SCRIPT_INHERITED, /* Qaai */ - G_UNICODE_SCRIPT_ARABIC, /* Arab */ - G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ - G_UNICODE_SCRIPT_BENGALI, /* Beng */ - G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ - G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ - G_UNICODE_SCRIPT_COPTIC, /* Qaac */ - G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ - G_UNICODE_SCRIPT_DESERET, /* Dsrt */ - G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ - G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */ - G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ - G_UNICODE_SCRIPT_GOTHIC, /* Goth */ - G_UNICODE_SCRIPT_GREEK, /* Grek */ - G_UNICODE_SCRIPT_GUJARATI, /* Gujr */ - G_UNICODE_SCRIPT_GURMUKHI, /* Guru */ - G_UNICODE_SCRIPT_HAN, /* Hani */ - G_UNICODE_SCRIPT_HANGUL, /* Hang */ - G_UNICODE_SCRIPT_HEBREW, /* Hebr */ - G_UNICODE_SCRIPT_HIRAGANA, /* Hira */ - G_UNICODE_SCRIPT_KANNADA, /* Knda */ - G_UNICODE_SCRIPT_KATAKANA, /* Kana */ - G_UNICODE_SCRIPT_KHMER, /* Khmr */ - G_UNICODE_SCRIPT_LAO, /* Laoo */ - G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */ - G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */ - G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */ - G_UNICODE_SCRIPT_MYANMAR, /* Mymr */ - G_UNICODE_SCRIPT_OGHAM, /* Ogam */ - G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */ - G_UNICODE_SCRIPT_ORIYA, /* Orya */ - G_UNICODE_SCRIPT_RUNIC, /* Runr */ - G_UNICODE_SCRIPT_SINHALA, /* Sinh */ - G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ - G_UNICODE_SCRIPT_TAMIL, /* Taml */ - G_UNICODE_SCRIPT_TELUGU, /* Telu */ - G_UNICODE_SCRIPT_THAANA, /* Thaa */ - G_UNICODE_SCRIPT_THAI, /* Thai */ - G_UNICODE_SCRIPT_TIBETAN, /* Tibt */ - G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ - G_UNICODE_SCRIPT_YI, /* Yiii */ - G_UNICODE_SCRIPT_TAGALOG, /* Tglg */ - G_UNICODE_SCRIPT_HANUNOO, /* Hano */ - G_UNICODE_SCRIPT_BUHID, /* Buhd */ - G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */ +typedef enum { + /* ISO 15924 code */ + G_UNICODE_SCRIPT_INVALID_CODE = -1, + G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ + G_UNICODE_SCRIPT_INHERITED, /* Qaai */ + G_UNICODE_SCRIPT_ARABIC, /* Arab */ + G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ + G_UNICODE_SCRIPT_BENGALI, /* Beng */ + G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ + G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ + G_UNICODE_SCRIPT_COPTIC, /* Qaac */ + G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ + G_UNICODE_SCRIPT_DESERET, /* Dsrt */ + G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ + G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */ + G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ + G_UNICODE_SCRIPT_GOTHIC, /* Goth */ + G_UNICODE_SCRIPT_GREEK, /* Grek */ + G_UNICODE_SCRIPT_GUJARATI, /* Gujr */ + G_UNICODE_SCRIPT_GURMUKHI, /* Guru */ + G_UNICODE_SCRIPT_HAN, /* Hani */ + G_UNICODE_SCRIPT_HANGUL, /* Hang */ + G_UNICODE_SCRIPT_HEBREW, /* Hebr */ + G_UNICODE_SCRIPT_HIRAGANA, /* Hira */ + G_UNICODE_SCRIPT_KANNADA, /* Knda */ + G_UNICODE_SCRIPT_KATAKANA, /* Kana */ + G_UNICODE_SCRIPT_KHMER, /* Khmr */ + G_UNICODE_SCRIPT_LAO, /* Laoo */ + G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */ + G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */ + G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */ + G_UNICODE_SCRIPT_MYANMAR, /* Mymr */ + G_UNICODE_SCRIPT_OGHAM, /* Ogam */ + G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */ + G_UNICODE_SCRIPT_ORIYA, /* Orya */ + G_UNICODE_SCRIPT_RUNIC, /* Runr */ + G_UNICODE_SCRIPT_SINHALA, /* Sinh */ + G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ + G_UNICODE_SCRIPT_TAMIL, /* Taml */ + G_UNICODE_SCRIPT_TELUGU, /* Telu */ + G_UNICODE_SCRIPT_THAANA, /* Thaa */ + G_UNICODE_SCRIPT_THAI, /* Thai */ + G_UNICODE_SCRIPT_TIBETAN, /* Tibt */ + G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ + G_UNICODE_SCRIPT_YI, /* Yiii */ + G_UNICODE_SCRIPT_TAGALOG, /* Tglg */ + G_UNICODE_SCRIPT_HANUNOO, /* Hano */ + G_UNICODE_SCRIPT_BUHID, /* Buhd */ + G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */ + + /* Unicode-4.0 additions */ + G_UNICODE_SCRIPT_BRAILLE, /* Brai */ + G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */ + G_UNICODE_SCRIPT_LIMBU, /* Limb */ + G_UNICODE_SCRIPT_OSMANYA, /* Osma */ + G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */ + G_UNICODE_SCRIPT_LINEAR_B, /* Linb */ + G_UNICODE_SCRIPT_TAI_LE, /* Tale */ + G_UNICODE_SCRIPT_UGARITIC, /* Ugar */ - /* Unicode-4.0 additions */ - G_UNICODE_SCRIPT_BRAILLE, /* Brai */ - G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */ - G_UNICODE_SCRIPT_LIMBU, /* Limb */ - G_UNICODE_SCRIPT_OSMANYA, /* Osma */ - G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */ - G_UNICODE_SCRIPT_LINEAR_B, /* Linb */ - G_UNICODE_SCRIPT_TAI_LE, /* Tale */ - G_UNICODE_SCRIPT_UGARITIC, /* Ugar */ - - /* Unicode-4.1 additions */ - G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */ - G_UNICODE_SCRIPT_BUGINESE, /* Bugi */ - G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */ - G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */ - G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */ - G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */ - G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */ + /* Unicode-4.1 additions */ + G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */ + G_UNICODE_SCRIPT_BUGINESE, /* Bugi */ + G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */ + G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */ + G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */ + G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */ + G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */ - /* Unicode-5.0 additions */ - G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */ - G_UNICODE_SCRIPT_BALINESE, /* Bali */ - G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */ - G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */ - G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */ - G_UNICODE_SCRIPT_NKO, /* Nkoo */ + /* Unicode-5.0 additions */ + G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */ + G_UNICODE_SCRIPT_BALINESE, /* Bali */ + G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */ + G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */ + G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */ + G_UNICODE_SCRIPT_NKO, /* Nkoo */ - /* Unicode-5.1 additions */ - G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */ - G_UNICODE_SCRIPT_LEPCHA, /* Lepc */ - G_UNICODE_SCRIPT_REJANG, /* Rjng */ - G_UNICODE_SCRIPT_SUNDANESE, /* Sund */ - G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */ - G_UNICODE_SCRIPT_CHAM, /* Cham */ - G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */ - G_UNICODE_SCRIPT_VAI, /* Vaii */ - G_UNICODE_SCRIPT_CARIAN, /* Cari */ - G_UNICODE_SCRIPT_LYCIAN, /* Lyci */ - G_UNICODE_SCRIPT_LYDIAN /* Lydi */ + /* Unicode-5.1 additions */ + G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */ + G_UNICODE_SCRIPT_LEPCHA, /* Lepc */ + G_UNICODE_SCRIPT_REJANG, /* Rjng */ + G_UNICODE_SCRIPT_SUNDANESE, /* Sund */ + G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */ + G_UNICODE_SCRIPT_CHAM, /* Cham */ + G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */ + G_UNICODE_SCRIPT_VAI, /* Vaii */ + G_UNICODE_SCRIPT_CARIAN, /* Cari */ + G_UNICODE_SCRIPT_LYCIAN, /* Lyci */ + G_UNICODE_SCRIPT_LYDIAN /* Lydi */ } GUnicodeScript; #include "gunichartables.h" diff --git a/src/demo/CMakeLists.txt b/src/demo/CMakeLists.txt index 8c848e35..ad9a45f9 100644 --- a/src/demo/CMakeLists.txt +++ b/src/demo/CMakeLists.txt @@ -1,30 +1,3 @@ -project(lucene++-demo) - -#################################### -# THE lucene++demo applications -#################################### -file(GLOB_RECURSE HEADERS ${lucene++-demo_SOURCE_DIR}/../include/*.h) -ADD_DEFINITIONS(-DLPP_HAVE_DLL) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) - -ADD_EXECUTABLE(indexfiles EXCLUDE_FROM_ALL - ${lucene++-demo_SOURCE_DIR}/indexfiles/main.cpp ${HEADERS} -) -TARGET_LINK_LIBRARIES(indexfiles lucene++) - -ADD_EXECUTABLE(searchfiles EXCLUDE_FROM_ALL - ${lucene++-demo_SOURCE_DIR}/searchfiles/main.cpp ${HEADERS} -) -TARGET_LINK_LIBRARIES(searchfiles lucene++) - -ADD_EXECUTABLE(deletefiles EXCLUDE_FROM_ALL - ${lucene++-demo_SOURCE_DIR}/deletefiles/main.cpp ${HEADERS} -) -TARGET_LINK_LIBRARIES(deletefiles lucene++) - -ADD_CUSTOM_TARGET( - demo - DEPENDS indexfiles searchfiles deletefiles -) - +add_subdirectory(deletefiles) +add_subdirectory(indexfiles) +add_subdirectory(searchfiles) \ No newline at end of file diff --git a/src/demo/deletefiles/CMakeLists.txt b/src/demo/deletefiles/CMakeLists.txt new file mode 100644 index 00000000..b50d5bee --- /dev/null +++ b/src/demo/deletefiles/CMakeLists.txt @@ -0,0 +1,45 @@ +project(deletefiles) + + +#################################### +# src +#################################### +file(GLOB_RECURSE deletefiles_sources + "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") + +file(GLOB_RECURSE demo_headers + "${deletefiles_SOURCE_DIR}/../include/*.h") + + +#################################### +# create executable target +#################################### +add_executable(deletefiles + ${deletefiles_sources}) + + +#################################### +# include directories +#################################### +target_include_directories(deletefiles + PRIVATE + $ + $ + ${Boost_INCLUDE_DIRS}) + + +#################################### +# dependencies +#################################### +target_link_libraries(deletefiles + PRIVATE + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB + lucene++::lucene++ + lucene++::lucene++-contrib) diff --git a/src/demo/deletefiles/main.cpp b/src/demo/deletefiles/main.cpp index ddb8083a..dc546618 100644 --- a/src/demo/deletefiles/main.cpp +++ b/src/demo/deletefiles/main.cpp @@ -1,10 +1,16 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX #define NOMINMAX +#endif #include "targetver.h" #include @@ -13,16 +19,13 @@ using namespace Lucene; /// Deletes documents from an index that do not contain a term. -int main(int argc, char* argv[]) -{ - if (argc == 1) - { +int main(int argc, char* argv[]) { + if (argc == 1) { std::wcout << L"Usage: deletefiles.exe \n"; return 1; } - try - { + try { DirectoryPtr directory = FSDirectory::open(StringUtils::toUnicode(argv[1])); // we don't want read-only because we are about to delete @@ -35,9 +38,7 @@ int main(int argc, char* argv[]) reader->close(); directory->close(); - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } diff --git a/src/demo/deletefiles/msvc/deletefiles.vcxproj b/src/demo/deletefiles/msvc/deletefiles.vcxproj new file mode 100644 index 00000000..e39c2261 --- /dev/null +++ b/src/demo/deletefiles/msvc/deletefiles.vcxproj @@ -0,0 +1,210 @@ + + + + + Debug DLL + Win32 + + + Debug Static + Win32 + + + Release DLL + Win32 + + + Release Static + Win32 + + + + {688A6720-739F-4EA3-AC5B-AA67A0965105} + deletefiles + Win32Proj + + + + Application + Unicode + true + + + Application + Unicode + + + Application + Unicode + true + + + Application + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + + + + {46a95afd-95fd-4280-b22e-1b56f273144b} + false + + + {46a95afd-95fd-4280-b22e-1b56f273144a} + false + + + + + + \ No newline at end of file diff --git a/src/demo/deletefiles/msvc/deletefiles.vcxproj.filters b/src/demo/deletefiles/msvc/deletefiles.vcxproj.filters new file mode 100644 index 00000000..e37e4061 --- /dev/null +++ b/src/demo/deletefiles/msvc/deletefiles.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + source files + + + \ No newline at end of file diff --git a/src/demo/indexfiles/CMakeLists.txt b/src/demo/indexfiles/CMakeLists.txt new file mode 100644 index 00000000..a1dd582b --- /dev/null +++ b/src/demo/indexfiles/CMakeLists.txt @@ -0,0 +1,46 @@ +project(indexfiles) + + +#################################### +# src +#################################### +file(GLOB_RECURSE indexfiles_sources + "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") + +file(GLOB_RECURSE demo_headers + "${indexfiles_SOURCE_DIR}/../include/*.h") + + + +#################################### +# create executable target +#################################### +add_executable(indexfiles + ${indexfiles_sources}) + + +#################################### +# include directories +#################################### +target_include_directories(indexfiles + PRIVATE + $ + $ + ${Boost_INCLUDE_DIRS}) + + +#################################### +# dependencies +#################################### +target_link_libraries(indexfiles + PRIVATE + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB + lucene++::lucene++ + lucene++::lucene++-contrib) diff --git a/src/demo/indexfiles/main.cpp b/src/demo/indexfiles/main.cpp index d60da216..e6911f48 100644 --- a/src/demo/indexfiles/main.cpp +++ b/src/demo/indexfiles/main.cpp @@ -1,10 +1,16 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX #define NOMINMAX +#endif #include "targetver.h" #include @@ -16,58 +22,50 @@ using namespace Lucene; int32_t docNumber = 0; -DocumentPtr fileDocument(const String& docFile) -{ +DocumentPtr fileDocument(const String& docFile) { DocumentPtr doc = newLucene(); - // Add the path of the file as a field named "path". Use a field that is indexed (ie. searchable), but + // Add the path of the file as a field named "path". Use a field that is indexed (ie. searchable), but // don't tokenize the field into words. doc->add(newLucene(L"path", docFile, Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); - - // Add the last modified date of the file a field named "modified". Use a field that is indexed (ie. searchable), + + // Add the last modified date of the file a field named "modified". Use a field that is indexed (ie. searchable), // but don't tokenize the field into words. doc->add(newLucene(L"modified", DateTools::timeToString(FileUtils::fileModified(docFile), DateTools::RESOLUTION_MINUTE), Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); - // Add the contents of the file to a field named "contents". Specify a Reader, so that the text of the file is - // tokenized and indexed, but not stored. Note that FileReader expects the file to be in the system's default + // Add the contents of the file to a field named "contents". Specify a Reader, so that the text of the file is + // tokenized and indexed, but not stored. Note that FileReader expects the file to be in the system's default // encoding. If that's not the case searching for special characters will fail. doc->add(newLucene(L"contents", newLucene(docFile))); - + return doc; } -void indexDocs(IndexWriterPtr writer, const String& sourceDir) -{ +void indexDocs(const IndexWriterPtr& writer, const String& sourceDir) { HashSet dirList(HashSet::newInstance()); - if (!FileUtils::listDirectory(sourceDir, false, dirList)) + if (!FileUtils::listDirectory(sourceDir, false, dirList)) { return; + } - for (HashSet::iterator dirFile = dirList.begin(); dirFile != dirList.end(); ++dirFile) - { + for (HashSet::iterator dirFile = dirList.begin(); dirFile != dirList.end(); ++dirFile) { String docFile(FileUtils::joinPath(sourceDir, *dirFile)); - if (FileUtils::isDirectory(docFile)) + if (FileUtils::isDirectory(docFile)) { indexDocs(writer, docFile); - else - { + } else { std::wcout << L"Adding [" << ++docNumber << L"]: " << *dirFile << L"\n"; - try - { + try { writer->addDocument(fileDocument(docFile)); - } - catch (FileNotFoundException&) - { + } catch (FileNotFoundException&) { } } } } /// Index all text files under a directory. -int main(int argc, char* argv[]) -{ - if (argc != 3) - { +int main(int argc, char* argv[]) { + if (argc != 3) { std::wcout << L"Usage: indexfiles.exe \n"; return 1; } @@ -75,16 +73,13 @@ int main(int argc, char* argv[]) String sourceDir(StringUtils::toUnicode(argv[1])); String indexDir(StringUtils::toUnicode(argv[2])); - if (!FileUtils::isDirectory(sourceDir)) - { + if (!FileUtils::isDirectory(sourceDir)) { std::wcout << L"Source directory doesn't exist: " << sourceDir << L"\n"; return 1; } - if (!FileUtils::isDirectory(indexDir)) - { - if (!FileUtils::createDirectory(indexDir)) - { + if (!FileUtils::isDirectory(indexDir)) { + if (!FileUtils::createDirectory(indexDir)) { std::wcout << L"Unable to create directory: " << indexDir << L"\n"; return 1; } @@ -92,8 +87,7 @@ int main(int argc, char* argv[]) uint64_t beginIndex = MiscUtils::currentTimeMillis(); - try - { + try { IndexWriterPtr writer = newLucene(FSDirectory::open(indexDir), newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); std::wcout << L"Indexing to directory: " << indexDir << L"...\n"; @@ -112,9 +106,7 @@ int main(int argc, char* argv[]) writer->close(); std::wcout << L"Total time: " << indexDuration + optimizeDuration << L" milliseconds\n"; - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } diff --git a/src/demo/indexfiles/msvc/indexfiles.vcxproj b/src/demo/indexfiles/msvc/indexfiles.vcxproj new file mode 100644 index 00000000..94c8f34b --- /dev/null +++ b/src/demo/indexfiles/msvc/indexfiles.vcxproj @@ -0,0 +1,210 @@ + + + + + Debug DLL + Win32 + + + Debug Static + Win32 + + + Release DLL + Win32 + + + Release Static + Win32 + + + + {688A6720-739F-4EA3-AC5B-AA67A0965103} + indexfiles + Win32Proj + + + + Application + Unicode + true + + + Application + Unicode + + + Application + Unicode + true + + + Application + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + + + + {46a95afd-95fd-4280-b22e-1b56f273144b} + false + + + {46a95afd-95fd-4280-b22e-1b56f273144a} + false + + + + + + \ No newline at end of file diff --git a/src/demo/indexfiles/msvc/indexfiles.vcxproj.filters b/src/demo/indexfiles/msvc/indexfiles.vcxproj.filters new file mode 100644 index 00000000..e37e4061 --- /dev/null +++ b/src/demo/indexfiles/msvc/indexfiles.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + source files + + + \ No newline at end of file diff --git a/src/demo/searchfiles/CMakeLists.txt b/src/demo/searchfiles/CMakeLists.txt new file mode 100644 index 00000000..aaf559e4 --- /dev/null +++ b/src/demo/searchfiles/CMakeLists.txt @@ -0,0 +1,45 @@ +project(searchfiles) + + +#################################### +# src +#################################### +file(GLOB_RECURSE searchfiles_sources + "${CMAKE_CURRENT_SOURCE_DIR}/main.cpp") + +file(GLOB_RECURSE demo_headers + "${searchfiles_SOURCE_DIR}/../include/*.h") + + +#################################### +# create executable target +#################################### +add_executable(searchfiles + ${searchfiles_sources}) + + +#################################### +# include directories +#################################### +target_include_directories(searchfiles + PRIVATE + $ + $ + ${Boost_INCLUDE_DIRS}) + + +#################################### +# dependencies +#################################### +target_link_libraries(searchfiles + PRIVATE + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB + lucene++::lucene++ + lucene++::lucene++-contrib) diff --git a/src/demo/searchfiles/main.cpp b/src/demo/searchfiles/main.cpp index 4b7a1ed3..63686e2a 100644 --- a/src/demo/searchfiles/main.cpp +++ b/src/demo/searchfiles/main.cpp @@ -1,10 +1,16 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN +#endif + +#ifndef NOMINMAX #define NOMINMAX +#endif #include "targetver.h" #include @@ -15,40 +21,35 @@ using namespace Lucene; -/// Use the norms from one field for all fields. Norms are read into memory, using a byte of memory -/// per document per searched field. This can cause search of large collections with a large number +/// Use the norms from one field for all fields. Norms are read into memory, using a byte of memory +/// per document per searched field. This can cause search of large collections with a large number /// of fields to run out of memory. If all of the fields contain only a single token, then the norms /// are all identical, then single norm vector may be shared. -class OneNormsReader : public FilterIndexReader -{ +class OneNormsReader : public FilterIndexReader { public: - OneNormsReader(IndexReaderPtr in, const String& field) : FilterIndexReader(in) - { + OneNormsReader(const IndexReaderPtr& in, const String& field) : FilterIndexReader(in) { this->field = field; } - virtual ~OneNormsReader() - { + virtual ~OneNormsReader() { } protected: String field; public: - virtual ByteArray norms(const String& field) - { + virtual ByteArray norms(const String& field) { return in->norms(this->field); } }; -/// This demonstrates a typical paging search scenario, where the search engine presents pages of size n +/// This demonstrates a typical paging search scenario, where the search engine presents pages of size n /// to the user. The user can then go to the next page if interested in the next hits. /// -/// When the query is executed for the first time, then only enough results are collected to fill 5 result -/// pages. If the user wants to page beyond this limit, then the query is executed another time and all +/// When the query is executed for the first time, then only enough results are collected to fill 5 result +/// pages. If the user wants to page beyond this limit, then the query is executed another time and all /// hits are collected. -static void doPagingSearch(SearcherPtr searcher, QueryPtr query, int32_t hitsPerPage, bool raw, bool interactive) -{ +static void doPagingSearch(const SearcherPtr& searcher, const QueryPtr& query, int32_t hitsPerPage, bool raw, bool interactive) { // Collect enough docs to show 5 pages TopScoreDocCollectorPtr collector = TopScoreDocCollector::create(5 * hitsPerPage, false); searcher->search(query, collector); @@ -60,18 +61,17 @@ static void doPagingSearch(SearcherPtr searcher, QueryPtr query, int32_t hitsPer int32_t start = 0; int32_t end = std::min(numTotalHits, hitsPerPage); - while (true) - { - if (end > hits.size()) - { + while (true) { + if (end > hits.size()) { std::wcout << L"Only results 1 - " << hits.size() << L" of " << numTotalHits << L" total matching documents collected.\n"; std::wcout << L"Collect more (y/n) ?"; String line; std::wcin >> line; boost::trim(line); - if (line.empty() || boost::starts_with(line, L"n")) + if (line.empty() || boost::starts_with(line, L"n")) { break; + } collector = TopScoreDocCollector::create(numTotalHits, false); searcher->search(query, collector); @@ -80,98 +80,86 @@ static void doPagingSearch(SearcherPtr searcher, QueryPtr query, int32_t hitsPer end = std::min(hits.size(), start + hitsPerPage); - for (int32_t i = start; i < end; ++i) - { - if (raw) // output raw format - { + for (int32_t i = start; i < end; ++i) { + if (raw) { // output raw format std::wcout << L"doc=" << hits[i]->doc << L" score=" << hits[i]->score << L"\n"; continue; } DocumentPtr doc = searcher->doc(hits[i]->doc); String path = doc->get(L"path"); - if (!path.empty()) - { + if (!path.empty()) { std::wcout << StringUtils::toString(i + 1) + L". " << path << L"\n"; String title = doc->get(L"title"); - if (!title.empty()) + if (!title.empty()) { std::wcout << L" Title: " << doc->get(L"title") << L"\n"; - } - else + } + } else { std::wcout << StringUtils::toString(i + 1) + L". No path for this document\n"; + } } - if (!interactive) + if (!interactive) { break; + } - if (numTotalHits >= end) - { + if (numTotalHits >= end) { bool quit = false; - while (true) - { + while (true) { std::wcout << L"Press "; - if (start - hitsPerPage >= 0) + if (start - hitsPerPage >= 0) { std::wcout << L"(p)revious page, "; - if (start + hitsPerPage < numTotalHits) + } + if (start + hitsPerPage < numTotalHits) { std::wcout << L"(n)ext page, "; + } std::wcout << L"(q)uit or enter number to jump to a page: "; String line; std::wcin >> line; boost::trim(line); - if (line.empty() || boost::starts_with(line, L"q")) - { + if (line.empty() || boost::starts_with(line, L"q")) { quit = true; break; } - if (boost::starts_with(line, L"p")) - { + if (boost::starts_with(line, L"p")) { start = std::max((int32_t)0, start - hitsPerPage); break; - } - else if (boost::starts_with(line, L"n")) - { - if (start + hitsPerPage < numTotalHits) + } else if (boost::starts_with(line, L"n")) { + if (start + hitsPerPage < numTotalHits) { start += hitsPerPage; + } break; - } - else - { + } else { int32_t page = 0; - try - { + try { page = StringUtils::toInt(line); + } catch (NumberFormatException&) { } - catch (NumberFormatException&) - { - } - if ((page - 1) * hitsPerPage < numTotalHits) - { + if ((page - 1) * hitsPerPage < numTotalHits) { start = std::max((int32_t)0, (page - 1) * hitsPerPage); break; - } - else + } else { std::wcout << L"No such page\n"; + } } } - if (quit) + if (quit) { break; + } end = std::min(numTotalHits, start + hitsPerPage); } } } -class StreamingHitCollector : public Collector -{ +class StreamingHitCollector : public Collector { public: - StreamingHitCollector() - { + StreamingHitCollector() { docBase = 0; } - virtual ~StreamingHitCollector() - { + virtual ~StreamingHitCollector() { } protected: @@ -180,50 +168,42 @@ class StreamingHitCollector : public Collector public: /// simply print docId and score of every matching document - virtual void collect(int32_t doc) - { + virtual void collect(int32_t doc) { std::wcout << L"doc=" << (doc + docBase) << L" score=" << scorer->score(); } - virtual bool acceptsDocsOutOfOrder() - { + virtual bool acceptsDocsOutOfOrder() { return true; } - virtual void setNextReader(IndexReaderPtr reader, int32_t docBase) - { + virtual void setNextReader(const IndexReaderPtr& reader, int32_t docBase) { this->docBase = docBase; } - virtual void setScorer(ScorerPtr scorer) - { + virtual void setScorer(const ScorerPtr& scorer) { this->scorer = scorer; } }; -/// This method uses a custom HitCollector implementation which simply prints out the docId and score of -/// every matching document. +/// This method uses a custom HitCollector implementation which simply prints out the docId and score of +/// every matching document. /// -/// This simulates the streaming search use case, where all hits are supposed to be processed, regardless +/// This simulates the streaming search use case, where all hits are supposed to be processed, regardless /// of their relevance. -static void doStreamingSearch(SearcherPtr searcher, QueryPtr query) -{ +static void doStreamingSearch(const SearcherPtr& searcher, const QueryPtr& query) { searcher->search(query, newLucene()); } /// Simple command-line based search demo. -int main(int argc, char* argv[]) -{ - if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0) - { +int main(int argc, char* argv[]) { + if (argc == 1 || strcmp(argv[1], "-h") == 0 || strcmp(argv[1], "-help") == 0) { std::wcout << L"Usage: searchfiles.exe [-index dir] [-field f] [-repeat n] [-queries file] [-raw] "; std::wcout << L"[-norms field] [-paging hitsPerPage]\n\n"; std::wcout << L"Specify 'false' for hitsPerPage to use streaming instead of paging search.\n"; return 1; } - try - { + try { String index = L"index"; String field = L"contents"; String queries; @@ -233,44 +213,32 @@ int main(int argc, char* argv[]) bool paging = true; int32_t hitsPerPage = 10; - for (int32_t i = 0; i < argc; ++i) - { - if (strcmp(argv[i], "-index") == 0) - { + for (int32_t i = 0; i < argc; ++i) { + if (strcmp(argv[i], "-index") == 0) { index = StringUtils::toUnicode(argv[i + 1]); ++i; - } - else if (strcmp(argv[i], "-field") == 0) - { + } else if (strcmp(argv[i], "-field") == 0) { field = StringUtils::toUnicode(argv[i + 1]); ++i; - } - else if (strcmp(argv[i], "-queries") == 0) - { + } else if (strcmp(argv[i], "-queries") == 0) { queries = StringUtils::toUnicode(argv[i + 1]); ++i; - } - else if (strcmp(argv[i], "-repeat") == 0) - { + } else if (strcmp(argv[i], "-repeat") == 0) { repeat = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); ++i; - } - else if (strcmp(argv[i], "-raw") == 0) + } else if (strcmp(argv[i], "-raw") == 0) { raw = true; - else if (strcmp(argv[i], "-norms") == 0) - { + } else if (strcmp(argv[i], "-norms") == 0) { normsField = StringUtils::toUnicode(argv[i + 1]); ++i; - } - else if (strcmp(argv[i], "-paging") == 0) - { - if (strcmp(argv[i + 1], "false") == 0) + } else if (strcmp(argv[i], "-paging") == 0) { + if (strcmp(argv[i + 1], "false") == 0) { paging = false; - else - { + } else { hitsPerPage = StringUtils::toInt(StringUtils::toUnicode(argv[i + 1])); - if (hitsPerPage == 0) + if (hitsPerPage == 0) { paging = false; + } } ++i; } @@ -279,60 +247,57 @@ int main(int argc, char* argv[]) // only searching, so read-only=true IndexReaderPtr reader = IndexReader::open(FSDirectory::open(index), true); - if (!normsField.empty()) + if (!normsField.empty()) { reader = newLucene(reader, normsField); + } SearcherPtr searcher = newLucene(reader); AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); QueryParserPtr parser = newLucene(LuceneVersion::LUCENE_CURRENT, field, analyzer); ReaderPtr in; - if (!queries.empty()) + if (!queries.empty()) { in = newLucene(queries); + } - while (true) - { + while (true) { String line; - if (!queries.empty()) - { + if (!queries.empty()) { wchar_t c = in->read(); - while (c != L'\n' && c != L'\r' && c != Reader::READER_EOF) - { + while (c != L'\n' && c != L'\r' && c != Reader::READER_EOF) { line += c; c = in->read(); } - } - else - { + } else { std::wcout << L"Enter query: "; - std::wcin >> line; + getline(std::wcin, line); } boost::trim(line); - if (line.empty()) + if (line.empty()) { break; + } QueryPtr query = parser->parse(line); std::wcout << L"Searching for: " << query->toString(field) << L"\n"; - if (repeat > 0) // repeat and time as benchmark - { + if (repeat > 0) { // repeat and time as benchmark int64_t start = MiscUtils::currentTimeMillis(); - for (int32_t i = 0; i < repeat; ++i) + for (int32_t i = 0; i < repeat; ++i) { searcher->search(query, FilterPtr(), 100); + } std::wcout << L"Time: " << (MiscUtils::currentTimeMillis() - start) << L"ms\n"; } - if (paging) + if (paging) { doPagingSearch(searcher, query, hitsPerPage, raw, queries.empty()); - else + } else { doStreamingSearch(searcher, query); + } } reader->close(); - } - catch (LuceneException& e) - { + } catch (LuceneException& e) { std::wcout << L"Exception: " << e.getError() << L"\n"; return 1; } diff --git a/src/demo/searchfiles/msvc/searchfiles.vcxproj b/src/demo/searchfiles/msvc/searchfiles.vcxproj new file mode 100644 index 00000000..59187932 --- /dev/null +++ b/src/demo/searchfiles/msvc/searchfiles.vcxproj @@ -0,0 +1,210 @@ + + + + + Debug DLL + Win32 + + + Debug Static + Win32 + + + Release DLL + Win32 + + + Release Static + Win32 + + + + {688A6720-739F-4EA3-AC5B-AA67A0965104} + searchfiles + Win32Proj + + + + Application + Unicode + true + + + Application + Unicode + + + Application + Unicode + true + + + Application + Unicode + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>10.0.40219.1 + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + true + $(SolutionDir)$(Configuration)\ + $(Configuration)\ + false + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\lib32-msvc-10.0;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + Disabled + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + true + Async + EnableFastChecks + MultiThreadedDebugDLL + false + + + Level3 + EditAndContinue + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + /Zm180 %(AdditionalOptions) + MaxSpeed + AnySuitable + true + Speed + true + ..\..\..\..\include;$(BOOST_ROOT);%(AdditionalIncludeDirectories) + WIN32;NDEBUG;_CONSOLE;LPP_HAVE_DLL;%(PreprocessorDefinitions) + Async + MultiThreadedDLL + true + false + + + Level3 + ProgramDatabase + + + lucene++.lib;%(AdditionalDependencies) + $(BOOST_ROOT)\stage\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) + true + Console + true + true + MachineX86 + + + copy "$(OutDir)$(ProjectName).exe" "..\..\..\..\bin\." + + + + + + + + {46a95afd-95fd-4280-b22e-1b56f273144b} + false + + + {46a95afd-95fd-4280-b22e-1b56f273144a} + false + + + + + + \ No newline at end of file diff --git a/src/demo/searchfiles/msvc/searchfiles.vcxproj.filters b/src/demo/searchfiles/msvc/searchfiles.vcxproj.filters new file mode 100644 index 00000000..e37e4061 --- /dev/null +++ b/src/demo/searchfiles/msvc/searchfiles.vcxproj.filters @@ -0,0 +1,14 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D752A2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + + + source files + + + \ No newline at end of file diff --git a/src/msvc/lucene++.sln b/src/msvc/lucene++.sln index e183f153..a016ec99 100644 --- a/src/msvc/lucene++.sln +++ b/src/msvc/lucene++.sln @@ -1,100 +1,81 @@ - -Microsoft Visual Studio Solution File, Format Version 10.00 -# Visual Studio 2008 -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_tester", "..\test\msvc\lucene_tester.vcproj", "{6D684870-1124-49E1-8F96-7DE7B6114BEA}" - ProjectSection(ProjectDependencies) = postProject - {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} - {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} - EndProjectSection -EndProject -Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "demos", "demos", "{E9344A66-4CC8-4E5B-83BC-8061E8962B46}" -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "indexfiles", "..\demo\indexfiles\msvc\indexfiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965103}" - ProjectSection(ProjectDependencies) = postProject - {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} - {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deletefiles", "..\demo\deletefiles\msvc\deletefiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965105}" - ProjectSection(ProjectDependencies) = postProject - {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} - {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "searchfiles", "..\demo\searchfiles\msvc\searchfiles.vcproj", "{688A6720-739F-4EA3-AC5B-AA67A0965104}" - ProjectSection(ProjectDependencies) = postProject - {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} - {46A95AFD-95FD-4280-B22E-1B56F273144B} = {46A95AFD-95FD-4280-B22E-1B56F273144B} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_contrib", "..\contrib\msvc\lucene_contrib.vcproj", "{46A95AFD-95FD-4280-B22E-1B56F273144B}" - ProjectSection(ProjectDependencies) = postProject - {46A95AFD-95FD-4280-B22E-1B56F273144A} = {46A95AFD-95FD-4280-B22E-1B56F273144A} - EndProjectSection -EndProject -Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene++", "..\core\msvc\lucene++.vcproj", "{46A95AFD-95FD-4280-B22E-1B56F273144A}" -EndProject -Global - GlobalSection(SolutionConfigurationPlatforms) = preSolution - Debug DLL|Win32 = Debug DLL|Win32 - Debug Static|Win32 = Debug Static|Win32 - Release DLL|Win32 = Release DLL|Win32 - Release Static|Win32 = Release Static|Win32 - EndGlobalSection - GlobalSection(ProjectConfigurationPlatforms) = postSolution - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.Build.0 = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.Build.0 = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.Build.0 = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.Build.0 = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.Build.0 = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.Build.0 = Release DLL|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.Build.0 = Release Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.Build.0 = Release DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.Build.0 = Release Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.Build.0 = Debug Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.Build.0 = Release DLL|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.ActiveCfg = Release Static|Win32 - {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.Build.0 = Release Static|Win32 - EndGlobalSection - GlobalSection(SolutionProperties) = preSolution - HideSolutionNode = FALSE - EndGlobalSection - GlobalSection(NestedProjects) = preSolution - {688A6720-739F-4EA3-AC5B-AA67A0965103} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} - {688A6720-739F-4EA3-AC5B-AA67A0965105} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} - {688A6720-739F-4EA3-AC5B-AA67A0965104} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} - EndGlobalSection -EndGlobal + +Microsoft Visual Studio Solution File, Format Version 11.00 +# Visual Studio 2010 +Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "demos", "demos", "{E9344A66-4CC8-4E5B-83BC-8061E8962B46}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_tester", "..\test\msvc\lucene_tester.vcxproj", "{6D684870-1124-49E1-8F96-7DE7B6114BEA}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "indexfiles", "..\demo\indexfiles\msvc\indexfiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965103}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "deletefiles", "..\demo\deletefiles\msvc\deletefiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965105}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "searchfiles", "..\demo\searchfiles\msvc\searchfiles.vcxproj", "{688A6720-739F-4EA3-AC5B-AA67A0965104}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene_contrib", "..\contrib\msvc\lucene_contrib.vcxproj", "{46A95AFD-95FD-4280-B22E-1B56F273144B}" +EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "lucene++", "..\core\msvc\lucene++.vcxproj", "{46A95AFD-95FD-4280-B22E-1B56F273144A}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug DLL|Win32 = Debug DLL|Win32 + Debug Static|Win32 = Debug Static|Win32 + Release DLL|Win32 = Release DLL|Win32 + Release Static|Win32 = Release Static|Win32 + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {6D684870-1124-49E1-8F96-7DE7B6114BEA}.Release Static|Win32.Build.0 = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release DLL|Win32.Build.0 = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965103}.Release Static|Win32.Build.0 = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release DLL|Win32.Build.0 = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965105}.Release Static|Win32.Build.0 = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release DLL|Win32.Build.0 = Release DLL|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {688A6720-739F-4EA3-AC5B-AA67A0965104}.Release Static|Win32.Build.0 = Release Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release DLL|Win32.Build.0 = Release DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144B}.Release Static|Win32.Build.0 = Release Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.ActiveCfg = Debug DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug DLL|Win32.Build.0 = Debug DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.ActiveCfg = Debug Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Debug Static|Win32.Build.0 = Debug Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.ActiveCfg = Release DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release DLL|Win32.Build.0 = Release DLL|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.ActiveCfg = Release Static|Win32 + {46A95AFD-95FD-4280-B22E-1B56F273144A}.Release Static|Win32.Build.0 = Release Static|Win32 + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(NestedProjects) = preSolution + {688A6720-739F-4EA3-AC5B-AA67A0965103} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} + {688A6720-739F-4EA3-AC5B-AA67A0965105} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} + {688A6720-739F-4EA3-AC5B-AA67A0965104} = {E9344A66-4CC8-4E5B-83BC-8061E8962B46} + EndGlobalSection +EndGlobal diff --git a/src/test/CMakeLists.txt b/src/test/CMakeLists.txt index 36ab3368..f0b9b7e6 100644 --- a/src/test/CMakeLists.txt +++ b/src/test/CMakeLists.txt @@ -1,37 +1,77 @@ -project(lucene++-tester) - -#################################### -# THE lucene++tester library -#################################### -file(GLOB_RECURSE lucene_sources - ${lucene++-tester_SOURCE_DIR}/*.cpp) -file(GLOB_RECURSE HEADERS ${lucene++-tester_SOURCE_DIR}/../include/*.h) -file(GLOB_RECURSE HEADERS ${lucene++-tester_SOURCE_DIR}/include/*.h) - -LINK_DIRECTORIES(${Boost_LIBRARY_DIRS}) -INCLUDE_DIRECTORIES(${Boost_INCLUDE_DIRS}) -INCLUDE_DIRECTORIES(${lucene++-base_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-lib_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-contrib_SOURCE_DIR}/include) -INCLUDE_DIRECTORIES(${lucene++-tester_SOURCE_DIR}/include) -ADD_DEFINITIONS(-DLPP_EXPOSE_INTERNAL) - -ADD_EXECUTABLE(lucene++-tester EXCLUDE_FROM_ALL - ${lucene_sources} ${HEADERS} -) - -#set properties on the libraries -SET_TARGET_PROPERTIES(lucene++-tester PROPERTIES - VERSION ${LUCENE++_VERSION} - SOVERSION ${LUCENE++_SOVERSION} -) -TARGET_LINK_LIBRARIES(lucene++-tester - lucene++-static - lucene++-c - lucene++-contrib-static - ${CMAKE_THREAD_LIBS_INIT} - ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY_RELEASE} - ${LUCENE_BOOST_LIBS} ) - -ADD_TEST(${EXECUTABLE_OUTPUT_PATH}/lucene++-tester ${EXECUTABLE_OUTPUT_PATH}/lucene++-tester -p) +project(tester) + +#################################### +# configure GTest +#################################### +if(MSVC) + set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) +endif() + +add_subdirectory(gtest) + + +#################################### +# src +#################################### +file(GLOB_RECURSE tester_sources + "analysis/*.cpp" + "contrib/*.cpp" + "document/*.cpp" + "index/*.cpp" + "main/*.cpp" + "queryparser/*.cpp" + "search/*.cpp" + "store/*.cpp" + "util/*.cpp") + +file(GLOB_RECURSE test_headers + "${lucene++-tester_SOURCE_DIR}/include/*.h") + +#################################### +# create test bin target +#################################### +add_executable(lucene++-tester + ${tester_sources}) + + +#################################### +# include directories +#################################### +target_include_directories(lucene++-tester + PUBLIC + $ + $ + $ + $ + $ + $ + $) + + +#################################### +# dependencies +#################################### +target_link_libraries(lucene++-tester + PRIVATE + Boost::boost + Boost::date_time + Boost::filesystem + Boost::iostreams + Boost::regex + Boost::system + Boost::thread + ZLIB::ZLIB + gtest_main + gtest + + lucene++::lucene++ + lucene++::lucene++-contrib) + + +#################################### +# link args +#################################### +target_compile_options(lucene++-tester PRIVATE -DLPP_EXPOSE_INTERNAL) + +cotire(lucene++-tester) diff --git a/src/test/analysis/AnalyzersTest.cpp b/src/test/analysis/AnalyzersTest.cpp index a71b7fee..f5609a15 100644 --- a/src/test/analysis/AnalyzersTest.cpp +++ b/src/test/analysis/AnalyzersTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,22 +17,20 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(AnalyzersTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture AnalyzersTest; -static void verifyPayload(TokenStreamPtr ts) -{ +static void verifyPayload(const TokenStreamPtr& ts) { PayloadAttributePtr payloadAtt = ts->getAttribute(); - for (uint8_t b = 1; ; ++b) - { + for (uint8_t b = 1; ; ++b) { bool hasNext = ts->incrementToken(); - if (!hasNext) + if (!hasNext) { break; - BOOST_CHECK_EQUAL(b, payloadAtt->getPayload()->toByteArray()[0]); + } + EXPECT_EQ(b, payloadAtt->getPayload()->toByteArray()[0]); } } -BOOST_AUTO_TEST_CASE(testSimple) -{ +TEST_F(AnalyzersTest, testSimple) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); @@ -44,8 +42,7 @@ BOOST_AUTO_TEST_CASE(testSimple) checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } -BOOST_AUTO_TEST_CASE(testNull) -{ +TEST_F(AnalyzersTest, testNull) { AnalyzerPtr a = newLucene(); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"FOO", L"BAR")); checkAnalyzesTo(a, L"foo bar . FOO <> BAR", newCollection(L"foo", L"bar", L".", L"FOO", L"<>", L"BAR")); @@ -57,53 +54,49 @@ BOOST_AUTO_TEST_CASE(testNull) checkAnalyzesTo(a, L"\"QUOTED\" word", newCollection(L"\"QUOTED\"", L"word")); } -BOOST_AUTO_TEST_CASE(testStop) -{ +TEST_F(AnalyzersTest, testStop) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); checkAnalyzesTo(a, L"foo a bar such FOO THESE BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); } -namespace TestPayloadCopy -{ - DECLARE_SHARED_PTR(PayloadSetter) - - class PayloadSetter : public TokenFilter - { - public: - PayloadSetter(TokenStreamPtr input) : TokenFilter(input) - { - payloadAtt = addAttribute(); - data = ByteArray::newInstance(1); - data[0] = 0; - p = newLucene(data, 0, 1); - } - - virtual ~PayloadSetter() - { - } +namespace TestPayloadCopy { + +DECLARE_SHARED_PTR(PayloadSetter) + +class PayloadSetter : public TokenFilter { +public: + PayloadSetter(const TokenStreamPtr& input) : TokenFilter(input) { + payloadAtt = addAttribute(); + data = ByteArray::newInstance(1); + data[0] = 0; + p = newLucene(data, 0, 1); + } - public: - PayloadAttributePtr payloadAtt; - ByteArray data; - PayloadPtr p; - - public: - virtual bool incrementToken() - { - bool hasNext = input->incrementToken(); - if (!hasNext) - return false; - payloadAtt->setPayload(p); // reuse the payload / byte[] - data[0]++; - return true; + virtual ~PayloadSetter() { + } + +public: + PayloadAttributePtr payloadAtt; + ByteArray data; + PayloadPtr p; + +public: + virtual bool incrementToken() { + bool hasNext = input->incrementToken(); + if (!hasNext) { + return false; } - }; + payloadAtt->setPayload(p); // reuse the payload / byte[] + data[0]++; + return true; + } +}; + } /// Make sure old style next() calls result in a new copy of payloads -BOOST_AUTO_TEST_CASE(testPayloadCopy) -{ +TEST_F(AnalyzersTest, testPayloadCopy) { String s = L"how now brown cow"; TokenStreamPtr ts = newLucene(newLucene(s)); ts = newLucene(ts); @@ -113,5 +106,3 @@ BOOST_AUTO_TEST_CASE(testPayloadCopy) ts = newLucene(ts); verifyPayload(ts); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/BaseTokenStreamFixture.cpp b/src/test/analysis/BaseTokenStreamFixture.cpp index 94254cfa..1672c332 100644 --- a/src/test/analysis/BaseTokenStreamFixture.cpp +++ b/src/test/analysis/BaseTokenStreamFixture.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,232 +14,208 @@ #include "Analyzer.h" #include "StringReader.h" -namespace Lucene -{ - CheckClearAttributesAttribute::CheckClearAttributesAttribute() - { - clearCalled = false; - } - - CheckClearAttributesAttribute::~CheckClearAttributesAttribute() - { - } - - bool CheckClearAttributesAttribute::getAndResetClearCalled() - { - bool _clearCalled = clearCalled; - clearCalled = false; - return _clearCalled; - } - - void CheckClearAttributesAttribute::clear() - { - clearCalled = true; - } - - bool CheckClearAttributesAttribute::equals(LuceneObjectPtr other) - { - if (Attribute::equals(other)) - return true; - - CheckClearAttributesAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); - if (otherAttribute) - return (otherAttribute->clearCalled == clearCalled); - - return false; +namespace Lucene { + +CheckClearAttributesAttribute::CheckClearAttributesAttribute() { + clearCalled = false; +} + +CheckClearAttributesAttribute::~CheckClearAttributesAttribute() { +} + +bool CheckClearAttributesAttribute::getAndResetClearCalled() { + bool _clearCalled = clearCalled; + clearCalled = false; + return _clearCalled; +} + +void CheckClearAttributesAttribute::clear() { + clearCalled = true; +} + +bool CheckClearAttributesAttribute::equals(const LuceneObjectPtr& other) { + if (Attribute::equals(other)) { + return true; } - - int32_t CheckClearAttributesAttribute::hashCode() - { - return 76137213 ^ (clearCalled ? 1231 : 1237); + + CheckClearAttributesAttributePtr otherAttribute(boost::dynamic_pointer_cast(other)); + if (otherAttribute) { + return (otherAttribute->clearCalled == clearCalled); } - - void CheckClearAttributesAttribute::copyTo(AttributePtr target) - { - CheckClearAttributesAttributePtr clearAttribute(boost::dynamic_pointer_cast(target)); - clearAttribute->clear(); + + return false; +} + +int32_t CheckClearAttributesAttribute::hashCode() { + return 76137213 ^ (clearCalled ? 1231 : 1237); +} + +void CheckClearAttributesAttribute::copyTo(const AttributePtr& target) { + CheckClearAttributesAttributePtr clearAttribute(boost::dynamic_pointer_cast(target)); + clearAttribute->clear(); +} + +LuceneObjectPtr CheckClearAttributesAttribute::clone(const LuceneObjectPtr& other) { + LuceneObjectPtr clone = other ? other : newLucene(); + CheckClearAttributesAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); + cloneAttribute->clearCalled = clearCalled; + return cloneAttribute; +} + +BaseTokenStreamFixture::~BaseTokenStreamFixture() { +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, + Collection types, Collection posIncrements, int32_t finalOffset) { + EXPECT_TRUE(output); + CheckClearAttributesAttributePtr checkClearAtt = ts->addAttribute(); + + EXPECT_TRUE(ts->hasAttribute()); + TermAttributePtr termAtt = ts->getAttribute(); + + OffsetAttributePtr offsetAtt; + if (startOffsets || endOffsets || finalOffset != -1) { + EXPECT_TRUE(ts->hasAttribute()); + offsetAtt = ts->getAttribute(); } - - LuceneObjectPtr CheckClearAttributesAttribute::clone(LuceneObjectPtr other) - { - LuceneObjectPtr clone = other ? other : newLucene(); - CheckClearAttributesAttributePtr cloneAttribute(boost::dynamic_pointer_cast(Attribute::clone(clone))); - cloneAttribute->clearCalled = clearCalled; - return cloneAttribute; + + TypeAttributePtr typeAtt; + if (types) { + EXPECT_TRUE(ts->hasAttribute()); + typeAtt = ts->getAttribute(); } - - BaseTokenStreamFixture::~BaseTokenStreamFixture() - { + + PositionIncrementAttributePtr posIncrAtt; + if (posIncrements) { + EXPECT_TRUE(ts->hasAttribute()); + posIncrAtt = ts->getAttribute(); } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, - Collection types, Collection posIncrements, int32_t finalOffset) - { - BOOST_CHECK(output); - CheckClearAttributesAttributePtr checkClearAtt = ts->addAttribute(); - - BOOST_CHECK(ts->hasAttribute()); - TermAttributePtr termAtt = ts->getAttribute(); - - OffsetAttributePtr offsetAtt; - if (startOffsets || endOffsets || finalOffset != -1) - { - BOOST_CHECK(ts->hasAttribute()); - offsetAtt = ts->getAttribute(); + + ts->reset(); + for (int32_t i = 0; i < output.size(); ++i) { + // extra safety to enforce, that the state is not preserved and also assign bogus values + ts->clearAttributes(); + termAtt->setTermBuffer(L"bogusTerm"); + if (offsetAtt) { + offsetAtt->setOffset(14584724, 24683243); } - - TypeAttributePtr typeAtt; - if (types) - { - BOOST_CHECK(ts->hasAttribute()); - typeAtt = ts->getAttribute(); + if (typeAtt) { + typeAtt->setType(L"bogusType"); } - - PositionIncrementAttributePtr posIncrAtt; - if (posIncrements) - { - BOOST_CHECK(ts->hasAttribute()); - posIncrAtt = ts->getAttribute(); + if (posIncrAtt) { + posIncrAtt->setPositionIncrement(45987657); } - - ts->reset(); - for (int32_t i = 0; i < output.size(); ++i) - { - // extra safety to enforce, that the state is not preserved and also assign bogus values - ts->clearAttributes(); - termAtt->setTermBuffer(L"bogusTerm"); - if (offsetAtt) - offsetAtt->setOffset(14584724, 24683243); - if (typeAtt) - typeAtt->setType(L"bogusType"); - if (posIncrAtt) - posIncrAtt->setPositionIncrement(45987657); - - checkClearAtt->getAndResetClearCalled(); // reset it, because we called clearAttribute() before - BOOST_CHECK(ts->incrementToken()); - BOOST_CHECK(checkClearAtt->getAndResetClearCalled()); - - BOOST_CHECK_EQUAL(output[i], termAtt->term()); - if (startOffsets) - BOOST_CHECK_EQUAL(startOffsets[i], offsetAtt->startOffset()); - if (endOffsets) - BOOST_CHECK_EQUAL(endOffsets[i], offsetAtt->endOffset()); - if (types) - BOOST_CHECK_EQUAL(types[i], typeAtt->type()); - if (posIncrements) - BOOST_CHECK_EQUAL(posIncrements[i], posIncrAtt->getPositionIncrement()); + + checkClearAtt->getAndResetClearCalled(); // reset it, because we called clearAttribute() before + EXPECT_TRUE(ts->incrementToken()); + EXPECT_TRUE(checkClearAtt->getAndResetClearCalled()); + + EXPECT_EQ(output[i], termAtt->term()); + if (startOffsets) { + EXPECT_EQ(startOffsets[i], offsetAtt->startOffset()); + } + if (endOffsets) { + EXPECT_EQ(endOffsets[i], offsetAtt->endOffset()); + } + if (types) { + EXPECT_EQ(types[i], typeAtt->type()); + } + if (posIncrements) { + EXPECT_EQ(posIncrements[i], posIncrAtt->getPositionIncrement()); } - BOOST_CHECK(!ts->incrementToken()); - ts->end(); - if (finalOffset != -1) - BOOST_CHECK_EQUAL(finalOffset, offsetAtt->endOffset()); - ts->close(); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output) - { - checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), Collection(), -1); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection types) - { - checkTokenStreamContents(ts, output, Collection(), Collection(), types, Collection(), -1); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection posIncrements) - { - checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), posIncrements, -1); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets) - { - checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), -1); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, int32_t finalOffset) - { - checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), finalOffset); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) - { - checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, -1); - } - - void BaseTokenStreamFixture::checkTokenStreamContents(TokenStreamPtr ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements, int32_t finalOffset) - { - checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, finalOffset); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, - Collection endOffsets, Collection types, Collection posIncrements) - { - checkTokenStreamContents(analyzer->tokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output) - { - checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection types) - { - checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), types, Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements) - { - checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) - { - checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesTo(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) - { - checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, - Collection endOffsets, Collection types, Collection posIncrements) - { - checkTokenStreamContents(analyzer->reusableTokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output) - { - checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection types) - { - checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), types, Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection posIncrements) - { - checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) - { - checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); - } - - void BaseTokenStreamFixture::checkAnalyzesToReuse(AnalyzerPtr analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) - { - checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); - } - - void BaseTokenStreamFixture::checkOneTerm(AnalyzerPtr analyzer, const String& input, const String& expected) - { - checkAnalyzesTo(analyzer, input, newCollection(expected)); } - - void BaseTokenStreamFixture::checkOneTermReuse(AnalyzerPtr analyzer, const String& input, const String& expected) - { - checkAnalyzesToReuse(analyzer, input, newCollection(expected)); + EXPECT_TRUE(!ts->incrementToken()); + ts->end(); + if (finalOffset != -1) { + EXPECT_EQ(finalOffset, offsetAtt->endOffset()); } + ts->close(); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output) { + checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), Collection(), -1); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection types) { + checkTokenStreamContents(ts, output, Collection(), Collection(), types, Collection(), -1); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection posIncrements) { + checkTokenStreamContents(ts, output, Collection(), Collection(), Collection(), posIncrements, -1); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets) { + checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), -1); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, int32_t finalOffset) { + checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), Collection(), finalOffset); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { + checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, -1); +} + +void BaseTokenStreamFixture::checkTokenStreamContents(const TokenStreamPtr& ts, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements, int32_t finalOffset) { + checkTokenStreamContents(ts, output, startOffsets, endOffsets, Collection(), posIncrements, finalOffset); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, + Collection endOffsets, Collection types, Collection posIncrements) { + checkTokenStreamContents(analyzer->tokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output) { + checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection types) { + checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), types, Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection posIncrements) { + checkAnalyzesTo(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { + checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesTo(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { + checkAnalyzesTo(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, + Collection endOffsets, Collection types, Collection posIncrements) { + checkTokenStreamContents(analyzer->reusableTokenStream(L"dummy", newLucene(input)), output, startOffsets, endOffsets, types, posIncrements, (int32_t)input.length()); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output) { + checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection types) { + checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), types, Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection posIncrements) { + checkAnalyzesToReuse(analyzer, input, output, Collection(), Collection(), Collection(), posIncrements); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets) { + checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), Collection()); +} + +void BaseTokenStreamFixture::checkAnalyzesToReuse(const AnalyzerPtr& analyzer, const String& input, Collection output, Collection startOffsets, Collection endOffsets, Collection posIncrements) { + checkAnalyzesToReuse(analyzer, input, output, startOffsets, endOffsets, Collection(), posIncrements); +} + +void BaseTokenStreamFixture::checkOneTerm(const AnalyzerPtr& analyzer, const String& input, const String& expected) { + checkAnalyzesTo(analyzer, input, newCollection(expected)); +} + +void BaseTokenStreamFixture::checkOneTermReuse(const AnalyzerPtr& analyzer, const String& input, const String& expected) { + checkAnalyzesToReuse(analyzer, input, newCollection(expected)); +} + } diff --git a/src/test/analysis/CachingTokenFilterTest.cpp b/src/test/analysis/CachingTokenFilterTest.cpp index 34794d69..2db324b8 100644 --- a/src/test/analysis/CachingTokenFilterTest.cpp +++ b/src/test/analysis/CachingTokenFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -21,64 +21,57 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(CachingTokenFilterTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture CachingTokenFilterTest; static Collection tokens = newCollection(L"term1", L"term2", L"term3", L"term2"); -static void checkTokens(TokenStreamPtr stream) -{ +static void checkTokens(const TokenStreamPtr& stream) { int32_t count = 0; TermAttributePtr termAtt = stream->getAttribute(); - BOOST_CHECK(termAtt); - while (stream->incrementToken()) - { - BOOST_CHECK(count < tokens.size()); - BOOST_CHECK_EQUAL(tokens[count], termAtt->term()); + EXPECT_TRUE(termAtt); + while (stream->incrementToken()) { + EXPECT_TRUE(count < tokens.size()); + EXPECT_EQ(tokens[count], termAtt->term()); ++count; } - BOOST_CHECK_EQUAL(tokens.size(), count); + EXPECT_EQ(tokens.size(), count); } -namespace TestCaching -{ - class TestableTokenStream : public TokenStream - { - public: - TestableTokenStream() - { - index = 0; - termAtt = addAttribute(); - offsetAtt = addAttribute(); - } - - virtual ~TestableTokenStream() - { - } - - protected: - int32_t index; - TermAttributePtr termAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken() - { - if (index == tokens.size()) - return false; - else - { - clearAttributes(); - termAtt->setTermBuffer(tokens[index++]); - offsetAtt->setOffset(0, 0); - return true; - } +namespace TestCaching { + +class TestableTokenStream : public TokenStream { +public: + TestableTokenStream() { + index = 0; + termAtt = addAttribute(); + offsetAtt = addAttribute(); + } + + virtual ~TestableTokenStream() { + } + +protected: + int32_t index; + TermAttributePtr termAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken() { + if (index == tokens.size()) { + return false; + } else { + clearAttributes(); + termAtt->setTermBuffer(tokens[index++]); + offsetAtt->setOffset(0, 0); + return true; } - }; + } +}; + } -BOOST_AUTO_TEST_CASE(testCaching) -{ +TEST_F(CachingTokenFilterTest, testCaching) { DirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); @@ -88,35 +81,33 @@ BOOST_AUTO_TEST_CASE(testCaching) // 1) we consume all tokens twice before we add the doc to the index checkTokens(stream); - stream->reset(); + stream->reset(); checkTokens(stream); - // 2) now add the document to the index and verify if all tokens are indexed don't reset the stream here, the + // 2) now add the document to the index and verify if all tokens are indexed don't reset the stream here, the // DocumentWriter should do that implicitly writer->addDocument(doc); writer->close(); IndexReaderPtr reader = IndexReader::open(dir, true); TermPositionsPtr termPositions = reader->termPositions(newLucene(L"preanalyzed", L"term1")); - BOOST_CHECK(termPositions->next()); - BOOST_CHECK_EQUAL(1, termPositions->freq()); - BOOST_CHECK_EQUAL(0, termPositions->nextPosition()); + EXPECT_TRUE(termPositions->next()); + EXPECT_EQ(1, termPositions->freq()); + EXPECT_EQ(0, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term2")); - BOOST_CHECK(termPositions->next()); - BOOST_CHECK_EQUAL(2, termPositions->freq()); - BOOST_CHECK_EQUAL(1, termPositions->nextPosition()); - BOOST_CHECK_EQUAL(3, termPositions->nextPosition()); + EXPECT_TRUE(termPositions->next()); + EXPECT_EQ(2, termPositions->freq()); + EXPECT_EQ(1, termPositions->nextPosition()); + EXPECT_EQ(3, termPositions->nextPosition()); termPositions->seek(newLucene(L"preanalyzed", L"term3")); - BOOST_CHECK(termPositions->next()); - BOOST_CHECK_EQUAL(1, termPositions->freq()); - BOOST_CHECK_EQUAL(2, termPositions->nextPosition()); + EXPECT_TRUE(termPositions->next()); + EXPECT_EQ(1, termPositions->freq()); + EXPECT_EQ(2, termPositions->nextPosition()); reader->close(); // 3) reset stream and consume tokens again stream->reset(); checkTokens(stream); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/CharFilterTest.cpp b/src/test/analysis/CharFilterTest.cpp index 2d87108c..24f0d8e8 100644 --- a/src/test/analysis/CharFilterTest.cpp +++ b/src/test/analysis/CharFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,66 +12,52 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(CharFilterTest, LuceneTestFixture) +typedef LuceneTestFixture CharFilterTest; -class CharFilter1 : public CharFilter -{ +class CharFilter1 : public CharFilter { public: - CharFilter1(CharStreamPtr in) : CharFilter(in) - { + CharFilter1(const CharStreamPtr& in) : CharFilter(in) { } - - virtual ~CharFilter1() - { + + virtual ~CharFilter1() { } protected: - virtual int32_t correct(int32_t currentOff) - { + virtual int32_t correct(int32_t currentOff) { return currentOff + 1; } }; -class CharFilter2 : public CharFilter -{ +class CharFilter2 : public CharFilter { public: - CharFilter2(CharStreamPtr in) : CharFilter(in) - { + CharFilter2(const CharStreamPtr& in) : CharFilter(in) { } - - virtual ~CharFilter2() - { + + virtual ~CharFilter2() { } protected: - virtual int32_t correct(int32_t currentOff) - { + virtual int32_t correct(int32_t currentOff) { return currentOff + 2; } }; -BOOST_AUTO_TEST_CASE(testCharFilter1) -{ +TEST_F(CharFilterTest, testCharFilter1) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); - BOOST_CHECK_EQUAL(1, cs->correctOffset(0)); + EXPECT_EQ(1, cs->correctOffset(0)); } -BOOST_AUTO_TEST_CASE(testCharFilter2) -{ +TEST_F(CharFilterTest, testCharFilter2) { CharStreamPtr cs = newLucene(CharReader::get(newLucene(L""))); - BOOST_CHECK_EQUAL(2, cs->correctOffset(0)); + EXPECT_EQ(2, cs->correctOffset(0)); } -BOOST_AUTO_TEST_CASE(testCharFilter12) -{ +TEST_F(CharFilterTest, testCharFilter12) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); - BOOST_CHECK_EQUAL(3, cs->correctOffset(0)); + EXPECT_EQ(3, cs->correctOffset(0)); } -BOOST_AUTO_TEST_CASE(testCharFilter11) -{ +TEST_F(CharFilterTest, testCharFilter11) { CharStreamPtr cs = newLucene(newLucene(CharReader::get(newLucene(L"")))); - BOOST_CHECK_EQUAL(2, cs->correctOffset(0)); + EXPECT_EQ(2, cs->correctOffset(0)); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/KeywordAnalyzerTest.cpp b/src/test/analysis/KeywordAnalyzerTest.cpp index 8f340eca..d9ffc3cb 100644 --- a/src/test/analysis/KeywordAnalyzerTest.cpp +++ b/src/test/analysis/KeywordAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -27,11 +27,9 @@ using namespace Lucene; -class KeywordAnalyzerTestFixture : public BaseTokenStreamFixture -{ +class KeywordAnalyzerTest : public BaseTokenStreamFixture { public: - KeywordAnalyzerTestFixture() - { + KeywordAnalyzerTest() { directory = newLucene(); IndexWriterPtr writer = newLucene(directory, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); @@ -43,9 +41,8 @@ class KeywordAnalyzerTestFixture : public BaseTokenStreamFixture searcher = newLucene(directory, true); } - - virtual ~KeywordAnalyzerTestFixture() - { + + virtual ~KeywordAnalyzerTest() { } protected: @@ -53,10 +50,7 @@ class KeywordAnalyzerTestFixture : public BaseTokenStreamFixture IndexSearcherPtr searcher; }; -BOOST_FIXTURE_TEST_SUITE(KeywordAnalyzerTest, KeywordAnalyzerTestFixture) - -BOOST_AUTO_TEST_CASE(testPerFieldAnalyzer) -{ +TEST_F(KeywordAnalyzerTest, testPerFieldAnalyzer) { PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"partnum", newLucene()); @@ -64,12 +58,11 @@ BOOST_AUTO_TEST_CASE(testPerFieldAnalyzer) QueryPtr query = queryParser->parse(L"partnum:Q36 AND SPACE"); Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; - BOOST_CHECK_EQUAL(L"+partnum:Q36 +space", query->toString(L"description")); - BOOST_CHECK_EQUAL(1, hits.size()); + EXPECT_EQ(L"+partnum:Q36 +space", query->toString(L"description")); + EXPECT_EQ(1, hits.size()); } -BOOST_AUTO_TEST_CASE(testMutipleDocument) -{ +TEST_F(KeywordAnalyzerTest, testMutipleDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(), true, IndexWriter::MaxFieldLengthLIMITED); DocumentPtr doc = newLucene(); @@ -82,18 +75,15 @@ BOOST_AUTO_TEST_CASE(testMutipleDocument) IndexReaderPtr reader = IndexReader::open(dir, true); TermDocsPtr td = reader->termDocs(newLucene(L"partnum", L"Q36")); - BOOST_CHECK(td->next()); + EXPECT_TRUE(td->next()); td = reader->termDocs(newLucene(L"partnum", L"Q37")); - BOOST_CHECK(td->next()); + EXPECT_TRUE(td->next()); } -BOOST_AUTO_TEST_CASE(testOffsets) -{ +TEST_F(KeywordAnalyzerTest, testOffsets) { TokenStreamPtr stream = newLucene()->tokenStream(L"field", newLucene(L"abcd")); OffsetAttributePtr offsetAtt = stream->addAttribute(); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(0, offsetAtt->startOffset()); - BOOST_CHECK_EQUAL(4, offsetAtt->endOffset()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(0, offsetAtt->startOffset()); + EXPECT_EQ(4, offsetAtt->endOffset()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/LengthFilterTest.cpp b/src/test/analysis/LengthFilterTest.cpp index add84bba..7234c8e0 100644 --- a/src/test/analysis/LengthFilterTest.cpp +++ b/src/test/analysis/LengthFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,21 +14,18 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(LengthFilterTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture LengthFilterTest; -BOOST_AUTO_TEST_CASE(testFilter) -{ +TEST_F(LengthFilterTest, testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"short toolong evenmuchlongertext a ab toolong foo")); LengthFilterPtr filter = newLucene(stream, 2, 6); TermAttributePtr termAtt = filter->getAttribute(); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"short", termAtt->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"ab", termAtt->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"foo", termAtt->term()); - BOOST_CHECK(!filter->incrementToken()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"short", termAtt->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"ab", termAtt->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"foo", termAtt->term()); + EXPECT_TRUE(!filter->incrementToken()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/MappingCharFilterTest.cpp b/src/test/analysis/MappingCharFilterTest.cpp index d3c3edd0..47b4f97e 100644 --- a/src/test/analysis/MappingCharFilterTest.cpp +++ b/src/test/analysis/MappingCharFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,11 +15,9 @@ using namespace Lucene; -class MappingCharFilterTestFixture : public BaseTokenStreamFixture -{ +class MappingCharFilterTest : public BaseTokenStreamFixture { public: - MappingCharFilterTestFixture() - { + MappingCharFilterTest() { normMap = newLucene(); normMap->add(L"aa", L"a"); @@ -33,92 +31,79 @@ class MappingCharFilterTestFixture : public BaseTokenStreamFixture normMap->add(L"empty", L""); } - - virtual ~MappingCharFilterTestFixture() - { + + virtual ~MappingCharFilterTest() { } public: NormalizeCharMapPtr normMap; }; -BOOST_FIXTURE_TEST_SUITE(MappingCharFilterTest, MappingCharFilterTestFixture) - -BOOST_AUTO_TEST_CASE(testReaderReset) -{ +TEST_F(MappingCharFilterTest, testReaderReset) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); CharArray buf = CharArray::newInstance(10); int32_t len = cs->read(buf.get(), 0, 10); - BOOST_CHECK_EQUAL(1, len); - BOOST_CHECK_EQUAL(L'x', buf[0]) ; + EXPECT_EQ(1, len); + EXPECT_EQ(L'x', buf[0]) ; len = cs->read(buf.get(), 0, 10); - BOOST_CHECK_EQUAL(-1, len); + EXPECT_EQ(-1, len); // rewind cs->reset(); len = cs->read(buf.get(), 0, 10); - BOOST_CHECK_EQUAL(1, len); - BOOST_CHECK_EQUAL(L'x', buf[0]) ; + EXPECT_EQ(1, len); + EXPECT_EQ(L'x', buf[0]) ; } -BOOST_AUTO_TEST_CASE(testNothingChange) -{ +TEST_F(MappingCharFilterTest, testNothingChange) { CharStreamPtr cs = newLucene(normMap, newLucene(L"x")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"x"), newCollection(0), newCollection(1)); } -BOOST_AUTO_TEST_CASE(test1to1) -{ +TEST_F(MappingCharFilterTest, test1to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"h")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"i"), newCollection(0), newCollection(1)); } -BOOST_AUTO_TEST_CASE(test1to2) -{ +TEST_F(MappingCharFilterTest, test1to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"j")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"jj"), newCollection(0), newCollection(1)); } -BOOST_AUTO_TEST_CASE(test1to3) -{ +TEST_F(MappingCharFilterTest, test1to3) { CharStreamPtr cs = newLucene(normMap, newLucene(L"k")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"kkk"), newCollection(0), newCollection(1)); } -BOOST_AUTO_TEST_CASE(test2to4) -{ +TEST_F(MappingCharFilterTest, test2to4) { CharStreamPtr cs = newLucene(normMap, newLucene(L"ll")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"llll"), newCollection(0), newCollection(2)); } -BOOST_AUTO_TEST_CASE(test2to1) -{ +TEST_F(MappingCharFilterTest, test2to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"aa")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a"), newCollection(0), newCollection(2)); } -BOOST_AUTO_TEST_CASE(test3to1) -{ +TEST_F(MappingCharFilterTest, test3to1) { CharStreamPtr cs = newLucene(normMap, newLucene(L"bbb")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"b"), newCollection(0), newCollection(3)); } -BOOST_AUTO_TEST_CASE(test4to2) -{ +TEST_F(MappingCharFilterTest, test4to2) { CharStreamPtr cs = newLucene(normMap, newLucene(L"cccc")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"cc"), newCollection(0), newCollection(4)); } -BOOST_AUTO_TEST_CASE(test5to0) -{ +TEST_F(MappingCharFilterTest, test5to0) { CharStreamPtr cs = newLucene(normMap, newLucene(L"empty")); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, Collection::newInstance()); @@ -141,12 +126,11 @@ BOOST_AUTO_TEST_CASE(test5to0) // cccc,11,15 => cc,11,15 // bbb,16,19 => b,16,19 // aa,20,22 => a,20,22 -BOOST_AUTO_TEST_CASE(testTokenStream) -{ +TEST_F(MappingCharFilterTest, testTokenStream) { CharStreamPtr cs = newLucene(normMap, CharReader::get(newLucene(L"h i j k ll cccc bbb aa"))); TokenStreamPtr ts = newLucene(cs); - checkTokenStreamContents(ts, newCollection(L"i", L"i", L"jj", L"kkk", L"llll", L"cc", L"b", L"a"), - newCollection(0, 2, 4, 6, 8, 11, 16, 20), + checkTokenStreamContents(ts, newCollection(L"i", L"i", L"jj", L"kkk", L"llll", L"cc", L"b", L"a"), + newCollection(0, 2, 4, 6, 8, 11, 16, 20), newCollection(1, 3, 5, 7, 10, 15, 19, 22)); } @@ -160,11 +144,8 @@ BOOST_AUTO_TEST_CASE(testTokenStream) // aaaa,0,4 => a,0,4 // ll,5,7 => llllllll,5,7 // h,8,9 => i,8,9 -BOOST_AUTO_TEST_CASE(testChained) -{ +TEST_F(MappingCharFilterTest, testChained) { CharStreamPtr cs = newLucene(normMap, (CharStreamPtr)newLucene(normMap, CharReader::get(newLucene(L"aaaa ll h")))); TokenStreamPtr ts = newLucene(cs); checkTokenStreamContents(ts, newCollection(L"a", L"llllllll", L"i"), newCollection(0, 5, 8), newCollection(4, 7, 9)); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/NumericTokenStreamTest.cpp b/src/test/analysis/NumericTokenStreamTest.cpp index bc57abdf..aba9cfb0 100644 --- a/src/test/analysis/NumericTokenStreamTest.cpp +++ b/src/test/analysis/NumericTokenStreamTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,46 +13,47 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(NumericTokenStreamTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture NumericTokenStreamTest; static int64_t lvalue = 4573245871874382LL; static int32_t ivalue = 123456; -BOOST_AUTO_TEST_CASE(testLongStream) -{ +TEST_F(NumericTokenStreamTest, testLongStream) { NumericTokenStreamPtr stream = newLucene()->setLongValue(lvalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); - for (int32_t shift = 0; shift < 64; shift += NumericUtils::PRECISION_STEP_DEFAULT) - { - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(NumericUtils::longToPrefixCoded(lvalue, shift), termAtt->term()); - BOOST_CHECK_EQUAL(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); + for (int32_t shift = 0; shift < 64; shift += NumericUtils::PRECISION_STEP_DEFAULT) { + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(NumericUtils::longToPrefixCoded(lvalue, shift), termAtt->term()); + EXPECT_EQ(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } - BOOST_CHECK(!stream->incrementToken()); + EXPECT_TRUE(!stream->incrementToken()); } -BOOST_AUTO_TEST_CASE(testIntStream) -{ +TEST_F(NumericTokenStreamTest, testIntStream) { NumericTokenStreamPtr stream = newLucene()->setIntValue(ivalue); // use getAttribute to test if attributes really exist, if not an IAE will be thrown TermAttributePtr termAtt = stream->getAttribute(); TypeAttributePtr typeAtt = stream->getAttribute(); - for (int32_t shift = 0; shift < 32; shift += NumericUtils::PRECISION_STEP_DEFAULT) - { - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(NumericUtils::intToPrefixCoded(ivalue, shift), termAtt->term()); - BOOST_CHECK_EQUAL(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); + for (int32_t shift = 0; shift < 32; shift += NumericUtils::PRECISION_STEP_DEFAULT) { + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(NumericUtils::intToPrefixCoded(ivalue, shift), termAtt->term()); + EXPECT_EQ(shift == 0 ? NumericTokenStream::TOKEN_TYPE_FULL_PREC() : NumericTokenStream::TOKEN_TYPE_LOWER_PREC(), typeAtt->type()); } - BOOST_CHECK(!stream->incrementToken()); + EXPECT_TRUE(!stream->incrementToken()); } -BOOST_AUTO_TEST_CASE(testNotInitialized) -{ +TEST_F(NumericTokenStreamTest, testNotInitialized) { NumericTokenStreamPtr stream = newLucene(); - BOOST_CHECK_EXCEPTION(stream->reset(), IllegalStateException, check_exception(LuceneException::IllegalState)); - BOOST_CHECK_EXCEPTION(stream->incrementToken(), IllegalStateException, check_exception(LuceneException::IllegalState)); + try { + stream->reset(); + } catch (IllegalStateException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalState)(e)); + } + try { + stream->incrementToken(); + } catch (IllegalStateException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalState)(e)); + } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp b/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp index d7a2f5e8..af9b9617 100644 --- a/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp +++ b/src/test/analysis/PerFieldAnalzyerWrapperTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,10 +15,9 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(PerFieldAnalzyerWrapperTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture PerFieldAnalzyerWrapperTest; -BOOST_AUTO_TEST_CASE(testPerField) -{ +TEST_F(PerFieldAnalzyerWrapperTest, testPerField) { String text = L"Qwerty"; PerFieldAnalyzerWrapperPtr analyzer = newLucene(newLucene()); analyzer->addAnalyzer(L"special", newLucene()); @@ -26,13 +25,11 @@ BOOST_AUTO_TEST_CASE(testPerField) TokenStreamPtr tokenStream = analyzer->tokenStream(L"field", newLucene(text)); TermAttributePtr termAtt = tokenStream->getAttribute(); - BOOST_CHECK(tokenStream->incrementToken()); - BOOST_CHECK_EQUAL(L"Qwerty", termAtt->term()); + EXPECT_TRUE(tokenStream->incrementToken()); + EXPECT_EQ(L"Qwerty", termAtt->term()); tokenStream = analyzer->tokenStream(L"special", newLucene(text)); termAtt = tokenStream->getAttribute(); - BOOST_CHECK(tokenStream->incrementToken()); - BOOST_CHECK_EQUAL(L"qwerty", termAtt->term()); + EXPECT_TRUE(tokenStream->incrementToken()); + EXPECT_EQ(L"qwerty", termAtt->term()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/StopAnalyzerTest.cpp b/src/test/analysis/StopAnalyzerTest.cpp index 43bd430b..e4e49c4f 100644 --- a/src/test/analysis/StopAnalyzerTest.cpp +++ b/src/test/analysis/StopAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,19 +14,17 @@ using namespace Lucene; -class StopAnalyzerTestFixture : public BaseTokenStreamFixture -{ +class StopAnalyzerTest : public BaseTokenStreamFixture { public: - StopAnalyzerTestFixture() - { + StopAnalyzerTest() { stop = newLucene(LuceneVersion::LUCENE_CURRENT); inValidTokens = HashSet::newInstance(); - for (HashSet::iterator word = StopAnalyzer::ENGLISH_STOP_WORDS_SET().begin(); word != StopAnalyzer::ENGLISH_STOP_WORDS_SET().end(); ++word) + for (HashSet::iterator word = StopAnalyzer::ENGLISH_STOP_WORDS_SET().begin(); word != StopAnalyzer::ENGLISH_STOP_WORDS_SET().end(); ++word) { inValidTokens.add(*word); + } } - - virtual ~StopAnalyzerTestFixture() - { + + virtual ~StopAnalyzerTest() { } protected: @@ -34,22 +32,19 @@ class StopAnalyzerTestFixture : public BaseTokenStreamFixture HashSet inValidTokens; }; -BOOST_FIXTURE_TEST_SUITE(StopAnalyzerTest, StopAnalyzerTestFixture) - -BOOST_AUTO_TEST_CASE(testDefaults) -{ - BOOST_CHECK(stop); +TEST_F(StopAnalyzerTest, testDefaults) { + EXPECT_TRUE(stop); StringReaderPtr reader = newLucene(L"This is a test of the english stop analyzer"); TokenStreamPtr stream = stop->tokenStream(L"test", reader); - BOOST_CHECK(stream); + EXPECT_TRUE(stream); TermAttributePtr termAtt = stream->getAttribute(); - while (stream->incrementToken()) - BOOST_CHECK(!inValidTokens.contains(termAtt->term())); + while (stream->incrementToken()) { + EXPECT_TRUE(!inValidTokens.contains(termAtt->term())); + } } -BOOST_AUTO_TEST_CASE(testStopList) -{ +TEST_F(StopAnalyzerTest, testStopList) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); @@ -57,20 +52,18 @@ BOOST_AUTO_TEST_CASE(testStopList) StopAnalyzerPtr newStop = newLucene(LuceneVersion::LUCENE_24, stopWordsSet); StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer"); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); - BOOST_CHECK(stream); + EXPECT_TRUE(stream); TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); - while (stream->incrementToken()) - { + while (stream->incrementToken()) { String text = termAtt->term(); - BOOST_CHECK(!stopWordsSet.contains(text)); - BOOST_CHECK_EQUAL(1, posIncrAtt->getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. + EXPECT_TRUE(!stopWordsSet.contains(text)); + EXPECT_EQ(1, posIncrAtt->getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments. } } -BOOST_AUTO_TEST_CASE(testStopListPositions) -{ +TEST_F(StopAnalyzerTest, testStopListPositions) { HashSet stopWordsSet = HashSet::newInstance(); stopWordsSet.add(L"good"); stopWordsSet.add(L"test"); @@ -79,17 +72,14 @@ BOOST_AUTO_TEST_CASE(testStopListPositions) StringReaderPtr reader = newLucene(L"This is a good test of the english stop analyzer with positions"); Collection expectedIncr = newCollection(1, 1, 1, 3, 1, 1, 1, 2, 1); TokenStreamPtr stream = newStop->tokenStream(L"test", reader); - BOOST_CHECK(stream); + EXPECT_TRUE(stream); int32_t i = 0; TermAttributePtr termAtt = stream->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stream->addAttribute(); - while (stream->incrementToken()) - { + while (stream->incrementToken()) { String text = termAtt->term(); - BOOST_CHECK(!stopWordsSet.contains(text)); - BOOST_CHECK_EQUAL(expectedIncr[i++], posIncrAtt->getPositionIncrement()); + EXPECT_TRUE(!stopWordsSet.contains(text)); + EXPECT_EQ(expectedIncr[i++], posIncrAtt->getPositionIncrement()); } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/StopFilterTest.cpp b/src/test/analysis/StopFilterTest.cpp index f78a5d44..e725520f 100644 --- a/src/test/analysis/StopFilterTest.cpp +++ b/src/test/analysis/StopFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,25 +16,22 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(StopFilterTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture StopFilterTest; -static void doTestStopPositons(StopFilterPtr stpf, bool enableIcrements) -{ +static void doTestStopPositons(const StopFilterPtr& stpf, bool enableIcrements) { stpf->setEnablePositionIncrements(enableIcrements); TermAttributePtr termAtt = stpf->getAttribute(); PositionIncrementAttributePtr posIncrAtt = stpf->getAttribute(); - for (int32_t i = 0; i < 20; i += 3) - { - BOOST_CHECK(stpf->incrementToken()); + for (int32_t i = 0; i < 20; i += 3) { + EXPECT_TRUE(stpf->incrementToken()); String w = intToEnglish(i); - BOOST_CHECK_EQUAL(w, termAtt->term()); - BOOST_CHECK_EQUAL(enableIcrements ? (i == 0 ? 1 : 3) : 1, posIncrAtt->getPositionIncrement()); + EXPECT_EQ(w, termAtt->term()); + EXPECT_EQ(enableIcrements ? (i == 0 ? 1 : 3) : 1, posIncrAtt->getPositionIncrement()); } - BOOST_CHECK(!stpf->incrementToken()); + EXPECT_TRUE(!stpf->incrementToken()); } -BOOST_AUTO_TEST_CASE(testExactCase) -{ +TEST_F(StopFilterTest, testExactCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); @@ -42,15 +39,14 @@ BOOST_AUTO_TEST_CASE(testExactCase) stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, false); TermAttributePtr termAtt = stream->getAttribute(); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(L"Now", termAtt->term()); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(L"The", termAtt->term()); - BOOST_CHECK(!stream->incrementToken()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(L"Now", termAtt->term()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(L"The", termAtt->term()); + EXPECT_TRUE(!stream->incrementToken()); } -BOOST_AUTO_TEST_CASE(testIgnoreCase) -{ +TEST_F(StopFilterTest, testIgnoreCase) { StringReaderPtr reader = newLucene(L"Now is The Time"); HashSet stopWords = HashSet::newInstance(); stopWords.add(L"is"); @@ -58,21 +54,20 @@ BOOST_AUTO_TEST_CASE(testIgnoreCase) stopWords.add(L"Time"); TokenStreamPtr stream = newLucene(false, newLucene(reader), stopWords, true); TermAttributePtr termAtt = stream->getAttribute(); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(L"Now", termAtt->term()); - BOOST_CHECK(!stream->incrementToken()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(L"Now", termAtt->term()); + EXPECT_TRUE(!stream->incrementToken()); } -BOOST_AUTO_TEST_CASE(testStopPositons) -{ +TEST_F(StopFilterTest, testStopPositons) { StringStream buf; Collection stopWords = Collection::newInstance(); - for (int32_t i = 0; i < 20; ++i) - { + for (int32_t i = 0; i < 20; ++i) { String w = intToEnglish(i); buf << w << L" "; - if (i % 3 != 0) + if (i % 3 != 0) { stopWords.add(w); + } } HashSet stopSet = HashSet::newInstance(stopWords.begin(), stopWords.end()); // with increments @@ -86,12 +81,12 @@ BOOST_AUTO_TEST_CASE(testStopPositons) // with increments, concatenating two stop filters Collection stopWords0 = Collection::newInstance(); Collection stopWords1 = Collection::newInstance(); - for (int32_t i = 0; i < stopWords.size(); ++i) - { - if (i % 2 == 0) + for (int32_t i = 0; i < stopWords.size(); ++i) { + if (i % 2 == 0) { stopWords0.add(stopWords[i]); - else + } else { stopWords1.add(stopWords[i]); + } } HashSet stopSet0 = HashSet::newInstance(stopWords0.begin(), stopWords0.end()); HashSet stopSet1 = HashSet::newInstance(stopWords1.begin(), stopWords1.end()); @@ -101,5 +96,3 @@ BOOST_AUTO_TEST_CASE(testStopPositons) StopFilterPtr stpf01 = newLucene(false, stpf0, stopSet1); // two stop filters concatenated! doTestStopPositons(stpf01, true); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/TeeSinkTokenFilterTest.cpp b/src/test/analysis/TeeSinkTokenFilterTest.cpp index 43f9d807..df90b5f2 100644 --- a/src/test/analysis/TeeSinkTokenFilterTest.cpp +++ b/src/test/analysis/TeeSinkTokenFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -22,55 +22,48 @@ using namespace Lucene; -class TheSinkFilter : public SinkFilter -{ +class TheSinkFilter : public SinkFilter { public: - virtual ~TheSinkFilter() - { + virtual ~TheSinkFilter() { } public: - virtual bool accept(AttributeSourcePtr source) - { + virtual bool accept(const AttributeSourcePtr& source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"The"); } }; -class DogSinkFilter : public SinkFilter -{ +class DogSinkFilter : public SinkFilter { public: - virtual ~DogSinkFilter() - { + virtual ~DogSinkFilter() { } public: - virtual bool accept(AttributeSourcePtr source) - { + virtual bool accept(const AttributeSourcePtr& source) { TermAttributePtr termAtt = source->getAttribute(); return boost::iequals(termAtt->term(), L"Dogs"); } }; -class TeeSinkTokenFilterTestFixture : public BaseTokenStreamFixture -{ +class TeeSinkTokenFilterTest : public BaseTokenStreamFixture { public: - TeeSinkTokenFilterTestFixture() - { + TeeSinkTokenFilterTest() { tokens1 = newCollection(L"The", L"quick", L"Burgundy", L"Fox", L"jumped", L"over", L"the", L"lazy", L"Red", L"Dogs"); tokens2 = newCollection(L"The", L"Lazy", L"Dogs", L"should", L"stay", L"on", L"the", L"porch"); - - for (int32_t i = 0; i < tokens1.size(); ++i) + + for (int32_t i = 0; i < tokens1.size(); ++i) { buffer1 << tokens1[i] << L" "; - for (int32_t i = 0; i < tokens2.size(); ++i) + } + for (int32_t i = 0; i < tokens2.size(); ++i) { buffer2 << tokens2[i] << L" "; - + } + theFilter = newLucene(); dogFilter = newLucene(); } - - virtual ~TeeSinkTokenFilterTestFixture() - { + + virtual ~TeeSinkTokenFilterTest() { } protected: @@ -78,19 +71,16 @@ class TeeSinkTokenFilterTestFixture : public BaseTokenStreamFixture StringStream buffer2; Collection tokens1; Collection tokens2; - + SinkFilterPtr theFilter; SinkFilterPtr dogFilter; }; -BOOST_FIXTURE_TEST_SUITE(TeeSinkTokenFilterTest, TeeSinkTokenFilterTestFixture) - -BOOST_AUTO_TEST_CASE(testGeneral) -{ +TEST_F(TeeSinkTokenFilterTest, testGeneral) { TeeSinkTokenFilterPtr source = newLucene(newLucene(newLucene(buffer1.str()))); TokenStreamPtr sink1 = source->newSinkTokenStream(); TokenStreamPtr sink2 = source->newSinkTokenStream(theFilter); - + source->addAttribute(); sink1->addAttribute(); sink2->addAttribute(); @@ -100,13 +90,12 @@ BOOST_AUTO_TEST_CASE(testGeneral) checkTokenStreamContents(sink2, newCollection(L"The", L"the")); } -BOOST_AUTO_TEST_CASE(testMultipleSources) -{ +TEST_F(TeeSinkTokenFilterTest, testMultipleSources) { TeeSinkTokenFilterPtr tee1 = newLucene(newLucene(newLucene(buffer1.str()))); SinkTokenStreamPtr dogDetector = tee1->newSinkTokenStream(dogFilter); SinkTokenStreamPtr theDetector = tee1->newSinkTokenStream(theFilter); TokenStreamPtr source1 = newLucene(tee1); - + tee1->addAttribute(); dogDetector->addAttribute(); theDetector->addAttribute(); @@ -115,7 +104,7 @@ BOOST_AUTO_TEST_CASE(testMultipleSources) tee2->addSinkTokenStream(dogDetector); tee2->addSinkTokenStream(theDetector); TokenStreamPtr source2 = tee2; - + checkTokenStreamContents(source1, tokens1); checkTokenStreamContents(source2, tokens2); @@ -125,80 +114,74 @@ BOOST_AUTO_TEST_CASE(testMultipleSources) source1->reset(); TokenStreamPtr lowerCasing = newLucene(source1); Collection lowerCaseTokens = Collection::newInstance(tokens1.size()); - for (int32_t i = 0; i < tokens1.size(); ++i) + for (int32_t i = 0; i < tokens1.size(); ++i) { lowerCaseTokens[i] = StringUtils::toLower((const String&)tokens1[i]); + } checkTokenStreamContents(lowerCasing, lowerCaseTokens); } -namespace TestPerformance -{ - class ModuloTokenFilter : public TokenFilter - { - public: - ModuloTokenFilter(TokenStreamPtr input, int32_t mc) : TokenFilter(input) - { - modCount = mc; - count = 0; - } - - virtual ~ModuloTokenFilter() - { - } - - public: - int32_t modCount; - int32_t count; - - public: - // return every 100 tokens - virtual bool incrementToken() - { - bool hasNext = false; - for (hasNext = input->incrementToken(); hasNext && count % modCount != 0; hasNext = input->incrementToken()) - ++count; - ++count; - return hasNext; - } - }; - - class ModuloSinkFilter : public SinkFilter - { - public: - ModuloSinkFilter(int32_t mc) - { - modCount = mc; - count = 0; - } - - virtual ~ModuloSinkFilter() - { - } - - public: - int32_t modCount; - int32_t count; - - public: - virtual bool accept(AttributeSourcePtr source) - { - bool b = (source && count % modCount == 0); +namespace TestPerformance { + +class ModuloTokenFilter : public TokenFilter { +public: + ModuloTokenFilter(const TokenStreamPtr& input, int32_t mc) : TokenFilter(input) { + modCount = mc; + count = 0; + } + + virtual ~ModuloTokenFilter() { + } + +public: + int32_t modCount; + int32_t count; + +public: + // return every 100 tokens + virtual bool incrementToken() { + bool hasNext = false; + for (hasNext = input->incrementToken(); hasNext && count % modCount != 0; hasNext = input->incrementToken()) { ++count; - return b; } - }; + ++count; + return hasNext; + } +}; + +class ModuloSinkFilter : public SinkFilter { +public: + ModuloSinkFilter(int32_t mc) { + modCount = mc; + count = 0; + } + + virtual ~ModuloSinkFilter() { + } + +public: + int32_t modCount; + int32_t count; + +public: + virtual bool accept(const AttributeSourcePtr& source) { + bool b = (source && count % modCount == 0); + ++count; + return b; + } +}; + } /// Not an explicit test, just useful to print out some info on performance -BOOST_AUTO_TEST_CASE(testPerformance) -{ +TEST_F(TeeSinkTokenFilterTest, testPerformance) { Collection tokCount = newCollection(100, 500, 1000, 2000, 5000, 10000); Collection modCounts = newCollection(1, 2, 5, 10, 20, 50, 100, 200, 500); - for (int32_t k = 0; k < tokCount.size(); ++k) - { + for (int32_t k = 0; k < tokCount.size(); ++k) { StringStream buffer; - BOOST_TEST_MESSAGE("-----Tokens: " << tokCount[k] << "-----"); - for (int32_t i = 0; i < tokCount[k]; ++i) + // std::cout << "-----Tokens: " << tokCount[k] << "-----"; + for (int32_t i = 0; i < tokCount[k]; ++i) { buffer << StringUtils::toUpper(intToEnglish(i)) << L" "; + } // make sure we produce the same tokens TeeSinkTokenFilterPtr teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); TokenStreamPtr sink = teeStream->newSinkTokenStream(newLucene(100)); @@ -206,50 +189,48 @@ BOOST_AUTO_TEST_CASE(testPerformance) TokenStreamPtr stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), 100); TermAttributePtr tfTok = stream->addAttribute(); TermAttributePtr sinkTok = sink->addAttribute(); - for (int32_t i = 0; stream->incrementToken(); ++i) - { - BOOST_CHECK(sink->incrementToken()); - BOOST_CHECK(tfTok->equals(sinkTok)); + for (int32_t i = 0; stream->incrementToken(); ++i) { + EXPECT_TRUE(sink->incrementToken()); + EXPECT_TRUE(tfTok->equals(sinkTok)); } - + // simulate two fields, each being analyzed once, for 20 documents - for (int32_t j = 0; j < modCounts.size(); ++j) - { + for (int32_t j = 0; j < modCounts.size(); ++j) { int32_t tfPos = 0; int64_t start = MiscUtils::currentTimeMillis(); - for (int32_t i = 0; i < 20; ++i) - { + for (int32_t i = 0; i < 20; ++i) { stream = newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))); PositionIncrementAttributePtr posIncrAtt = stream->getAttribute(); - while (stream->incrementToken()) + while (stream->incrementToken()) { tfPos += posIncrAtt->getPositionIncrement(); + } stream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str()))), modCounts[j]); posIncrAtt = stream->getAttribute(); - while (stream->incrementToken()) + while (stream->incrementToken()) { tfPos += posIncrAtt->getPositionIncrement(); + } } int64_t finish = MiscUtils::currentTimeMillis(); - BOOST_TEST_MESSAGE("ModCount: " << modCounts[j] << " Two fields took " << (finish - start) << " ms"); + // std::cout << "ModCount: " << modCounts[j] << " Two fields took " << (finish - start) << " ms"; int32_t sinkPos = 0; // simulate one field with one sink start = MiscUtils::currentTimeMillis(); - for (int32_t i = 0; i < 20; ++i) - { + for (int32_t i = 0; i < 20; ++i) { teeStream = newLucene(newLucene(newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(buffer.str())))); sink = teeStream->newSinkTokenStream(newLucene(modCounts[j])); PositionIncrementAttributePtr posIncrAtt = teeStream->getAttribute(); - while (teeStream->incrementToken()) + while (teeStream->incrementToken()) { sinkPos += posIncrAtt->getPositionIncrement(); + } posIncrAtt = sink->getAttribute(); - while (sink->incrementToken()) + while (sink->incrementToken()) { sinkPos += posIncrAtt->getPositionIncrement(); + } } finish = MiscUtils::currentTimeMillis(); - BOOST_TEST_MESSAGE("ModCount: " << modCounts[j] << " Tee fields took " << (finish - start) << " ms"); - BOOST_CHECK_EQUAL(sinkPos, tfPos); + // std::cout << "ModCount: " << modCounts[j] << " Tee fields took " << (finish - start) << " ms"; + EXPECT_EQ(sinkPos, tfPos); } - BOOST_TEST_MESSAGE("- End Tokens: " << tokCount[k] << "-----"); + // std::cout << "- End Tokens: " << tokCount[k] << "-----"; } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/TokenTest.cpp b/src/test/analysis/TokenTest.cpp index 1950c61f..96569659 100644 --- a/src/test/analysis/TokenTest.cpp +++ b/src/test/analysis/TokenTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,131 +11,120 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(TokenTest, LuceneTestFixture) +typedef LuceneTestFixture TokenTest; -static AttributePtr checkCloneIsEqual(AttributePtr att) -{ +static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); - BOOST_CHECK(att->equals(clone)); - BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); + EXPECT_TRUE(att->equals(clone)); + EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template -static AttributePtr checkCopyIsEqual(AttributePtr att) -{ +static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); - BOOST_CHECK(att->equals(copy)); - BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); + EXPECT_TRUE(att->equals(copy)); + EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } -BOOST_AUTO_TEST_CASE(testCtor) -{ +TEST_F(TokenTest, testCtor) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(L"hello", t->term()); - BOOST_CHECK_EQUAL(L"word", t->type()); - BOOST_CHECK_EQUAL(0, t->getFlags()); + EXPECT_EQ(L"hello", t->term()); + EXPECT_EQ(L"word", t->type()); + EXPECT_EQ(0, t->getFlags()); t = newLucene(6, 22); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(L"hello", t->term()); - BOOST_CHECK_EQUAL(L"(hello,6,22)", t->toString()); - BOOST_CHECK_EQUAL(L"word", t->type()); - BOOST_CHECK_EQUAL(0, t->getFlags()); + EXPECT_EQ(L"hello", t->term()); + EXPECT_EQ(L"(hello,6,22)", t->toString()); + EXPECT_EQ(L"word", t->type()); + EXPECT_EQ(0, t->getFlags()); t = newLucene(6, 22, 7); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(L"hello", t->term()); - BOOST_CHECK_EQUAL(L"(hello,6,22)", t->toString()); - BOOST_CHECK_EQUAL(7, t->getFlags()); + EXPECT_EQ(L"hello", t->term()); + EXPECT_EQ(L"(hello,6,22)", t->toString()); + EXPECT_EQ(7, t->getFlags()); t = newLucene(6, 22, L"junk"); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(L"hello", t->term()); - BOOST_CHECK_EQUAL(L"(hello,6,22,type=junk)", t->toString()); - BOOST_CHECK_EQUAL(0, t->getFlags()); + EXPECT_EQ(L"hello", t->term()); + EXPECT_EQ(L"(hello,6,22,type=junk)", t->toString()); + EXPECT_EQ(0, t->getFlags()); } -BOOST_AUTO_TEST_CASE(testResize) -{ +TEST_F(TokenTest, testResize) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); - for (int32_t i = 0; i < 2000; ++i) - { + for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); - BOOST_CHECK(i <= t->termBuffer().size()); - BOOST_CHECK_EQUAL(L"hello", t->term()); + EXPECT_TRUE(i <= t->termBuffer().size()); + EXPECT_EQ(L"hello", t->term()); } } -BOOST_AUTO_TEST_CASE(testGrow) -{ +TEST_F(TokenTest, testGrow) { TokenPtr t = newLucene(); StringStream buf; buf << L"ab"; - for (int32_t i = 0; i < 20; ++i) - { + for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); - BOOST_CHECK_EQUAL(content.length(), t->termLength()); - BOOST_CHECK_EQUAL(content, t->term()); + EXPECT_EQ(content.length(), t->termLength()); + EXPECT_EQ(content, t->term()); buf << content; } - BOOST_CHECK_EQUAL(1048576, t->termLength()); - BOOST_CHECK_EQUAL(1179654, t->termBuffer().size()); - + EXPECT_EQ(1048576, t->termLength()); + EXPECT_EQ(1179654, t->termBuffer().size()); + // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; - for (int32_t i = 0; i < 20000; ++i) - { + for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); - BOOST_CHECK_EQUAL(content.length(), t->termLength()); - BOOST_CHECK_EQUAL(content, t->term()); + EXPECT_EQ(content.length(), t->termLength()); + EXPECT_EQ(content, t->term()); buf << L"a"; } - BOOST_CHECK_EQUAL(20000, t->termLength()); - BOOST_CHECK_EQUAL(20167, t->termBuffer().size()); + EXPECT_EQ(20000, t->termLength()); + EXPECT_EQ(20167, t->termBuffer().size()); } -BOOST_AUTO_TEST_CASE(testToString) -{ +TEST_F(TokenTest, testToString) { TokenPtr t = newLucene(L"", 0, 5); t->setTermBuffer(L"aloha"); - BOOST_CHECK_EQUAL(L"(aloha,0,5)", t->toString()); + EXPECT_EQ(L"(aloha,0,5)", t->toString()); t->setTermBuffer(L"hi there"); - BOOST_CHECK_EQUAL(L"(hi there,0,5)", t->toString()); + EXPECT_EQ(L"(hi there,0,5)", t->toString()); } -BOOST_AUTO_TEST_CASE(testTermBufferEquals) -{ +TEST_F(TokenTest, testTermBufferEquals) { TokenPtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TokenPtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TokenPtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); - BOOST_CHECK(t1a->equals(t1b)); - BOOST_CHECK(!t1a->equals(t2)); - BOOST_CHECK(!t2->equals(t1b)); + EXPECT_TRUE(t1a->equals(t1b)); + EXPECT_TRUE(!t1a->equals(t2)); + EXPECT_TRUE(!t2->equals(t1b)); } -BOOST_AUTO_TEST_CASE(testMixedStringArray) -{ +TEST_F(TokenTest, testMixedStringArray) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(t->termLength(), 5); - BOOST_CHECK_EQUAL(t->term(), L"hello"); + EXPECT_EQ(t->termLength(), 5); + EXPECT_EQ(t->term(), L"hello"); t->setTermBuffer(L"hello2"); - BOOST_CHECK_EQUAL(t->termLength(), 6); - BOOST_CHECK_EQUAL(t->term(), L"hello2"); - + EXPECT_EQ(t->termLength(), 6); + EXPECT_EQ(t->term(), L"hello2"); + CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; @@ -143,23 +132,22 @@ BOOST_AUTO_TEST_CASE(testMixedStringArray) test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; - + t->setTermBuffer(test.get(), 0, 6); - BOOST_CHECK_EQUAL(t->term(), L"hello3"); - + EXPECT_EQ(t->term(), L"hello3"); + CharArray buffer = t->termBuffer(); buffer[1] = L'o'; - BOOST_CHECK_EQUAL(t->term(), L"hollo3"); + EXPECT_EQ(t->term(), L"hollo3"); } -BOOST_AUTO_TEST_CASE(testClone) -{ +TEST_F(TokenTest, testClone) { TokenPtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TokenPtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); - BOOST_CHECK_EQUAL(t->term(), clone->term()); - BOOST_CHECK(buf != clone->termBuffer()); + EXPECT_EQ(t->term(), clone->term()); + EXPECT_TRUE(buf != clone->termBuffer()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; @@ -170,24 +158,23 @@ BOOST_AUTO_TEST_CASE(testClone) PayloadPtr pl = newLucene(payload); t->setPayload(pl); clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); - BOOST_CHECK(pl->equals(clone->getPayload())); - BOOST_CHECK_NE(pl, clone->getPayload()); + EXPECT_TRUE(pl->equals(clone->getPayload())); + EXPECT_NE(pl, clone->getPayload()); } -BOOST_AUTO_TEST_CASE(testCopyTo) -{ +TEST_F(TokenTest, testCopyTo) { TokenPtr t = newLucene(); TokenPtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); - BOOST_CHECK_EQUAL(L"", t->term()); - BOOST_CHECK_EQUAL(L"", copy->term()); - + EXPECT_EQ(L"", t->term()); + EXPECT_EQ(L"", copy->term()); + t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); - BOOST_CHECK_EQUAL(t->term(), copy->term()); - BOOST_CHECK(buf != copy->termBuffer()); - + EXPECT_EQ(t->term(), copy->term()); + EXPECT_TRUE(buf != copy->termBuffer()); + ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; @@ -197,8 +184,6 @@ BOOST_AUTO_TEST_CASE(testCopyTo) PayloadPtr pl = newLucene(payload); t->setPayload(pl); copy = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); - BOOST_CHECK(pl->equals(copy->getPayload())); - BOOST_CHECK_NE(pl, copy->getPayload()); + EXPECT_TRUE(pl->equals(copy->getPayload())); + EXPECT_NE(pl, copy->getPayload()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/standard/StandardAnalyzerTest.cpp b/src/test/analysis/standard/StandardAnalyzerTest.cpp index b4cca784..389474a9 100644 --- a/src/test/analysis/standard/StandardAnalyzerTest.cpp +++ b/src/test/analysis/standard/StandardAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,50 +10,44 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(StandardAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture StandardAnalyzerTest; -BOOST_AUTO_TEST_CASE(testMaxTermLength) -{ +TEST_F(StandardAnalyzerTest, testMaxTermLength) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } -BOOST_AUTO_TEST_CASE(testMaxTermLength2) -{ +TEST_F(StandardAnalyzerTest, testMaxTermLength2) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); - + checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"toolong", L"xy", L"z")); sa->setMaxTokenLength(5); checkAnalyzesTo(sa, L"ab cd toolong xy z", newCollection(L"ab", L"cd", L"xy", L"z"), newCollection(1, 1, 2, 1)); } -BOOST_AUTO_TEST_CASE(testMaxTermLength3) -{ +TEST_F(StandardAnalyzerTest, testMaxTermLength3) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); String longTerm(255, L'a'); checkAnalyzesTo(sa, L"ab cd " + longTerm + L" xy z", newCollection(L"ab", L"cd", longTerm, L"xy", L"z")); checkAnalyzesTo(sa, L"ab cd " + longTerm + L"a xy z", newCollection(L"ab", L"cd", L"xy", L"z")); } -BOOST_AUTO_TEST_CASE(testAlphanumeric) -{ +TEST_F(StandardAnalyzerTest, testAlphanumeric) { // alphanumeric tokens StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"B2B", newCollection(L"b2b")); checkAnalyzesTo(sa, L"2B", newCollection(L"2b")); } -BOOST_AUTO_TEST_CASE(testUnderscores) -{ +TEST_F(StandardAnalyzerTest, testUnderscores) { // underscores are delimiters, but not in email addresses (below) StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"word_having_underscore", newCollection(L"word", L"having", L"underscore")); checkAnalyzesTo(sa, L"word_with_underscore_and_stopwords", newCollection(L"word", L"underscore", L"stopwords")); } -BOOST_AUTO_TEST_CASE(testDelimiters) -{ +TEST_F(StandardAnalyzerTest, testDelimiters) { // other delimiters: "-", "/", "," StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"some-dashed-phrase", newCollection(L"some", L"dashed", L"phrase")); @@ -61,11 +55,10 @@ BOOST_AUTO_TEST_CASE(testDelimiters) checkAnalyzesTo(sa, L"ac/dc", newCollection(L"ac", L"dc")); } -BOOST_AUTO_TEST_CASE(testApostrophes) -{ +TEST_F(StandardAnalyzerTest, testApostrophes) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); - + checkAnalyzesTo(sa, L"O'Reilly", newCollection(L"o'reilly")); checkAnalyzesTo(sa, L"you're", newCollection(L"you're")); checkAnalyzesTo(sa, L"she's", newCollection(L"she")); @@ -74,46 +67,42 @@ BOOST_AUTO_TEST_CASE(testApostrophes) checkAnalyzesTo(sa, L"O'Reilly's", newCollection(L"o'reilly")); } -BOOST_AUTO_TEST_CASE(testTSADash) -{ +TEST_F(StandardAnalyzerTest, testTSADash) { // t and s had been stopwords in Lucene <= 2.0, which made it impossible to correctly search for these terms StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"s-class", newCollection(L"s", L"class")); checkAnalyzesTo(sa, L"t-com", newCollection(L"t", L"com")); - + // 'a' is still a stopword checkAnalyzesTo(sa, L"a-class", newCollection(L"class")); } -BOOST_AUTO_TEST_CASE(testCompanyNames) -{ +TEST_F(StandardAnalyzerTest, testCompanyNames) { // internal apostrophes: O'Reilly, you're, O'Reilly's possessives are actually removed by StardardFilter, not the tokenizer StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"AT&T", newCollection(L"at&t")); checkAnalyzesTo(sa, L"Excite@Home", newCollection(L"excite@home")); } -BOOST_AUTO_TEST_CASE(testDomainNames) -{ +TEST_F(StandardAnalyzerTest, testDomainNames) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); - + // domain names checkAnalyzesTo(sa, L"www.nutch.org", newCollection(L"www.nutch.org")); - + // the following should be recognized as HOST - BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); - + EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); + // 2.3 should show the bug sa = newLucene(LuceneVersion::LUCENE_23); - BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"wwwnutchorg"), newCollection(L""))); - + EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"wwwnutchorg"), newCollection(L""))); + // 2.4 should not show the bug sa = newLucene(LuceneVersion::LUCENE_24); - BOOST_CHECK_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); + EXPECT_NO_THROW(checkAnalyzesTo(sa, L"www.nutch.org.", newCollection(L"www.nutch.org"), newCollection(L""))); } -BOOST_AUTO_TEST_CASE(testEMailAddresses) -{ +TEST_F(StandardAnalyzerTest, testEMailAddresses) { // email addresses, possibly with underscores, periods, etc StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"test@example.com", newCollection(L"test@example.com")); @@ -121,8 +110,7 @@ BOOST_AUTO_TEST_CASE(testEMailAddresses) checkAnalyzesTo(sa, L"first_lastname@example.com", newCollection(L"first_lastname@example.com")); } -BOOST_AUTO_TEST_CASE(testNumeric) -{ +TEST_F(StandardAnalyzerTest, testNumeric) { // floating point, serial, model numbers, ip addresses, etc. // every other segment must have at least one digit StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); @@ -134,14 +122,12 @@ BOOST_AUTO_TEST_CASE(testNumeric) checkAnalyzesTo(sa, L"R2D2 C3PO", newCollection(L"r2d2", L"c3po")); } -BOOST_AUTO_TEST_CASE(testTextWithNumbers) -{ +TEST_F(StandardAnalyzerTest, testTextWithNumbers) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"David has 5000 bones", newCollection(L"david", L"has", L"5000", L"bones")); } -BOOST_AUTO_TEST_CASE(testVariousText) -{ +TEST_F(StandardAnalyzerTest, testVariousText) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C embedded developers wanted", newCollection(L"c", L"embedded", L"developers", L"wanted")); checkAnalyzesTo(sa, L"foo bar FOO BAR", newCollection(L"foo", L"bar", L"foo", L"bar")); @@ -149,64 +135,53 @@ BOOST_AUTO_TEST_CASE(testVariousText) checkAnalyzesTo(sa, L"\"QUOTED\" word", newCollection(L"quoted", L"word")); } -BOOST_AUTO_TEST_CASE(testAcronyms) -{ +TEST_F(StandardAnalyzerTest, testAcronyms) { // acronyms have their dots stripped StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"U.S.A.", newCollection(L"usa")); } -BOOST_AUTO_TEST_CASE(testCPlusPlusHash) -{ +TEST_F(StandardAnalyzerTest, testCPlusPlusHash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"C++", newCollection(L"c")); checkAnalyzesTo(sa, L"C#", newCollection(L"c")); } -BOOST_AUTO_TEST_CASE(testComplianceFileName) -{ +TEST_F(StandardAnalyzerTest, testComplianceFileName) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2004.jpg", newCollection(L"2004.jpg"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceNumericIncorrect) -{ +TEST_F(StandardAnalyzerTest, testComplianceNumericIncorrect) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"62.46", newCollection(L"62.46"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceNumericLong) -{ +TEST_F(StandardAnalyzerTest, testComplianceNumericLong) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"978-0-94045043-1", newCollection(L"978-0-94045043-1"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceNumericFile) -{ +TEST_F(StandardAnalyzerTest, testComplianceNumericFile) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"78academyawards/rules/rule02.html", newCollection(L"78academyawards/rules/rule02.html"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceNumericWithUnderscores) -{ +TEST_F(StandardAnalyzerTest, testComplianceNumericWithUnderscores) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs", newCollection(L"2006-03-11t082958z_01_ban130523_rtridst_0_ozabs"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceNumericWithDash) -{ +TEST_F(StandardAnalyzerTest, testComplianceNumericWithDash) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(sa, L"mid-20th", newCollection(L"mid-20th"), newCollection(L"")); } -BOOST_AUTO_TEST_CASE(testComplianceManyTokens) -{ +TEST_F(StandardAnalyzerTest, testComplianceManyTokens) { StandardAnalyzerPtr sa = newLucene(LuceneVersion::LUCENE_CURRENT); - checkAnalyzesTo(sa, L"/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm safari-0-sheikh-zayed-grand-mosque.jpg", - newCollection(L"money.cnn.com", L"magazines", L"fortune", L"fortune", L"archive/2007/03/19/8402357", + checkAnalyzesTo(sa, L"/money.cnn.com/magazines/fortune/fortune_archive/2007/03/19/8402357/index.htm safari-0-sheikh-zayed-grand-mosque.jpg", + newCollection(L"money.cnn.com", L"magazines", L"fortune", L"fortune", L"archive/2007/03/19/8402357", L"index.htm", L"safari-0-sheikh", L"zayed", L"grand", L"mosque.jpg"), newCollection(L"", L"", L"", L"", L"", L"", L"", - L"", L"", L"")); + L"", L"", L"")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp b/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp index 5aa36ee8..d1ab837d 100644 --- a/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp +++ b/src/test/analysis/tokenattributes/SimpleAttributeTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,142 +16,132 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(SimpleAttributeTest, LuceneTestFixture) +typedef LuceneTestFixture SimpleAttributeTest; -static AttributePtr checkCloneIsEqual(AttributePtr att) -{ +static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); - BOOST_CHECK(att->equals(clone)); - BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); + EXPECT_TRUE(att->equals(clone)); + EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template -static AttributePtr checkCopyIsEqual(AttributePtr att) -{ +static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); - BOOST_CHECK(att->equals(copy)); - BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); + EXPECT_TRUE(att->equals(copy)); + EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } -BOOST_AUTO_TEST_CASE(testFlagsAttribute) -{ +TEST_F(SimpleAttributeTest, testFlagsAttribute) { FlagsAttributePtr att = newLucene(); - BOOST_CHECK_EQUAL(0, att->getFlags()); + EXPECT_EQ(0, att->getFlags()); att->setFlags(1234); - BOOST_CHECK_EQUAL(L"flags=1234", att->toString()); + EXPECT_EQ(L"flags=1234", att->toString()); FlagsAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); - BOOST_CHECK_EQUAL(1234, att2->getFlags()); + EXPECT_EQ(1234, att2->getFlags()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); - BOOST_CHECK_EQUAL(1234, att2->getFlags()); + EXPECT_EQ(1234, att2->getFlags()); att->clear(); - BOOST_CHECK_EQUAL(0, att->getFlags()); + EXPECT_EQ(0, att->getFlags()); } -BOOST_AUTO_TEST_CASE(testPositionIncrementAttribute) -{ +TEST_F(SimpleAttributeTest, testPositionIncrementAttribute) { PositionIncrementAttributePtr att = newLucene(); - BOOST_CHECK_EQUAL(1, att->getPositionIncrement()); + EXPECT_EQ(1, att->getPositionIncrement()); att->setPositionIncrement(1234); - BOOST_CHECK_EQUAL(L"positionIncrement=1234", att->toString()); + EXPECT_EQ(L"positionIncrement=1234", att->toString()); PositionIncrementAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); - BOOST_CHECK_EQUAL(1234, att2->getPositionIncrement()); + EXPECT_EQ(1234, att2->getPositionIncrement()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); - BOOST_CHECK_EQUAL(1234, att2->getPositionIncrement()); + EXPECT_EQ(1234, att2->getPositionIncrement()); att->clear(); - BOOST_CHECK_EQUAL(1, att->getPositionIncrement()); + EXPECT_EQ(1, att->getPositionIncrement()); } -namespace TestTypeAttribute -{ - class TestableTypeAttribute : public TypeAttribute - { - public: - virtual ~TestableTypeAttribute() - { - } - - LUCENE_CLASS(TestableTypeAttribute); - - public: - using TypeAttribute::DEFAULT_TYPE; - }; +namespace TestTypeAttribute { + +class TestableTypeAttribute : public TypeAttribute { +public: + virtual ~TestableTypeAttribute() { + } + + LUCENE_CLASS(TestableTypeAttribute); + +public: + using TypeAttribute::DEFAULT_TYPE; +}; + } -BOOST_AUTO_TEST_CASE(testTypeAttribute) -{ +TEST_F(SimpleAttributeTest, testTypeAttribute) { TypeAttributePtr att = newLucene(); - BOOST_CHECK_EQUAL(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); + EXPECT_EQ(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); att->setType(L"hello"); - BOOST_CHECK_EQUAL(L"type=hello", att->toString()); + EXPECT_EQ(L"type=hello", att->toString()); TypeAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); - BOOST_CHECK_EQUAL(L"hello", att2->type()); + EXPECT_EQ(L"hello", att2->type()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); - BOOST_CHECK_EQUAL(L"hello", att2->type()); + EXPECT_EQ(L"hello", att2->type()); att->clear(); - BOOST_CHECK_EQUAL(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); + EXPECT_EQ(TestTypeAttribute::TestableTypeAttribute::DEFAULT_TYPE(), att->type()); } -BOOST_AUTO_TEST_CASE(testPayloadAttribute) -{ +TEST_F(SimpleAttributeTest, testPayloadAttribute) { PayloadAttributePtr att = newLucene(); - BOOST_CHECK(!att->getPayload()); + EXPECT_TRUE(!att->getPayload()); ByteArray payload = ByteArray::newInstance(4); payload[0] = 1; payload[1] = 2; payload[2] = 3; payload[3] = 4; - + PayloadPtr pl = newLucene(payload); att->setPayload(pl); PayloadAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); - BOOST_CHECK(pl->equals(att2->getPayload())); - BOOST_CHECK_NE(pl, att2->getPayload()); + EXPECT_TRUE(pl->equals(att2->getPayload())); + EXPECT_NE(pl, att2->getPayload()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); - BOOST_CHECK(pl->equals(att2->getPayload())); - BOOST_CHECK_NE(pl, att2->getPayload()); + EXPECT_TRUE(pl->equals(att2->getPayload())); + EXPECT_NE(pl, att2->getPayload()); att->clear(); - BOOST_CHECK(!att->getPayload()); + EXPECT_TRUE(!att->getPayload()); } -BOOST_AUTO_TEST_CASE(testOffsetAttribute) -{ +TEST_F(SimpleAttributeTest, testOffsetAttribute) { OffsetAttributePtr att = newLucene(); - BOOST_CHECK_EQUAL(0, att->startOffset()); - BOOST_CHECK_EQUAL(0, att->endOffset()); + EXPECT_EQ(0, att->startOffset()); + EXPECT_EQ(0, att->endOffset()); att->setOffset(12, 34); // no string test here, because order unknown - + OffsetAttributePtr att2 = boost::dynamic_pointer_cast(checkCloneIsEqual(att)); - BOOST_CHECK_EQUAL(12, att2->startOffset()); - BOOST_CHECK_EQUAL(34, att2->endOffset()); + EXPECT_EQ(12, att2->startOffset()); + EXPECT_EQ(34, att2->endOffset()); att2 = boost::dynamic_pointer_cast(checkCopyIsEqual(att)); - BOOST_CHECK_EQUAL(12, att2->startOffset()); - BOOST_CHECK_EQUAL(34, att2->endOffset()); + EXPECT_EQ(12, att2->startOffset()); + EXPECT_EQ(34, att2->endOffset()); att->clear(); - BOOST_CHECK_EQUAL(0, att->startOffset()); - BOOST_CHECK_EQUAL(0, att->endOffset()); + EXPECT_EQ(0, att->startOffset()); + EXPECT_EQ(0, att->endOffset()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/analysis/tokenattributes/TermAttributeTest.cpp b/src/test/analysis/tokenattributes/TermAttributeTest.cpp index 9e70bcec..d0a350de 100644 --- a/src/test/analysis/tokenattributes/TermAttributeTest.cpp +++ b/src/test/analysis/tokenattributes/TermAttributeTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,90 +11,81 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(TermAttributeTest, LuceneTestFixture) +typedef LuceneTestFixture TermAttributeTest; -static AttributePtr checkCloneIsEqual(AttributePtr att) -{ +static AttributePtr checkCloneIsEqual(const AttributePtr& att) { AttributePtr clone = boost::dynamic_pointer_cast(att->clone()); - BOOST_CHECK(att->equals(clone)); - BOOST_CHECK_EQUAL(att->hashCode(), clone->hashCode()); + EXPECT_TRUE(att->equals(clone)); + EXPECT_EQ(att->hashCode(), clone->hashCode()); return clone; } template -static AttributePtr checkCopyIsEqual(AttributePtr att) -{ +static AttributePtr checkCopyIsEqual(const AttributePtr& att) { AttributePtr copy = newLucene(); att->copyTo(copy); - BOOST_CHECK(att->equals(copy)); - BOOST_CHECK_EQUAL(att->hashCode(), copy->hashCode()); + EXPECT_TRUE(att->equals(copy)); + EXPECT_EQ(att->hashCode(), copy->hashCode()); return copy; } -BOOST_AUTO_TEST_CASE(testResize) -{ +TEST_F(TermAttributeTest, testResize) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); - for (int32_t i = 0; i < 2000; ++i) - { + for (int32_t i = 0; i < 2000; ++i) { t->resizeTermBuffer(i); - BOOST_CHECK(i <= t->termBuffer().size()); - BOOST_CHECK_EQUAL(L"hello", t->term()); + EXPECT_TRUE(i <= t->termBuffer().size()); + EXPECT_EQ(L"hello", t->term()); } } -BOOST_AUTO_TEST_CASE(testGrow) -{ +TEST_F(TermAttributeTest, testGrow) { TermAttributePtr t = newLucene(); StringStream buf; buf << L"ab"; - for (int32_t i = 0; i < 20; ++i) - { + for (int32_t i = 0; i < 20; ++i) { String content = buf.str(); t->setTermBuffer(content); - BOOST_CHECK_EQUAL(content.length(), t->termLength()); - BOOST_CHECK_EQUAL(content, t->term()); + EXPECT_EQ(content.length(), t->termLength()); + EXPECT_EQ(content, t->term()); buf << content; } - BOOST_CHECK_EQUAL(1048576, t->termLength()); - BOOST_CHECK_EQUAL(1179654, t->termBuffer().size()); - + EXPECT_EQ(1048576, t->termLength()); + EXPECT_EQ(1179654, t->termBuffer().size()); + // Test for slow growth to a long term t = newLucene(); buf.str(L""); buf << L"a"; - for (int32_t i = 0; i < 20000; ++i) - { + for (int32_t i = 0; i < 20000; ++i) { String content = buf.str(); t->setTermBuffer(content); - BOOST_CHECK_EQUAL(content.length(), t->termLength()); - BOOST_CHECK_EQUAL(content, t->term()); + EXPECT_EQ(content.length(), t->termLength()); + EXPECT_EQ(content, t->term()); buf << L"a"; } - BOOST_CHECK_EQUAL(20000, t->termLength()); - BOOST_CHECK_EQUAL(20167, t->termBuffer().size()); + EXPECT_EQ(20000, t->termLength()); + EXPECT_EQ(20167, t->termBuffer().size()); } -BOOST_AUTO_TEST_CASE(testToString) -{ +TEST_F(TermAttributeTest, testToString) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"aloha"); - BOOST_CHECK_EQUAL(L"term=aloha", t->toString()); + EXPECT_EQ(L"term=aloha", t->toString()); t->setTermBuffer(L"hi there"); - BOOST_CHECK_EQUAL(L"term=hi there", t->toString()); + EXPECT_EQ(L"term=hi there", t->toString()); } -BOOST_AUTO_TEST_CASE(testMixedStringArray) -{ +TEST_F(TermAttributeTest, testMixedStringArray) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); - BOOST_CHECK_EQUAL(t->termLength(), 5); - BOOST_CHECK_EQUAL(t->term(), L"hello"); + EXPECT_EQ(t->termLength(), 5); + EXPECT_EQ(t->term(), L"hello"); t->setTermBuffer(L"hello2"); - BOOST_CHECK_EQUAL(t->termLength(), 6); - BOOST_CHECK_EQUAL(t->term(), L"hello2"); - + EXPECT_EQ(t->termLength(), 6); + EXPECT_EQ(t->term(), L"hello2"); + CharArray test = CharArray::newInstance(6); test[0] = L'h'; test[1] = L'e'; @@ -102,52 +93,47 @@ BOOST_AUTO_TEST_CASE(testMixedStringArray) test[3] = L'l'; test[4] = L'o'; test[5] = L'3'; - + t->setTermBuffer(test.get(), 0, 6); - BOOST_CHECK_EQUAL(t->term(), L"hello3"); + EXPECT_EQ(t->term(), L"hello3"); // Make sure if we get the buffer and change a character that term() reflects the change CharArray buffer = t->termBuffer(); buffer[1] = L'o'; - BOOST_CHECK_EQUAL(t->term(), L"hollo3"); + EXPECT_EQ(t->term(), L"hollo3"); } -BOOST_AUTO_TEST_CASE(testClone) -{ +TEST_F(TermAttributeTest, testClone) { TermAttributePtr t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); TermAttributePtr clone = boost::dynamic_pointer_cast(checkCloneIsEqual(t)); - BOOST_CHECK_EQUAL(t->term(), clone->term()); - BOOST_CHECK(buf != clone->termBuffer()); + EXPECT_EQ(t->term(), clone->term()); + EXPECT_TRUE(buf != clone->termBuffer()); } -BOOST_AUTO_TEST_CASE(testEquals) -{ +TEST_F(TermAttributeTest, testEquals) { TermAttributePtr t1a = newLucene(); t1a->setTermBuffer(L"hello"); TermAttributePtr t1b = newLucene(); t1b->setTermBuffer(L"hello"); TermAttributePtr t2 = newLucene(); t2->setTermBuffer(L"hello2"); - BOOST_CHECK(t1a->equals(t1b)); - BOOST_CHECK(!t1a->equals(t2)); - BOOST_CHECK(!t2->equals(t1b)); + EXPECT_TRUE(t1a->equals(t1b)); + EXPECT_TRUE(!t1a->equals(t2)); + EXPECT_TRUE(!t2->equals(t1b)); } -BOOST_AUTO_TEST_CASE(testCopyTo) -{ +TEST_F(TermAttributeTest, testCopyTo) { TermAttributePtr t = newLucene(); TermAttributePtr copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); - BOOST_CHECK_EQUAL(L"", t->term()); - BOOST_CHECK_EQUAL(L"", copy->term()); - + EXPECT_EQ(L"", t->term()); + EXPECT_EQ(L"", copy->term()); + t = newLucene(); t->setTermBuffer(L"hello"); CharArray buf = t->termBuffer(); copy = boost::dynamic_pointer_cast(checkCopyIsEqual(t)); - BOOST_CHECK_EQUAL(t->term(), copy->term()); - BOOST_CHECK(buf != copy->termBuffer()); + EXPECT_EQ(t->term(), copy->term()); + EXPECT_TRUE(buf != copy->termBuffer()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp index f5613b04..f1739c5b 100644 --- a/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/ar/ArabicAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,76 +10,67 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(ArabicAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture ArabicAnalyzerTest; /// Some simple tests showing some features of the analyzer, how some regular forms will conflate -BOOST_AUTO_TEST_CASE(testBasicFeatures1) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures2) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures3) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures3) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures4) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures4) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xb4, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures5) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures5) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa3, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures6) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures6) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x85, 0xd8, 0xb1, 0xd9, 0x8a, 0xd9, 0x83}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures7) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures7) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures8) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures8) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures9) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures9) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd8, 0xa7, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; @@ -87,8 +78,7 @@ BOOST_AUTO_TEST_CASE(testBasicFeatures9) checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second), UTF8_TO_STRING(third))); } -BOOST_AUTO_TEST_CASE(testBasicFeatures10) -{ +TEST_F(ArabicAnalyzerTest, testBasicFeatures10) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xb0, 0xd9, 0x8a, 0xd9, 0x86, 0x20, 0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa, 0x20, 0xd8, 0xa3, 0xd9, 0x8a, 0xd9, 0x85, 0xd8, 0xa7, 0xd9, 0x86, 0xd9, 0x83, 0xd9, 0x85}; const uint8_t second[] = {0xd9, 0x85, 0xd9, 0x84, 0xd9, 0x83, 0xd8, 0xaa}; @@ -97,16 +87,14 @@ BOOST_AUTO_TEST_CASE(testBasicFeatures10) } /// Simple tests to show things are getting reset correctly, etc. -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(ArabicAnalyzerTest, testReusableTokenStream1) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(ArabicAnalyzerTest, testReusableTokenStream2) { ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xa8, 0xd9, 0x8a, 0xd8, 0xb1}; @@ -114,17 +102,13 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream2) } /// Non-arabic text gets treated in a similar way as SimpleAnalyzer. -BOOST_AUTO_TEST_CASE(testEnglishInput) -{ +TEST_F(ArabicAnalyzerTest, testEnglishInput) { checkAnalyzesTo(newLucene(LuceneVersion::LUCENE_CURRENT), L"English text.", newCollection(L"english", L"text")); } /// Test that custom stopwords work, and are not case-sensitive. -BOOST_AUTO_TEST_CASE(testCustomStopwords) -{ +TEST_F(ArabicAnalyzerTest, testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); ArabicAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilterTest.cpp b/src/test/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilterTest.cpp index 7fa9f5a5..86712d9d 100644 --- a/src/test/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilterTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/ar/ArabicNormalizationFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,120 +12,99 @@ using namespace Lucene; -class ArabicNormalizationFilterFixture : public BaseTokenStreamFixture -{ +class ArabicNormalizationFilterTest : public BaseTokenStreamFixture { public: - virtual ~ArabicNormalizationFilterFixture() - { + virtual ~ArabicNormalizationFilterTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; -BOOST_FIXTURE_TEST_SUITE(ArabicNormalizationFilterTest, ArabicNormalizationFilterFixture) - -BOOST_AUTO_TEST_CASE(testAlifMadda) -{ +TEST_F(ArabicNormalizationFilterTest, testAlifMadda) { const uint8_t first[] = {0xd8, 0xa2, 0xd8, 0xac, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xac, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAlifHamzaAbove) -{ +TEST_F(ArabicNormalizationFilterTest, testAlifHamzaAbove) { const uint8_t first[] = {0xd8, 0xa3, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xad, 0xd9, 0x85, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAlifHamzaBelow) -{ +TEST_F(ArabicNormalizationFilterTest, testAlifHamzaBelow) { const uint8_t first[] = {0xd8, 0xa5, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xb9, 0xd8, 0xa7, 0xd8, 0xb0}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAlifMaksura) -{ +TEST_F(ArabicNormalizationFilterTest, testAlifMaksura) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x89}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x86, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testTehMarbuta) -{ +TEST_F(ArabicNormalizationFilterTest, testTehMarbuta) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd8, 0xa9}; const uint8_t second[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd8, 0xb7, 0xd9, 0x85, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testTatweel) -{ +TEST_F(ArabicNormalizationFilterTest, testTatweel) { const uint8_t first[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd9, 0x80, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testFatha) -{ +TEST_F(ArabicNormalizationFilterTest, testFatha) { const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8e, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; const uint8_t second[] = {0xd9, 0x85, 0xd8, 0xa8, 0xd9, 0x86, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testKasra) -{ +TEST_F(ArabicNormalizationFilterTest, testKasra) { const uint8_t first[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x90, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb9, 0xd9, 0x84, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testDamma) -{ +TEST_F(ArabicNormalizationFilterTest, testDamma) { const uint8_t first[] = {0xd8, 0xa8, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xa7, 0xd8, 0xaa}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testFathatan) -{ +TEST_F(ArabicNormalizationFilterTest, testFathatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x8b}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd8, 0xa7}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testKasratan) -{ +TEST_F(ArabicNormalizationFilterTest, testKasratan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8d}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testDammatan) -{ +TEST_F(ArabicNormalizationFilterTest, testDammatan) { const uint8_t first[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf, 0xd9, 0x8c}; const uint8_t second[] = {0xd9, 0x88, 0xd9, 0x84, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testSukun) -{ +TEST_F(ArabicNormalizationFilterTest, testSukun) { const uint8_t first[] = {0xd9, 0x86, 0xd9, 0x84, 0xd9, 0x92, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x86, 0xd9, 0x84, 0xd8, 0xb3, 0xd9, 0x88, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testShaddah) -{ +TEST_F(ArabicNormalizationFilterTest, testShaddah) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a, 0xd9, 0x91}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xaa, 0xd9, 0x85, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp b/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp index 7ee61d66..a5c0b6a0 100644 --- a/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/ar/ArabicStemFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,167 +12,139 @@ using namespace Lucene; -class ArabicStemFilterFixture : public BaseTokenStreamFixture -{ +class ArabicStemFilterTest : public BaseTokenStreamFixture { public: - virtual ~ArabicStemFilterFixture() - { + virtual ~ArabicStemFilterTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); ArabicStemFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; -BOOST_FIXTURE_TEST_SUITE(ArabicStemFilterTest, ArabicStemFilterFixture) - -BOOST_AUTO_TEST_CASE(testAlPrefix) -{ +TEST_F(ArabicStemFilterTest, testAlPrefix) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testWalPrefix) -{ +TEST_F(ArabicStemFilterTest, testWalPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testBalPrefix) -{ +TEST_F(ArabicStemFilterTest, testBalPrefix) { const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testKalPrefix) -{ +TEST_F(ArabicStemFilterTest, testKalPrefix) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testFalPrefix) -{ +TEST_F(ArabicStemFilterTest, testFalPrefix) { const uint8_t first[] = {0xd9, 0x81, 0xd8, 0xa7, 0xd9, 0x84, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testLlPrefix) -{ +TEST_F(ArabicStemFilterTest, testLlPrefix) { const uint8_t first[] = {0xd9, 0x84, 0xd9, 0x84, 0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; const uint8_t second[] = {0xd8, 0xa7, 0xd8, 0xae, 0xd8, 0xb1}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testWaPrefix) -{ +TEST_F(ArabicStemFilterTest, testWaPrefix) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xad, 0xd8, 0xb3, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAhSuffix) -{ +TEST_F(ArabicStemFilterTest, testAhSuffix) { const uint8_t first[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xb2, 0xd9, 0x88, 0xd8, 0xac}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAnSuffix) -{ +TEST_F(ArabicStemFilterTest, testAnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testAtSuffix) -{ +TEST_F(ArabicStemFilterTest, testAtSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testWnSuffix) -{ +TEST_F(ArabicStemFilterTest, testWnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testYnSuffix) -{ +TEST_F(ArabicStemFilterTest, testYnSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testYhSuffix) -{ +TEST_F(ArabicStemFilterTest, testYhSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testYpSuffix) -{ +TEST_F(ArabicStemFilterTest, testYpSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testHSuffix) -{ +TEST_F(ArabicStemFilterTest, testHSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testPSuffix) -{ +TEST_F(ArabicStemFilterTest, testPSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd8, 0xa9}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testYSuffix) -{ +TEST_F(ArabicStemFilterTest, testYSuffix) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x8a}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testComboPrefSuf) -{ +TEST_F(ArabicStemFilterTest, testComboPrefSuf) { const uint8_t first[] = {0xd9, 0x88, 0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x88, 0xd9, 0x86}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testComboSuf) -{ +TEST_F(ArabicStemFilterTest, testComboSuf) { const uint8_t first[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0xd9, 0x87, 0xd8, 0xa7, 0xd8, 0xaa}; const uint8_t second[] = {0xd8, 0xb3, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testShouldntStem) -{ +TEST_F(ArabicStemFilterTest, testShouldntStem) { const uint8_t first[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; const uint8_t second[] = {0xd8, 0xa7, 0xd9, 0x84, 0xd9, 0x88}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testNonArabic) -{ +TEST_F(ArabicStemFilterTest, testNonArabic) { check(L"English", L"English"); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp b/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp index 68ad27a7..1fd6a2d8 100644 --- a/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/br/BrazilianStemmerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,32 +10,25 @@ using namespace Lucene; -class BrazilianStemmerFixture : public BaseTokenStreamFixture -{ +class BrazilianStemmerTest : public BaseTokenStreamFixture { public: - virtual ~BrazilianStemmerFixture() - { + virtual ~BrazilianStemmerTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } - - void checkReuse(AnalyzerPtr a, const String& input, const String& expected) - { + + void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; -BOOST_FIXTURE_TEST_SUITE(BrazilianStemmerTest, BrazilianStemmerFixture) - /// Test the Brazilian Stem Filter, which only modifies the term text. /// It is very similar to the snowball Portuguese algorithm but not exactly the same. -BOOST_AUTO_TEST_CASE(testWithSnowballExamples) -{ +TEST_F(BrazilianStemmerTest, testWithSnowballExamples) { check(L"boa", L"boa"); check(L"boainain", L"boainain"); check(L"boas", L"boas"); @@ -134,8 +127,7 @@ BOOST_AUTO_TEST_CASE(testWithSnowballExamples) check(L"quiosque", L"quiosqu"); } -BOOST_AUTO_TEST_CASE(testNormalization) -{ +TEST_F(BrazilianStemmerTest, testNormalization) { check(L"Brasil", L"brasil"); // lowercase by default const uint8_t brasil[] = {0x42, 0x72, 0x61, 0x73, 0xc3, 0xad, 0x6c, 0x69, 0x61}; check(UTF8_TO_STRING(brasil), L"brasil"); // remove diacritics @@ -147,8 +139,7 @@ BOOST_AUTO_TEST_CASE(testNormalization) check(UTF8_TO_STRING(aaa), L"aaa"); // normally, diacritics are removed } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ +TEST_F(BrazilianStemmerTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"boa", L"boa"); checkReuse(a, L"boainain", L"boainain"); @@ -157,8 +148,7 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) checkReuse(a, UTF8_TO_STRING(boas), L"boas"); // removes diacritic: different from snowball Portuguese } -BOOST_AUTO_TEST_CASE(testStemExclusionTable) -{ +TEST_F(BrazilianStemmerTest, testStemExclusionTable) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); HashSet exclusions = HashSet::newInstance(); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; @@ -168,8 +158,7 @@ BOOST_AUTO_TEST_CASE(testStemExclusionTable) } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. -BOOST_AUTO_TEST_CASE(testExclusionTableReuse) -{ +TEST_F(BrazilianStemmerTest, testExclusionTableReuse) { BrazilianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t quintessencia[] = {0x71, 0x75, 0x69, 0x6e, 0x74, 0x65, 0x73, 0x73, 0xc3, 0xaa, 0x6e, 0x63, 0x69, 0x61}; checkReuse(a, UTF8_TO_STRING(quintessencia), L"quintessente"); @@ -178,5 +167,3 @@ BOOST_AUTO_TEST_CASE(testExclusionTableReuse) a->setStemExclusionTable(exclusions); checkReuse(a, UTF8_TO_STRING(quintessencia), UTF8_TO_STRING(quintessencia)); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp b/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp index fb38a5c2..b4f325a6 100644 --- a/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/cjk/CJKTokenizerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,18 +11,14 @@ using namespace Lucene; -class CJKTokenizerFixture : public BaseTokenStreamFixture -{ +class CJKTokenizerTest : public BaseTokenStreamFixture { public: - virtual ~CJKTokenizerFixture() - { + virtual ~CJKTokenizerTest() { } public: - struct TestToken - { - TestToken(const String& termText = L"", int32_t start = 0, int32_t end = 0, int32_t type = 0) - { + struct TestToken { + TestToken(const String& termText = L"", int32_t start = 0, int32_t end = 0, int32_t type = 0) { this->termText = termText; this->start = start; this->end = end; @@ -34,16 +30,14 @@ class CJKTokenizerFixture : public BaseTokenStreamFixture int32_t end; String type; }; - - void checkCJKToken(const String& str, Collection out_tokens) - { + + void checkCJKToken(const String& str, Collection out_tokens) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); - for (int32_t i = 0; i < out_tokens.size(); ++i) - { + for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; @@ -51,15 +45,13 @@ class CJKTokenizerFixture : public BaseTokenStreamFixture } checkAnalyzesTo(analyzer, str, terms, startOffsets, endOffsets, types, Collection()); } - - void checkCJKTokenReusable(AnalyzerPtr analyzer, const String& str, Collection out_tokens) - { + + void checkCJKTokenReusable(const AnalyzerPtr& analyzer, const String& str, Collection out_tokens) { Collection terms = Collection::newInstance(out_tokens.size()); Collection startOffsets = Collection::newInstance(out_tokens.size()); Collection endOffsets = Collection::newInstance(out_tokens.size()); Collection types = Collection::newInstance(out_tokens.size()); - for (int32_t i = 0; i < out_tokens.size(); ++i) - { + for (int32_t i = 0; i < out_tokens.size(); ++i) { terms[i] = out_tokens[i].termText; startOffsets[i] = out_tokens[i].start; endOffsets[i] = out_tokens[i].end; @@ -69,14 +61,12 @@ class CJKTokenizerFixture : public BaseTokenStreamFixture } }; -BOOST_FIXTURE_TEST_SUITE(CJKTokenizerTest, CJKTokenizerFixture) - -BOOST_AUTO_TEST_CASE(testJa1) -{ +TEST_F(CJKTokenizerTest, testJa1) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab, - 0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81}; - + 0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81 + }; + const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xba, 0x8c}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; const uint8_t token3[] = {0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b}; @@ -86,26 +76,26 @@ BOOST_AUTO_TEST_CASE(testJa1) const uint8_t token7[] = {0xe4, 0xb8, 0x83, 0xe5, 0x85, 0xab}; const uint8_t token8[] = {0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d}; const uint8_t token9[] = {0xe4, 0xb9, 0x9d, 0xe5, 0x8d, 0x81}; - + Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token5), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token7), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token9), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token5), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token7), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token9), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -BOOST_AUTO_TEST_CASE(testJa2) -{ +TEST_F(CJKTokenizerTest, testJa2) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89, 0xe5, 0x9b, 0x9b, 0x20, 0xe4, 0xba, 0x94, 0xe5, 0x85, 0xad, 0xe4, 0xb8, - 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0x20, 0xe5, 0x8d, 0x81}; + 0x83, 0xe5, 0x85, 0xab, 0xe4, 0xb9, 0x9d, 0x20, 0xe5, 0x8d, 0x81 + }; const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xe4, 0xba, 0x8c, 0xe4, 0xb8, 0x89}; @@ -117,38 +107,37 @@ BOOST_AUTO_TEST_CASE(testJa2) const uint8_t token8[] = {0xe5, 0x8d, 0x81}; Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token5), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 12, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token5), 7, 9, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 12, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -BOOST_AUTO_TEST_CASE(testC) -{ +TEST_F(CJKTokenizerTest, testC) { String str = L"abc defgh ijklmn opqrstu vwxy z"; Collection out_tokens = newCollection( - TestToken(L"abc", 0, 3, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"defgh", 4, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"ijklmn", 10, 16, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"opqrstu", 17, 24, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"vwxy", 25, 29, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"z", 30, 31, CJKTokenizer::SINGLE_TOKEN_TYPE) - ); + TestToken(L"abc", 0, 3, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"defgh", 4, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"ijklmn", 10, 16, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"opqrstu", 17, 24, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"vwxy", 25, 29, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"z", 30, 31, CJKTokenizer::SINGLE_TOKEN_TYPE) + ); checkCJKToken(str, out_tokens); } -BOOST_AUTO_TEST_CASE(testMix) -{ +TEST_F(CJKTokenizerTest, testMix) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, - 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; + 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93 + }; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; @@ -158,26 +147,26 @@ BOOST_AUTO_TEST_CASE(testMix) const uint8_t token7[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token8[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; const uint8_t token9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; - + Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -BOOST_AUTO_TEST_CASE(testMix2) -{ +TEST_F(CJKTokenizerTest, testMix2) { const uint8_t str[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, - 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; + 0x81, 0x8b, 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91 + }; const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t token2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; @@ -187,51 +176,50 @@ BOOST_AUTO_TEST_CASE(testMix2) const uint8_t token8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t token9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t token10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; - + Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -BOOST_AUTO_TEST_CASE(testSingleChar) -{ +TEST_F(CJKTokenizerTest, testSingleChar) { const uint8_t str[] = {0xe4, 0xb8, 0x80}; Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(str), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(str), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -/// Full-width text is normalized to half-width -BOOST_AUTO_TEST_CASE(testFullWidth) -{ +/// Full-width text is normalized to half-width +TEST_F(CJKTokenizerTest, testFullWidth) { const uint8_t str[] = {0xef, 0xbc, 0xb4, 0xef, 0xbd, 0x85, 0xef, 0xbd, 0x93, 0xef, 0xbd, 0x94, - 0x20, 0xef, 0xbc, 0x91, 0xef, 0xbc, 0x92, 0xef, 0xbc, 0x93, 0xef, 0xbc, 0x94}; + 0x20, 0xef, 0xbc, 0x91, 0xef, 0xbc, 0x92, 0xef, 0xbc, 0x93, 0xef, 0xbc, 0x94 + }; Collection out_tokens = newCollection( - TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(L"1234", 5, 9, CJKTokenizer::SINGLE_TOKEN_TYPE) - ); + TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(L"1234", 5, 9, CJKTokenizer::SINGLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -/// Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 -BOOST_AUTO_TEST_CASE(testNonIdeographic) -{ +/// Non-english text (not just CJK) is treated the same as CJK: C1C2 C2C3 +TEST_F(CJKTokenizerTest, testNonIdeographic) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x88, 0xd8, 0xa8, 0xd8, 0xb1, - 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1}; - + 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1 + }; + const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1, 0xd9, 0x88}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; @@ -240,27 +228,27 @@ BOOST_AUTO_TEST_CASE(testNonIdeographic) const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; - + Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token5), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token5), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -/// Non-english text with non-letters (non-spacing marks,etc) is treated as C1C2 C2C3, +/// Non-english text with non-letters (non-spacing marks,etc) is treated as C1C2 C2C3, /// except for words are split around non-letters. -BOOST_AUTO_TEST_CASE(testNonIdeographicNonLetter) -{ +TEST_F(CJKTokenizerTest, testNonIdeographicNonLetter) { const uint8_t str[] = {0xe4, 0xb8, 0x80, 0x20, 0xd8, 0xb1, 0xd9, 0x8f, 0xd9, 0x88, 0xd8, 0xa8, - 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1}; - + 0xd8, 0xb1, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x88, 0xd9, 0x8a, 0xd8, 0xb1 + }; + const uint8_t token1[] = {0xe4, 0xb8, 0x80}; const uint8_t token2[] = {0xd8, 0xb1}; const uint8_t token3[] = {0xd9, 0x88, 0xd8, 0xa8}; @@ -269,38 +257,37 @@ BOOST_AUTO_TEST_CASE(testNonIdeographicNonLetter) const uint8_t token6[] = {0xd9, 0x85, 0xd9, 0x88}; const uint8_t token7[] = {0xd9, 0x88, 0xd9, 0x8a}; const uint8_t token8[] = {0xd9, 0x8a, 0xd8, 0xb1}; - + Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token2), 2, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token3), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token4), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token5), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token6), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token7), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token8), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(token1), 0, 1, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token2), 2, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token3), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token4), 5, 7, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token5), 6, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token6), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token7), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token8), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKToken(UTF8_TO_STRING(str), out_tokens); } -BOOST_AUTO_TEST_CASE(testTokenStream) -{ +TEST_F(CJKTokenizerTest, testTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); - + const uint8_t token1[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; const uint8_t token2[] = {0xe4, 0xb8, 0x80, 0xe4, 0xb8, 0x81}; const uint8_t token3[] = {0xe4, 0xb8, 0x81, 0xe4, 0xb8, 0x82}; - + checkAnalyzesTo(analyzer, UTF8_TO_STRING(token1), newCollection(UTF8_TO_STRING(token2), UTF8_TO_STRING(token3))); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ +TEST_F(CJKTokenizerTest, testReusableTokenStream) { AnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); - + const uint8_t first[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0x63, 0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d, - 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; + 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93 + }; const uint8_t firstToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t firstToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; @@ -312,21 +299,22 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) const uint8_t firstToken9[] = {0xe3, 0x81, 0x91, 0xe3, 0x81, 0x93}; Collection out_tokens = newCollection( - TestToken(UTF8_TO_STRING(firstToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(firstToken9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(firstToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"abc", 5, 8, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken6), 8, 10, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken7), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken8), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(firstToken9), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(first), out_tokens); - + const uint8_t second[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86, 0xe3, 0x81, 0x88, 0xe3, 0x81, 0x8a, 0x61, 0x62, 0xe3, 0x82, 0x93, 0x63, 0xe3, 0x81, 0x8b, - 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; + 0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91 + }; const uint8_t secondToken1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; const uint8_t secondToken2[] = {0xe3, 0x81, 0x84, 0xe3, 0x81, 0x86}; @@ -336,42 +324,39 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) const uint8_t secondToken8[] = {0xe3, 0x81, 0x8b, 0xe3, 0x81, 0x8d}; const uint8_t secondToken9[] = {0xe3, 0x81, 0x8d, 0xe3, 0x81, 0x8f}; const uint8_t secondToken10[] = {0xe3, 0x81, 0x8f, 0xe3, 0x81, 0x91}; - + Collection out_tokens2 = newCollection( - TestToken(UTF8_TO_STRING(secondToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(secondToken10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) - ); + TestToken(UTF8_TO_STRING(secondToken1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken2), 1, 3, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken3), 2, 4, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken4), 3, 5, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"ab", 5, 7, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken6), 7, 8, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"c", 8, 9, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken8), 9, 11, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken9), 10, 12, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(secondToken10), 11, 13, CJKTokenizer::DOUBLE_TOKEN_TYPE) + ); checkCJKTokenReusable(analyzer, UTF8_TO_STRING(second), out_tokens2); } -BOOST_AUTO_TEST_CASE(testFinalOffset) -{ +TEST_F(CJKTokenizerTest, testFinalOffset) { const uint8_t token1[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84}; - checkCJKToken(UTF8_TO_STRING(token1), + checkCJKToken(UTF8_TO_STRING(token1), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); const uint8_t token2[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20}; - checkCJKToken(UTF8_TO_STRING(token2), + checkCJKToken(UTF8_TO_STRING(token2), newCollection(TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE))); checkCJKToken(L"test", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); checkCJKToken(L"test ", newCollection(TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token3[] = {0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x74, 0x65, 0x73, 0x74}; - checkCJKToken(UTF8_TO_STRING(token3), + checkCJKToken(UTF8_TO_STRING(token3), newCollection( - TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), - TestToken(L"test", 2, 6, CJKTokenizer::SINGLE_TOKEN_TYPE))); + TestToken(UTF8_TO_STRING(token1), 0, 2, CJKTokenizer::DOUBLE_TOKEN_TYPE), + TestToken(L"test", 2, 6, CJKTokenizer::SINGLE_TOKEN_TYPE))); const uint8_t token4[] = {0x74, 0x65, 0x73, 0x74, 0xe3, 0x81, 0x82, 0xe3, 0x81, 0x84, 0x20, 0x20, 0x20, 0x20}; - checkCJKToken(UTF8_TO_STRING(token4), + checkCJKToken(UTF8_TO_STRING(token4), newCollection( - TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), - TestToken(UTF8_TO_STRING(token1), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE))); + TestToken(L"test", 0, 4, CJKTokenizer::SINGLE_TOKEN_TYPE), + TestToken(UTF8_TO_STRING(token1), 4, 6, CJKTokenizer::DOUBLE_TOKEN_TYPE))); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp b/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp index 843abfe0..7be338c2 100644 --- a/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/cn/ChineseTokenizerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,62 +17,54 @@ using namespace Lucene; /// Analyzer that just uses ChineseTokenizer, not ChineseFilter. /// Convenience to show the behaviour of the tokenizer -class JustChineseTokenizerAnalyzer : public Analyzer -{ +class JustChineseTokenizerAnalyzer : public Analyzer { public: - virtual ~JustChineseTokenizerAnalyzer() - { + virtual ~JustChineseTokenizerAnalyzer() { } public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) - { + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(reader); } }; /// Analyzer that just uses ChineseFilter, not ChineseTokenizer. /// Convenience to show the behavior of the filter. -class JustChineseFilterAnalyzer : public Analyzer -{ +class JustChineseFilterAnalyzer : public Analyzer { public: - virtual ~JustChineseFilterAnalyzer() - { + virtual ~JustChineseFilterAnalyzer() { } public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) - { + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { return newLucene(newLucene(reader)); } }; -BOOST_FIXTURE_TEST_SUITE(ChineseTokenizerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture ChineseTokenizerTest; -BOOST_AUTO_TEST_CASE(testOtherLetterOffset) -{ +TEST_F(ChineseTokenizerTest, testOtherLetterOffset) { const uint8_t token[] = {0x61, 0xe5, 0xa4, 0xa9, 0x62}; ChineseTokenizerPtr tokenizer = newLucene(newLucene(UTF8_TO_STRING(token))); int32_t correctStartOffset = 0; int32_t correctEndOffset = 1; OffsetAttributePtr offsetAtt = tokenizer->getAttribute(); - while (tokenizer->incrementToken()) - { - BOOST_CHECK_EQUAL(correctStartOffset, offsetAtt->startOffset()); - BOOST_CHECK_EQUAL(correctEndOffset, offsetAtt->endOffset()); + while (tokenizer->incrementToken()) { + EXPECT_EQ(correctStartOffset, offsetAtt->startOffset()); + EXPECT_EQ(correctEndOffset, offsetAtt->endOffset()); ++correctStartOffset; ++correctEndOffset; } } -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(ChineseTokenizerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(); - + const uint8_t input[] = {0xe4, 0xb8, 0xad, 0xe5, 0x8d, 0x8e, 0xe4, 0xba, 0xba, 0xe6, 0xb0, - 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd}; - + 0x91, 0xe5, 0x85, 0xb1, 0xe5, 0x92, 0x8c, 0xe5, 0x9b, 0xbd + }; + const uint8_t token1[] = {0xe4, 0xb8, 0xad}; const uint8_t token2[] = {0xe5, 0x8d, 0x8e}; const uint8_t token3[] = {0xe4, 0xba, 0xba}; @@ -80,54 +72,52 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream1) const uint8_t token5[] = {0xe5, 0x85, 0xb1}; const uint8_t token6[] = {0xe5, 0x92, 0x8c}; const uint8_t token7[] = {0xe5, 0x9b, 0xbd}; - + checkAnalyzesToReuse(a, UTF8_TO_STRING(input), - newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3), - UTF8_TO_STRING(token4), - UTF8_TO_STRING(token5), - UTF8_TO_STRING(token6), - UTF8_TO_STRING(token7) - ), - newCollection(0, 1, 2, 3, 4, 5, 6), - newCollection(1, 2, 3, 4, 5, 6, 7)); + newCollection( + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3), + UTF8_TO_STRING(token4), + UTF8_TO_STRING(token5), + UTF8_TO_STRING(token6), + UTF8_TO_STRING(token7) + ), + newCollection(0, 1, 2, 3, 4, 5, 6), + newCollection(1, 2, 3, 4, 5, 6, 7)); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(ChineseTokenizerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(); - + const uint8_t input[] = {0xe5, 0x8c, 0x97, 0xe4, 0xba, 0xac, 0xe5, 0xb8, 0x82}; const uint8_t token1[] = {0xe5, 0x8c, 0x97}; const uint8_t token2[] = {0xe4, 0xba, 0xac}; const uint8_t token3[] = {0xe5, 0xb8, 0x82}; - + checkAnalyzesToReuse(a, UTF8_TO_STRING(input), - newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3) - ), - newCollection(0, 1, 2), - newCollection(1, 2, 3)); + newCollection( + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3) + ), + newCollection(0, 1, 2), + newCollection(1, 2, 3)); } /// ChineseTokenizer tokenizes numbers as one token, but they are filtered by ChineseFilter -BOOST_AUTO_TEST_CASE(testNumerics) -{ +TEST_F(ChineseTokenizerTest, testNumerics) { AnalyzerPtr justTokenizer = newLucene(); - + const uint8_t input[] = {0xe4, 0xb8, 0xad, 0x31, 0x32, 0x33, 0x34}; const uint8_t token1[] = {0xe4, 0xb8, 0xad}; - + checkAnalyzesTo(justTokenizer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), L"1234")); - // in this case the ChineseAnalyzer (which applies ChineseFilter) will remove the numeric token. + // in this case the ChineseAnalyzer (which applies ChineseFilter) will not remove the numeric token. AnalyzerPtr a = newLucene(); - checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1))); + checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), L"1234")); } /// ChineseTokenizer tokenizes english similar to SimpleAnalyzer. @@ -135,8 +125,7 @@ BOOST_AUTO_TEST_CASE(testNumerics) /// /// ChineseFilter has an english stopword list, it also removes any single character tokens. /// The stopword list is case-sensitive. -BOOST_AUTO_TEST_CASE(testEnglish) -{ +TEST_F(ChineseTokenizerTest, testEnglish) { AnalyzerPtr chinese = newLucene(); checkAnalyzesTo(chinese, L"This is a Test. b c d", newCollection(L"test")); @@ -146,5 +135,3 @@ BOOST_AUTO_TEST_CASE(testEnglish) AnalyzerPtr justFilter = newLucene(); checkAnalyzesTo(justFilter, L"This is a Test. b c d", newCollection(L"This", L"Test.")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp index 81eebebe..20951d6f 100644 --- a/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/cz/CzechAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,28 +10,24 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(CzechAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture CzechAnalyzerTest; -BOOST_AUTO_TEST_CASE(testStopWord) -{ +TEST_F(CzechAnalyzerTest, testStopWord) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(CzechAnalyzerTest, testReusableTokenStream1) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(analyzer, L"Pokud mluvime o volnem", newCollection(L"mluvime", L"volnem")); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(CzechAnalyzerTest, testReusableTokenStream2) { CzechAnalyzerPtr analyzer = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t input[] = {0xc4, 0x8c, 0x65, 0x73, 0x6b, 0xc3, 0xa1, 0x20, 0x52, - 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61}; + 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61 + }; const uint8_t token1[] = {0xc4, 0x8d, 0x65, 0x73, 0x6b, 0xc3, 0xa1}; const uint8_t token2[] = {0x72, 0x65, 0x70, 0x75, 0x62, 0x6c, 0x69, 0x6b, 0x61}; checkAnalyzesToReuse(analyzer, UTF8_TO_STRING(input), newCollection(UTF8_TO_STRING(token1), UTF8_TO_STRING(token2))); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp b/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp index 9edb223f..81fba752 100644 --- a/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/de/GermanStemFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,68 +11,61 @@ using namespace Lucene; -class GermanStemFilterFixture : public BaseTokenStreamFixture -{ +class GermanStemFilterTest : public BaseTokenStreamFixture { public: - virtual ~GermanStemFilterFixture() - { + virtual ~GermanStemFilterTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } - - void checkReuse(AnalyzerPtr a, const String& input, const String& expected) - { + + void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; -BOOST_FIXTURE_TEST_SUITE(GermanStemFilterTest, GermanStemFilterFixture) - -/// Test the German stemmer. The stemming algorithm is known to work less than perfect, as it doesn't +/// Test the German stemmer. The stemming algorithm is known to work less than perfect, as it doesn't /// use any word lists with exceptions. We also check some of the cases where the algorithm is wrong. -BOOST_AUTO_TEST_CASE(testStemming) -{ +TEST_F(GermanStemFilterTest, testStemming) { const uint8_t haufig[] = {0x68, 0xc3, 0xa4, 0x75, 0x66, 0x69, 0x67}; check(UTF8_TO_STRING(haufig), L"haufig"); // German special characters are replaced - + const uint8_t abschliess1[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess1), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem - + const uint8_t abschliess2[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x72}; check(UTF8_TO_STRING(abschliess2), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem - + const uint8_t abschliess3[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x73}; check(UTF8_TO_STRING(abschliess3), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem - + const uint8_t abschliess4[] = {0x61, 0x62, 0x73, 0x63, 0x68, 0x6c, 0x69, 0x65, 0xc3, 0x9f, 0x65, 0x6e, 0x64, 0x65, 0x6e}; check(UTF8_TO_STRING(abschliess4), L"abschliess"); // here the stemmer works okay, it maps related words to the same stem - + check(L"Tisch", L"tisch"); check(L"Tische", L"tisch"); check(L"Tischen", L"tisch"); - + check(L"Haus", L"hau"); check(L"Hauses", L"hau"); - + const uint8_t hau1[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72}; check(UTF8_TO_STRING(hau1), L"hau"); - + const uint8_t hau2[] = {0x48, 0xc3, 0xa4, 0x75, 0x73, 0x65, 0x72, 0x6e}; check(UTF8_TO_STRING(hau2), L"hau"); // Here's a case where overstemming occurs, ie. a word is mapped to the same stem as unrelated words check(L"hauen", L"hau"); - - // Here's a case where understemming occurs, i.e. two related words are not mapped to the same stem. + + // Here's a case where understemming occurs, i.e. two related words are not mapped to the same stem. // This is the case with basically all irregular forms check(L"Drama", L"drama"); check(L"Dramen", L"dram"); - + const uint8_t ausmass[] = {0x41, 0x75, 0x73, 0x6d, 0x61, 0xc3, 0x9f}; check(UTF8_TO_STRING(ausmass), L"ausmass"); @@ -84,23 +77,22 @@ BOOST_AUTO_TEST_CASE(testStemming) check(L"xxxxxem", L"xxxxx"); check(L"xxxxxet", L"xxxxx"); check(L"xxxxxnd", L"xxxxx"); - + // The suffixes are also removed when combined check(L"xxxxxetende", L"xxxxx"); - + // Words that are shorter than four charcters are not changed check(L"xxe", L"xxe"); - + // -em and -er are not removed from words shorter than five characters check(L"xxem", L"xxem"); check(L"xxer", L"xxer"); - + // -nd is not removed from words shorter than six characters check(L"xxxnd", L"xxxnd"); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ +TEST_F(GermanStemFilterTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"Tisch", L"tisch"); checkReuse(a, L"Tische", L"tisch"); @@ -108,8 +100,7 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. -BOOST_AUTO_TEST_CASE(testExclusionTableReuse) -{ +TEST_F(GermanStemFilterTest, testExclusionTableReuse) { GermanAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"tischen", L"tisch"); HashSet exclusions = HashSet::newInstance(); @@ -117,5 +108,3 @@ BOOST_AUTO_TEST_CASE(testExclusionTableReuse) a->setStemExclusionTable(exclusions); checkReuse(a, L"tischen", L"tischen"); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp index 054c07ec..c89da0b1 100644 --- a/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/el/GreekAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,204 +10,204 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(GreekAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture GreekAnalyzerTest; -BOOST_AUTO_TEST_CASE(testAnalyzer1) -{ +TEST_F(GreekAnalyzerTest, testAnalyzer1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, - 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, - 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, - 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, - 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, - 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, - 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, - 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, + + const uint8_t input[] = { + 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, + 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, + 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, + 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, + 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, + 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, + 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, + 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; - + const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; - const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, - 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1}; + const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, + 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1 + }; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; - const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, - 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd}; - const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, - 0xce, 0xb7, 0xcf, 0x83}; + const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, + 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd + }; + const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, + 0xce, 0xb7, 0xcf, 0x83 + }; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; - + // Verify the correct analysis of capitals and small accented letters checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3), - UTF8_TO_STRING(token4), - UTF8_TO_STRING(token5), - UTF8_TO_STRING(token6), - UTF8_TO_STRING(token7), + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3), + UTF8_TO_STRING(token4), + UTF8_TO_STRING(token5), + UTF8_TO_STRING(token6), + UTF8_TO_STRING(token7), UTF8_TO_STRING(token8) - )); + )); } -BOOST_AUTO_TEST_CASE(testAnalyzer2) -{ +TEST_F(GreekAnalyzerTest, testAnalyzer2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, - 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, - 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, - 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, + + const uint8_t input[] = { + 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, + 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, + 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, + 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; - - const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, - 0xcf, 0x84, 0xce, 0xb1}; - const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, - 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83}; - const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, - 0xcf, 0x83}; - + + const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, + 0xcf, 0x84, 0xce, 0xb1 + }; + const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, + 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83 + }; + const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, + 0xcf, 0x83 + }; + // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3) + )); } -BOOST_AUTO_TEST_CASE(testAnalyzer3) -{ +TEST_F(GreekAnalyzerTest, testAnalyzer3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, - 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, - 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, - 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, + + const uint8_t input[] = { + 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, + 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, + 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, + 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; - - const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, - 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83}; + + const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, + 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83 + }; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; - + // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesTo(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), UTF8_TO_STRING(token3), UTF8_TO_STRING(token4) - )); + )); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(GreekAnalyzerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, - 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, - 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, - 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, - 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, - 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, - 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, - 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, + + const uint8_t input[] = { + 0xce, 0x9c, 0xce, 0xaf, 0xce, 0xb1, 0x20, 0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, + 0x81, 0xce, 0xb5, 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xac, 0x20, 0xce, 0xba, 0xce, 0xb1, + 0xce, 0xbb, 0xce, 0xae, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xcf, 0x80, 0xce, 0xbb, + 0xce, 0xbf, 0xcf, 0x8d, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1, 0x20, 0xcf, 0x83, 0xce, 0xb5, 0xce, + 0xb9, 0xcf, 0x81, 0xce, 0xac, 0x20, 0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, + 0xcf, 0x84, 0xce, 0xae, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd, 0x20, 0xcf, 0x84, 0xce, 0xb7, 0xcf, + 0x82, 0x20, 0xce, 0x95, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, + 0xce, 0xae, 0xcf, 0x82, 0x20, 0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x8e, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x82 }; - + const uint8_t token1[] = {0xce, 0xbc, 0xce, 0xb9, 0xce, 0xb1}; - const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, - 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1}; + const uint8_t token2[] = {0xce, 0xb5, 0xce, 0xbe, 0xce, 0xb1, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb5, + 0xcf, 0x84, 0xce, 0xb9, 0xce, 0xba, 0xce, 0xb1 + }; const uint8_t token3[] = {0xce, 0xba, 0xce, 0xb1, 0xce, 0xbb, 0xce, 0xb7}; const uint8_t token4[] = {0xcf, 0x80, 0xce, 0xbb, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x83, 0xce, 0xb9, 0xce, 0xb1}; const uint8_t token5[] = {0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x81, 0xce, 0xb1}; - const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, - 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd}; - const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, - 0xce, 0xb7, 0xcf, 0x83}; + const uint8_t token6[] = {0xcf, 0x87, 0xce, 0xb1, 0xcf, 0x81, 0xce, 0xb1, 0xce, 0xba, 0xcf, 0x84, + 0xce, 0xb7, 0xcf, 0x81, 0xcf, 0x89, 0xce, 0xbd + }; + const uint8_t token7[] = {0xce, 0xb5, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb7, 0xce, 0xbd, 0xce, 0xb9, 0xce, 0xba, + 0xce, 0xb7, 0xcf, 0x83 + }; const uint8_t token8[] = {0xce, 0xb3, 0xce, 0xbb, 0xcf, 0x89, 0xcf, 0x83, 0xcf, 0x83, 0xce, 0xb1, 0xcf, 0x83}; - + // Verify the correct analysis of capitals and small accented letters checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3), - UTF8_TO_STRING(token4), - UTF8_TO_STRING(token5), - UTF8_TO_STRING(token6), - UTF8_TO_STRING(token7), - UTF8_TO_STRING(token8) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3), + UTF8_TO_STRING(token4), + UTF8_TO_STRING(token5), + UTF8_TO_STRING(token6), + UTF8_TO_STRING(token7), + UTF8_TO_STRING(token8) + )); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(GreekAnalyzerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, - 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, - 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, - 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, + + const uint8_t input[] = { + 0xce, 0xa0, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x8a, 0xcf, 0x8c, 0xce, 0xbd, 0xcf, 0x84, 0xce, 0xb1, + 0x20, 0x28, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x29, 0x20, 0x20, 0x20, 0x20, 0x20, 0x5b, 0xcf, + 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, 0xce, 0xbb, 0xce, 0xad, 0xcf, + 0x82, 0x5d, 0x09, 0x2d, 0x09, 0xce, 0x91, 0xce, 0x9d, 0xce, 0x91, 0xce, 0x93, 0xce, 0x9a, 0xce, 0x95, 0xce, 0xa3 }; - - const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, - 0xcf, 0x84, 0xce, 0xb1}; - const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, - 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83}; - const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, - 0xcf, 0x83}; - + + const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xce, 0xb9, 0xce, 0xbf, 0xce, 0xbd, + 0xcf, 0x84, 0xce, 0xb1 + }; + const uint8_t token2[] = {0xcf, 0x80, 0xce, 0xbf, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xb1, 0xcf, 0x80, + 0xce, 0xbb, 0xce, 0xb5, 0xcf, 0x83 + }; + const uint8_t token3[] = {0xce, 0xb1, 0xce, 0xbd, 0xce, 0xb1, 0xce, 0xb3, 0xce, 0xba, 0xce, 0xb5, + 0xcf, 0x83 + }; + // Verify the correct analysis of small letters with diaeresis and the elimination of punctuation marks checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3) + )); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream3) -{ +TEST_F(GreekAnalyzerTest, testReusableTokenStream3) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { - 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, - 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, - 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, - 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, + + const uint8_t input[] = { + 0xce, 0xa0, 0xce, 0xa1, 0xce, 0x9f, 0xce, 0xab, 0xce, 0xa0, 0xce, 0x9f, 0xce, 0x98, 0xce, 0x95, + 0xce, 0xa3, 0xce, 0x95, 0xce, 0x99, 0xce, 0xa3, 0x20, 0x20, 0xce, 0x86, 0xcf, 0x88, 0xce, 0xbf, + 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x82, 0x2c, 0x20, 0xce, 0xbf, 0x20, 0xce, 0xbc, 0xce, 0xb5, 0xcf, + 0x83, 0xcf, 0x84, 0xcf, 0x8c, 0xcf, 0x82, 0x20, 0xce, 0xba, 0xce, 0xb1, 0xce, 0xb9, 0x20, 0xce, 0xbf, 0xce, 0xb9, 0x20, 0xce, 0xac, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9 }; - - const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, - 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83}; + + const uint8_t token1[] = {0xcf, 0x80, 0xcf, 0x81, 0xce, 0xbf, 0xcf, 0x85, 0xcf, 0x80, 0xce, 0xbf, + 0xce, 0xb8, 0xce, 0xb5, 0xcf, 0x83, 0xce, 0xb5, 0xce, 0xb9, 0xcf, 0x83 + }; const uint8_t token2[] = {0xce, 0xb1, 0xcf, 0x88, 0xce, 0xbf, 0xce, 0xb3, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token3[] = {0xce, 0xbc, 0xce, 0xb5, 0xcf, 0x83, 0xcf, 0x84, 0xce, 0xbf, 0xcf, 0x83}; const uint8_t token4[] = {0xce, 0xb1, 0xce, 0xbb, 0xce, 0xbb, 0xce, 0xbf, 0xce, 0xb9}; - + // Verify the correct analysis of capital accented letters and capital letters with diaeresis, // as well as the elimination of stop words checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3), - UTF8_TO_STRING(token4) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3), + UTF8_TO_STRING(token4) + )); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp index 0e13a7b8..e9d41fd0 100644 --- a/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/fa/PersianAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,7 +10,7 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(PersianAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture PersianAnalyzerTest; /// These tests show how the combination of tokenization (breaking on zero-width /// non-joiner), normalization (such as treating arabic YEH and farsi YEH the @@ -19,8 +19,7 @@ BOOST_FIXTURE_TEST_SUITE(PersianAnalyzerTest, BaseTokenStreamFixture) /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs1) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -28,8 +27,7 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbs1) } /// active preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs2) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -37,8 +35,7 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbs2) } /// active imperfective preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs3) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -46,129 +43,128 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbs3) } /// active future indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs4) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, - 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, + 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs5) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, - 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, + 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs6) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, - 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, + 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs7) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, - 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, + 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs8) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, - 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, + 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs9) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, - 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, + 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs10) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs11) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, - 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, + 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs12) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs13) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs14) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, - 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, + 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs15) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs16) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; @@ -176,145 +172,144 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbs16) } /// passive imperfective preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs17) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs18) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs19) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, - 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0xe2, 0x80, 0x8c, 0xd8, 0xa7, 0xd8, + 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs20) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs21) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, - 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, + 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs22) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, - 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, + 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs23) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, - 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd9, + 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbs24) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, - 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, + 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs25) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, - 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, + 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs26) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs27) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, - 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, + 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs28) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, - 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, + 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs29) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, - 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, - 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xdb, 0x8c, + 0xe2, 0x80, 0x8c, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, + 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbs30) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbs30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -331,8 +326,7 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbs30) /// These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar /// active present subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective1) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -340,8 +334,7 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective1) } /// active preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective2) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -349,8 +342,7 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective2) } /// active imperfective preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective3) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective3) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -358,129 +350,128 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective3) } /// active future indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective4) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective4) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, - 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, + 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective5) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective5) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, - 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, + 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective6) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective6) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, - 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, + 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective7) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective7) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, - 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, + 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective8) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective8) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, - 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, + 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective9) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective9) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, - 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, + 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective10) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective10) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, - 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, + 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective11) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective11) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, - 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, + 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective12) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective12) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, - 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, + 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective13) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective13) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active imperfective pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective14) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective14) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, - 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, - 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, + 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, + 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective15) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective15) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective16) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective16) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; @@ -488,142 +479,141 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective16) } /// passive imperfective preterite indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective17) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective17) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective18) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective18) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective perfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective19) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective19) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa7, 0xd8, 0xb3, 0xd8, 0xaa + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective20) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective20) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective21) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective21) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive future indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective22) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective22) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, - 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xae, 0xd9, 0x88, + 0xd8, 0xa7, 0xd9, 0x87, 0xd8, 0xaf, 0x20, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective23) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective23) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, - 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb1, 0xd8, 0xaf, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd9, 0x88, 0xd8, + 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite progressive indicative -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective24) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective24) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, - 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xaf, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaa, 0x20, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, + 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, 0x20, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive present subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective25) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective25) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, - 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd9, 0x88, + 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective26) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective26) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective preterite subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective27) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective27) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, - 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, + 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective28) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective28) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, - 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, - 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xb4, 0xd8, 0xaf, + 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd8, + 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// passive imperfective pluperfect subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective29) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective29) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, - 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, + 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } /// active present subjunctive -BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective30) -{ +TEST_F(PersianAnalyzerTest, testBehaviorVerbsDefective30) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf}; @@ -634,16 +624,14 @@ BOOST_AUTO_TEST_CASE(testBehaviorVerbsDefective30) /// non-joiner or space) and stopwords creates a light-stemming effect for /// nouns, removing the plural -ha. -BOOST_AUTO_TEST_CASE(testBehaviorNouns1) -{ +TEST_F(PersianAnalyzerTest, testBehaviorNouns1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0x20, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; checkAnalyzesTo(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testBehaviorNouns2) -{ +TEST_F(PersianAnalyzerTest, testBehaviorNouns2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; @@ -651,24 +639,22 @@ BOOST_AUTO_TEST_CASE(testBehaviorNouns2) } /// Test showing that non-Persian text is treated very much like SimpleAnalyzer (lowercased, etc) -BOOST_AUTO_TEST_CASE(testBehaviorNonPersian) -{ +TEST_F(PersianAnalyzerTest, testBehaviorNonPersian) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesTo(a, L"English test.", newCollection(L"english", L"test")); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(PersianAnalyzerTest, testReusableTokenStream1) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, - 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, - 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf}; + const uint8_t first[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd9, 0x85, 0xd9, 0x8a, + 0x20, 0xd8, 0xb4, 0xd8, 0xaf, 0xd9, 0x87, 0x20, 0xd8, 0xa8, 0xd9, 0x88, 0xd8, 0xaf, 0xd9, + 0x87, 0x20, 0xd8, 0xa8, 0xd8, 0xa7, 0xd8, 0xb4, 0xd8, 0xaf + }; const uint8_t second[] = {0xd8, 0xae, 0xd9, 0x88, 0xd8, 0xb1, 0xd8, 0xaf, 0xd9, 0x87}; checkAnalyzesToReuse(a, UTF8_TO_STRING(first), newCollection(UTF8_TO_STRING(second))); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(PersianAnalyzerTest, testReusableTokenStream2) { PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); const uint8_t first[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf, 0xe2, 0x80, 0x8c, 0xd9, 0x87, 0xd8, 0xa7}; const uint8_t second[] = {0xd8, 0xa8, 0xd8, 0xb1, 0xda, 0xaf}; @@ -676,11 +662,8 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream2) } /// Test that custom stopwords work, and are not case-sensitive. -BOOST_AUTO_TEST_CASE(testCustomStopwords) -{ +TEST_F(PersianAnalyzerTest, testCustomStopwords) { Collection stopWords = newCollection(L"the", L"and", L"a"); PersianAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, HashSet::newInstance(stopWords.begin(), stopWords.end())); checkAnalyzesTo(a, L"The quick brown fox.", newCollection(L"quick", L"brown", L"fox")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/fa/PersianNormalizationFilterTest.cpp b/src/test/contrib/analyzers/common/analysis/fa/PersianNormalizationFilterTest.cpp index 22298569..c86a8faa 100644 --- a/src/test/contrib/analyzers/common/analysis/fa/PersianNormalizationFilterTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/fa/PersianNormalizationFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -12,64 +12,51 @@ using namespace Lucene; -class PersianNormalizationFilterFixture : public BaseTokenStreamFixture -{ +class PersianNormalizationFilterTest : public BaseTokenStreamFixture { public: - virtual ~PersianNormalizationFilterFixture() - { + virtual ~PersianNormalizationFilterTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { ArabicLetterTokenizerPtr tokenStream = newLucene(newLucene(input)); PersianNormalizationFilterPtr filter = newLucene(tokenStream); checkTokenStreamContents(filter, newCollection(expected)); } }; -BOOST_FIXTURE_TEST_SUITE(PersianNormalizationFilterTest, PersianNormalizationFilterFixture) - -BOOST_AUTO_TEST_CASE(testFarsiYeh) -{ +TEST_F(PersianNormalizationFilterTest, testFarsiYeh) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x8c}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testYehBarree) -{ +TEST_F(PersianNormalizationFilterTest, testYehBarree) { const uint8_t first[] = {0xd9, 0x87, 0xd8, 0xa7, 0xdb, 0x92}; const uint8_t second[] = {0xd9, 0x87, 0xd8, 0xa7, 0xd9, 0x8a}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testKeheh) -{ +TEST_F(PersianNormalizationFilterTest, testKeheh) { const uint8_t first[] = {0xda, 0xa9, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xb4, 0xd8, 0xa7, 0xd9, 0x86, 0xd8, 0xaf, 0xd9, 0x86}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testHehYeh) -{ +TEST_F(PersianNormalizationFilterTest, testHehYeh) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xdb, 0x80}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testHehHamzaAbove) -{ +TEST_F(PersianNormalizationFilterTest, testHehHamzaAbove) { const uint8_t first[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87, 0xd9, 0x94}; const uint8_t second[] = {0xd9, 0x83, 0xd8, 0xaa, 0xd8, 0xa7, 0xd8, 0xa8, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } -BOOST_AUTO_TEST_CASE(testHehGoal) -{ +TEST_F(PersianNormalizationFilterTest, testHehGoal) { const uint8_t first[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xdb, 0x81}; const uint8_t second[] = {0xd8, 0xb2, 0xd8, 0xa7, 0xd8, 0xaf, 0xd9, 0x87}; check(UTF8_TO_STRING(first), UTF8_TO_STRING(second)); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp b/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp index 60eb145a..3338df6b 100644 --- a/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/fr/ElisionTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,28 +13,23 @@ using namespace Lucene; -class ElisionFixture : public BaseTokenStreamFixture -{ +class ElisionTest : public BaseTokenStreamFixture { public: - virtual ~ElisionFixture() - { + virtual ~ElisionTest() { } public: - Collection addTerms(TokenFilterPtr filter) - { + Collection addTerms(const TokenFilterPtr& filter) { Collection terms = Collection::newInstance(); TermAttributePtr termAtt = filter->getAttribute(); - while (filter->incrementToken()) + while (filter->incrementToken()) { terms.add(termAtt->term()); + } return terms; } }; -BOOST_FIXTURE_TEST_SUITE(ElisionTest, ElisionFixture) - -BOOST_AUTO_TEST_CASE(testElision) -{ +TEST_F(ElisionTest, testElision) { String test = L"Plop, juste pour voir l'embrouille avec O'brian. M'enfin."; TokenizerPtr tokenizer = newLucene(LuceneVersion::LUCENE_CURRENT, newLucene(test)); HashSet articles = HashSet::newInstance(); @@ -42,9 +37,7 @@ BOOST_AUTO_TEST_CASE(testElision) articles.add(L"M"); TokenFilterPtr filter = newLucene(tokenizer, articles); Collection terms = addTerms(filter); - BOOST_CHECK_EQUAL(L"embrouille", terms[4]); - BOOST_CHECK_EQUAL(L"O'brian", terms[6]); - BOOST_CHECK_EQUAL(L"enfin", terms[7]); + EXPECT_EQ(L"embrouille", terms[4]); + EXPECT_EQ(L"O'brian", terms[6]); + EXPECT_EQ(L"enfin", terms[7]); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp index 761176a3..3908b325 100644 --- a/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/fr/FrenchAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,12 +10,11 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(FrenchAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture FrenchAnalyzerTest; -BOOST_AUTO_TEST_CASE(testAnalyzer) -{ +TEST_F(FrenchAnalyzerTest, testAnalyzer) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); - + checkAnalyzesTo(fa, L"", Collection::newInstance()); checkAnalyzesTo(fa, L"chien chat cheval", newCollection(L"chien", L"chat", L"cheval")); @@ -37,7 +36,7 @@ BOOST_AUTO_TEST_CASE(testAnalyzer) checkAnalyzesTo(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); // some nouns and adjectives - checkAnalyzesTo(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", + checkAnalyzesTo(fa, L"lances chismes habitable chiste \u00e9l\u00e9ments captifs", newCollection( L"lanc", L"chism", L"habit", L"chist", L"\u00e9l\u00e9ment", L"captif")); // some verbs @@ -52,10 +51,9 @@ BOOST_AUTO_TEST_CASE(testAnalyzer) newCollection(L"33bis", L"1940-1945", L"1940", L"1945", L"i")); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ +TEST_F(FrenchAnalyzerTest, testReusableTokenStream) { AnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); - + // stopwords checkAnalyzesToReuse(fa, L"le la chien les aux chat du des \u00e0 cheval", newCollection(L"chien", L"chat", L"cheval")); @@ -66,8 +64,7 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. -BOOST_AUTO_TEST_CASE(testExclusionTableReuse) -{ +TEST_F(FrenchAnalyzerTest, testExclusionTableReuse) { FrenchAnalyzerPtr fa = newLucene(LuceneVersion::LUCENE_CURRENT); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habit")); HashSet exclusions = HashSet::newInstance(); @@ -75,5 +72,3 @@ BOOST_AUTO_TEST_CASE(testExclusionTableReuse) fa->setStemExclusionTable(exclusions); checkAnalyzesToReuse(fa, L"habitable", newCollection(L"habitable")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp b/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp index 0cf8d7ea..e58831ae 100644 --- a/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/nl/DutchStemmerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,21 +11,17 @@ using namespace Lucene; -class DutchStemmerFixture : public BaseTokenStreamFixture -{ +class DutchStemmerTest : public BaseTokenStreamFixture { public: - virtual ~DutchStemmerFixture() - { + virtual ~DutchStemmerTest() { } public: - void check(const String& input, const String& expected) - { + void check(const String& input, const String& expected) { checkOneTerm(newLucene(LuceneVersion::LUCENE_CURRENT), input, expected); } - - void checkReuse(AnalyzerPtr a, const String& input, const String& expected) - { + + void checkReuse(const AnalyzerPtr& a, const String& input, const String& expected) { checkOneTermReuse(a, input, expected); } }; @@ -33,10 +29,7 @@ class DutchStemmerFixture : public BaseTokenStreamFixture /// Test the Dutch Stem Filter, which only modifies the term text. /// The code states that it uses the snowball algorithm, but tests reveal some differences. -BOOST_FIXTURE_TEST_SUITE(DutchStemmerTest, DutchStemmerFixture) - -BOOST_AUTO_TEST_CASE(testWithSnowballExamples) -{ +TEST_F(DutchStemmerTest, testWithSnowballExamples) { check(L"lichaamsziek", L"lichaamsziek"); check(L"lichamelijk", L"licham"); check(L"lichamelijke", L"licham"); @@ -119,9 +112,8 @@ BOOST_AUTO_TEST_CASE(testWithSnowballExamples) check(L"ophouden", L"ophoud"); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ - AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); +TEST_F(DutchStemmerTest, testReusableTokenStream) { + AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichaamsziek", L"lichaamsziek"); checkReuse(a, L"lichamelijk", L"licham"); checkReuse(a, L"lichamelijke", L"licham"); @@ -129,8 +121,7 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream) } /// Test that changes to the exclusion table are applied immediately when using reusable token streams. -BOOST_AUTO_TEST_CASE(testExclusionTableReuse) -{ +TEST_F(DutchStemmerTest, testExclusionTableReuse) { DutchAnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); checkReuse(a, L"lichamelijk", L"licham"); HashSet exclusions = HashSet::newInstance(); @@ -138,5 +129,3 @@ BOOST_AUTO_TEST_CASE(testExclusionTableReuse) a->setStemExclusionTable(exclusions); checkReuse(a, L"lichamelijk", L"lichamelijk"); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/reverse/ReverseStringFilterTest.cpp b/src/test/contrib/analyzers/common/analysis/reverse/ReverseStringFilterTest.cpp index cb1af25c..b436220c 100644 --- a/src/test/contrib/analyzers/common/analysis/reverse/ReverseStringFilterTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/reverse/ReverseStringFilterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -13,42 +13,38 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(ReverseStringFilterTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture ReverseStringFilterTest; -BOOST_AUTO_TEST_CASE(testFilter) -{ +TEST_F(ReverseStringFilterTest, testFilter) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream); TermAttributePtr text = filter->getAttribute(); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"oD", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"evah", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"a", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"ecin", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(L"yad", text->term()); - BOOST_CHECK(!filter->incrementToken()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"oD", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"evah", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"a", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"ecin", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(L"yad", text->term()); + EXPECT_TRUE(!filter->incrementToken()); } -BOOST_AUTO_TEST_CASE(testFilterWithMark) -{ +TEST_F(ReverseStringFilterTest, testFilterWithMark) { TokenStreamPtr stream = newLucene(newLucene(L"Do have a nice day")); // 1-4 length string ReverseStringFilterPtr filter = newLucene(stream, (wchar_t)0x0001); TermAttributePtr text = filter->getAttribute(); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"oD", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"evah", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"a", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"ecin", text->term()); - BOOST_CHECK(filter->incrementToken()); - BOOST_CHECK_EQUAL(String(1, (wchar_t)0x0001) + L"yad", text->term()); - BOOST_CHECK(!filter->incrementToken()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(String(1, (wchar_t)0x0001) + L"oD", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(String(1, (wchar_t)0x0001) + L"evah", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(String(1, (wchar_t)0x0001) + L"a", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(String(1, (wchar_t)0x0001) + L"ecin", text->term()); + EXPECT_TRUE(filter->incrementToken()); + EXPECT_EQ(String(1, (wchar_t)0x0001) + L"yad", text->term()); + EXPECT_TRUE(!filter->incrementToken()); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp b/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp index 1431194f..3a11f076 100644 --- a/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/ru/RussianAnalyzerTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -17,56 +17,52 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(RussianAnalyzerTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture RussianAnalyzerTest; -BOOST_AUTO_TEST_CASE(testUnicode) -{ +TEST_F(RussianAnalyzerTest, testUnicode) { RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); - + String testFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"testUTF8.txt")); InputStreamReaderPtr inWords = newLucene(newLucene(testFile)); - + String sampleFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"resUTF8.htm")); InputStreamReaderPtr sampleUnicode = newLucene(newLucene(sampleFile)); - + TokenStreamPtr in = ra->tokenStream(L"all", inWords); RussianLetterTokenizerPtr sample = newLucene(sampleUnicode); TermAttributePtr text = in->getAttribute(); TermAttributePtr sampleText = sample->getAttribute(); - - while (true) - { - if (!in->incrementToken()) + + while (true) { + if (!in->incrementToken()) { break; + } sample->incrementToken(); - BOOST_CHECK_EQUAL(text->term(), sampleText->term()); + EXPECT_EQ(text->term(), sampleText->term()); } inWords->close(); sampleUnicode->close(); } -BOOST_AUTO_TEST_CASE(testDigitsInRussianCharset) -{ +TEST_F(RussianAnalyzerTest, testDigitsInRussianCharset) { ReaderPtr reader = newLucene(L"text 1000"); RussianAnalyzerPtr ra = newLucene(LuceneVersion::LUCENE_CURRENT); TokenStreamPtr stream = ra->tokenStream(L"", reader); TermAttributePtr termText = stream->getAttribute(); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(L"text", termText->term()); - BOOST_CHECK(stream->incrementToken()); - BOOST_CHECK_EQUAL(L"1000", termText->term()); - BOOST_CHECK(!stream->incrementToken()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(L"text", termText->term()); + EXPECT_TRUE(stream->incrementToken()); + EXPECT_EQ(L"1000", termText->term()); + EXPECT_TRUE(!stream->incrementToken()); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream1) -{ +TEST_F(RussianAnalyzerTest, testReusableTokenStream1) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { + + const uint8_t input[] = { 0xd0, 0x92, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb5, 0x20, 0xd1, 0x81, 0x20, 0xd1, 0x82, 0xd0, 0xb5, 0xd0, 0xbc, 0x20, 0xd0, 0xbe, 0x20, 0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, @@ -76,48 +72,46 @@ BOOST_AUTO_TEST_CASE(testReusableTokenStream1) 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd0, 0xb5, 0xd1, 0x89, 0xd0, 0xb5 }; - + const uint8_t token1[] = {0xd0, 0xb2, 0xd0, 0xbc, 0xd0, 0xb5, 0xd1, 0x81, 0xd1, 0x82}; const uint8_t token2[] = {0xd1, 0x81, 0xd0, 0xb8, 0xd0, 0xbb}; const uint8_t token3[] = {0xd1, 0x8d, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xba, 0xd1, 0x82, 0xd1, 0x80, 0xd0, 0xbe, 0xd0, 0xbc, 0xd0, 0xb0, 0xd0, 0xb3, 0xd0, 0xbd, 0xd0, 0xb8, 0xd1, 0x82, - 0xd0, 0xbd}; + 0xd0, 0xbd + }; const uint8_t token4[] = {0xd1, 0x8d, 0xd0, 0xbd, 0xd0, 0xb5, 0xd1, 0x80, 0xd0, 0xb3}; const uint8_t token5[] = {0xd0, 0xb8, 0xd0, 0xbc, 0xd0, 0xb5, 0xd0, 0xbb}; const uint8_t token6[] = {0xd0, 0xbf, 0xd1, 0x80, 0xd0, 0xb5, 0xd0, 0xb4, 0xd1, 0x81, 0xd1, 0x82, 0xd0, - 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd}; - + 0xb0, 0xd0, 0xb2, 0xd0, 0xbb, 0xd0, 0xb5, 0xd0, 0xbd + }; + checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3), - UTF8_TO_STRING(token4), - UTF8_TO_STRING(token5), - UTF8_TO_STRING(token6) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3), + UTF8_TO_STRING(token4), + UTF8_TO_STRING(token5), + UTF8_TO_STRING(token6) + )); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream2) -{ +TEST_F(RussianAnalyzerTest, testReusableTokenStream2) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT); - - const uint8_t input[] = - { + + const uint8_t input[] = { 0xd0, 0x9d, 0xd0, 0xbe, 0x20, 0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xb5, 0x20, 0xd1, 0x8d, 0xd1, 0x82, 0xd0, 0xbe, 0x20, 0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd, 0xd0, 0xb8, 0xd0, 0xbb, 0xd0, 0xbe, 0xd1, 0x81, 0xd1, 0x8c, 0x20, 0xd0, 0xb2, 0x20, 0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd, 0xd0, 0xb5 }; - + const uint8_t token1[] = {0xd0, 0xb7, 0xd0, 0xbd, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token2[] = {0xd1, 0x85, 0xd1, 0x80, 0xd0, 0xb0, 0xd0, 0xbd}; const uint8_t token3[] = {0xd1, 0x82, 0xd0, 0xb0, 0xd0, 0xb9, 0xd0, 0xbd}; checkAnalyzesToReuse(a, UTF8_TO_STRING(input), newCollection( - UTF8_TO_STRING(token1), - UTF8_TO_STRING(token2), - UTF8_TO_STRING(token3) - )); + UTF8_TO_STRING(token1), + UTF8_TO_STRING(token2), + UTF8_TO_STRING(token3) + )); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp b/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp index 7cdb210b..27f77fb5 100644 --- a/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp +++ b/src/test/contrib/analyzers/common/analysis/ru/RussianStemTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -15,32 +15,31 @@ using namespace Lucene; -class RussianStemmerFixture : public BaseTokenStreamFixture -{ +class RussianStemmerTest : public BaseTokenStreamFixture { public: - RussianStemmerFixture() - { + RussianStemmerTest() { words = Collection::newInstance(); stems = Collection::newInstance(); - + String wordsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"wordsUTF8.txt")); String stemsFile(FileUtils::joinPath(FileUtils::joinPath(getTestDir(), L"russian"), L"stemsUTF8.txt")); - + BufferedReaderPtr inWords = newLucene(newLucene(newLucene(wordsFile))); String word; - while (inWords->readLine(word)) + while (inWords->readLine(word)) { words.add(word); + } inWords->close(); - + BufferedReaderPtr inStems = newLucene(newLucene(newLucene(stemsFile))); String stem; - while (inStems->readLine(stem)) + while (inStems->readLine(stem)) { stems.add(stem); + } inStems->close(); } - - virtual ~RussianStemmerFixture() - { + + virtual ~RussianStemmerTest() { } protected: @@ -48,16 +47,10 @@ class RussianStemmerFixture : public BaseTokenStreamFixture Collection stems; }; -BOOST_FIXTURE_TEST_SUITE(RussianStemTest, RussianStemmerFixture) - -BOOST_AUTO_TEST_CASE(testStem) -{ - BOOST_CHECK_EQUAL(words.size(), stems.size()); - for (int32_t i = 0; i < words.size(); ++i) - { +TEST_F(RussianStemmerTest, testStem) { + EXPECT_EQ(words.size(), stems.size()); + for (int32_t i = 0; i < words.size(); ++i) { String realStem = RussianStemmer::stemWord(words[i]); - BOOST_CHECK_EQUAL(stems[i], realStem); + EXPECT_EQ(stems[i], realStem); } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/highlighter/HighlighterTest.cpp b/src/test/contrib/highlighter/HighlighterTest.cpp index 9f8a510a..beaa9ea4 100644 --- a/src/test/contrib/highlighter/HighlighterTest.cpp +++ b/src/test/contrib/highlighter/HighlighterTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -55,48 +55,47 @@ #include "TermQuery.h" using namespace Lucene; -class HighlighterTestFixture; - -namespace HighlighterTest -{ - class TestFormatter : public Formatter, public LuceneObject - { - public: - TestFormatter(HighlighterTestFixture* fixture); - virtual ~TestFormatter(); - - LUCENE_CLASS(TestFormatter); - - protected: - HighlighterTestFixture* fixture; - - public: - virtual String highlightTerm(const String& originalText, TokenGroupPtr tokenGroup); - }; -} - -class HighlighterTestFixture : public BaseTokenStreamFixture -{ -public: - HighlighterTestFixture() - { +class HighlighterTest; + +namespace HighlighterTestNS { + +class TestFormatter : public Formatter, public LuceneObject { +public: + TestFormatter(HighlighterTest* fixture); + virtual ~TestFormatter(); + + LUCENE_CLASS(TestFormatter); + +protected: + HighlighterTest* fixture; + +public: + virtual String highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup); +}; + +} + +class HighlighterTest : public BaseTokenStreamFixture { +public: + HighlighterTest() { numHighlights = 0; analyzer = newLucene(TEST_VERSION); texts = newCollection( - L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", - L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", - L"JFK has been shot", - L"John Kennedy has been shot", - L"This text has a typo in referring to Keneddy", - L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", - L"y z x y z a b", - L"lets is a the lets is a the lets is a the lets" - ); - + L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot", + L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy", + L"JFK has been shot", + L"John Kennedy has been shot", + L"This text has a typo in referring to Keneddy", + L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc", + L"y z x y z a b", + L"lets is a the lets is a the lets is a the lets" + ); + ramDir = newLucene(); IndexWriterPtr writer = newLucene(ramDir, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); - for (int32_t i = 0; i < texts.size(); ++i) + for (int32_t i = 0; i < texts.size(); ++i) { addDoc(writer, texts[i]); + } DocumentPtr doc = newLucene(); NumericFieldPtr nfield = newLucene(NUMERIC_FIELD_NAME, Field::STORE_YES, true); nfield->setIntValue(1); @@ -120,15 +119,14 @@ class HighlighterTestFixture : public BaseTokenStreamFixture writer->optimize(); writer->close(); reader = IndexReader::open(ramDir, true); - + dir = newLucene(); a = newLucene(); } - - virtual ~HighlighterTestFixture() - { + + virtual ~HighlighterTest() { } - + public: IndexReaderPtr reader; QueryPtr query; @@ -139,27 +137,25 @@ class HighlighterTestFixture : public BaseTokenStreamFixture int32_t numHighlights; AnalyzerPtr analyzer; TopDocsPtr hits; - + Collection texts; - + DirectoryPtr dir; AnalyzerPtr a; - + static const LuceneVersion::Version TEST_VERSION; static const String FIELD_NAME; static const String NUMERIC_FIELD_NAME; public: - void addDoc(IndexWriterPtr writer, const String& text) - { + void addDoc(const IndexWriterPtr& writer, const String& text) { DocumentPtr doc = newLucene(); FieldPtr field = newLucene(FIELD_NAME, text, Field::STORE_YES, Field::INDEX_ANALYZED); doc->add(field); writer->addDocument(doc); } - - String highlightField(QueryPtr query, const String& fieldName, const String& text) - { + + String highlightField(const QueryPtr& query, const String& fieldName, const String& text) { TokenStreamPtr tokenStream = newLucene(TEST_VERSION)->tokenStream(fieldName, newLucene(text)); // Assuming "", "" used to highlight SimpleHTMLFormatterPtr formatter = newLucene(); @@ -170,49 +166,45 @@ class HighlighterTestFixture : public BaseTokenStreamFixture String rv = highlighter->getBestFragments(tokenStream, text, 1, L"(FIELD TEXT TRUNCATED)"); return rv.empty() ? text : rv; } - - void doSearching(const String& queryString) - { + + void doSearching(const String& queryString) { QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, analyzer); parser->setEnablePositionIncrements(true); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); query = parser->parse(queryString); doSearching(query); } - - void doSearching(QueryPtr unReWrittenQuery) - { + + void doSearching(const QueryPtr& unReWrittenQuery) { searcher = newLucene(ramDir, true); // for any multi-term queries to work (prefix, wildcard, range,fuzzy etc) you must use a rewritten query query = unReWrittenQuery->rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); } - - void checkExpectedHighlightCount(int32_t maxNumFragmentsRequired, int32_t expectedHighlights, Collection expected) - { + + void checkExpectedHighlightCount(int32_t maxNumFragmentsRequired, int32_t expectedHighlights, Collection expected) { Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); - + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); - - BOOST_CHECK_EQUAL(numHighlights, expectedHighlights); + + EXPECT_EQ(numHighlights, expectedHighlights); + } + + EXPECT_EQ(results.size(), expected.size()); + for (int32_t i = 0; i < results.size(); ++i) { + EXPECT_EQ(results[i], expected[i]); } - - BOOST_CHECK_EQUAL(results.size(), expected.size()); - for (int32_t i = 0; i < results.size(); ++i) - BOOST_CHECK_EQUAL(results[i], expected[i]); - } - - void makeIndex() - { + } + + void makeIndex() { IndexWriterPtr writer = newLucene(dir, a, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(doc(L"t_text1", L"random words for highlighting tests del")); writer->addDocument(doc(L"t_text1", L"more random words for second field del")); @@ -221,23 +213,20 @@ class HighlighterTestFixture : public BaseTokenStreamFixture writer->optimize(); writer->close(); } - - DocumentPtr doc(const String& f, const String& v) - { + + DocumentPtr doc(const String& f, const String& v) { DocumentPtr doc = newLucene(); doc->add(newLucene(f, v, Field::STORE_YES, Field::INDEX_ANALYZED)); return doc; } - - void deleteDocument() - { + + void deleteDocument() { IndexWriterPtr writer = newLucene(dir, a, false, IndexWriter::MaxFieldLengthLIMITED); writer->deleteDocuments(newLucene(L"t_text1", L"del")); writer->close(); } - - void searchIndex() - { + + void searchIndex() { String q = L"t_text1:random"; QueryParserPtr parser = newLucene(TEST_VERSION, L"t_text1", a ); QueryPtr query = parser->parse(q); @@ -247,149 +236,136 @@ class HighlighterTestFixture : public BaseTokenStreamFixture HighlighterPtr h = newLucene(scorer); TopDocsPtr hits = searcher->search(query, FilterPtr(), 10); - for (int32_t i = 0; i < hits->totalHits; ++i) - { + for (int32_t i = 0; i < hits->totalHits; ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String result = h->getBestFragment(a, L"t_text1", doc->get(L"t_text1")); - BOOST_CHECK_EQUAL(L"more random words for second field", result); + EXPECT_EQ(L"more random words for second field", result); } searcher->close(); } }; -const LuceneVersion::Version HighlighterTestFixture::TEST_VERSION = LuceneVersion::LUCENE_CURRENT; -const String HighlighterTestFixture::FIELD_NAME = L"contents"; -const String HighlighterTestFixture::NUMERIC_FIELD_NAME = L"nfield"; +const LuceneVersion::Version HighlighterTest::TEST_VERSION = LuceneVersion::LUCENE_CURRENT; +const String HighlighterTest::FIELD_NAME = L"contents"; +const String HighlighterTest::NUMERIC_FIELD_NAME = L"nfield"; + +namespace HighlighterTestNS { + +TestFormatter::TestFormatter(HighlighterTest* fixture) { + this->fixture = fixture; +} + +TestFormatter::~TestFormatter() { +} + +String TestFormatter::highlightTerm(const String& originalText, const TokenGroupPtr& tokenGroup) { + if (tokenGroup->getTotalScore() <= 0) { + return originalText; + } + ++fixture->numHighlights; // update stats used in assertions + return L"" + originalText + L""; +} -namespace HighlighterTest -{ - TestFormatter::TestFormatter(HighlighterTestFixture* fixture) - { +DECLARE_SHARED_PTR(TestHighlightRunner) + +class TestHighlightRunner : public LuceneObject { +public: + TestHighlightRunner(HighlighterTest* fixture) { this->fixture = fixture; + mode = QUERY; + frag = newLucene(20); } - - TestFormatter::~TestFormatter() - { - } - - String TestFormatter::highlightTerm(const String& originalText, TokenGroupPtr tokenGroup) - { - if (tokenGroup->getTotalScore() <= 0) - return originalText; - ++fixture->numHighlights; // update stats used in assertions - return L"" + originalText + L""; - } - - DECLARE_SHARED_PTR(TestHighlightRunner) - - class TestHighlightRunner : public LuceneObject - { - public: - TestHighlightRunner(HighlighterTestFixture* fixture) - { - this->fixture = fixture; - mode = QUERY; - frag = newLucene(20); - } - - virtual ~TestHighlightRunner() - { - } - - LUCENE_CLASS(TestHighlightRunner); - - protected: - HighlighterTestFixture* fixture; - - static const int32_t QUERY; - static const int32_t QUERY_TERM; - - public: - int32_t mode; - FragmenterPtr frag; - - public: - virtual HighlighterPtr getHighlighter(QueryPtr query, const String& fieldName, TokenStreamPtr stream, FormatterPtr formatter) - { - return getHighlighter(query, fieldName, stream, formatter, true); - } - - virtual HighlighterPtr getHighlighter(QueryPtr query, const String& fieldName, TokenStreamPtr stream, FormatterPtr formatter, bool expanMultiTerm) - { - HighlighterScorerPtr scorer; - if (mode == QUERY) - { - scorer = newLucene(query, fieldName); - if (!expanMultiTerm) - boost::dynamic_pointer_cast(scorer)->setExpandMultiTermQuery(false); + + virtual ~TestHighlightRunner() { + } + + LUCENE_CLASS(TestHighlightRunner); + +protected: + HighlighterTest* fixture; + + static const int32_t QUERY; + static const int32_t QUERY_TERM; + +public: + int32_t mode; + FragmenterPtr frag; + +public: + virtual HighlighterPtr getHighlighter(const QueryPtr& query, const String& fieldName, const TokenStreamPtr& stream, const FormatterPtr& formatter) { + return getHighlighter(query, fieldName, stream, formatter, true); + } + + virtual HighlighterPtr getHighlighter(const QueryPtr& query, const String& fieldName, const TokenStreamPtr& stream, const FormatterPtr& formatter, bool expanMultiTerm) { + HighlighterScorerPtr scorer; + if (mode == QUERY) { + scorer = newLucene(query, fieldName); + if (!expanMultiTerm) { + boost::dynamic_pointer_cast(scorer)->setExpandMultiTermQuery(false); } - else if (mode == QUERY_TERM) - scorer = newLucene(query); - else - BOOST_FAIL("Unknown highlight mode"); - - return newLucene(formatter, scorer); + } else if (mode == QUERY_TERM) { + scorer = newLucene(query); + } else { + boost::throw_exception(IllegalArgumentException(L"Unknown highlight mode")); } - - virtual HighlighterPtr getHighlighter(Collection weightedTerms, FormatterPtr formatter) - { - if (mode == QUERY) - { - Collection weightedSpanTerms = Collection::newInstance(weightedTerms.size()); - for (int32_t i = 0; i < weightedTerms.size(); ++i) - weightedSpanTerms[i] = boost::dynamic_pointer_cast(weightedTerms[i]); - return newLucene(formatter, newLucene(weightedSpanTerms)); + + return newLucene(formatter, scorer); + } + + virtual HighlighterPtr getHighlighter(Collection weightedTerms, const FormatterPtr& formatter) { + if (mode == QUERY) { + Collection weightedSpanTerms = Collection::newInstance(weightedTerms.size()); + for (int32_t i = 0; i < weightedTerms.size(); ++i) { + weightedSpanTerms[i] = boost::dynamic_pointer_cast(weightedTerms[i]); } - else if (mode == QUERY_TERM) - return newLucene(formatter, newLucene(weightedTerms)); - else - BOOST_FAIL("Unknown highlight mode"); - return HighlighterPtr(); + return newLucene(formatter, newLucene(weightedSpanTerms)); + } else if (mode == QUERY_TERM) { + return newLucene(formatter, newLucene(weightedTerms)); + } else { + boost::throw_exception(IllegalArgumentException(L"Unknown highlight mode")); } - - virtual void doStandardHighlights(AnalyzerPtr analyzer, IndexSearcherPtr searcher, TopDocsPtr hits, QueryPtr query, FormatterPtr formatter, Collection expected, bool expandMT = false) - { - Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { - String text = searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - int32_t maxNumFragmentsRequired = 2; - String fragmentSeparator = L"..."; - HighlighterScorerPtr scorer; - TokenStreamPtr tokenStream = analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - if (mode == QUERY) - scorer = newLucene(query); - else if (mode == QUERY_TERM) - scorer = newLucene(query); - HighlighterPtr highlighter = newLucene(formatter, scorer); - highlighter->setTextFragmenter(frag); - results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); + return HighlighterPtr(); + } + + virtual void doStandardHighlights(const AnalyzerPtr& analyzer, const IndexSearcherPtr& searcher, const TopDocsPtr& hits, const QueryPtr& query, const FormatterPtr& formatter, Collection expected, bool expandMT = false) { + Collection results = Collection::newInstance(); + + for (int32_t i = 0; i < hits->totalHits; ++i) { + String text = searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + int32_t maxNumFragmentsRequired = 2; + String fragmentSeparator = L"..."; + HighlighterScorerPtr scorer; + TokenStreamPtr tokenStream = analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + if (mode == QUERY) { + scorer = newLucene(query); + } else if (mode == QUERY_TERM) { + scorer = newLucene(query); } - - BOOST_CHECK_EQUAL(results.size(), expected.size()); - for (int32_t i = 0; i < results.size(); ++i) - BOOST_CHECK_EQUAL(results[i], expected[i]); + HighlighterPtr highlighter = newLucene(formatter, scorer); + highlighter->setTextFragmenter(frag); + results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } - - virtual void run(Collection expected) = 0; - - virtual void start(Collection expected = Collection()) - { - run(expected); - mode = QUERY_TERM; - run(expected); + + EXPECT_EQ(results.size(), expected.size()); + for (int32_t i = 0; i < results.size(); ++i) { + EXPECT_EQ(results[i], expected[i]); } - }; - - const int32_t TestHighlightRunner::QUERY = 0; - const int32_t TestHighlightRunner::QUERY_TERM = 1; -} + } -BOOST_FIXTURE_TEST_SUITE(HighlighterTest, HighlighterTestFixture) + virtual void run(Collection expected) = 0; -BOOST_AUTO_TEST_CASE(testQueryScorerHits) -{ + virtual void start(Collection expected = Collection()) { + run(expected); + mode = QUERY_TERM; + run(expected); + } +}; + +const int32_t TestHighlightRunner::QUERY = 0; +const int32_t TestHighlightRunner::QUERY_TERM = 1; +} + +TEST_F(HighlighterTest, testQueryScorerHits) { AnalyzerPtr analyzer = newLucene(); QueryParserPtr qp = newLucene(TEST_VERSION, FIELD_NAME, analyzer); query = qp->parse(L"\"very long\""); @@ -399,9 +375,8 @@ BOOST_AUTO_TEST_CASE(testQueryScorerHits) QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) - { + + for (int32_t i = 0; i < hits->scoreDocs.size(); ++i) { DocumentPtr doc = searcher->doc(hits->scoreDocs[i]->doc); String storedField = doc->get(FIELD_NAME); @@ -412,14 +387,13 @@ BOOST_AUTO_TEST_CASE(testQueryScorerHits) results.add(highlighter->getBestFragment(stream, storedField)); } - - BOOST_CHECK_EQUAL(results.size(), 2); - BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); + + EXPECT_EQ(results.size(), 2); + EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); } -BOOST_AUTO_TEST_CASE(testHighlightingWithDefaultField) -{ +TEST_F(HighlighterTest, testHighlightingWithDefaultField) { String s1 = L"I call our world Flatland, not because we call it so,"; QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); @@ -429,18 +403,17 @@ BOOST_AUTO_TEST_CASE(testHighlightingWithDefaultField) String expected = L"I call our world Flatland, not because we call it so,"; String observed = highlightField(q, L"SOME_FIELD_NAME", s1); - BOOST_CHECK_EQUAL(expected, observed); + EXPECT_EQ(expected, observed); - // Verify that a query against a named field does not result in any ighlighting when the query field name differs + // Verify that a query against a named field does not result in any ighlighting when the query field name differs // from the name of the field being highlighted, which in this example happens to be the default field name. q = parser->parse(L"text:\"world Flatland\"~3"); expected = s1; observed = highlightField(q, FIELD_NAME, s1); - BOOST_CHECK_EQUAL(s1, highlightField(q, FIELD_NAME, s1)); + EXPECT_EQ(s1, highlightField(q, FIELD_NAME, s1)); } -BOOST_AUTO_TEST_CASE(testSimpleSpanHighlighter) -{ +TEST_F(HighlighterTest, testSimpleSpanHighlighter) { doSearching(L"Kennedy"); int32_t maxNumFragmentsRequired = 2; @@ -448,24 +421,22 @@ BOOST_AUTO_TEST_CASE(testSimpleSpanHighlighter) QueryScorerPtr scorer = newLucene(query, FIELD_NAME); HighlighterPtr highlighter = newLucene(scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy... to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy... to Kennedy"); + EXPECT_EQ(results[2], L" kennedy has been shot"); } -BOOST_AUTO_TEST_CASE(testRepeatingTermsInMultBooleans) -{ +TEST_F(HighlighterTest, testRepeatingTermsInMultBooleans) { String content = L"x y z a b c d e f g b c g"; String ph1 = L"\"a b c d\""; String ph2 = L"\"b c g\""; @@ -481,25 +452,23 @@ BOOST_AUTO_TEST_CASE(testRepeatingTermsInMultBooleans) QueryScorerPtr scorer = newLucene(query, f1); scorer->setExpandMultiTermQuery(false); - HighlighterPtr h = newLucene(newLucene(this), scorer); + HighlighterPtr h = newLucene(newLucene(this), scorer); h->getBestFragment(analyzer, f1, content); - BOOST_CHECK_EQUAL(numHighlights, 7); + EXPECT_EQ(numHighlights, 7); } -BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) -{ +TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting) { doSearching(L"\"very long and contains\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); @@ -507,11 +476,11 @@ BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 1); - BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); - - BOOST_CHECK_EQUAL(numHighlights, 3); + + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); + + EXPECT_EQ(numHighlights, 3); numHighlights = 0; doSearching(L"\"This piece of text refers to Kennedy\""); @@ -519,11 +488,10 @@ BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); - highlighter = newLucene(newLucene(this), scorer); + highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); - for (int32_t i = 0; i < hits->totalHits; ++i) - { + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); @@ -531,11 +499,11 @@ BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 1); - BOOST_CHECK_EQUAL(results[0], L"This piece of text refers to Kennedy at the beginning then has a longer piece"); - - BOOST_CHECK_EQUAL(numHighlights, 4); + + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], L"This piece of text refers to Kennedy at the beginning then has a longer piece"); + + EXPECT_EQ(numHighlights, 4); numHighlights = 0; doSearching(L"\"lets is a the lets is a the lets is a the lets\""); @@ -543,11 +511,10 @@ BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); - highlighter = newLucene(newLucene(this), scorer); + highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); @@ -555,25 +522,22 @@ BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting) results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 1); - BOOST_CHECK_EQUAL(results[0], L"lets is a the lets is a the lets is a the lets"); - - BOOST_CHECK_EQUAL(numHighlights, 4); + + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], L"lets is a the lets is a the lets is a the lets"); + + EXPECT_EQ(numHighlights, 4); } -BOOST_AUTO_TEST_CASE(testSpanRegexQuery) -{ +TEST_F(HighlighterTest, testSpanRegexQuery) { // todo } -BOOST_AUTO_TEST_CASE(testRegexQuery) -{ +TEST_F(HighlighterTest, testRegexQuery) { // todo } -BOOST_AUTO_TEST_CASE(testNumericRangeQuery) -{ +TEST_F(HighlighterTest, testNumericRangeQuery) { // doesn't currently highlight, but make sure it doesn't cause exception either query = NumericRangeQuery::newIntRange(NUMERIC_FIELD_NAME, 2, 6, true, true); searcher = newLucene(ramDir, true); @@ -581,11 +545,10 @@ BOOST_AUTO_TEST_CASE(testNumericRangeQuery) int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(NUMERIC_FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); @@ -593,144 +556,134 @@ BOOST_AUTO_TEST_CASE(testNumericRangeQuery) results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 2); - BOOST_CHECK_EQUAL(results[0], L""); - BOOST_CHECK_EQUAL(results[1], L""); - - BOOST_CHECK_EQUAL(numHighlights, 0); + + EXPECT_EQ(results.size(), 2); + EXPECT_EQ(results[0], L""); + EXPECT_EQ(results[1], L""); + + EXPECT_EQ(numHighlights, 0); } -BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting2) -{ +TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting2) { doSearching(L"\"text piece long\"~5"); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); highlighter->setTextFragmenter(newLucene(40)); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 2); - BOOST_CHECK_EQUAL(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); - BOOST_CHECK_EQUAL(results[1], L" at the beginning then has a longer piece of text that is very long in the middle"); - - BOOST_CHECK_EQUAL(numHighlights, 6); + + EXPECT_EQ(results.size(), 2); + EXPECT_EQ(results[0], L"Hello this is a piece of text that is very long and contains too much preamble"); + EXPECT_EQ(results[1], L" at the beginning then has a longer piece of text that is very long in the middle"); + + EXPECT_EQ(numHighlights, 6); } -BOOST_AUTO_TEST_CASE(testSimpleQueryScorerPhraseHighlighting3) -{ +TEST_F(HighlighterTest, testSimpleQueryScorerPhraseHighlighting3) { doSearching(L"\"x y z\""); int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); - + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); - - BOOST_CHECK_EQUAL(numHighlights, 3); + + EXPECT_EQ(numHighlights, 3); } - - BOOST_CHECK_EQUAL(results.size(), 1); - BOOST_CHECK_EQUAL(results[0], L"y z x y z a b"); + + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], L"y z x y z a b"); } -BOOST_AUTO_TEST_CASE(testSimpleSpanFragmenter) -{ +TEST_F(HighlighterTest, testSimpleSpanFragmenter) { doSearching(L"\"piece of text that is very long\""); int32_t maxNumFragmentsRequired = 2; QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 5)); - + results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 2); - BOOST_CHECK_EQUAL(results[0], L" this is a piece of text"); - BOOST_CHECK_EQUAL(results[1], L" piece of text that is very long"); - + + EXPECT_EQ(results.size(), 2); + EXPECT_EQ(results[0], L" this is a piece of text"); + EXPECT_EQ(results[1], L" piece of text that is very long"); + doSearching(L"\"been shot\""); - + maxNumFragmentsRequired = 2; scorer = newLucene(query, FIELD_NAME); - highlighter = newLucene(newLucene(this), scorer); + highlighter = newLucene(newLucene(this), scorer); results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); highlighter->setTextFragmenter(newLucene(scorer, 20)); - + results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(numHighlights, 14); - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"JFK has been shot"); - BOOST_CHECK_EQUAL(results[1], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); + + EXPECT_EQ(numHighlights, 14); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"JFK has been shot"); + EXPECT_EQ(results[1], L"John Kennedy has been shot"); + EXPECT_EQ(results[2], L" kennedy has been shot"); } /// position sensitive query added after position insensitive query -BOOST_AUTO_TEST_CASE(testPosTermStdTerm) -{ +TEST_F(HighlighterTest, testPosTermStdTerm) { doSearching(L"y \"x y z\""); int32_t maxNumFragmentsRequired = 2; - + QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); - + highlighter->setTextFragmenter(newLucene(40)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); - - BOOST_CHECK_EQUAL(numHighlights, 4); + + EXPECT_EQ(numHighlights, 4); } - - BOOST_CHECK_EQUAL(results.size(), 1); - BOOST_CHECK_EQUAL(results[0], L"y z x y z a b"); + + EXPECT_EQ(results.size(), 1); + EXPECT_EQ(results[0], L"y z x y z a b"); } -BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlighting) -{ +TEST_F(HighlighterTest, testQueryScorerMultiPhraseQueryHighlighting) { MultiPhraseQueryPtr mpq = newLucene(); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx"), newLucene(FIELD_NAME, L"wordb"))); @@ -740,16 +693,15 @@ BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlighting) int32_t maxNumFragmentsRequired = 2; Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx worda wordb wordy wordc"); - + checkExpectedHighlightCount(maxNumFragmentsRequired, 6, expected); } -BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlightingWithGap) -{ +TEST_F(HighlighterTest, testQueryScorerMultiPhraseQueryHighlightingWithGap) { MultiPhraseQueryPtr mpq = newLucene(); // The toString of MultiPhraseQuery doesn't work so well with these out-of-order additions, but the Query itself seems to match accurately. - + mpq->add(newCollection(newLucene(FIELD_NAME, L"wordz")), 2); mpq->add(newCollection(newLucene(FIELD_NAME, L"wordx")), 0); @@ -757,182 +709,164 @@ BOOST_AUTO_TEST_CASE(testQueryScorerMultiPhraseQueryHighlightingWithGap) int32_t maxNumFragmentsRequired = 1; int32_t expectedHighlights = 2; - + Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); - + checkExpectedHighlightCount(maxNumFragmentsRequired, expectedHighlights, expected); } -namespace TestNearSpanSimpleQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - mode = QUERY; - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - } - }; +namespace TestNearSpanSimpleQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + mode = QUERY; + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + } +}; + } -BOOST_AUTO_TEST_CASE(testNearSpanSimpleQuery) -{ +TEST_F(HighlighterTest, testNearSpanSimpleQuery) { doSearching(newLucene(newCollection( - newLucene(newLucene(FIELD_NAME, L"beginning")), - newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false)); + newLucene(newLucene(FIELD_NAME, L"beginning")), + newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false)); + + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); - TestHighlightRunnerPtr helper = newLucene(this); - Collection expected = newCollection(L" refers to Kennedy at the beginning"); helper->run(expected); - - BOOST_CHECK_EQUAL(numHighlights, 2); + + EXPECT_EQ(numHighlights, 2); } -BOOST_AUTO_TEST_CASE(testSimpleQueryTermScorerHighlighter) -{ +TEST_F(HighlighterTest, testSimpleQueryTermScorerHighlighter) { doSearching(L"Kennedy"); HighlighterPtr highlighter = newLucene(newLucene(query)); highlighter->setTextFragmenter(newLucene(40)); - + int32_t maxNumFragmentsRequired = 2; Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"...")); } - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy... to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy... to Kennedy"); + EXPECT_EQ(results[2], L" kennedy has been shot"); } -namespace TestSpanHighlighting -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - mode = QUERY; - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - } - }; +namespace TestSpanHighlighting { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + mode = QUERY; + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + } +}; + } -BOOST_AUTO_TEST_CASE(testSpanHighlighting) -{ +TEST_F(HighlighterTest, testSpanHighlighting) { QueryPtr query1 = newLucene(newCollection( - newLucene(newLucene(FIELD_NAME, L"wordx")), - newLucene(newLucene(FIELD_NAME, L"wordy"))), 1, false); + newLucene(newLucene(FIELD_NAME, L"wordx")), + newLucene(newLucene(FIELD_NAME, L"wordy"))), 1, false); QueryPtr query2 = newLucene(newCollection( - newLucene(newLucene(FIELD_NAME, L"wordy")), - newLucene(newLucene(FIELD_NAME, L"wordc"))), 1, false); + newLucene(newLucene(FIELD_NAME, L"wordy")), + newLucene(newLucene(FIELD_NAME, L"wordc"))), 1, false); BooleanQueryPtr bquery = newLucene(); bquery->add(query1, BooleanClause::SHOULD); bquery->add(query2, BooleanClause::SHOULD); doSearching(bquery); - - TestHighlightRunnerPtr helper = newLucene(this); - + + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + Collection expected = newCollection(L"wordx wordy wordz wordx wordy wordx"); helper->run(expected); - - BOOST_CHECK_EQUAL(numHighlights, 7); + + EXPECT_EQ(numHighlights, 7); } -namespace TestNotSpanSimpleQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - mode = QUERY; - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - } - }; +namespace TestNotSpanSimpleQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + mode = QUERY; + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + } +}; + } -BOOST_AUTO_TEST_CASE(testNotSpanSimpleQuery) -{ +TEST_F(HighlighterTest, testNotSpanSimpleQuery) { doSearching(newLucene(newLucene(newCollection( - newLucene(newLucene(FIELD_NAME, L"shot")), - newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false), - newLucene(newLucene(FIELD_NAME, L"john")))); - - TestHighlightRunnerPtr helper = newLucene(this); - + newLucene(newLucene(FIELD_NAME, L"shot")), + newLucene(newLucene(FIELD_NAME, L"kennedy"))), 3, false), + newLucene(newLucene(FIELD_NAME, L"john")))); + + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + Collection expected = newCollection( - L"John Kennedy has been shot", - L" kennedy has been shot" - ); + L"John Kennedy has been shot", + L" kennedy has been shot" + ); helper->run(expected); - - BOOST_CHECK_EQUAL(numHighlights, 4); + + EXPECT_EQ(numHighlights, 4); } -namespace TestGetBestFragmentsSimpleQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"Kennedy"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - } - }; +namespace TestGetBestFragmentsSimpleQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"Kennedy"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 4); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsSimpleQuery) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetBestFragmentsSimpleQuery) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L"John Kennedy has been shot", @@ -942,34 +876,30 @@ BOOST_AUTO_TEST_CASE(testGetBestFragmentsSimpleQuery) ); } -namespace TestGetFuzzyFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"Kinnedy~"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected, true); - BOOST_CHECK_EQUAL(fixture->numHighlights, 5); - } - }; +namespace TestGetFuzzyFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"Kinnedy~"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected, true); + EXPECT_EQ(fixture->numHighlights, 5); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetFuzzyFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetFuzzyFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L"John Kennedy has been shot", @@ -980,34 +910,30 @@ BOOST_AUTO_TEST_CASE(testGetFuzzyFragments) ); } -namespace TestGetWildCardFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"K?nnedy"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - } - }; +namespace TestGetWildCardFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"K?nnedy"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 4); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetWildCardFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetWildCardFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L"John Kennedy has been shot", @@ -1017,34 +943,30 @@ BOOST_AUTO_TEST_CASE(testGetWildCardFragments) ); } -namespace TestGetMidWildCardFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"K*dy"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 5); - } - }; +namespace TestGetMidWildCardFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"K*dy"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 5); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetMidWildCardFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetMidWildCardFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L" to Keneddy", @@ -1055,41 +977,37 @@ BOOST_AUTO_TEST_CASE(testGetMidWildCardFragments) ); } -namespace TestGetRangeFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - String queryString = HighlighterTestFixture::FIELD_NAME + L":[kannedy TO kznnedy]"; - - // Need to explicitly set the QueryParser property to use TermRangeQuery rather than RangeFilters - QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, fixture->analyzer); - parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); - fixture->query = parser->parse(queryString); - fixture->doSearching(fixture->query); - - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 5); - } - }; +namespace TestGetRangeFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + String queryString = HighlighterTest::FIELD_NAME + L":[kannedy TO kznnedy]"; + + // Need to explicitly set the QueryParser property to use TermRangeQuery rather than RangeFilters + QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, fixture->analyzer); + parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); + fixture->query = parser->parse(queryString); + fixture->doSearching(fixture->query); + + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 5); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetRangeFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetRangeFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L" to Keneddy", @@ -1100,8 +1018,7 @@ BOOST_AUTO_TEST_CASE(testGetRangeFragments) ); } -BOOST_AUTO_TEST_CASE(testConstantScoreMultiTermQuery) -{ +TEST_F(HighlighterTest, testConstantScoreMultiTermQuery) { numHighlights = 0; query = newLucene(newLucene(FIELD_NAME, L"ken*")); @@ -1110,306 +1027,279 @@ BOOST_AUTO_TEST_CASE(testConstantScoreMultiTermQuery) // can't rewrite ConstantScore if you want to highlight it - it rewrites to ConstantScoreQuery which cannot be highlighted // query = unReWrittenQuery.rewrite(reader); hits = searcher->search(query, FilterPtr(), 1000); - + Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; - + TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, FIELD_NAME); - - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); - + + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } - - BOOST_CHECK_EQUAL(numHighlights, 5); - - BOOST_CHECK_EQUAL(results.size(), 4); - BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); - + + EXPECT_EQ(numHighlights, 5); + + EXPECT_EQ(results.size(), 4); + EXPECT_EQ(results[0], L" kennedy has been shot"); + EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); + EXPECT_EQ(results[2], L"John Kennedy has been shot"); + EXPECT_EQ(results[3], L" to Keneddy"); + // try null field - + hits = searcher->search(query, FilterPtr(), 1000); - + numHighlights = 0; - + results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; - + TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L""); - - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); - + + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } - - BOOST_CHECK_EQUAL(numHighlights, 5); - - BOOST_CHECK_EQUAL(results.size(), 4); - BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); - + + EXPECT_EQ(numHighlights, 5); + + EXPECT_EQ(results.size(), 4); + EXPECT_EQ(results[0], L" kennedy has been shot"); + EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); + EXPECT_EQ(results[2], L"John Kennedy has been shot"); + EXPECT_EQ(results[3], L" to Keneddy"); + // try default field - + hits = searcher->search(query, FilterPtr(), 1000); - + numHighlights = 0; - + results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = searcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); int32_t maxNumFragmentsRequired = 2; String fragmentSeparator = L"..."; - + TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); QueryScorerPtr scorer = newLucene(query, L"random_field", FIELD_NAME); - - HighlighterPtr highlighter = newLucene(newLucene(this), scorer); - + + HighlighterPtr highlighter = newLucene(newLucene(this), scorer); + highlighter->setTextFragmenter(newLucene(20)); results.add(highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, fragmentSeparator)); } - - BOOST_CHECK_EQUAL(numHighlights, 5); - - BOOST_CHECK_EQUAL(results.size(), 4); - BOOST_CHECK_EQUAL(results[0], L" kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L" refers to Kennedy... to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[3], L" to Keneddy"); -} - -namespace TestGetBestFragmentsPhrase -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"\"John Kennedy\""); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - - // Currently highlights "John" and "Kennedy" separately - BOOST_CHECK_EQUAL(fixture->numHighlights, 2); - } - }; + + EXPECT_EQ(numHighlights, 5); + + EXPECT_EQ(results.size(), 4); + EXPECT_EQ(results[0], L" kennedy has been shot"); + EXPECT_EQ(results[1], L" refers to Kennedy... to Kennedy"); + EXPECT_EQ(results[2], L"John Kennedy has been shot"); + EXPECT_EQ(results[3], L" to Keneddy"); } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsPhrase) -{ - TestHighlightRunnerPtr helper = newLucene(this); +namespace TestGetBestFragmentsPhrase { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"\"John Kennedy\""); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + + // Currently highlights "John" and "Kennedy" separately + EXPECT_EQ(fixture->numHighlights, 2); + } +}; + +} + +TEST_F(HighlighterTest, testGetBestFragmentsPhrase) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } -namespace TestGetBestFragmentsQueryScorer -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - Collection clauses = newCollection( - newLucene(newLucene(L"contents", L"john")), - newLucene(newLucene(L"contents", L"kennedy")) - ); - - SpanNearQueryPtr snq = newLucene(clauses, 1, true); - fixture->doSearching(snq); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - - // Currently highlights "John" and "Kennedy" separately - BOOST_CHECK_EQUAL(fixture->numHighlights, 2); - } - }; +namespace TestGetBestFragmentsQueryScorer { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + Collection clauses = newCollection( + newLucene(newLucene(L"contents", L"john")), + newLucene(newLucene(L"contents", L"kennedy")) + ); + + SpanNearQueryPtr snq = newLucene(clauses, 1, true); + fixture->doSearching(snq); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + + // Currently highlights "John" and "Kennedy" separately + EXPECT_EQ(fixture->numHighlights, 2); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsQueryScorer) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetBestFragmentsQueryScorer) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } -namespace TestOffByOne -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - TermQueryPtr query = newLucene(newLucene(L"data", L"help")); - HighlighterPtr hg = newLucene(newLucene(), newLucene(query)); - hg->setTextFragmenter(newLucene()); - - String match = hg->getBestFragment(fixture->analyzer, L"data", L"help me [54-65]"); - BOOST_CHECK_EQUAL(L"help me [54-65]", match); - } - }; +namespace TestOffByOne { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + TermQueryPtr query = newLucene(newLucene(L"data", L"help")); + HighlighterPtr hg = newLucene(newLucene(), newLucene(query)); + hg->setTextFragmenter(newLucene()); + + String match = hg->getBestFragment(fixture->analyzer, L"data", L"help me [54-65]"); + EXPECT_EQ(L"help me [54-65]", match); + } +}; + } -BOOST_AUTO_TEST_CASE(testOffByOne) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testOffByOne) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestGetBestFragmentsFilteredQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); - Collection clauses = newCollection( - newLucene(newLucene(L"contents", L"john")), - newLucene(newLucene(L"contents", L"kennedy")) - ); - SpanNearQueryPtr snq = newLucene(clauses, 1, true); - FilteredQueryPtr fq = newLucene(snq, rf); - - fixture->doSearching(fq); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - - // Currently highlights "John" and "Kennedy" separately - BOOST_CHECK_EQUAL(fixture->numHighlights, 2); - } - }; +namespace TestGetBestFragmentsFilteredQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); + Collection clauses = newCollection( + newLucene(newLucene(L"contents", L"john")), + newLucene(newLucene(L"contents", L"kennedy")) + ); + SpanNearQueryPtr snq = newLucene(clauses, 1, true); + FilteredQueryPtr fq = newLucene(snq, rf); + + fixture->doSearching(fq); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + + // Currently highlights "John" and "Kennedy" separately + EXPECT_EQ(fixture->numHighlights, 2); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsFilteredQuery) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetBestFragmentsFilteredQuery) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } -namespace TestGetBestFragmentsFilteredPhraseQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); - PhraseQueryPtr pq = newLucene(); - pq->add(newLucene(L"contents", L"john")); - pq->add(newLucene(L"contents", L"kennedy")); - FilteredQueryPtr fq = newLucene(pq, rf); - - fixture->doSearching(fq); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - - // Currently highlights "John" and "Kennedy" separately - BOOST_CHECK_EQUAL(fixture->numHighlights, 2); - } - }; +namespace TestGetBestFragmentsFilteredPhraseQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + TermRangeFilterPtr rf = newLucene(L"contents", L"john", L"john", true, true); + PhraseQueryPtr pq = newLucene(); + pq->add(newLucene(L"contents", L"john")); + pq->add(newLucene(L"contents", L"kennedy")); + FilteredQueryPtr fq = newLucene(pq, rf); + + fixture->doSearching(fq); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + + // Currently highlights "John" and "Kennedy" separately + EXPECT_EQ(fixture->numHighlights, 2); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsFilteredPhraseQuery) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetBestFragmentsFilteredPhraseQuery) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(newCollection(L"John Kennedy has been shot")); } -namespace TestGetBestFragmentsMultiTerm -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"John Kenn*"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 5); - } - }; +namespace TestGetBestFragmentsMultiTerm { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"John Kenn*"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 5); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsMultiTerm) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetBestFragmentsMultiTerm) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L"John Kennedy has been shot", @@ -1419,34 +1309,30 @@ BOOST_AUTO_TEST_CASE(testGetBestFragmentsMultiTerm) ); } -namespace TestGetBestFragmentsWithOr -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"JFK OR Kennedy"); - doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); - BOOST_CHECK_EQUAL(fixture->numHighlights, 5); - } - }; +namespace TestGetBestFragmentsWithOr { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"JFK OR Kennedy"); + doStandardHighlights(fixture->analyzer, fixture->searcher, fixture->hits, fixture->query, newLucene(fixture), expected); + EXPECT_EQ(fixture->numHighlights, 5); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestFragmentsWithOr) -{ - TestHighlightRunnerPtr helper = newLucene(this); - +TEST_F(HighlighterTest, testGetBestFragmentsWithOr) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); + helper->start( newCollection( L"JFK has been shot", @@ -1457,643 +1343,578 @@ BOOST_AUTO_TEST_CASE(testGetBestFragmentsWithOr) ); } -namespace TestGetBestSingleFragment -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { +namespace TestGetBestSingleFragment { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->doSearching(L"Kennedy"); + fixture->numHighlights = 0; + Collection results = Collection::newInstance(); + + for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { + String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + highlighter->setTextFragmenter(newLucene(40)); + results.add(highlighter->getBestFragment(tokenStream, text)); } - - virtual ~HelperHighlightRunner() - { + EXPECT_EQ(fixture->numHighlights, 4); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy"); + EXPECT_EQ(results[2], L" kennedy has been shot"); + + fixture->numHighlights = 0; + results = Collection::newInstance(); + + for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { + String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + results.add(highlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, text)); } - - public: - virtual void run(Collection expected) - { - fixture->doSearching(L"Kennedy"); - fixture->numHighlights = 0; - Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < fixture->hits->totalHits; ++i) - { - String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - highlighter->setTextFragmenter(newLucene(40)); - results.add(highlighter->getBestFragment(tokenStream, text)); - } - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L" kennedy has been shot"); - - fixture->numHighlights = 0; - results = Collection::newInstance(); - - for (int32_t i = 0; i < fixture->hits->totalHits; ++i) - { - String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - results.add(highlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, text)); - } - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); - BOOST_CHECK_EQUAL(results[2], L" is really here which says kennedy has been shot"); - - fixture->numHighlights = 0; - results = Collection::newInstance(); - - for (int32_t i = 0; i < fixture->hits->totalHits; ++i) - { - String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - highlighter->setTextFragmenter(newLucene(40)); - Collection result = highlighter->getBestFragments(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, text, 10); - results.addAll(result.begin(), result.end()); - } - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"); - BOOST_CHECK_EQUAL(results[2], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); + EXPECT_EQ(fixture->numHighlights, 4); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); + EXPECT_EQ(results[2], L" is really here which says kennedy has been shot"); + + fixture->numHighlights = 0; + results = Collection::newInstance(); + + for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { + String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + highlighter->setTextFragmenter(newLucene(40)); + Collection result = highlighter->getBestFragments(fixture->analyzer, HighlighterTest::FIELD_NAME, text, 10); + results.addAll(result.begin(), result.end()); } - }; + EXPECT_EQ(fixture->numHighlights, 4); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very long in the middle and finally ends with another reference to Kennedy"); + EXPECT_EQ(results[2], L"Hello this is a piece of text that is very long and contains too much preamble and the meat is really here which says kennedy has been shot"); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestSingleFragment) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetBestSingleFragment) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestGetBestSingleFragmentWithWeights -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - Collection wTerms = Collection::newInstance(2); - wTerms[0] = newLucene(10.0, L"hello"); - - Collection positionSpans = newCollection(newLucene(0, 0)); - boost::dynamic_pointer_cast(wTerms[0])->addPositionSpans(positionSpans); - - wTerms[1] = newLucene(1.0, L"kennedy"); - positionSpans = newCollection(newLucene(14, 14)); - boost::dynamic_pointer_cast(wTerms[1])->addPositionSpans(positionSpans); - - HighlighterPtr highlighter = getHighlighter(wTerms, newLucene(fixture)); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); - highlighter->setTextFragmenter(newLucene(2)); - - String result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); - boost::trim(result); - - BOOST_CHECK_EQUAL(L"Hello", result); - - wTerms[1]->setWeight(50.0); - tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); - highlighter = getHighlighter(wTerms, newLucene(fixture)); - highlighter->setTextFragmenter(newLucene(2)); - - result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); - boost::trim(result); - - BOOST_CHECK_EQUAL(L"kennedy", result); - } - }; +namespace TestGetBestSingleFragmentWithWeights { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + Collection wTerms = Collection::newInstance(2); + wTerms[0] = newLucene(10.0, L"hello"); + + Collection positionSpans = newCollection(newLucene(0, 0)); + boost::dynamic_pointer_cast(wTerms[0])->addPositionSpans(positionSpans); + + wTerms[1] = newLucene(1.0, L"kennedy"); + positionSpans = newCollection(newLucene(14, 14)); + boost::dynamic_pointer_cast(wTerms[1])->addPositionSpans(positionSpans); + + HighlighterPtr highlighter = getHighlighter(wTerms, newLucene(fixture)); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); + highlighter->setTextFragmenter(newLucene(2)); + + String result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); + boost::trim(result); + + EXPECT_EQ(L"Hello", result); + + wTerms[1]->setWeight(50.0); + tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); + highlighter = getHighlighter(wTerms, newLucene(fixture)); + highlighter->setTextFragmenter(newLucene(2)); + + result = highlighter->getBestFragment(tokenStream, fixture->texts[0]); + boost::trim(result); + + EXPECT_EQ(L"kennedy", result); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetBestSingleFragmentWithWeights) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetBestSingleFragmentWithWeights) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestOverlapAnalyzer -{ - class SynonymTokenizer : public TokenStream - { - public: - SynonymTokenizer(TokenStreamPtr realStream, MapStringString synonyms) - { - this->realStream = realStream; - this->synonyms = synonyms; - this->synonymToken = 0; - this->realTermAtt = realStream->addAttribute(); - this->realPosIncrAtt = realStream->addAttribute(); - this->realOffsetAtt = realStream->addAttribute(); - - this->termAtt = addAttribute(); - this->posIncrAtt = addAttribute(); - this->offsetAtt = addAttribute(); - } - - virtual ~SynonymTokenizer() - { - } - - protected: - TokenStreamPtr realStream; - TokenPtr currentRealToken; - TokenPtr cRealToken; - MapStringString synonyms; - Collection synonymTokens; - int32_t synonymToken; - TermAttributePtr realTermAtt; - PositionIncrementAttributePtr realPosIncrAtt; - OffsetAttributePtr realOffsetAtt; - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncrAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken() - { - if (!currentRealToken) - { - bool next = realStream->incrementToken(); - if (!next) - return false; - clearAttributes(); - termAtt->setTermBuffer(realTermAtt->term()); - offsetAtt->setOffset(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); - posIncrAtt->setPositionIncrement(realPosIncrAtt->getPositionIncrement()); - - if (!synonyms.contains(realTermAtt->term())) - return true; - String expansions = synonyms.get(realTermAtt->term()); - synonymTokens = StringUtils::split(expansions, L","); - synonymToken = 0; - if (!synonymTokens.empty()) - { - currentRealToken = newLucene(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); - currentRealToken->setTermBuffer(realTermAtt->term()); - } - return true; +namespace TestOverlapAnalyzer { + +class SynonymTokenizer : public TokenStream { +public: + SynonymTokenizer(const TokenStreamPtr& realStream, MapStringString synonyms) { + this->realStream = realStream; + this->synonyms = synonyms; + this->synonymToken = 0; + this->realTermAtt = realStream->addAttribute(); + this->realPosIncrAtt = realStream->addAttribute(); + this->realOffsetAtt = realStream->addAttribute(); + + this->termAtt = addAttribute(); + this->posIncrAtt = addAttribute(); + this->offsetAtt = addAttribute(); + } + + virtual ~SynonymTokenizer() { + } + +protected: + TokenStreamPtr realStream; + TokenPtr currentRealToken; + TokenPtr cRealToken; + MapStringString synonyms; + Collection synonymTokens; + int32_t synonymToken; + TermAttributePtr realTermAtt; + PositionIncrementAttributePtr realPosIncrAtt; + OffsetAttributePtr realOffsetAtt; + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncrAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken() { + if (!currentRealToken) { + bool next = realStream->incrementToken(); + if (!next) { + return false; } - else - { - String tok = synonymTokens[synonymToken++]; - clearAttributes(); - termAtt->setTermBuffer(tok); - offsetAtt->setOffset(currentRealToken->startOffset(), currentRealToken->endOffset()); - posIncrAtt->setPositionIncrement(0); - if (synonymToken == synonymTokens.size()) - { - currentRealToken.reset(); - synonymTokens.reset(); - synonymToken = 0; - } + clearAttributes(); + termAtt->setTermBuffer(realTermAtt->term()); + offsetAtt->setOffset(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); + posIncrAtt->setPositionIncrement(realPosIncrAtt->getPositionIncrement()); + + if (!synonyms.contains(realTermAtt->term())) { return true; } + String expansions = synonyms.get(realTermAtt->term()); + synonymTokens = StringUtils::split(expansions, L","); + synonymToken = 0; + if (!synonymTokens.empty()) { + currentRealToken = newLucene(realOffsetAtt->startOffset(), realOffsetAtt->endOffset()); + currentRealToken->setTermBuffer(realTermAtt->term()); + } + return true; + } else { + String tok = synonymTokens[synonymToken++]; + clearAttributes(); + termAtt->setTermBuffer(tok); + offsetAtt->setOffset(currentRealToken->startOffset(), currentRealToken->endOffset()); + posIncrAtt->setPositionIncrement(0); + if (synonymToken == synonymTokens.size()) { + currentRealToken.reset(); + synonymTokens.reset(); + synonymToken = 0; + } + return true; } - }; - - class SynonymAnalyzer : public Analyzer - { - public: - SynonymAnalyzer(MapStringString synonyms) - { - this->synonyms = synonyms; - } - - virtual ~SynonymAnalyzer() - { - } - - protected: - MapStringString synonyms; - - public: - virtual TokenStreamPtr tokenStream(const String& fieldName, ReaderPtr reader) - { - LowerCaseTokenizerPtr stream = newLucene(reader); - stream->addAttribute(); - stream->addAttribute(); - stream->addAttribute(); - return newLucene(stream, synonyms); - } - }; - - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - MapStringString synonyms = MapStringString::newInstance(); - synonyms.put(L"football", L"soccer,footie"); - AnalyzerPtr analyzer = newLucene(synonyms); - String srchkey = L"football"; - - String s = L"football-soccer in the euro 2004 footie competition"; - QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, L"bookid", analyzer); - QueryPtr query = parser->parse(srchkey); - - TokenStreamPtr tokenStream = analyzer->tokenStream(L"", newLucene(s)); - - HighlighterPtr highlighter = getHighlighter(query, L"", tokenStream, newLucene(fixture)); - - // Get 3 best fragments and separate with a "..." - tokenStream = analyzer->tokenStream(L"", newLucene(s)); - - String result = highlighter->getBestFragments(tokenStream, s, 3, L"..."); - String expectedResult = L"football-soccer in the euro 2004 footie competition"; - - BOOST_CHECK_EQUAL(expectedResult, result); - } - }; + } +}; + +class SynonymAnalyzer : public Analyzer { +public: + SynonymAnalyzer(MapStringString synonyms) { + this->synonyms = synonyms; + } + + virtual ~SynonymAnalyzer() { + } + +protected: + MapStringString synonyms; + +public: + virtual TokenStreamPtr tokenStream(const String& fieldName, const ReaderPtr& reader) { + LowerCaseTokenizerPtr stream = newLucene(reader); + stream->addAttribute(); + stream->addAttribute(); + stream->addAttribute(); + return newLucene(stream, synonyms); + } +}; + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + MapStringString synonyms = MapStringString::newInstance(); + synonyms.put(L"football", L"soccer,footie"); + AnalyzerPtr analyzer = newLucene(synonyms); + String srchkey = L"football"; + + String s = L"football-soccer in the euro 2004 footie competition"; + QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, L"bookid", analyzer); + QueryPtr query = parser->parse(srchkey); + + TokenStreamPtr tokenStream = analyzer->tokenStream(L"", newLucene(s)); + + HighlighterPtr highlighter = getHighlighter(query, L"", tokenStream, newLucene(fixture)); + + // Get 3 best fragments and separate with a "..." + tokenStream = analyzer->tokenStream(L"", newLucene(s)); + + String result = highlighter->getBestFragments(tokenStream, s, 3, L"..."); + String expectedResult = L"football-soccer in the euro 2004 footie competition"; + + EXPECT_EQ(expectedResult, result); + } +}; + } /// tests a "complex" analyzer that produces multiple overlapping tokens -BOOST_AUTO_TEST_CASE(testOverlapAnalyzer) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testOverlapAnalyzer) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestGetSimpleHighlight -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"Kennedy"); - - Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < fixture->hits->totalHits; ++i) - { - String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - results.add(highlighter->getBestFragment(tokenStream, text)); - } - BOOST_CHECK_EQUAL(fixture->numHighlights, 4); - - BOOST_CHECK_EQUAL(results.size(), 3); - BOOST_CHECK_EQUAL(results[0], L"John Kennedy has been shot"); - BOOST_CHECK_EQUAL(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); - BOOST_CHECK_EQUAL(results[2], L" is really here which says kennedy has been shot"); +namespace TestGetSimpleHighlight { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"Kennedy"); + + Collection results = Collection::newInstance(); + + for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { + String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + results.add(highlighter->getBestFragment(tokenStream, text)); } - }; + EXPECT_EQ(fixture->numHighlights, 4); + + EXPECT_EQ(results.size(), 3); + EXPECT_EQ(results[0], L"John Kennedy has been shot"); + EXPECT_EQ(results[1], L"This piece of text refers to Kennedy at the beginning then has a longer piece of text that is very"); + EXPECT_EQ(results[2], L" is really here which says kennedy has been shot"); + } +}; + } -BOOST_AUTO_TEST_CASE(testGetSimpleHighlight) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetSimpleHighlight) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestGetTextFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->doSearching(L"Kennedy"); - - for (int32_t i = 0; i < fixture->hits->totalHits; ++i) - { - String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - highlighter->setTextFragmenter(newLucene(20)); - Collection stringResults = highlighter->getBestFragments(tokenStream, text, 10); - - tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - Collection fragmentResults = highlighter->getBestTextFragments(tokenStream, text, true, 10); - - BOOST_CHECK_EQUAL(fragmentResults.size(), stringResults.size()); - for (int32_t j = 0; j < stringResults.size(); ++j) - BOOST_CHECK_EQUAL(fragmentResults[j]->toString(), stringResults[j]); +namespace TestGetTextFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->doSearching(L"Kennedy"); + + for (int32_t i = 0; i < fixture->hits->totalHits; ++i) { + String text = fixture->searcher->doc(fixture->hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + highlighter->setTextFragmenter(newLucene(20)); + Collection stringResults = highlighter->getBestFragments(tokenStream, text, 10); + + tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + Collection fragmentResults = highlighter->getBestTextFragments(tokenStream, text, true, 10); + + EXPECT_EQ(fragmentResults.size(), stringResults.size()); + for (int32_t j = 0; j < stringResults.size(); ++j) { + EXPECT_EQ(fragmentResults[j]->toString(), stringResults[j]); } } - }; + } +}; + } -BOOST_AUTO_TEST_CASE(testGetTextFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testGetTextFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestMaxSizeHighlight -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - fixture->doSearching(L"meat"); - - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(fixture->texts[0])); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - highlighter->setMaxDocCharsToAnalyze(30); - - highlighter->getBestFragment(tokenStream, fixture->texts[0]); - BOOST_CHECK_EQUAL(fixture->numHighlights, 0); - } - }; +namespace TestMaxSizeHighlight { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + fixture->doSearching(L"meat"); + + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(fixture->texts[0])); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + highlighter->setMaxDocCharsToAnalyze(30); + + highlighter->getBestFragment(tokenStream, fixture->texts[0]); + EXPECT_EQ(fixture->numHighlights, 0); + } +}; + } -BOOST_AUTO_TEST_CASE(testMaxSizeHighlight) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testMaxSizeHighlight) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestMaxSizeHighlightTruncates -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - String goodWord = L"goodtoken"; - HashSet stopWords = HashSet::newInstance(); - stopWords.add(L"stoppedtoken"); - - TermQueryPtr query = newLucene(newLucene(L"data", goodWord)); - - StringStream buffer; - buffer << goodWord; - - for (int32_t i = 0; i < 10000; ++i) - { - // only one stopword - buffer << L" " << *stopWords.begin(); - } - SimpleHTMLFormatterPtr fm = newLucene(); - HighlighterPtr hg = getHighlighter(query, L"data", newLucene(HighlighterTestFixture::TEST_VERSION, stopWords)->tokenStream(L"data", newLucene(buffer.str())), fm); - - hg->setTextFragmenter(newLucene()); - hg->setMaxDocCharsToAnalyze(100); - String match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"data", buffer.str()); - BOOST_CHECK((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); - - // add another tokenized word to the overall length - but set way beyond the length of text under consideration - // (after a large slug of stop words + whitespace) - buffer << L" " << goodWord; - match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"data", buffer.str()); - BOOST_CHECK((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); +namespace TestMaxSizeHighlightTruncates { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + String goodWord = L"goodtoken"; + HashSet stopWords = HashSet::newInstance(); + stopWords.add(L"stoppedtoken"); + + TermQueryPtr query = newLucene(newLucene(L"data", goodWord)); + + StringStream buffer; + buffer << goodWord; + + for (int32_t i = 0; i < 10000; ++i) { + // only one stopword + buffer << L" " << *stopWords.begin(); } - }; + SimpleHTMLFormatterPtr fm = newLucene(); + HighlighterPtr hg = getHighlighter(query, L"data", newLucene(HighlighterTest::TEST_VERSION, stopWords)->tokenStream(L"data", newLucene(buffer.str())), fm); + + hg->setTextFragmenter(newLucene()); + hg->setMaxDocCharsToAnalyze(100); + String match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"data", buffer.str()); + EXPECT_TRUE((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); + + // add another tokenized word to the overall length - but set way beyond the length of text under consideration + // (after a large slug of stop words + whitespace) + buffer << L" " << goodWord; + match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"data", buffer.str()); + EXPECT_TRUE((int32_t)match.length() < hg->getMaxDocCharsToAnalyze()); + } +}; + } -BOOST_AUTO_TEST_CASE(testMaxSizeHighlightTruncates) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testMaxSizeHighlightTruncates) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestMaxSizeEndHighlight -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - HashSet stopWords = HashSet::newInstance(); - stopWords.add(L"in"); - stopWords.add(L"it"); - - TermQueryPtr query = newLucene(newLucene(L"text", L"searchterm")); - - String text = L"this is a text with searchterm in it"; - - SimpleHTMLFormatterPtr fm = newLucene(); - HighlighterPtr hg = getHighlighter(query, L"text", newLucene(HighlighterTestFixture::TEST_VERSION, stopWords)->tokenStream(L"text", newLucene(text)), fm); - - hg->setTextFragmenter(newLucene()); - hg->setMaxDocCharsToAnalyze(36); - String match = hg->getBestFragment(newLucene(HighlighterTestFixture::TEST_VERSION, stopWords), L"text", text); - BOOST_CHECK(boost::ends_with(match, L"in it")); - } - }; +namespace TestMaxSizeEndHighlight { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + HashSet stopWords = HashSet::newInstance(); + stopWords.add(L"in"); + stopWords.add(L"it"); + + TermQueryPtr query = newLucene(newLucene(L"text", L"searchterm")); + + String text = L"this is a text with searchterm in it"; + + SimpleHTMLFormatterPtr fm = newLucene(); + HighlighterPtr hg = getHighlighter(query, L"text", newLucene(HighlighterTest::TEST_VERSION, stopWords)->tokenStream(L"text", newLucene(text)), fm); + + hg->setTextFragmenter(newLucene()); + hg->setMaxDocCharsToAnalyze(36); + String match = hg->getBestFragment(newLucene(HighlighterTest::TEST_VERSION, stopWords), L"text", text); + EXPECT_TRUE(boost::ends_with(match, L"in it")); + } +}; + } -BOOST_AUTO_TEST_CASE(testMaxSizeEndHighlight) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testMaxSizeEndHighlight) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestUnRewrittenQuery -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->numHighlights = 0; - // test to show how rewritten query can still be used - fixture->searcher = newLucene(fixture->ramDir, true); - AnalyzerPtr analyzer = newLucene(HighlighterTestFixture::TEST_VERSION); - - QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, analyzer); - QueryPtr query = parser->parse(L"JF? or Kenned*"); - TopDocsPtr hits = fixture->searcher->search(query, FilterPtr(), 1000); - - int32_t maxNumFragmentsRequired = 3; - - for (int32_t i = 0; i < hits->totalHits; ++i) - { - String text = fixture->searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTestFixture::FIELD_NAME); - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture), false); - - highlighter->setTextFragmenter(newLucene(40)); - - highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"..."); - } - - // We expect to have zero highlights if the query is multi-terms and is not rewritten - BOOST_CHECK_EQUAL(fixture->numHighlights, 0); +namespace TestUnRewrittenQuery { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->numHighlights = 0; + // test to show how rewritten query can still be used + fixture->searcher = newLucene(fixture->ramDir, true); + AnalyzerPtr analyzer = newLucene(HighlighterTest::TEST_VERSION); + + QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, analyzer); + QueryPtr query = parser->parse(L"JF? or Kenned*"); + TopDocsPtr hits = fixture->searcher->search(query, FilterPtr(), 1000); + + int32_t maxNumFragmentsRequired = 3; + + for (int32_t i = 0; i < hits->totalHits; ++i) { + String text = fixture->searcher->doc(hits->scoreDocs[i]->doc)->get(HighlighterTest::FIELD_NAME); + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture), false); + + highlighter->setTextFragmenter(newLucene(40)); + + highlighter->getBestFragments(tokenStream, text, maxNumFragmentsRequired, L"..."); } - }; + + // We expect to have zero highlights if the query is multi-terms and is not rewritten + EXPECT_EQ(fixture->numHighlights, 0); + } +}; + } -BOOST_AUTO_TEST_CASE(testUnRewrittenQuery) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testUnRewrittenQuery) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestNoFragments -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - fixture->doSearching(L"AnInvalidQueryWhichShouldYieldNoResults"); - - for (int32_t i = 0; i < fixture->texts.size(); ++i) - { - String text = fixture->texts[i]; - TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTestFixture::FIELD_NAME, newLucene(text)); - HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTestFixture::FIELD_NAME, tokenStream, newLucene(fixture)); - String result = highlighter->getBestFragment(tokenStream, text); - BOOST_CHECK(result.empty()); - } +namespace TestNoFragments { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + fixture->doSearching(L"AnInvalidQueryWhichShouldYieldNoResults"); + + for (int32_t i = 0; i < fixture->texts.size(); ++i) { + String text = fixture->texts[i]; + TokenStreamPtr tokenStream = fixture->analyzer->tokenStream(HighlighterTest::FIELD_NAME, newLucene(text)); + HighlighterPtr highlighter = getHighlighter(fixture->query, HighlighterTest::FIELD_NAME, tokenStream, newLucene(fixture)); + String result = highlighter->getBestFragment(tokenStream, text); + EXPECT_TRUE(result.empty()); } - }; + } +}; + } -BOOST_AUTO_TEST_CASE(testNoFragments) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testNoFragments) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestEncoding -{ - class NullScorer : public HighlighterScorer, public LuceneObject - { - public: - virtual ~NullScorer() - { - } - - public: - virtual void startFragment(TextFragmentPtr newFragment) - { - } - - virtual double getTokenScore() - { - return 0.0; - } - - virtual double getFragmentScore() - { - return 1.0; - } - - virtual TokenStreamPtr init(TokenStreamPtr tokenStream) - { - return TokenStreamPtr(); - } - }; +namespace TestEncoding { + +class NullScorer : public HighlighterScorer, public LuceneObject { +public: + virtual ~NullScorer() { + } + +public: + virtual void startFragment(const TextFragmentPtr& newFragment) { + } + + virtual double getTokenScore() { + return 0.0; + } + + virtual double getFragmentScore() { + return 1.0; + } + + virtual TokenStreamPtr init(const TokenStreamPtr& tokenStream) { + return TokenStreamPtr(); + } +}; + } /// Demonstrates creation of an XHTML compliant doc using new encoding facilities. -BOOST_AUTO_TEST_CASE(testEncoding) -{ +TEST_F(HighlighterTest, testEncoding) { String rawDocContent = L"\"Smith & sons' prices < 3 and >4\" claims article"; - + // run the highlighter on the raw content (scorer does not score any tokens for // highlighting but scores a single fragment for selection - HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(), newLucene()); - + HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(), newLucene()); + highlighter->setTextFragmenter(newLucene(2000)); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(rawDocContent)); String encodedSnippet = highlighter->getBestFragments(tokenStream, rawDocContent, 1, L""); - BOOST_CHECK_EQUAL(encodedSnippet, L""Smith & sons' prices < 3 and >4" claims article"); + EXPECT_EQ(encodedSnippet, L""Smith & sons' prices < 3 and >4" claims article"); } -BOOST_AUTO_TEST_CASE(testMultiSearcher) -{ +TEST_F(HighlighterTest, testMultiSearcher) { // setup index 1 RAMDirectoryPtr ramDir1 = newLucene(); IndexWriterPtr writer1 = newLucene(ramDir1, newLucene(TEST_VERSION), true, IndexWriter::MaxFieldLengthUNLIMITED); @@ -2115,11 +1936,11 @@ BOOST_AUTO_TEST_CASE(testMultiSearcher) writer2->optimize(); writer2->close(); IndexReaderPtr reader2 = IndexReader::open(ramDir2, true); - + Collection searchers = newCollection( - newLucene(ramDir1, true), - newLucene(ramDir2, true) - ); + newLucene(ramDir1, true), + newLucene(ramDir2, true) + ); MultiSearcherPtr multiSearcher = newLucene(searchers); QueryParserPtr parser = newLucene(TEST_VERSION, FIELD_NAME, newLucene(TEST_VERSION)); parser->setMultiTermRewriteMethod(MultiTermQuery::SCORING_BOOLEAN_QUERY_REWRITE()); @@ -2128,318 +1949,294 @@ BOOST_AUTO_TEST_CASE(testMultiSearcher) hits = multiSearcher->search(query, FilterPtr(), 1000); Collection expandedQueries = newCollection( - query->rewrite(reader1), - query->rewrite(reader2) - ); + query->rewrite(reader1), + query->rewrite(reader2) + ); query = query->combine(expandedQueries); // create an instance of the highlighter with the tags used to surround highlighted text - HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(query)); + HighlighterPtr highlighter = newLucene(newLucene(this), newLucene(query)); Collection results = Collection::newInstance(); - - for (int32_t i = 0; i < hits->totalHits; ++i) - { + + for (int32_t i = 0; i < hits->totalHits; ++i) { String text = multiSearcher->doc(hits->scoreDocs[i]->doc)->get(FIELD_NAME); TokenStreamPtr tokenStream = analyzer->tokenStream(FIELD_NAME, newLucene(text)); String highlightedText = highlighter->getBestFragment(tokenStream, text); results.add(highlightedText); } - BOOST_CHECK_EQUAL(results.size(), 2); - BOOST_CHECK_EQUAL(results[0], L"multiOne"); - BOOST_CHECK_EQUAL(results[1], L"multiTwo"); - - BOOST_CHECK_EQUAL(numHighlights, 2); + EXPECT_EQ(results.size(), 2); + EXPECT_EQ(results[0], L"multiOne"); + EXPECT_EQ(results[1], L"multiTwo"); + + EXPECT_EQ(numHighlights, 2); } -namespace TestFieldSpecificHighlighting -{ - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { +namespace TestFieldSpecificHighlighting { + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + String docMainText = L"fred is one of the people"; + QueryParserPtr parser = newLucene(HighlighterTest::TEST_VERSION, HighlighterTest::FIELD_NAME, fixture->analyzer); + QueryPtr query = parser->parse(L"fred category:people"); + + // highlighting respects fieldnames used in query + + HighlighterScorerPtr fieldSpecificScorer; + if (mode == QUERY) { + fieldSpecificScorer = newLucene(query, HighlighterTest::FIELD_NAME); + } else if (mode == QUERY_TERM) { + fieldSpecificScorer = newLucene(query, L"contents"); } - - public: - virtual void run(Collection expected) - { - String docMainText = L"fred is one of the people"; - QueryParserPtr parser = newLucene(HighlighterTestFixture::TEST_VERSION, HighlighterTestFixture::FIELD_NAME, fixture->analyzer); - QueryPtr query = parser->parse(L"fred category:people"); - - // highlighting respects fieldnames used in query - - HighlighterScorerPtr fieldSpecificScorer; - if (mode == QUERY) - fieldSpecificScorer = newLucene(query, HighlighterTestFixture::FIELD_NAME); - else if (mode == QUERY_TERM) - fieldSpecificScorer = newLucene(query, L"contents"); - - HighlighterPtr fieldSpecificHighlighter = newLucene(newLucene(), fieldSpecificScorer); - fieldSpecificHighlighter->setTextFragmenter(newLucene()); - String result = fieldSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, docMainText); - BOOST_CHECK_EQUAL(result, L"fred is one of the people"); - - // highlighting does not respect fieldnames used in query - HighlighterScorerPtr fieldInSpecificScorer; - if (mode == QUERY) - fieldInSpecificScorer = newLucene(query, L""); - else if (mode == QUERY_TERM) - fieldInSpecificScorer = newLucene(query); - - HighlighterPtr fieldInSpecificHighlighter = newLucene(newLucene(), fieldInSpecificScorer); - fieldInSpecificHighlighter->setTextFragmenter(newLucene()); - result = fieldInSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTestFixture::FIELD_NAME, docMainText); - BOOST_CHECK_EQUAL(result, L"fred is one of the people"); - - fixture->reader->close(); + + HighlighterPtr fieldSpecificHighlighter = newLucene(newLucene(), fieldSpecificScorer); + fieldSpecificHighlighter->setTextFragmenter(newLucene()); + String result = fieldSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, docMainText); + EXPECT_EQ(result, L"fred is one of the people"); + + // highlighting does not respect fieldnames used in query + HighlighterScorerPtr fieldInSpecificScorer; + if (mode == QUERY) { + fieldInSpecificScorer = newLucene(query, L""); + } else if (mode == QUERY_TERM) { + fieldInSpecificScorer = newLucene(query); } - }; + + HighlighterPtr fieldInSpecificHighlighter = newLucene(newLucene(), fieldInSpecificScorer); + fieldInSpecificHighlighter->setTextFragmenter(newLucene()); + result = fieldInSpecificHighlighter->getBestFragment(fixture->analyzer, HighlighterTest::FIELD_NAME, docMainText); + EXPECT_EQ(result, L"fred is one of the people"); + + fixture->reader->close(); + } +}; + } -BOOST_AUTO_TEST_CASE(testFieldSpecificHighlighting) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testFieldSpecificHighlighting) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -namespace TestOverlapAnalyzer2 -{ - class TS2 : public TokenStream - { - public: - TS2() - { - termAtt = addAttribute(); - posIncrAtt = addAttribute(); - offsetAtt = addAttribute(); - lst = Collection::newInstance(); - TokenPtr t = createToken(L"hi", 0, 2); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"hispeed", 0, 8); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"speed", 3, 8); - t->setPositionIncrement(0); - lst.add(t); - t = createToken(L"10", 8, 10); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"foo", 11, 14); - t->setPositionIncrement(1); - lst.add(t); - tokenPos = 0; - } - - virtual ~TS2() - { - } - - protected: - Collection lst; - int32_t tokenPos; - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncrAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken() - { - if (tokenPos < (int32_t)lst.size()) - { - TokenPtr token = lst[tokenPos++]; - clearAttributes(); - termAtt->setTermBuffer(token->term()); - posIncrAtt->setPositionIncrement(token->getPositionIncrement()); - offsetAtt->setOffset(token->startOffset(), token->endOffset()); - return true; - } - return false; - } - - protected: - TokenPtr createToken(const String& term, int32_t start, int32_t offset) - { - TokenPtr token = newLucene(start, offset); - token->setTermBuffer(term); - return token; - } - }; - - /// same token-stream as above, but the bigger token comes first this time - class TS2a : public TokenStream - { - public: - TS2a() - { - termAtt = addAttribute(); - posIncrAtt = addAttribute(); - offsetAtt = addAttribute(); - lst = Collection::newInstance(); - TokenPtr t = createToken(L"hispeed", 0, 8); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"hi", 0, 2); - t->setPositionIncrement(0); - lst.add(t); - t = createToken(L"speed", 3, 8); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"10", 8, 10); - t->setPositionIncrement(1); - lst.add(t); - t = createToken(L"foo", 11, 14); - t->setPositionIncrement(1); - lst.add(t); - tokenPos = 0; - } - - virtual ~TS2a() - { - } - - protected: - Collection lst; - int32_t tokenPos; - TermAttributePtr termAtt; - PositionIncrementAttributePtr posIncrAtt; - OffsetAttributePtr offsetAtt; - - public: - virtual bool incrementToken() - { - if (tokenPos < (int32_t)lst.size()) - { - TokenPtr token = lst[tokenPos++]; - clearAttributes(); - termAtt->setTermBuffer(token->term()); - posIncrAtt->setPositionIncrement(token->getPositionIncrement()); - offsetAtt->setOffset(token->startOffset(), token->endOffset()); - return true; - } - return false; - } - - protected: - TokenPtr createToken(const String& term, int32_t start, int32_t offset) - { - TokenPtr token = newLucene(start, offset); - token->setTermBuffer(term); - return token; - } - }; - - class HelperHighlightRunner : public HighlighterTest::TestHighlightRunner - { - public: - HelperHighlightRunner(HighlighterTestFixture* fixture) : HighlighterTest::TestHighlightRunner(fixture) - { - } - - virtual ~HelperHighlightRunner() - { - } - - public: - virtual void run(Collection expected) - { - String s = L"Hi-Speed10 foo"; - - QueryPtr query; - HighlighterPtr highlighter; - String result; - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"foo"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"10"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"speed"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); - highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - // same tests, just put the bigger overlapping token first - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"foo"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"10"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"speed"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - - query = newLucene(HighlighterTestFixture::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); - highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); - result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); - BOOST_CHECK_EQUAL(L"Hi-Speed10 foo", result); - } - - TokenStreamPtr getTS2() - { - return newLucene(); - } - - TokenStreamPtr getTS2a() - { - return newLucene(); - } - }; +namespace TestOverlapAnalyzer2 { + +class TS2 : public TokenStream { +public: + TS2() { + termAtt = addAttribute(); + posIncrAtt = addAttribute(); + offsetAtt = addAttribute(); + lst = Collection::newInstance(); + TokenPtr t = createToken(L"hi", 0, 2); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"hispeed", 0, 8); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"speed", 3, 8); + t->setPositionIncrement(0); + lst.add(t); + t = createToken(L"10", 8, 10); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"foo", 11, 14); + t->setPositionIncrement(1); + lst.add(t); + tokenPos = 0; + } + + virtual ~TS2() { + } + +protected: + Collection lst; + int32_t tokenPos; + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncrAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken() { + if (tokenPos < (int32_t)lst.size()) { + TokenPtr token = lst[tokenPos++]; + clearAttributes(); + termAtt->setTermBuffer(token->term()); + posIncrAtt->setPositionIncrement(token->getPositionIncrement()); + offsetAtt->setOffset(token->startOffset(), token->endOffset()); + return true; + } + return false; + } + +protected: + TokenPtr createToken(const String& term, int32_t start, int32_t offset) { + TokenPtr token = newLucene(start, offset); + token->setTermBuffer(term); + return token; + } +}; + +/// same token-stream as above, but the bigger token comes first this time +class TS2a : public TokenStream { +public: + TS2a() { + termAtt = addAttribute(); + posIncrAtt = addAttribute(); + offsetAtt = addAttribute(); + lst = Collection::newInstance(); + TokenPtr t = createToken(L"hispeed", 0, 8); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"hi", 0, 2); + t->setPositionIncrement(0); + lst.add(t); + t = createToken(L"speed", 3, 8); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"10", 8, 10); + t->setPositionIncrement(1); + lst.add(t); + t = createToken(L"foo", 11, 14); + t->setPositionIncrement(1); + lst.add(t); + tokenPos = 0; + } + + virtual ~TS2a() { + } + +protected: + Collection lst; + int32_t tokenPos; + TermAttributePtr termAtt; + PositionIncrementAttributePtr posIncrAtt; + OffsetAttributePtr offsetAtt; + +public: + virtual bool incrementToken() { + if (tokenPos < (int32_t)lst.size()) { + TokenPtr token = lst[tokenPos++]; + clearAttributes(); + termAtt->setTermBuffer(token->term()); + posIncrAtt->setPositionIncrement(token->getPositionIncrement()); + offsetAtt->setOffset(token->startOffset(), token->endOffset()); + return true; + } + return false; + } + +protected: + TokenPtr createToken(const String& term, int32_t start, int32_t offset) { + TokenPtr token = newLucene(start, offset); + token->setTermBuffer(term); + return token; + } +}; + +class HelperHighlightRunner : public HighlighterTestNS::TestHighlightRunner { +public: + HelperHighlightRunner(HighlighterTest* fixture) : HighlighterTestNS::TestHighlightRunner(fixture) { + } + + virtual ~HelperHighlightRunner() { + } + +public: + virtual void run(Collection expected) { + String s = L"Hi-Speed10 foo"; + + QueryPtr query; + HighlighterPtr highlighter; + String result; + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"foo"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"10"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"speed"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); + highlighter = getHighlighter(query, L"text", getTS2(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + // same tests, just put the bigger overlapping token first + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"foo"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"10"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"speed"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hispeed"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + + query = newLucene(HighlighterTest::TEST_VERSION, L"text", newLucene())->parse(L"hi speed"); + highlighter = getHighlighter(query, L"text", getTS2a(), newLucene(fixture)); + result = highlighter->getBestFragments(getTS2a(), s, 3, L"..."); + EXPECT_EQ(L"Hi-Speed10 foo", result); + } + + TokenStreamPtr getTS2() { + return newLucene(); + } + + TokenStreamPtr getTS2a() { + return newLucene(); + } +}; + } -BOOST_AUTO_TEST_CASE(testOverlapAnalyzer2) -{ - TestHighlightRunnerPtr helper = newLucene(this); +TEST_F(HighlighterTest, testOverlapAnalyzer2) { + HighlighterTestNS::TestHighlightRunnerPtr helper = newLucene(this); helper->start(); } -BOOST_AUTO_TEST_CASE(testWeightedTermsWithDeletes) -{ +TEST_F(HighlighterTest, testWeightedTermsWithDeletes) { makeIndex(); deleteDocument(); searchIndex(); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/memory/MemoryIndexTest.cpp b/src/test/contrib/memory/MemoryIndexTest.cpp index 50c1ff8c..9d4da606 100644 --- a/src/test/contrib/memory/MemoryIndexTest.cpp +++ b/src/test/contrib/memory/MemoryIndexTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -29,11 +29,9 @@ using namespace Lucene; /// Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour, /// returning the same results for queries on some randomish indexes. -class MemoryIndexTestFixture : public BaseTokenStreamFixture -{ +class MemoryIndexTest : public BaseTokenStreamFixture { public: - MemoryIndexTestFixture() - { + MemoryIndexTest() { fileDir = FileUtils::joinPath(getTestDir(), L"memory"); queries = HashSet::newInstance(); HashSet test1 = readQueries(L"testqueries.txt"); @@ -42,9 +40,9 @@ class MemoryIndexTestFixture : public BaseTokenStreamFixture queries.addAll(test2.begin(), test2.end()); random = newLucene(123); buffer = CharArray::newInstance(20); - - /// Some terms to be indexed, in addition to random words. - /// These terms are commonly used in the queries. + + /// Some terms to be indexed, in addition to random words. + /// These terms are commonly used in the queries. TEST_TERMS = Collection::newInstance(); TEST_TERMS.add(L"term"); TEST_TERMS.add(L"tErm"); @@ -69,9 +67,8 @@ class MemoryIndexTestFixture : public BaseTokenStreamFixture TEST_TERMS.add(L"copyright"); TEST_TERMS.add(L"Copyright"); } - - virtual ~MemoryIndexTestFixture() - { + + virtual ~MemoryIndexTest() { } protected: @@ -79,44 +76,44 @@ class MemoryIndexTestFixture : public BaseTokenStreamFixture HashSet queries; RandomPtr random; CharArray buffer; - + static const int32_t ITERATIONS; Collection TEST_TERMS; - + public: /// read a set of queries from a resource file - HashSet readQueries(const String& resource) - { + HashSet readQueries(const String& resource) { HashSet queries = HashSet::newInstance(); BufferedReaderPtr reader = newLucene(newLucene(FileUtils::joinPath(fileDir, resource))); String line; - while (reader->readLine(line)) - { + while (reader->readLine(line)) { boost::trim(line); - if (!line.empty() && !boost::starts_with(line, L"#") && !boost::starts_with(line, L"//")) + if (!line.empty() && !boost::starts_with(line, L"#") && !boost::starts_with(line, L"//")) { queries.add(line); + } } reader->close(); - + return queries; } - + /// Build a randomish document for both RAMDirectory and MemoryIndex, and run all the queries against it. - void checkAgainstRAMDirectory() - { + void checkAgainstRAMDirectory() { StringStream fooField; StringStream termField; - + // add up to 250 terms to field "foo" int32_t fieldCount = random->nextInt(250) + 1; - for (int32_t i = 0; i < fieldCount; ++i) + for (int32_t i = 0; i < fieldCount; ++i) { fooField << L" " << randomTerm(); - + } + // add up to 250 terms to field "foo" int32_t termCount = random->nextInt(250) + 1; - for (int32_t i = 0; i < termCount; ++i) + for (int32_t i = 0; i < termCount; ++i) { termField << L" " << randomTerm(); - + } + RAMDirectoryPtr ramdir = newLucene(); AnalyzerPtr analyzer = randomAnalyzer(); IndexWriterPtr writer = newLucene(ramdir, analyzer, IndexWriter::MaxFieldLengthUNLIMITED); @@ -131,101 +128,86 @@ class MemoryIndexTestFixture : public BaseTokenStreamFixture MemoryIndexPtr memory = newLucene(); memory->addField(L"foo", fooField.str(), analyzer); memory->addField(L"term", termField.str(), analyzer); - checkAllQueries(memory, ramdir, analyzer); + checkAllQueries(memory, ramdir, analyzer); } - - void checkAllQueries(MemoryIndexPtr memory, RAMDirectoryPtr ramdir, AnalyzerPtr analyzer) - { + + void checkAllQueries(const MemoryIndexPtr& memory, const RAMDirectoryPtr& ramdir, const AnalyzerPtr& analyzer) { IndexSearcherPtr ram = newLucene(ramdir); IndexSearcherPtr mem = memory->createSearcher(); QueryParserPtr qp = newLucene(LuceneVersion::LUCENE_CURRENT, L"foo", analyzer); - for (HashSet::iterator query = queries.begin(); query != queries.end(); ++query) - { + for (HashSet::iterator query = queries.begin(); query != queries.end(); ++query) { TopDocsPtr ramDocs = ram->search(qp->parse(*query), 1); TopDocsPtr memDocs = mem->search(qp->parse(*query), 1); - BOOST_CHECK_EQUAL(ramDocs->totalHits, memDocs->totalHits); + EXPECT_EQ(ramDocs->totalHits, memDocs->totalHits); } } - - AnalyzerPtr randomAnalyzer() - { - switch (random->nextInt(3)) - { - case 0: - return newLucene(); - case 1: - return newLucene(LuceneVersion::LUCENE_CURRENT); - default: - return newLucene(LuceneVersion::LUCENE_CURRENT); + + AnalyzerPtr randomAnalyzer() { + switch (random->nextInt(3)) { + case 0: + return newLucene(); + case 1: + return newLucene(LuceneVersion::LUCENE_CURRENT); + default: + return newLucene(LuceneVersion::LUCENE_CURRENT); } } - + /// half of the time, returns a random term from TEST_TERMS. /// the other half of the time, returns a random unicode string. - String randomTerm() - { - if (random->nextInt() % 2 == 1) - { + String randomTerm() { + if (random->nextInt() % 2 == 1) { // return a random TEST_TERM return TEST_TERMS[random->nextInt(TEST_TERMS.size())]; - } - else - { + } else { // return a random unicode term return randomString(); } } - + /// Return a random unicode term, like StressIndexingTest. - String randomString() - { + String randomString() { int32_t end = random->nextInt(20); - if (buffer.size() < 1 + end) + if (buffer.size() < 1 + end) { buffer.resize((int32_t)((double)(1 + end) * 1.25)); - - for (int32_t i = 0; i < end; ++i) - { + } + + for (int32_t i = 0; i < end; ++i) { int32_t t = random->nextInt(5); - if (t == 0 && i < end - 1) - { - #ifdef LPP_UNICODE_CHAR_SIZE_2 + if (t == 0 && i < end - 1) { +#ifdef LPP_UNICODE_CHAR_SIZE_2 // Make a surrogate pair // High surrogate buffer[i++] = (wchar_t)nextInt(0xd800, 0xdc00); // Low surrogate buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); - #else +#else buffer[i] = (wchar_t)nextInt(0xdc00, 0xe000); - #endif - } - else if (t <= 1) +#endif + } else if (t <= 1) { buffer[i] = (wchar_t)nextInt(0x01, 0x80); - else if (t == 2) + } else if (t == 2) { buffer[i] = (wchar_t)nextInt(0x80, 0x800); - else if (t == 3) + } else if (t == 3) { buffer[i] = (wchar_t)nextInt(0x800, 0xd800); - else if (t == 4) + } else if (t == 4) { buffer[i] = (wchar_t)nextInt(0xe000, 0xfff0); + } } return String(buffer.get(), end); } - + /// start is inclusive and end is exclusive - int32_t nextInt(int32_t start, int32_t end) - { + int32_t nextInt(int32_t start, int32_t end) { return start + random->nextInt(end - start); } }; -const int32_t MemoryIndexTestFixture::ITERATIONS = 100; - -BOOST_FIXTURE_TEST_SUITE(MemoryIndexTest, MemoryIndexTestFixture) +const int32_t MemoryIndexTest::ITERATIONS = 100; /// runs random tests, up to ITERATIONS times. -BOOST_AUTO_TEST_CASE(testRandomQueries) -{ - for (int32_t i = 0; i < ITERATIONS; ++i) +TEST_F(MemoryIndexTest, testRandomQueries) { + for (int32_t i = 0; i < ITERATIONS; ++i) { checkAgainstRAMDirectory(); + } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/contrib/snowball/SnowballTest.cpp b/src/test/contrib/snowball/SnowballTest.cpp index 5149da56..1a79f26b 100644 --- a/src/test/contrib/snowball/SnowballTest.cpp +++ b/src/test/contrib/snowball/SnowballTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -11,26 +11,21 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(SnowballTest, BaseTokenStreamFixture) +typedef BaseTokenStreamFixture SnowballTest; -BOOST_AUTO_TEST_CASE(testEnglish) -{ +TEST_F(SnowballTest, testEnglish) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); checkAnalyzesTo(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); } -BOOST_AUTO_TEST_CASE(testStopwords) -{ +TEST_F(SnowballTest, testStopwords) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english", StopAnalyzer::ENGLISH_STOP_WORDS_SET()); checkAnalyzesTo(a, L"the quick brown fox jumped", newCollection(L"quick", L"brown", L"fox", L"jump")); } -BOOST_AUTO_TEST_CASE(testReusableTokenStream) -{ +TEST_F(SnowballTest, testReusableTokenStream) { AnalyzerPtr a = newLucene(LuceneVersion::LUCENE_CURRENT, L"english"); - + checkAnalyzesToReuse(a, L"he abhorred accents", newCollection(L"he", L"abhor", L"accent")); checkAnalyzesToReuse(a, L"she abhorred him", newCollection(L"she", L"abhor", L"him")); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/document/BinaryDocumentTest.cpp b/src/test/document/BinaryDocumentTest.cpp index 05de12eb..b5776e5f 100644 --- a/src/test/document/BinaryDocumentTest.cpp +++ b/src/test/document/BinaryDocumentTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -16,21 +16,24 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(BinaryDocumentTest, LuceneTestFixture) +typedef LuceneTestFixture BinaryDocumentTest; static String binaryValStored = L"this text will be stored as a byte array in the index"; static String binaryValCompressed = L"this text will be also stored and compressed as a byte array in the index"; -BOOST_AUTO_TEST_CASE(testBinaryFieldInIndex) -{ +TEST_F(BinaryDocumentTest, testBinaryFieldInIndex) { ByteArray binaryStored = ByteArray::newInstance(binaryValStored.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryStored.get(), binaryValStored.c_str(), binaryValStored.length()); - + FieldablePtr binaryFldStored = newLucene(L"binaryStored", binaryStored, Field::STORE_YES); FieldablePtr stringFldStored = newLucene(L"stringStored", binaryValStored, Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); - + // binary fields with store off are not allowed - BOOST_CHECK_EXCEPTION(newLucene(L"fail", binaryStored, Field::STORE_NO), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); + try { + newLucene(L"fail", binaryStored, Field::STORE_NO); + } catch (IllegalArgumentException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); + } DocumentPtr doc = newLucene(); @@ -38,7 +41,7 @@ BOOST_AUTO_TEST_CASE(testBinaryFieldInIndex) doc->add(stringFldStored); // test for field count - BOOST_CHECK_EQUAL(2, doc->getFields().size()); + EXPECT_EQ(2, doc->getFields().size()); // add the doc to a ram index MockRAMDirectoryPtr dir = newLucene(); @@ -49,33 +52,32 @@ BOOST_AUTO_TEST_CASE(testBinaryFieldInIndex) // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); - BOOST_CHECK(docFromReader); + EXPECT_TRUE(docFromReader); // fetch the binary stored field and compare it's content with the original one ByteArray storedTest = docFromReader->getBinaryValue(L"binaryStored"); String binaryFldStoredTest((wchar_t*)storedTest.get(), storedTest.size() / sizeof(wchar_t)); - BOOST_CHECK_EQUAL(binaryFldStoredTest, binaryValStored); + EXPECT_EQ(binaryFldStoredTest, binaryValStored); // fetch the string field and compare it's content with the original one String stringFldStoredTest = docFromReader->get(L"stringStored"); - BOOST_CHECK_EQUAL(stringFldStoredTest, binaryValStored); + EXPECT_EQ(stringFldStoredTest, binaryValStored); // delete the document from index reader->deleteDocument(0); - BOOST_CHECK_EQUAL(0, reader->numDocs()); + EXPECT_EQ(0, reader->numDocs()); reader->close(); dir->close(); } -BOOST_AUTO_TEST_CASE(testCompressionTools) -{ +TEST_F(BinaryDocumentTest, testCompressionTools) { ByteArray binaryCompressed = ByteArray::newInstance(binaryValCompressed.length() * sizeof(wchar_t)); std::wcsncpy((wchar_t*)binaryCompressed.get(), binaryValCompressed.c_str(), binaryValCompressed.length()); - + FieldablePtr binaryFldCompressed = newLucene(L"binaryCompressed", CompressionTools::compress(binaryCompressed), Field::STORE_YES); FieldablePtr stringFldCompressed = newLucene(L"stringCompressed", CompressionTools::compressString(binaryValCompressed), Field::STORE_YES); - + DocumentPtr doc = newLucene(); doc->add(binaryFldCompressed); @@ -90,17 +92,15 @@ BOOST_AUTO_TEST_CASE(testCompressionTools) // open a reader and fetch the document IndexReaderPtr reader = IndexReader::open(dir, false); DocumentPtr docFromReader = reader->document(0); - BOOST_CHECK(docFromReader); + EXPECT_TRUE(docFromReader); // fetch the binary compressed field and compare it's content with the original one ByteArray compressTest = CompressionTools::decompress(docFromReader->getBinaryValue(L"binaryCompressed")); String binaryFldCompressedTest((wchar_t*)compressTest.get(), compressTest.size() / sizeof(wchar_t)); - BOOST_CHECK_EQUAL(binaryFldCompressedTest, binaryValCompressed); - - BOOST_CHECK_EQUAL(CompressionTools::decompressString(docFromReader->getBinaryValue(L"stringCompressed")), binaryValCompressed); + EXPECT_EQ(binaryFldCompressedTest, binaryValCompressed); + + EXPECT_EQ(CompressionTools::decompressString(docFromReader->getBinaryValue(L"stringCompressed")), binaryValCompressed); reader->close(); dir->close(); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/document/DateFieldTest.cpp b/src/test/document/DateFieldTest.cpp index e8f3ed96..7c35f6e9 100644 --- a/src/test/document/DateFieldTest.cpp +++ b/src/test/document/DateFieldTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,31 +10,24 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(DateFieldTest, LuceneTestFixture) +typedef LuceneTestFixture DateFieldTest; -BOOST_AUTO_TEST_CASE(testMinDate) -{ - BOOST_CHECK_EQUAL(DateField::MIN_DATE_STRING(), L"000000000"); +TEST_F(DateFieldTest, testMinDate) { + EXPECT_EQ(DateField::MIN_DATE_STRING(), L"000000000"); } -BOOST_AUTO_TEST_CASE(testMaxDate) -{ - BOOST_CHECK_EQUAL(DateField::MAX_DATE_STRING(), L"zzzzzzzzz"); +TEST_F(DateFieldTest, testMaxDate) { + EXPECT_EQ(DateField::MAX_DATE_STRING(), L"zzzzzzzzz"); } -BOOST_AUTO_TEST_CASE(testDateToString) -{ - BOOST_CHECK_EQUAL(DateField::dateToString(boost::posix_time::ptime(boost::gregorian::date(2010, boost::gregorian::Jan, 14))), L"0g4erxmo0"); +TEST_F(DateFieldTest, testDateToString) { + EXPECT_EQ(DateField::dateToString(boost::posix_time::ptime(boost::gregorian::date(2010, boost::gregorian::Jan, 14))), L"0g4erxmo0"); } -BOOST_AUTO_TEST_CASE(testTimeToString) -{ - BOOST_CHECK_EQUAL(DateField::timeToString(1263427200000LL), L"0g4erxmo0"); +TEST_F(DateFieldTest, testTimeToString) { + EXPECT_EQ(DateField::timeToString(1263427200000LL), L"0g4erxmo0"); } -BOOST_AUTO_TEST_CASE(testStringToTime) -{ - BOOST_CHECK_EQUAL(DateField::stringToTime(L"0g4erxmo0"), 1263427200000LL); +TEST_F(DateFieldTest, testStringToTime) { + EXPECT_EQ(DateField::stringToTime(L"0g4erxmo0"), 1263427200000LL); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/document/DateToolsTest.cpp b/src/test/document/DateToolsTest.cpp index 8cba05b2..59612ad3 100644 --- a/src/test/document/DateToolsTest.cpp +++ b/src/test/document/DateToolsTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -14,149 +14,131 @@ using namespace Lucene; using namespace boost::posix_time; using namespace boost::gregorian; -BOOST_FIXTURE_TEST_SUITE(DateToolsTest, LuceneTestFixture) +typedef LuceneTestFixture DateToolsTest; -BOOST_AUTO_TEST_CASE(testDateToString) -{ - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_YEAR), L"2010"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_MONTH), L"201001"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_DAY), L"20100114"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_HOUR), L"2010011403"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_MINUTE), L"201001140341"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_SECOND), L"20100114034105"); - BOOST_CHECK_EQUAL(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); +TEST_F(DateToolsTest, testDateToString) { + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_YEAR), L"2010"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_MONTH), L"201001"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14)), DateTools::RESOLUTION_DAY), L"20100114"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_HOUR), L"2010011403"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_MINUTE), L"201001140341"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5)), DateTools::RESOLUTION_SECOND), L"20100114034105"); + EXPECT_EQ(DateTools::dateToString(ptime(date(2010, Jan, 14), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } -BOOST_AUTO_TEST_CASE(testTimeToString) -{ - BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_YEAR), L"2010"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_MONTH), L"201001"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_DAY), L"20100114"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_HOUR), L"2010011403"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_MINUTE), L"201001140341"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_SECOND), L"20100114034105"); - BOOST_CHECK_EQUAL(DateTools::timeToString(1263440465123LL, DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); +TEST_F(DateToolsTest, testTimeToString) { + EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_YEAR), L"2010"); + EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_MONTH), L"201001"); + EXPECT_EQ(DateTools::timeToString(1263427200000LL, DateTools::RESOLUTION_DAY), L"20100114"); + EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_HOUR), L"2010011403"); + EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_MINUTE), L"201001140341"); + EXPECT_EQ(DateTools::timeToString(1263440465000LL, DateTools::RESOLUTION_SECOND), L"20100114034105"); + EXPECT_EQ(DateTools::timeToString(1263440465123LL, DateTools::RESOLUTION_MILLISECOND), L"20100114034105123"); } -BOOST_AUTO_TEST_CASE(testStringToTime) -{ - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"2010"), 1262304000000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"201001"), 1262304000000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114"), 1263427200000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"2010011403"), 1263438000000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"201001140341"), 1263440460000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114034105"), 1263440465000LL); - BOOST_CHECK_EQUAL(DateTools::stringToTime(L"20100114034105123"), 1263440465123LL); +TEST_F(DateToolsTest, testStringToTime) { + EXPECT_EQ(DateTools::stringToTime(L"2010"), 1262304000000LL); + EXPECT_EQ(DateTools::stringToTime(L"201001"), 1262304000000LL); + EXPECT_EQ(DateTools::stringToTime(L"20100114"), 1263427200000LL); + EXPECT_EQ(DateTools::stringToTime(L"2010011403"), 1263438000000LL); + EXPECT_EQ(DateTools::stringToTime(L"201001140341"), 1263440460000LL); + EXPECT_EQ(DateTools::stringToTime(L"20100114034105"), 1263440465000LL); + EXPECT_EQ(DateTools::stringToTime(L"20100114034105123"), 1263440465123LL); } -BOOST_AUTO_TEST_CASE(testDateRound) -{ - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_YEAR), ptime(date(2010, Jan, 1))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MONTH), ptime(date(2010, Feb, 1))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_DAY), ptime(date(2010, Feb, 16))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_HOUR), ptime(date(2010, Feb, 16), hours(3))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MINUTE), ptime(date(2010, Feb, 16), hours(3) + minutes(41))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_SECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5))); - BOOST_CHECK_EQUAL(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123))); +TEST_F(DateToolsTest, testDateRound) { + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_YEAR), ptime(date(2010, Jan, 1))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MONTH), ptime(date(2010, Feb, 1))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_DAY), ptime(date(2010, Feb, 16))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_HOUR), ptime(date(2010, Feb, 16), hours(3))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MINUTE), ptime(date(2010, Feb, 16), hours(3) + minutes(41))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_SECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5))); + EXPECT_EQ(DateTools::round(ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123)), DateTools::RESOLUTION_MILLISECOND), ptime(date(2010, Feb, 16), hours(3) + minutes(41) + seconds(5) + milliseconds(123))); } -BOOST_AUTO_TEST_CASE(testParseDateGB) -{ +TEST_F(DateToolsTest, testParseDateGB) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/Jan/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/2005"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/12/2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/12/05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/12/2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/12/05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/Jan/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/Jan/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/Jan/2005"), ptime(date(2005, 01, 01))); } -BOOST_AUTO_TEST_CASE(testParseDateUS) -{ +TEST_F(DateToolsTest, testParseDateUS) { DateTools::setDateOrder(DateTools::DATEORDER_MDY); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12012005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"120105"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/1/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/2005"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"12012005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"120105"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/01/2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/01/05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/1/2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/1/05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/1/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/1/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/01/05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/01/2005"), ptime(date(2005, 01, 01))); } -BOOST_AUTO_TEST_CASE(testParseDateLocale) -{ +TEST_F(DateToolsTest, testParseDateLocale) { bool hasThisLocale = false; - - try - { + + try { std::locale("en_GB.UTF-8"); hasThisLocale = true; - } - catch (...) - { + } catch (...) { } - if (hasThisLocale) - { + if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01/Jan/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01122005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"011205", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/12/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/12/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/1/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/Jan/05", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01/Jan/2005", std::locale("en_GB.UTF-8")), ptime(date(2005, 01, 01))); } - try - { + try { std::locale("en_US.UTF-8"); hasThisLocale = true; - } - catch (...) - { + } catch (...) { hasThisLocale = false; } - if (hasThisLocale) - { + if (hasThisLocale) { DateTools::setDateOrder(DateTools::DATEORDER_LOCALE); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12012005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"120105", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"12/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"Jan/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"12012005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"120105", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/1/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"12/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/1/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/01/05", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"Jan/01/2005", std::locale("en_US.UTF-8")), ptime(date(2005, 01, 01))); } } -BOOST_AUTO_TEST_CASE(testParseDateSeparator) -{ +TEST_F(DateToolsTest, testParseDateSeparator) { DateTools::setDateOrder(DateTools::DATEORDER_DMY); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01-12-2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01 12 05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1.12.2005"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1.12.05"), ptime(date(2005, 12, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1 1 05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"1 Jan 05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01-Jan-05"), ptime(date(2005, 01, 01))); - BOOST_CHECK_EQUAL(DateTools::parseDate(L"01,Jan,2005"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01122005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"011205"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01-12-2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"01 12 05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1.12.2005"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1.12.05"), ptime(date(2005, 12, 01))); + EXPECT_EQ(DateTools::parseDate(L"1 1 05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"1 Jan 05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01-Jan-05"), ptime(date(2005, 01, 01))); + EXPECT_EQ(DateTools::parseDate(L"01,Jan,2005"), ptime(date(2005, 01, 01))); } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/document/DocumentTest.cpp b/src/test/document/DocumentTest.cpp index ae0b61f4..274ed72e 100644 --- a/src/test/document/DocumentTest.cpp +++ b/src/test/document/DocumentTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -19,13 +19,12 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(DocumentTest, LuceneTestFixture) +typedef LuceneTestFixture DocumentTest; static String binaryVal = L"this text will be stored as a byte array in the index"; static String binaryVal2 = L"this text will be also stored as a byte array in the index"; -static DocumentPtr makeDocumentWithFields() -{ +static DocumentPtr makeDocumentWithFields() { DocumentPtr doc = newLucene(); doc->add(newLucene(L"keyword", L"test1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); doc->add(newLucene(L"keyword", L"test2", Field::STORE_YES, Field::INDEX_NOT_ANALYZED)); @@ -38,36 +37,34 @@ static DocumentPtr makeDocumentWithFields() return doc; } -static void checkDocument(DocumentPtr doc, bool fromIndex) -{ +static void checkDocument(const DocumentPtr& doc, bool fromIndex) { Collection keywordFieldValues = doc->getValues(L"keyword"); Collection textFieldValues = doc->getValues(L"text"); Collection unindexedFieldValues = doc->getValues(L"unindexed"); Collection unstoredFieldValues = doc->getValues(L"unstored"); - BOOST_CHECK_EQUAL(keywordFieldValues.size(), 2); - BOOST_CHECK_EQUAL(textFieldValues.size(), 2); - BOOST_CHECK_EQUAL(unindexedFieldValues.size(), 2); + EXPECT_EQ(keywordFieldValues.size(), 2); + EXPECT_EQ(textFieldValues.size(), 2); + EXPECT_EQ(unindexedFieldValues.size(), 2); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned - if (!fromIndex) - BOOST_CHECK_EQUAL(unstoredFieldValues.size(), 2); - - BOOST_CHECK_EQUAL(keywordFieldValues[0], L"test1"); - BOOST_CHECK_EQUAL(keywordFieldValues[1], L"test2"); - BOOST_CHECK_EQUAL(textFieldValues[0], L"test1"); - BOOST_CHECK_EQUAL(textFieldValues[1], L"test2"); - BOOST_CHECK_EQUAL(unindexedFieldValues[0], L"test1"); - BOOST_CHECK_EQUAL(unindexedFieldValues[1], L"test2"); + if (!fromIndex) { + EXPECT_EQ(unstoredFieldValues.size(), 2); + } + + EXPECT_EQ(keywordFieldValues[0], L"test1"); + EXPECT_EQ(keywordFieldValues[1], L"test2"); + EXPECT_EQ(textFieldValues[0], L"test1"); + EXPECT_EQ(textFieldValues[1], L"test2"); + EXPECT_EQ(unindexedFieldValues[0], L"test1"); + EXPECT_EQ(unindexedFieldValues[1], L"test2"); // this test cannot work for documents retrieved from the index since unstored fields will obviously not be returned - if (!fromIndex) - { - BOOST_CHECK_EQUAL(unstoredFieldValues[0], L"test1"); - BOOST_CHECK_EQUAL(unstoredFieldValues[1], L"test2"); + if (!fromIndex) { + EXPECT_EQ(unstoredFieldValues[0], L"test1"); + EXPECT_EQ(unstoredFieldValues[1], L"test2"); } } - -BOOST_AUTO_TEST_CASE(testBinaryField) -{ + +TEST_F(DocumentTest, testBinaryField) { DocumentPtr doc = newLucene(); FieldablePtr stringFld = newLucene(L"string", binaryVal, Field::STORE_YES, Field::INDEX_NO); @@ -82,93 +79,97 @@ BOOST_AUTO_TEST_CASE(testBinaryField) doc->add(stringFld); doc->add(binaryFld); - BOOST_CHECK_EQUAL(2, doc->getFields().size()); + EXPECT_EQ(2, doc->getFields().size()); - BOOST_CHECK(binaryFld->isBinary()); - BOOST_CHECK(binaryFld->isStored()); - BOOST_CHECK(!binaryFld->isIndexed()); - BOOST_CHECK(!binaryFld->isTokenized()); + EXPECT_TRUE(binaryFld->isBinary()); + EXPECT_TRUE(binaryFld->isStored()); + EXPECT_TRUE(!binaryFld->isIndexed()); + EXPECT_TRUE(!binaryFld->isTokenized()); ByteArray bytesTest = doc->getBinaryValue(L"binary"); String binaryTest((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); - BOOST_CHECK_EQUAL(binaryTest, binaryVal); + EXPECT_EQ(binaryTest, binaryVal); String stringTest = doc->get(L"string"); - BOOST_CHECK_EQUAL(binaryTest, stringTest); + EXPECT_EQ(binaryTest, stringTest); doc->add(binaryFld2); - BOOST_CHECK_EQUAL(3, doc->getFields().size()); + EXPECT_EQ(3, doc->getFields().size()); Collection binaryTests = doc->getBinaryValues(L"binary"); - BOOST_CHECK_EQUAL(2, binaryTests.size()); + EXPECT_EQ(2, binaryTests.size()); bytesTest = binaryTests[0]; binaryTest = String((wchar_t*)bytesTest.get(), bytesTest.size() / sizeof(wchar_t)); - + ByteArray bytesTest2 = binaryTests[1]; String binaryTest2((wchar_t*)bytesTest2.get(), bytesTest2.size() / sizeof(wchar_t)); - - BOOST_CHECK_NE(binaryTest, binaryTest2); - BOOST_CHECK_EQUAL(binaryTest, binaryVal); - BOOST_CHECK_EQUAL(binaryTest2, binaryVal2); + EXPECT_NE(binaryTest, binaryTest2); + + EXPECT_EQ(binaryTest, binaryVal); + EXPECT_EQ(binaryTest2, binaryVal2); doc->removeField(L"string"); - BOOST_CHECK_EQUAL(2, doc->getFields().size()); + EXPECT_EQ(2, doc->getFields().size()); doc->removeFields(L"binary"); - BOOST_CHECK_EQUAL(0, doc->getFields().size()); + EXPECT_EQ(0, doc->getFields().size()); } /// Tests {@link Document#removeField(String)} method for a brand new Document that has not been indexed yet. -BOOST_AUTO_TEST_CASE(testRemoveForNewDocument) -{ +TEST_F(DocumentTest, testRemoveForNewDocument) { DocumentPtr doc = makeDocumentWithFields(); - BOOST_CHECK_EQUAL(8, doc->getFields().size()); + EXPECT_EQ(8, doc->getFields().size()); doc->removeFields(L"keyword"); - BOOST_CHECK_EQUAL(6, doc->getFields().size()); + EXPECT_EQ(6, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored doc->removeFields(L"keyword"); // removing a field more than once - BOOST_CHECK_EQUAL(6, doc->getFields().size()); + EXPECT_EQ(6, doc->getFields().size()); doc->removeField(L"text"); - BOOST_CHECK_EQUAL(5, doc->getFields().size()); + EXPECT_EQ(5, doc->getFields().size()); doc->removeField(L"text"); - BOOST_CHECK_EQUAL(4, doc->getFields().size()); + EXPECT_EQ(4, doc->getFields().size()); doc->removeField(L"text"); - BOOST_CHECK_EQUAL(4, doc->getFields().size()); + EXPECT_EQ(4, doc->getFields().size()); doc->removeField(L"doesnotexists"); // removing non-existing fields is silently ignored - BOOST_CHECK_EQUAL(4, doc->getFields().size()); + EXPECT_EQ(4, doc->getFields().size()); doc->removeFields(L"unindexed"); - BOOST_CHECK_EQUAL(2, doc->getFields().size()); + EXPECT_EQ(2, doc->getFields().size()); doc->removeFields(L"unstored"); - BOOST_CHECK_EQUAL(0, doc->getFields().size()); + EXPECT_EQ(0, doc->getFields().size()); doc->removeFields(L"doesnotexists"); // removing non-existing fields is silently ignored - BOOST_CHECK_EQUAL(0, doc->getFields().size()); + EXPECT_EQ(0, doc->getFields().size()); } -BOOST_AUTO_TEST_CASE(testConstructorExceptions) -{ +TEST_F(DocumentTest, testConstructorExceptions) { newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO); // ok newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NOT_ANALYZED); // ok - - BOOST_CHECK_EXCEPTION(newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NO), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); - + + try { + newLucene(L"name", L"value", Field::STORE_NO, Field::INDEX_NO); + } catch (IllegalArgumentException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); + } + newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_NO); // ok - - BOOST_CHECK_EXCEPTION(newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_YES), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); + + try { + newLucene(L"name", L"value", Field::STORE_YES, Field::INDEX_NO, Field::TERM_VECTOR_YES); + } catch (IllegalArgumentException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); + } } /// Tests {@link Document#getValues(String)} method for a brand new Document that has not been indexed yet. -BOOST_AUTO_TEST_CASE(testGetValuesForNewDocument) -{ +TEST_F(DocumentTest, testGetValuesForNewDocument) { checkDocument(makeDocumentWithFields(), false); } /// Tests {@link Document#getValues(String)} method for a Document retrieved from an index. -BOOST_AUTO_TEST_CASE(testGetValuesForIndexedDocument) -{ +TEST_F(DocumentTest, testGetValuesForIndexedDocument) { RAMDirectoryPtr dir = newLucene(); IndexWriterPtr writer = newLucene(dir, newLucene(LuceneVersion::LUCENE_CURRENT), true, IndexWriter::MaxFieldLengthLIMITED); writer->addDocument(makeDocumentWithFields()); @@ -181,14 +182,13 @@ BOOST_AUTO_TEST_CASE(testGetValuesForIndexedDocument) // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; - BOOST_CHECK_EQUAL(1, hits.size()); + EXPECT_EQ(1, hits.size()); checkDocument(searcher->doc(hits[0]->doc), true); searcher->close(); } -BOOST_AUTO_TEST_CASE(testFieldSetValue) -{ +TEST_F(DocumentTest, testFieldSetValue) { FieldPtr field = newLucene(L"id", L"id1", Field::STORE_YES, Field::INDEX_NOT_ANALYZED); DocumentPtr doc = newLucene(); doc->add(field); @@ -204,38 +204,44 @@ BOOST_AUTO_TEST_CASE(testFieldSetValue) writer->close(); SearcherPtr searcher = newLucene(dir, true); - + QueryPtr query = newLucene(newLucene(L"keyword", L"test")); // ensure that queries return expected results without DateFilter first Collection hits = searcher->search(query, FilterPtr(), 1000)->scoreDocs; - BOOST_CHECK_EQUAL(3, hits.size()); + EXPECT_EQ(3, hits.size()); int32_t result = 0; - for (int32_t i = 0; i < 3; ++i) - { + for (int32_t i = 0; i < 3; ++i) { DocumentPtr doc2 = searcher->doc(hits[i]->doc); FieldPtr f = doc2->getField(L"id"); - if (f->stringValue() == L"id1") + if (f->stringValue() == L"id1") { result |= 1; - else if (f->stringValue() == L"id2") + } else if (f->stringValue() == L"id2") { result |= 2; - else if (f->stringValue() == L"id3") + } else if (f->stringValue() == L"id3") { result |= 4; - else - BOOST_FAIL("unexpected id field"); + } else { + FAIL() << "unexpected id field"; + } } searcher->close(); dir->close(); - BOOST_CHECK_EQUAL(7, result); + EXPECT_EQ(7, result); } -BOOST_AUTO_TEST_CASE(testFieldSetValueChangeBinary) -{ +TEST_F(DocumentTest, testFieldSetValueChangeBinary) { FieldPtr field1 = newLucene(L"field1", ByteArray::newInstance(0), Field::STORE_YES); FieldPtr field2 = newLucene(L"field2", L"", Field::STORE_YES, Field::INDEX_ANALYZED); - BOOST_CHECK_EXCEPTION(field1->setValue(L"abc"), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); - BOOST_CHECK_EXCEPTION(field2->setValue(ByteArray::newInstance(0)), IllegalArgumentException, check_exception(LuceneException::IllegalArgument)); -} + try { + field1->setValue(L"abc"); + } catch (IllegalArgumentException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); + } -BOOST_AUTO_TEST_SUITE_END() + try { + field2->setValue(ByteArray::newInstance(0)); + } catch (IllegalArgumentException& e) { + EXPECT_TRUE(check_exception(LuceneException::IllegalArgument)(e)); + } +} diff --git a/src/test/document/NumberToolsTest.cpp b/src/test/document/NumberToolsTest.cpp index 7bc5f78f..c4190c12 100644 --- a/src/test/document/NumberToolsTest.cpp +++ b/src/test/document/NumberToolsTest.cpp @@ -1,5 +1,5 @@ ///////////////////////////////////////////////////////////////////////////// -// Copyright (c) 2009-2011 Alan Wright. All rights reserved. +// Copyright (c) 2009-2014 Alan Wright. All rights reserved. // Distributable under the terms of either the Apache License (Version 2.0) // or the GNU Lesser General Public License. ///////////////////////////////////////////////////////////////////////////// @@ -10,47 +10,50 @@ using namespace Lucene; -BOOST_FIXTURE_TEST_SUITE(NumberToolsTest, LuceneTestFixture) +typedef LuceneTestFixture NumberToolsTest; -BOOST_AUTO_TEST_CASE(testMinValue) -{ - BOOST_CHECK_EQUAL(NumberTools::MIN_STRING_VALUE(), L"-0000000000000"); +TEST_F(NumberToolsTest, testMinValue) { + EXPECT_EQ(NumberTools::MIN_STRING_VALUE(), L"-0000000000000"); } -BOOST_AUTO_TEST_CASE(testMaxValue) -{ - BOOST_CHECK_EQUAL(NumberTools::MAX_STRING_VALUE(), L"01y2p0ij32e8e7"); +TEST_F(NumberToolsTest, testMaxValue) { + EXPECT_EQ(NumberTools::MAX_STRING_VALUE(), L"01y2p0ij32e8e7"); } -BOOST_AUTO_TEST_CASE(testValueSize) -{ - BOOST_CHECK_EQUAL(NumberTools::STR_SIZE(), 14); +TEST_F(NumberToolsTest, testValueSize) { + EXPECT_EQ(NumberTools::STR_SIZE(), 14); } -BOOST_AUTO_TEST_CASE(testLongToString) -{ - BOOST_CHECK_EQUAL(NumberTools::longToString(LLONG_MIN), L"-0000000000000"); - BOOST_CHECK_EQUAL(NumberTools::longToString(LLONG_MAX), L"01y2p0ij32e8e7"); - BOOST_CHECK_EQUAL(NumberTools::longToString(1LL), L"00000000000001"); - BOOST_CHECK_EQUAL(NumberTools::longToString(999LL), L"000000000000rr"); - BOOST_CHECK_EQUAL(NumberTools::longToString(34234LL), L"00000000000qey"); - BOOST_CHECK_EQUAL(NumberTools::longToString(4345325254LL), L"00000001zv3efa"); - BOOST_CHECK_EQUAL(NumberTools::longToString(986778657657575LL), L"00009ps7uuwdlz"); - BOOST_CHECK_EQUAL(NumberTools::longToString(23232143543434234LL), L"0006cr3vell8my"); +TEST_F(NumberToolsTest, testLongToString) { + EXPECT_EQ(NumberTools::longToString(LLONG_MIN), L"-0000000000000"); + EXPECT_EQ(NumberTools::longToString(LLONG_MAX), L"01y2p0ij32e8e7"); + EXPECT_EQ(NumberTools::longToString(1LL), L"00000000000001"); + EXPECT_EQ(NumberTools::longToString(999LL), L"000000000000rr"); + EXPECT_EQ(NumberTools::longToString(34234LL), L"00000000000qey"); + EXPECT_EQ(NumberTools::longToString(4345325254LL), L"00000001zv3efa"); + EXPECT_EQ(NumberTools::longToString(986778657657575LL), L"00009ps7uuwdlz"); + EXPECT_EQ(NumberTools::longToString(23232143543434234LL), L"0006cr3vell8my"); } -BOOST_AUTO_TEST_CASE(testStringToLong) -{ - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"-0000000000000"), LLONG_MIN); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"01y2p0ij32e8e7"), LLONG_MAX); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000000000001"), 1LL); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"000000000000rr"), 999LL); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000000000qey"), 34234LL); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00000001zv3efa"), 4345325254LL); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"00009ps7uuwdlz"), 986778657657575LL); - BOOST_CHECK_EQUAL(NumberTools::stringToLong(L"0006cr3vell8my"), 23232143543434234LL); - BOOST_CHECK_EXCEPTION(NumberTools::stringToLong(L"32132"), LuceneException, check_exception(LuceneException::NumberFormat)); // wrong length - BOOST_CHECK_EXCEPTION(NumberTools::stringToLong(L"9006cr3vell8my"), LuceneException, check_exception(LuceneException::NumberFormat)); // wrong prefix +TEST_F(NumberToolsTest, testStringToLong) { + EXPECT_EQ(NumberTools::stringToLong(L"-0000000000000"), LLONG_MIN); + EXPECT_EQ(NumberTools::stringToLong(L"01y2p0ij32e8e7"), LLONG_MAX); + EXPECT_EQ(NumberTools::stringToLong(L"00000000000001"), 1LL); + EXPECT_EQ(NumberTools::stringToLong(L"000000000000rr"), 999LL); + EXPECT_EQ(NumberTools::stringToLong(L"00000000000qey"), 34234LL); + EXPECT_EQ(NumberTools::stringToLong(L"00000001zv3efa"), 4345325254LL); + EXPECT_EQ(NumberTools::stringToLong(L"00009ps7uuwdlz"), 986778657657575LL); + EXPECT_EQ(NumberTools::stringToLong(L"0006cr3vell8my"), 23232143543434234LL); + + try { + NumberTools::stringToLong(L"32132"); + } catch (LuceneException& e) { + EXPECT_TRUE(check_exception(LuceneException::NumberFormat)(e)); // wrong length + } + + try { + NumberTools::stringToLong(L"9006cr3vell8my"); + } catch (LuceneException& e) { + EXPECT_TRUE(check_exception(LuceneException::NumberFormat)(e)); // wrong prefix + } } - -BOOST_AUTO_TEST_SUITE_END() diff --git a/src/test/gtest/.clang-format b/src/test/gtest/.clang-format new file mode 100644 index 00000000..5b9bfe6d --- /dev/null +++ b/src/test/gtest/.clang-format @@ -0,0 +1,4 @@ +# Run manually to reformat a file: +# clang-format -i --style=file +Language: Cpp +BasedOnStyle: Google diff --git a/src/test/gtest/.gitignore b/src/test/gtest/.gitignore new file mode 100644 index 00000000..f08cb72a --- /dev/null +++ b/src/test/gtest/.gitignore @@ -0,0 +1,84 @@ +# Ignore CI build directory +build/ +xcuserdata +cmake-build-debug/ +.idea/ +bazel-bin +bazel-genfiles +bazel-googletest +bazel-out +bazel-testlogs +# python +*.pyc + +# Visual Studio files +.vs +*.sdf +*.opensdf +*.VC.opendb +*.suo +*.user +_ReSharper.Caches/ +Win32-Debug/ +Win32-Release/ +x64-Debug/ +x64-Release/ + +# Ignore autoconf / automake files +Makefile.in +aclocal.m4 +configure +build-aux/ +autom4te.cache/ +googletest/m4/libtool.m4 +googletest/m4/ltoptions.m4 +googletest/m4/ltsugar.m4 +googletest/m4/ltversion.m4 +googletest/m4/lt~obsolete.m4 +googlemock/m4 + +# Ignore generated directories. +googlemock/fused-src/ +googletest/fused-src/ + +# macOS files +.DS_Store +googletest/.DS_Store +googletest/xcode/.DS_Store + +# Ignore cmake generated directories and files. +CMakeFiles +CTestTestfile.cmake +Makefile +cmake_install.cmake +googlemock/CMakeFiles +googlemock/CTestTestfile.cmake +googlemock/Makefile +googlemock/cmake_install.cmake +googlemock/gtest +/bin +/googlemock/gmock.dir +/googlemock/gmock_main.dir +/googlemock/RUN_TESTS.vcxproj.filters +/googlemock/RUN_TESTS.vcxproj +/googlemock/INSTALL.vcxproj.filters +/googlemock/INSTALL.vcxproj +/googlemock/gmock_main.vcxproj.filters +/googlemock/gmock_main.vcxproj +/googlemock/gmock.vcxproj.filters +/googlemock/gmock.vcxproj +/googlemock/gmock.sln +/googlemock/ALL_BUILD.vcxproj.filters +/googlemock/ALL_BUILD.vcxproj +/lib +/Win32 +/ZERO_CHECK.vcxproj.filters +/ZERO_CHECK.vcxproj +/RUN_TESTS.vcxproj.filters +/RUN_TESTS.vcxproj +/INSTALL.vcxproj.filters +/INSTALL.vcxproj +/googletest-distribution.sln +/CMakeCache.txt +/ALL_BUILD.vcxproj.filters +/ALL_BUILD.vcxproj diff --git a/src/test/gtest/.travis.yml b/src/test/gtest/.travis.yml new file mode 100644 index 00000000..04b51dde --- /dev/null +++ b/src/test/gtest/.travis.yml @@ -0,0 +1,73 @@ +# Build matrix / environment variable are explained on: +# https://docs.travis-ci.com/user/customizing-the-build/ +# This file can be validated on: +# http://lint.travis-ci.org/ + +language: cpp + +# Define the matrix explicitly, manually expanding the combinations of (os, compiler, env). +# It is more tedious, but grants us far more flexibility. +matrix: + include: + - os: linux + before_install: chmod -R +x ./ci/*platformio.sh + install: ./ci/install-platformio.sh + script: ./ci/build-platformio.sh + - os: linux + dist: xenial + compiler: gcc + install: ./ci/install-linux.sh && ./ci/log-config.sh + script: ./ci/build-linux-bazel.sh + - os: linux + dist: xenial + compiler: clang + install: ./ci/install-linux.sh && ./ci/log-config.sh + script: ./ci/build-linux-bazel.sh + - os: linux + compiler: gcc + env: BUILD_TYPE=Debug VERBOSE=1 CXX_FLAGS=-std=c++11 + - os: linux + compiler: clang + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 -Wgnu-zero-variadic-macro-arguments + - os: linux + compiler: clang + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 NO_EXCEPTION=ON NO_RTTI=ON COMPILER_IS_GNUCXX=ON + - os: osx + compiler: gcc + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 HOMEBREW_LOGS=~/homebrew-logs HOMEBREW_TEMP=~/homebrew-temp + - os: osx + compiler: clang + env: BUILD_TYPE=Release VERBOSE=1 CXX_FLAGS=-std=c++11 HOMEBREW_LOGS=~/homebrew-logs HOMEBREW_TEMP=~/homebrew-temp + +# These are the install and build (script) phases for the most common entries in the matrix. They could be included +# in each entry in the matrix, but that is just repetitive. +install: + - ./ci/install-${TRAVIS_OS_NAME}.sh + - . ./ci/env-${TRAVIS_OS_NAME}.sh + - ./ci/log-config.sh + +script: ./ci/travis.sh + +# This section installs the necessary dependencies. +addons: + apt: + # List of whitelisted in travis packages for ubuntu-precise can be found here: + # https://github.com/travis-ci/apt-package-whitelist/blob/master/ubuntu-precise + # List of whitelisted in travis apt-sources: + # https://github.com/travis-ci/apt-source-whitelist/blob/master/ubuntu.json + sources: + - ubuntu-toolchain-r-test + - llvm-toolchain-precise-3.9 + packages: + - g++-4.9 + - clang-3.9 + update: true + homebrew: + packages: + - ccache + - gcc@4.9 + - llvm@4 + update: true + +notifications: + email: false diff --git a/src/test/gtest/BUILD.bazel b/src/test/gtest/BUILD.bazel new file mode 100644 index 00000000..9b48aee5 --- /dev/null +++ b/src/test/gtest/BUILD.bazel @@ -0,0 +1,179 @@ +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Bazel Build for Google C++ Testing Framework(Google Test) + +load("@rules_cc//cc:defs.bzl", "cc_library", "cc_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +config_setting( + name = "windows", + constraint_values = ["@bazel_tools//platforms:windows"], +) + +config_setting( + name = "has_absl", + values = {"define": "absl=1"}, +) + +# Library that defines the FRIEND_TEST macro. +cc_library( + name = "gtest_prod", + hdrs = ["googletest/include/gtest/gtest_prod.h"], + includes = ["googletest/include"], +) + +# Google Test including Google Mock +cc_library( + name = "gtest", + srcs = glob( + include = [ + "googletest/src/*.cc", + "googletest/src/*.h", + "googletest/include/gtest/**/*.h", + "googlemock/src/*.cc", + "googlemock/include/gmock/**/*.h", + ], + exclude = [ + "googletest/src/gtest-all.cc", + "googletest/src/gtest_main.cc", + "googlemock/src/gmock-all.cc", + "googlemock/src/gmock_main.cc", + ], + ), + hdrs = glob([ + "googletest/include/gtest/*.h", + "googlemock/include/gmock/*.h", + ]), + copts = select({ + ":windows": [], + "//conditions:default": ["-pthread"], + }), + defines = select({ + ":has_absl": ["GTEST_HAS_ABSL=1"], + "//conditions:default": [], + }), + features = select({ + ":windows": ["windows_export_all_symbols"], + "//conditions:default": [], + }), + includes = [ + "googlemock", + "googlemock/include", + "googletest", + "googletest/include", + ], + linkopts = select({ + ":windows": [], + "//conditions:default": ["-pthread"], + }), + deps = select({ + ":has_absl": [ + "@com_google_absl//absl/debugging:failure_signal_handler", + "@com_google_absl//absl/debugging:stacktrace", + "@com_google_absl//absl/debugging:symbolize", + "@com_google_absl//absl/strings", + "@com_google_absl//absl/types:optional", + "@com_google_absl//absl/types:variant", + ], + "//conditions:default": [], + }), +) + +cc_library( + name = "gtest_main", + srcs = ["googlemock/src/gmock_main.cc"], + features = select({ + ":windows": ["windows_export_all_symbols"], + "//conditions:default": [], + }), + deps = [":gtest"], +) + +# The following rules build samples of how to use gTest. +cc_library( + name = "gtest_sample_lib", + srcs = [ + "googletest/samples/sample1.cc", + "googletest/samples/sample2.cc", + "googletest/samples/sample4.cc", + ], + hdrs = [ + "googletest/samples/prime_tables.h", + "googletest/samples/sample1.h", + "googletest/samples/sample2.h", + "googletest/samples/sample3-inl.h", + "googletest/samples/sample4.h", + ], + features = select({ + ":windows": ["windows_export_all_symbols"], + "//conditions:default": [], + }), +) + +cc_test( + name = "gtest_samples", + size = "small", + # All Samples except: + # sample9 (main) + # sample10 (main and takes a command line option and needs to be separate) + srcs = [ + "googletest/samples/sample1_unittest.cc", + "googletest/samples/sample2_unittest.cc", + "googletest/samples/sample3_unittest.cc", + "googletest/samples/sample4_unittest.cc", + "googletest/samples/sample5_unittest.cc", + "googletest/samples/sample6_unittest.cc", + "googletest/samples/sample7_unittest.cc", + "googletest/samples/sample8_unittest.cc", + ], + linkstatic = 0, + deps = [ + "gtest_sample_lib", + ":gtest_main", + ], +) + +cc_test( + name = "sample9_unittest", + size = "small", + srcs = ["googletest/samples/sample9_unittest.cc"], + deps = [":gtest"], +) + +cc_test( + name = "sample10_unittest", + size = "small", + srcs = ["googletest/samples/sample10_unittest.cc"], + deps = [":gtest"], +) diff --git a/src/test/gtest/CMakeLists.txt b/src/test/gtest/CMakeLists.txt new file mode 100644 index 00000000..f11bbb52 --- /dev/null +++ b/src/test/gtest/CMakeLists.txt @@ -0,0 +1,36 @@ +# Note: CMake support is community-based. The maintainers do not use CMake +# internally. + +cmake_minimum_required(VERSION 2.8.8) + +if (POLICY CMP0048) + cmake_policy(SET CMP0048 NEW) +endif (POLICY CMP0048) + +project(googletest-distribution) +set(GOOGLETEST_VERSION 1.10.0) + +if (CMAKE_VERSION VERSION_LESS "3.1") + add_definitions(-std=c++11) +else() + set(CMAKE_CXX_STANDARD 11) + set(CMAKE_CXX_STANDARD_REQUIRED ON) + if(NOT CYGWIN) + set(CMAKE_CXX_EXTENSIONS OFF) + endif() +endif() + +enable_testing() + +include(CMakeDependentOption) +include(GNUInstallDirs) + +#Note that googlemock target already builds googletest +option(BUILD_GMOCK "Builds the googlemock subproject" ON) +option(INSTALL_GTEST "Enable installation of googletest. (Projects embedding googletest may want to turn this OFF.)" ON) + +if(BUILD_GMOCK) + add_subdirectory( googlemock ) +else() + add_subdirectory( googletest ) +endif() diff --git a/src/test/gtest/CONTRIBUTING.md b/src/test/gtest/CONTRIBUTING.md new file mode 100644 index 00000000..30c8d890 --- /dev/null +++ b/src/test/gtest/CONTRIBUTING.md @@ -0,0 +1,142 @@ +# How to become a contributor and submit your own code + +## Contributor License Agreements + +We'd love to accept your patches! Before we can take them, we have to jump a +couple of legal hurdles. + +Please fill out either the individual or corporate Contributor License Agreement +(CLA). + +* If you are an individual writing original source code and you're sure you + own the intellectual property, then you'll need to sign an + [individual CLA](https://developers.google.com/open-source/cla/individual). +* If you work for a company that wants to allow you to contribute your work, + then you'll need to sign a + [corporate CLA](https://developers.google.com/open-source/cla/corporate). + +Follow either of the two links above to access the appropriate CLA and +instructions for how to sign and return it. Once we receive it, we'll be able to +accept your pull requests. + +## Are you a Googler? + +If you are a Googler, please make an attempt to submit an internal change rather +than a GitHub Pull Request. If you are not able to submit an internal change a +PR is acceptable as an alternative. + +## Contributing A Patch + +1. Submit an issue describing your proposed change to the + [issue tracker](https://github.com/google/googletest). +2. Please don't mix more than one logical change per submittal, because it + makes the history hard to follow. If you want to make a change that doesn't + have a corresponding issue in the issue tracker, please create one. +3. Also, coordinate with team members that are listed on the issue in question. + This ensures that work isn't being duplicated and communicating your plan + early also generally leads to better patches. +4. If your proposed change is accepted, and you haven't already done so, sign a + Contributor License Agreement (see details above). +5. Fork the desired repo, develop and test your code changes. +6. Ensure that your code adheres to the existing style in the sample to which + you are contributing. +7. Ensure that your code has an appropriate set of unit tests which all pass. +8. Submit a pull request. + +## The Google Test and Google Mock Communities + +The Google Test community exists primarily through the +[discussion group](http://groups.google.com/group/googletestframework) and the +GitHub repository. Likewise, the Google Mock community exists primarily through +their own [discussion group](http://groups.google.com/group/googlemock). You are +definitely encouraged to contribute to the discussion and you can also help us +to keep the effectiveness of the group high by following and promoting the +guidelines listed here. + +### Please Be Friendly + +Showing courtesy and respect to others is a vital part of the Google culture, +and we strongly encourage everyone participating in Google Test development to +join us in accepting nothing less. Of course, being courteous is not the same as +failing to constructively disagree with each other, but it does mean that we +should be respectful of each other when enumerating the 42 technical reasons +that a particular proposal may not be the best choice. There's never a reason to +be antagonistic or dismissive toward anyone who is sincerely trying to +contribute to a discussion. + +Sure, C++ testing is serious business and all that, but it's also a lot of fun. +Let's keep it that way. Let's strive to be one of the friendliest communities in +all of open source. + +As always, discuss Google Test in the official GoogleTest discussion group. You +don't have to actually submit code in order to sign up. Your participation +itself is a valuable contribution. + +## Style + +To keep the source consistent, readable, diffable and easy to merge, we use a +fairly rigid coding style, as defined by the +[google-styleguide](https://github.com/google/styleguide) project. All patches +will be expected to conform to the style outlined +[here](https://google.github.io/styleguide/cppguide.html). Use +[.clang-format](https://github.com/google/googletest/blob/master/.clang-format) +to check your formatting + +## Requirements for Contributors + +If you plan to contribute a patch, you need to build Google Test, Google Mock, +and their own tests from a git checkout, which has further requirements: + +* [Python](https://www.python.org/) v2.3 or newer (for running some of the + tests and re-generating certain source files from templates) +* [CMake](https://cmake.org/) v2.6.4 or newer + +## Developing Google Test and Google Mock + +This section discusses how to make your own changes to the Google Test project. + +### Testing Google Test and Google Mock Themselves + +To make sure your changes work as intended and don't break existing +functionality, you'll want to compile and run Google Test and GoogleMock's own +tests. For that you can use CMake: + + mkdir mybuild + cd mybuild + cmake -Dgtest_build_tests=ON -Dgmock_build_tests=ON ${GTEST_REPO_DIR} + +To choose between building only Google Test or Google Mock, you may modify your +cmake command to be one of each + + cmake -Dgtest_build_tests=ON ${GTEST_DIR} # sets up Google Test tests + cmake -Dgmock_build_tests=ON ${GMOCK_DIR} # sets up Google Mock tests + +Make sure you have Python installed, as some of Google Test's tests are written +in Python. If the cmake command complains about not being able to find Python +(`Could NOT find PythonInterp (missing: PYTHON_EXECUTABLE)`), try telling it +explicitly where your Python executable can be found: + + cmake -DPYTHON_EXECUTABLE=path/to/python ... + +Next, you can build Google Test and / or Google Mock and all desired tests. On +\*nix, this is usually done by + + make + +To run the tests, do + + make test + +All tests should pass. + +### Regenerating Source Files + +Some of Google Test's source files are generated from templates (not in the C++ +sense) using a script. For example, the file +include/gtest/internal/gtest-type-util.h.pump is used to generate +gtest-type-util.h in the same directory. + +You don't need to worry about regenerating the source files unless you need to +modify them. You would then modify the corresponding `.pump` files and run the +'[pump.py](googletest/scripts/pump.py)' generator script. See the +[Pump Manual](googletest/docs/pump_manual.md). diff --git a/src/test/gtest/LICENSE b/src/test/gtest/LICENSE new file mode 100644 index 00000000..1941a11f --- /dev/null +++ b/src/test/gtest/LICENSE @@ -0,0 +1,28 @@ +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/test/gtest/README.md b/src/test/gtest/README.md new file mode 100644 index 00000000..5b417fa8 --- /dev/null +++ b/src/test/gtest/README.md @@ -0,0 +1,134 @@ +# Google Test + +#### OSS Builds Status: + +[![Build Status](https://api.travis-ci.org/google/googletest.svg?branch=master)](https://travis-ci.org/google/googletest) +[![Build status](https://ci.appveyor.com/api/projects/status/4o38plt0xbo1ubc8/branch/master?svg=true)](https://ci.appveyor.com/project/GoogleTestAppVeyor/googletest/branch/master) + +### Future Plans + +#### 1.8.x Release: + +[the 1.8.x](https://github.com/google/googletest/releases/tag/release-1.8.1) is +the last release that works with pre-C++11 compilers. The 1.8.x will not accept +any requests for any new features and any bugfix requests will only be accepted +if proven "critical" + +#### Post 1.8.x: + +On-going work to improve/cleanup/pay technical debt. When this work is completed +there will be a 1.9.x tagged release + +#### Post 1.9.x + +Post 1.9.x googletest will follow +[Abseil Live at Head philosophy](https://abseil.io/about/philosophy) + +## Welcome to **Google Test**, Google's C++ test framework! + +This repository is a merger of the formerly separate GoogleTest and GoogleMock +projects. These were so closely related that it makes sense to maintain and +release them together. + +Please subscribe to the mailing list at googletestframework@googlegroups.com for +questions, discussions, and development. + +### Getting started: + +The information for **Google Test** is available in the +[Google Test Primer](googletest/docs/primer.md) documentation. + +**Google Mock** is an extension to Google Test for writing and using C++ mock +classes. See the separate [Google Mock documentation](googlemock/README.md). + +More detailed documentation for googletest is in its interior +[googletest/README.md](googletest/README.md) file. + +## Features + +* An [xUnit](https://en.wikipedia.org/wiki/XUnit) test framework. +* Test discovery. +* A rich set of assertions. +* User-defined assertions. +* Death tests. +* Fatal and non-fatal failures. +* Value-parameterized tests. +* Type-parameterized tests. +* Various options for running the tests. +* XML test report generation. + +## Platforms + +Google test has been used on a variety of platforms: + +* Linux +* Mac OS X +* Windows +* Cygwin +* MinGW +* Windows Mobile +* Symbian +* PlatformIO + +## Who Is Using Google Test? + +In addition to many internal projects at Google, Google Test is also used by the +following notable projects: + +* The [Chromium projects](http://www.chromium.org/) (behind the Chrome browser + and Chrome OS). +* The [LLVM](http://llvm.org/) compiler. +* [Protocol Buffers](https://github.com/google/protobuf), Google's data + interchange format. +* The [OpenCV](http://opencv.org/) computer vision library. +* [tiny-dnn](https://github.com/tiny-dnn/tiny-dnn): header only, + dependency-free deep learning framework in C++11. + +## Related Open Source Projects + +[GTest Runner](https://github.com/nholthaus/gtest-runner) is a Qt5 based +automated test-runner and Graphical User Interface with powerful features for +Windows and Linux platforms. + +[Google Test UI](https://github.com/ospector/gtest-gbar) is test runner that +runs your test binary, allows you to track its progress via a progress bar, and +displays a list of test failures. Clicking on one shows failure text. Google +Test UI is written in C#. + +[GTest TAP Listener](https://github.com/kinow/gtest-tap-listener) is an event +listener for Google Test that implements the +[TAP protocol](https://en.wikipedia.org/wiki/Test_Anything_Protocol) for test +result output. If your test runner understands TAP, you may find it useful. + +[gtest-parallel](https://github.com/google/gtest-parallel) is a test runner that +runs tests from your binary in parallel to provide significant speed-up. + +[GoogleTest Adapter](https://marketplace.visualstudio.com/items?itemName=DavidSchuldenfrei.gtest-adapter) +is a VS Code extension allowing to view Google Tests in a tree view, and +run/debug your tests. + +## Requirements + +Google Test is designed to have fairly minimal requirements to build and use +with your projects, but there are some. If you notice any problems on your +platform, please notify +[googletestframework@googlegroups.com](https://groups.google.com/forum/#!forum/googletestframework). +Patches for fixing them are welcome! + +### Build Requirements + +These are the base requirements to build and use Google Test from a source +package: + +* [Bazel](https://bazel.build/) or [CMake](https://cmake.org/). NOTE: Bazel is + the build system that googletest is using internally and tests against. + CMake is community-supported. + +* a C++11-standard-compliant compiler + +## Contributing change + +Please read the [`CONTRIBUTING.md`](CONTRIBUTING.md) for details on how to +contribute to this project. + +Happy testing! diff --git a/src/test/gtest/WORKSPACE b/src/test/gtest/WORKSPACE new file mode 100644 index 00000000..2289bdb7 --- /dev/null +++ b/src/test/gtest/WORKSPACE @@ -0,0 +1,23 @@ +workspace(name = "com_google_googletest") + +load("@bazel_tools//tools/build_defs/repo:http.bzl", "http_archive") + +# Abseil +http_archive( + name = "com_google_absl", + urls = ["https://github.com/abseil/abseil-cpp/archive/master.zip"], + strip_prefix = "abseil-cpp-master", +) + +http_archive( + name = "rules_cc", + strip_prefix = "rules_cc-master", + urls = ["https://github.com/bazelbuild/rules_cc/archive/master.zip"], +) + +http_archive( + name = "rules_python", + strip_prefix = "rules_python-master", + urls = ["https://github.com/bazelbuild/rules_python/archive/master.zip"], +) + diff --git a/src/test/gtest/appveyor.yml b/src/test/gtest/appveyor.yml new file mode 100644 index 00000000..a58b7687 --- /dev/null +++ b/src/test/gtest/appveyor.yml @@ -0,0 +1,154 @@ +version: '{build}' + +os: Visual Studio 2015 + +environment: + matrix: + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017" + build_system: cmake + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + + - compiler: msvc-15-seh + generator: "Visual Studio 15 2017 Win64" + build_system: cmake + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + enabled_on_pr: yes + + - compiler: msvc-15-seh + build_system: bazel + APPVEYOR_BUILD_WORKER_IMAGE: Visual Studio 2017 + enabled_on_pr: yes + + - compiler: msvc-14-seh + build_system: cmake + generator: "Visual Studio 14 2015" + enabled_on_pr: yes + + - compiler: msvc-14-seh + build_system: cmake + generator: "Visual Studio 14 2015 Win64" + + - compiler: gcc-6.3.0-posix + build_system: cmake + generator: "MinGW Makefiles" + cxx_path: 'C:\mingw-w64\i686-6.3.0-posix-dwarf-rt_v5-rev1\mingw32\bin' + enabled_on_pr: yes + +configuration: + - Debug + +build: + verbosity: minimal + +install: +- ps: | + Write-Output "Compiler: $env:compiler" + Write-Output "Generator: $env:generator" + Write-Output "Env:Configuation: $env:configuration" + Write-Output "Env: $env" + if (-not (Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER)) { + Write-Output "This is *NOT* a pull request build" + } else { + Write-Output "This is a pull request build" + if (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes") { + Write-Output "PR builds are *NOT* explicitly enabled" + } + } + + # install Bazel + if ($env:build_system -eq "bazel") { + appveyor DownloadFile https://github.com/bazelbuild/bazel/releases/download/0.28.1/bazel-0.28.1-windows-x86_64.exe -FileName bazel.exe + } + + if ($env:build_system -eq "cmake") { + # git bash conflicts with MinGW makefiles + if ($env:generator -eq "MinGW Makefiles") { + $env:path = $env:path.replace("C:\Program Files\Git\usr\bin;", "") + if ($env:cxx_path -ne "") { + $env:path += ";$env:cxx_path" + } + } + } + +before_build: +- ps: | + $env:root=$env:APPVEYOR_BUILD_FOLDER + Write-Output "env:root: $env:root" + +build_script: +- ps: | + # Only enable some builds for pull requests, the AppVeyor queue is too long. + if ((Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER) -And (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes")) { + return + } else { + # special case - build with Bazel + if ($env:build_system -eq "bazel") { + & $env:root\bazel.exe build -c opt //:gtest_samples + if ($LastExitCode -eq 0) { # bazel writes to StdErr and PowerShell interprets it as an error + $host.SetShouldExit(0) + } else { # a real error + throw "Exec: $ErrorMessage" + } + return + } + } + # by default build with CMake + md _build -Force | Out-Null + cd _build + + $conf = if ($env:generator -eq "MinGW Makefiles") {"-DCMAKE_BUILD_TYPE=$env:configuration"} else {"-DCMAKE_CONFIGURATION_TYPES=Debug;Release"} + # Disable test for MinGW (gtest tests fail, gmock tests can not build) + $gtest_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgtest_build_tests=OFF"} else {"-Dgtest_build_tests=ON"} + $gmock_build_tests = if ($env:generator -eq "MinGW Makefiles") {"-Dgmock_build_tests=OFF"} else {"-Dgmock_build_tests=ON"} + & cmake -G "$env:generator" $conf -Dgtest_build_samples=ON $gtest_build_tests $gmock_build_tests .. + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + $cmake_parallel = if ($env:generator -eq "MinGW Makefiles") {"-j2"} else {"/m"} + & cmake --build . --config $env:configuration -- $cmake_parallel + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + + +skip_commits: + files: + - '**/*.md' + +test_script: +- ps: | + # Only enable some builds for pull requests, the AppVeyor queue is too long. + if ((Test-Path env:APPVEYOR_PULL_REQUEST_NUMBER) -And (-not (Test-Path env:enabled_on_pr) -or $env:enabled_on_pr -ne "yes")) { + return + } + if ($env:build_system -eq "bazel") { + # special case - testing with Bazel + & $env:root\bazel.exe test //:gtest_samples + if ($LastExitCode -eq 0) { # bazel writes to StdErr and PowerShell interprets it as an error + $host.SetShouldExit(0) + } else { # a real error + throw "Exec: $ErrorMessage" + } + } + if ($env:build_system -eq "cmake") { + # built with CMake - test with CTest + if ($env:generator -eq "MinGW Makefiles") { + return # No test available for MinGW + } + + & ctest -C $env:configuration --timeout 600 --output-on-failure + if ($LastExitCode -ne 0) { + throw "Exec: $ErrorMessage" + } + } + +artifacts: + - path: '_build/CMakeFiles/*.log' + name: logs + - path: '_build/Testing/**/*.xml' + name: test_results + - path: 'bazel-testlogs/**/test.log' + name: test_logs + - path: 'bazel-testlogs/**/test.xml' + name: test_results diff --git a/src/test/gtest/ci/build-linux-bazel.sh b/src/test/gtest/ci/build-linux-bazel.sh new file mode 100755 index 00000000..ae8fb758 --- /dev/null +++ b/src/test/gtest/ci/build-linux-bazel.sh @@ -0,0 +1,37 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -e + +bazel version +bazel build --curses=no //...:all +bazel test --curses=no //...:all +bazel test --curses=no //...:all --define absl=1 diff --git a/src/test/gtest/ci/build-platformio.sh b/src/test/gtest/ci/build-platformio.sh new file mode 100644 index 00000000..1d7658d8 --- /dev/null +++ b/src/test/gtest/ci/build-platformio.sh @@ -0,0 +1,2 @@ +# run PlatformIO builds +platformio run diff --git a/src/test/gtest/ci/env-linux.sh b/src/test/gtest/ci/env-linux.sh new file mode 100755 index 00000000..37800d6a --- /dev/null +++ b/src/test/gtest/ci/env-linux.sh @@ -0,0 +1,41 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# This file should be sourced, and not executed as a standalone script. +# + +# TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. + +if [ "${TRAVIS_OS_NAME}" = "linux" ]; then + if [ "$CXX" = "g++" ]; then export CXX="g++-4.9" CC="gcc-4.9"; fi + if [ "$CXX" = "clang++" ]; then export CXX="clang++-3.9" CC="clang-3.9"; fi +fi diff --git a/src/test/gtest/ci/env-osx.sh b/src/test/gtest/ci/env-osx.sh new file mode 100755 index 00000000..9c421e14 --- /dev/null +++ b/src/test/gtest/ci/env-osx.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# +# This file should be sourced, and not executed as a standalone script. +# + +# TODO() - we can check if this is being sourced using $BASH_VERSION and $BASH_SOURCE[0] != ${0}. +# + +if [ "${TRAVIS_OS_NAME}" = "osx" ]; then + if [ "$CXX" = "clang++" ]; then + # $PATH needs to be adjusted because the llvm tap doesn't install the + # package to /usr/local/bin, etc, like the gcc tap does. + # See: https://github.com/Homebrew/legacy-homebrew/issues/29733 + clang_version=3.9 + export PATH="/usr/local/opt/llvm@${clang_version}/bin:$PATH"; + fi +fi diff --git a/src/test/gtest/ci/get-nprocessors.sh b/src/test/gtest/ci/get-nprocessors.sh new file mode 100755 index 00000000..43635e76 --- /dev/null +++ b/src/test/gtest/ci/get-nprocessors.sh @@ -0,0 +1,48 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# This file is typically sourced by another script. +# if possible, ask for the precise number of processors, +# otherwise take 2 processors as reasonable default; see +# https://docs.travis-ci.com/user/speeding-up-the-build/#Makefile-optimization +if [ -x /usr/bin/getconf ]; then + NPROCESSORS=$(/usr/bin/getconf _NPROCESSORS_ONLN) +else + NPROCESSORS=2 +fi + +# as of 2017-09-04 Travis CI reports 32 processors, but GCC build +# crashes if parallelized too much (maybe memory consumption problem), +# so limit to 4 processors for the time being. +if [ $NPROCESSORS -gt 4 ] ; then + echo "$0:Note: Limiting processors to use by make from $NPROCESSORS to 4." + NPROCESSORS=4 +fi diff --git a/src/test/gtest/ci/install-linux.sh b/src/test/gtest/ci/install-linux.sh new file mode 100755 index 00000000..05e2cb28 --- /dev/null +++ b/src/test/gtest/ci/install-linux.sh @@ -0,0 +1,49 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -eu + +if [ "${TRAVIS_OS_NAME}" != linux ]; then + echo "Not a Linux build; skipping installation" + exit 0 +fi + + +if [ "${TRAVIS_SUDO}" = "true" ]; then + echo "deb [arch=amd64] http://storage.googleapis.com/bazel-apt stable jdk1.8" | \ + sudo tee /etc/apt/sources.list.d/bazel.list + curl https://bazel.build/bazel-release.pub.gpg | sudo apt-key add - + sudo apt-get update && sudo apt-get install -y bazel gcc-4.9 g++-4.9 clang-3.9 +elif [ "${CXX}" = "clang++" ]; then + # Use ccache, assuming $HOME/bin is in the path, which is true in the Travis build environment. + ln -sf /usr/bin/ccache $HOME/bin/${CXX}; + ln -sf /usr/bin/ccache $HOME/bin/${CC}; +fi diff --git a/src/test/gtest/ci/install-osx.sh b/src/test/gtest/ci/install-osx.sh new file mode 100755 index 00000000..cc475082 --- /dev/null +++ b/src/test/gtest/ci/install-osx.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -eu + +if [ "${TRAVIS_OS_NAME}" != "osx" ]; then + echo "Not a macOS build; skipping installation" + exit 0 +fi + +brew update +brew install ccache gcc@4.9 diff --git a/src/test/gtest/ci/install-platformio.sh b/src/test/gtest/ci/install-platformio.sh new file mode 100644 index 00000000..4d7860a5 --- /dev/null +++ b/src/test/gtest/ci/install-platformio.sh @@ -0,0 +1,5 @@ +# install PlatformIO +sudo pip install -U platformio + +# update PlatformIO +platformio update diff --git a/src/test/gtest/ci/log-config.sh b/src/test/gtest/ci/log-config.sh new file mode 100755 index 00000000..5fef1194 --- /dev/null +++ b/src/test/gtest/ci/log-config.sh @@ -0,0 +1,51 @@ +#!/usr/bin/env bash +# Copyright 2017 Google Inc. +# All Rights Reserved. +# +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +set -e + +# ccache on OS X needs installation first +# reset ccache statistics +ccache --zero-stats + +echo PATH=${PATH} + +echo "Compiler configuration:" +echo CXX=${CXX} +echo CC=${CC} +echo CXXFLAGS=${CXXFLAGS} + +echo "C++ compiler version:" +${CXX} --version || echo "${CXX} does not seem to support the --version flag" +${CXX} -v || echo "${CXX} does not seem to support the -v flag" + +echo "C compiler version:" +${CC} --version || echo "${CXX} does not seem to support the --version flag" +${CC} -v || echo "${CXX} does not seem to support the -v flag" diff --git a/src/test/gtest/ci/travis.sh b/src/test/gtest/ci/travis.sh new file mode 100755 index 00000000..9ff3bad3 --- /dev/null +++ b/src/test/gtest/ci/travis.sh @@ -0,0 +1,44 @@ +#!/usr/bin/env sh +set -evx + +. ci/get-nprocessors.sh + +# if possible, ask for the precise number of processors, +# otherwise take 2 processors as reasonable default; see +# https://docs.travis-ci.com/user/speeding-up-the-build/#Makefile-optimization +if [ -x /usr/bin/getconf ]; then + NPROCESSORS=$(/usr/bin/getconf _NPROCESSORS_ONLN) +else + NPROCESSORS=2 +fi +# as of 2017-09-04 Travis CI reports 32 processors, but GCC build +# crashes if parallelized too much (maybe memory consumption problem), +# so limit to 4 processors for the time being. +if [ $NPROCESSORS -gt 4 ] ; then + echo "$0:Note: Limiting processors to use by make from $NPROCESSORS to 4." + NPROCESSORS=4 +fi +# Tell make to use the processors. No preceding '-' required. +MAKEFLAGS="j${NPROCESSORS}" +export MAKEFLAGS + +env | sort + +# Set default values to OFF for these variables if not specified. +: "${NO_EXCEPTION:=OFF}" +: "${NO_RTTI:=OFF}" +: "${COMPILER_IS_GNUCXX:=OFF}" + +mkdir build || true +cd build +cmake -Dgtest_build_samples=ON \ + -Dgtest_build_tests=ON \ + -Dgmock_build_tests=ON \ + -Dcxx_no_exception=$NO_EXCEPTION \ + -Dcxx_no_rtti=$NO_RTTI \ + -DCMAKE_COMPILER_IS_GNUCXX=$COMPILER_IS_GNUCXX \ + -DCMAKE_CXX_FLAGS=$CXX_FLAGS \ + -DCMAKE_BUILD_TYPE=$BUILD_TYPE \ + .. +make +CTEST_OUTPUT_ON_FAILURE=1 make test diff --git a/src/test/gtest/googlemock/CMakeLists.txt b/src/test/gtest/googlemock/CMakeLists.txt new file mode 100644 index 00000000..d32b70b5 --- /dev/null +++ b/src/test/gtest/googlemock/CMakeLists.txt @@ -0,0 +1,233 @@ +######################################################################## +# Note: CMake support is community-based. The maintainers do not use CMake +# internally. +# +# CMake build script for Google Mock. +# +# To run the tests for Google Mock itself on Linux, use 'make test' or +# ctest. You can select which tests to run using 'ctest -R regex'. +# For more options, run 'ctest --help'. + +option(gmock_build_tests "Build all of Google Mock's own tests." OFF) + +# A directory to find Google Test sources. +if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/gtest/CMakeLists.txt") + set(gtest_dir gtest) +else() + set(gtest_dir ../googletest) +endif() + +# Defines pre_project_set_up_hermetic_build() and set_up_hermetic_build(). +include("${gtest_dir}/cmake/hermetic_build.cmake" OPTIONAL) + +if (COMMAND pre_project_set_up_hermetic_build) + # Google Test also calls hermetic setup functions from add_subdirectory, + # although its changes will not affect things at the current scope. + pre_project_set_up_hermetic_build() +endif() + +######################################################################## +# +# Project-wide settings + +# Name of the project. +# +# CMake files in this project can refer to the root source directory +# as ${gmock_SOURCE_DIR} and to the root binary directory as +# ${gmock_BINARY_DIR}. +# Language "C" is required for find_package(Threads). +if (CMAKE_VERSION VERSION_LESS 3.0) + project(gmock CXX C) +else() + cmake_policy(SET CMP0048 NEW) + project(gmock VERSION ${GOOGLETEST_VERSION} LANGUAGES CXX C) +endif() +cmake_minimum_required(VERSION 2.6.4) + +if (COMMAND set_up_hermetic_build) + set_up_hermetic_build() +endif() + +# Instructs CMake to process Google Test's CMakeLists.txt and add its +# targets to the current scope. We are placing Google Test's binary +# directory in a subdirectory of our own as VC compilation may break +# if they are the same (the default). +add_subdirectory("${gtest_dir}" "${gmock_BINARY_DIR}/${gtest_dir}") + + +# These commands only run if this is the main project +if(CMAKE_PROJECT_NAME STREQUAL "gmock" OR CMAKE_PROJECT_NAME STREQUAL "googletest-distribution") + # BUILD_SHARED_LIBS is a standard CMake variable, but we declare it here to + # make it prominent in the GUI. + option(BUILD_SHARED_LIBS "Build shared libraries (DLLs)." OFF) +else() + mark_as_advanced(gmock_build_tests) +endif() + +# Although Google Test's CMakeLists.txt calls this function, the +# changes there don't affect the current scope. Therefore we have to +# call it again here. +config_compiler_and_linker() # from ${gtest_dir}/cmake/internal_utils.cmake + +# Adds Google Mock's and Google Test's header directories to the search path. +set(gmock_build_include_dirs + "${gmock_SOURCE_DIR}/include" + "${gmock_SOURCE_DIR}" + "${gtest_SOURCE_DIR}/include" + # This directory is needed to build directly from Google Test sources. + "${gtest_SOURCE_DIR}") +include_directories(${gmock_build_include_dirs}) + +######################################################################## +# +# Defines the gmock & gmock_main libraries. User tests should link +# with one of them. + +# Google Mock libraries. We build them using more strict warnings than what +# are used for other targets, to ensure that Google Mock can be compiled by +# a user aggressive about warnings. +if (MSVC) + cxx_library(gmock + "${cxx_strict}" + "${gtest_dir}/src/gtest-all.cc" + src/gmock-all.cc) + + cxx_library(gmock_main + "${cxx_strict}" + "${gtest_dir}/src/gtest-all.cc" + src/gmock-all.cc + src/gmock_main.cc) +else() + cxx_library(gmock "${cxx_strict}" src/gmock-all.cc) + target_link_libraries(gmock PUBLIC gtest) + cxx_library(gmock_main "${cxx_strict}" src/gmock_main.cc) + target_link_libraries(gmock_main PUBLIC gmock) +endif() +# If the CMake version supports it, attach header directory information +# to the targets for when we are part of a parent build (ie being pulled +# in via add_subdirectory() rather than being a standalone build). +if (DEFINED CMAKE_VERSION AND NOT "${CMAKE_VERSION}" VERSION_LESS "2.8.11") + target_include_directories(gmock SYSTEM INTERFACE + "$" + "$/${CMAKE_INSTALL_INCLUDEDIR}>") + target_include_directories(gmock_main SYSTEM INTERFACE + "$" + "$/${CMAKE_INSTALL_INCLUDEDIR}>") +endif() + +######################################################################## +# +# Install rules +install_project(gmock gmock_main) + +######################################################################## +# +# Google Mock's own tests. +# +# You can skip this section if you aren't interested in testing +# Google Mock itself. +# +# The tests are not built by default. To build them, set the +# gmock_build_tests option to ON. You can do it by running ccmake +# or specifying the -Dgmock_build_tests=ON flag when running cmake. + +if (gmock_build_tests) + # This must be set in the root directory for the tests to be run by + # 'make test' or ctest. + enable_testing() + + if (WIN32) + file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/$/RunTest.ps1" + CONTENT +"$project_bin = \"${CMAKE_BINARY_DIR}/bin/$\" +$env:Path = \"$project_bin;$env:Path\" +& $args") + elseif (MINGW OR CYGWIN) + file(GENERATE OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/RunTest.ps1" + CONTENT +"$project_bin = (cygpath --windows ${CMAKE_BINARY_DIR}/bin) +$env:Path = \"$project_bin;$env:Path\" +& $args") + endif() + + if (MINGW OR CYGWIN) + if (CMAKE_VERSION VERSION_LESS "2.8.12") + add_compile_options("-Wa,-mbig-obj") + else() + add_definitions("-Wa,-mbig-obj") + endif() + endif() + + ############################################################ + # C++ tests built with standard compiler flags. + + cxx_test(gmock-actions_test gmock_main) + cxx_test(gmock-cardinalities_test gmock_main) + cxx_test(gmock_ex_test gmock_main) + cxx_test(gmock-function-mocker_test gmock_main) + cxx_test(gmock-generated-actions_test gmock_main) + cxx_test(gmock-generated-function-mockers_test gmock_main) + cxx_test(gmock-generated-matchers_test gmock_main) + cxx_test(gmock-internal-utils_test gmock_main) + cxx_test(gmock-matchers_test gmock_main) + cxx_test(gmock-more-actions_test gmock_main) + cxx_test(gmock-nice-strict_test gmock_main) + cxx_test(gmock-port_test gmock_main) + cxx_test(gmock-spec-builders_test gmock_main) + cxx_test(gmock_link_test gmock_main test/gmock_link2_test.cc) + cxx_test(gmock_test gmock_main) + + if (DEFINED GTEST_HAS_PTHREAD) + cxx_test(gmock_stress_test gmock) + endif() + + # gmock_all_test is commented to save time building and running tests. + # Uncomment if necessary. + # cxx_test(gmock_all_test gmock_main) + + ############################################################ + # C++ tests built with non-standard compiler flags. + + if (MSVC) + cxx_library(gmock_main_no_exception "${cxx_no_exception}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + cxx_library(gmock_main_no_rtti "${cxx_no_rtti}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + else() + cxx_library(gmock_main_no_exception "${cxx_no_exception}" src/gmock_main.cc) + target_link_libraries(gmock_main_no_exception PUBLIC gmock) + + cxx_library(gmock_main_no_rtti "${cxx_no_rtti}" src/gmock_main.cc) + target_link_libraries(gmock_main_no_rtti PUBLIC gmock) + endif() + cxx_test_with_flags(gmock-more-actions_no_exception_test "${cxx_no_exception}" + gmock_main_no_exception test/gmock-more-actions_test.cc) + + cxx_test_with_flags(gmock_no_rtti_test "${cxx_no_rtti}" + gmock_main_no_rtti test/gmock-spec-builders_test.cc) + + cxx_shared_library(shared_gmock_main "${cxx_default}" + "${gtest_dir}/src/gtest-all.cc" src/gmock-all.cc src/gmock_main.cc) + + # Tests that a binary can be built with Google Mock as a shared library. On + # some system configurations, it may not possible to run the binary without + # knowing more details about the system configurations. We do not try to run + # this binary. To get a more robust shared library coverage, configure with + # -DBUILD_SHARED_LIBS=ON. + cxx_executable_with_flags(shared_gmock_test_ "${cxx_default}" + shared_gmock_main test/gmock-spec-builders_test.cc) + set_target_properties(shared_gmock_test_ + PROPERTIES + COMPILE_DEFINITIONS "GTEST_LINKED_AS_SHARED_LIBRARY=1") + + ############################################################ + # Python tests. + + cxx_executable(gmock_leak_test_ test gmock_main) + py_test(gmock_leak_test) + + cxx_executable(gmock_output_test_ test gmock) + py_test(gmock_output_test) +endif() diff --git a/src/test/gtest/googlemock/CONTRIBUTORS b/src/test/gtest/googlemock/CONTRIBUTORS new file mode 100644 index 00000000..6e9ae362 --- /dev/null +++ b/src/test/gtest/googlemock/CONTRIBUTORS @@ -0,0 +1,40 @@ +# This file contains a list of people who've made non-trivial +# contribution to the Google C++ Mocking Framework project. People +# who commit code to the project are encouraged to add their names +# here. Please keep the list sorted by first names. + +Benoit Sigoure +Bogdan Piloca +Chandler Carruth +Dave MacLachlan +David Anderson +Dean Sturtevant +Gene Volovich +Hal Burch +Jeffrey Yasskin +Jim Keller +Joe Walnes +Jon Wray +Keir Mierle +Keith Ray +Kostya Serebryany +Lev Makhlis +Manuel Klimek +Mario Tanev +Mark Paskin +Markus Heule +Matthew Simmons +Mike Bland +Neal Norwitz +Nermin Ozkiranartli +Owen Carlsen +Paneendra Ba +Paul Menage +Piotr Kaminski +Russ Rufer +Sverre Sundsdal +Takeshi Yoshino +Vadim Berman +Vlad Losev +Wolfgang Klier +Zhanyong Wan diff --git a/src/test/gtest/googlemock/LICENSE b/src/test/gtest/googlemock/LICENSE new file mode 100644 index 00000000..1941a11f --- /dev/null +++ b/src/test/gtest/googlemock/LICENSE @@ -0,0 +1,28 @@ +Copyright 2008, Google Inc. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + + * Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above +copyright notice, this list of conditions and the following disclaimer +in the documentation and/or other materials provided with the +distribution. + * Neither the name of Google Inc. nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/src/test/gtest/googlemock/README.md b/src/test/gtest/googlemock/README.md new file mode 100644 index 00000000..183fdb81 --- /dev/null +++ b/src/test/gtest/googlemock/README.md @@ -0,0 +1,44 @@ +# Googletest Mocking (gMock) Framework + +### Overview + +Google's framework for writing and using C++ mock classes. It can help you +derive better designs of your system and write better tests. + +It is inspired by: + +* [jMock](http://www.jmock.org/), +* [EasyMock](http://www.easymock.org/), and +* [Hamcrest](http://code.google.com/p/hamcrest/), + +and designed with C++'s specifics in mind. + +gMock: + +- provides a declarative syntax for defining mocks, +- can define partial (hybrid) mocks, which are a cross of real and mock + objects, +- handles functions of arbitrary types and overloaded functions, +- comes with a rich set of matchers for validating function arguments, +- uses an intuitive syntax for controlling the behavior of a mock, +- does automatic verification of expectations (no record-and-replay needed), +- allows arbitrary (partial) ordering constraints on function calls to be + expressed, +- lets a user extend it by defining new matchers and actions. +- does not use exceptions, and +- is easy to learn and use. + +Details and examples can be found here: + +* [gMock for Dummies](docs/for_dummies.md) +* [Legacy gMock FAQ](docs/gmock_faq.md) +* [gMock Cookbook](docs/cook_book.md) +* [gMock Cheat Sheet](docs/cheat_sheet.md) + +Please note that code under scripts/generator/ is from the [cppclean +project](http://code.google.com/p/cppclean/) and under the Apache +License, which is different from Google Mock's license. + +Google Mock is a part of +[Google Test C++ testing framework](http://github.com/google/googletest/) and a +subject to the same requirements. diff --git a/src/test/gtest/googlemock/cmake/gmock.pc.in b/src/test/gtest/googlemock/cmake/gmock.pc.in new file mode 100644 index 00000000..08e04547 --- /dev/null +++ b/src/test/gtest/googlemock/cmake/gmock.pc.in @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: gmock +Description: GoogleMock (without main() function) +Version: @PROJECT_VERSION@ +URL: https://github.com/google/googletest +Requires: gtest +Libs: -L${libdir} -lgmock @CMAKE_THREAD_LIBS_INIT@ +Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ diff --git a/src/test/gtest/googlemock/cmake/gmock_main.pc.in b/src/test/gtest/googlemock/cmake/gmock_main.pc.in new file mode 100644 index 00000000..b22fe614 --- /dev/null +++ b/src/test/gtest/googlemock/cmake/gmock_main.pc.in @@ -0,0 +1,11 @@ +prefix=${pcfiledir}/../.. +libdir=${prefix}/@CMAKE_INSTALL_LIBDIR@ +includedir=${prefix}/@CMAKE_INSTALL_INCLUDEDIR@ + +Name: gmock_main +Description: GoogleMock (with main() function) +Version: @PROJECT_VERSION@ +URL: https://github.com/google/googletest +Requires: gmock +Libs: -L${libdir} -lgmock_main @CMAKE_THREAD_LIBS_INIT@ +Cflags: -I${includedir} @GTEST_HAS_PTHREAD_MACRO@ @CMAKE_THREAD_LIBS_INIT@ diff --git a/src/test/gtest/googlemock/docs/cheat_sheet.md b/src/test/gtest/googlemock/docs/cheat_sheet.md new file mode 100644 index 00000000..850963af --- /dev/null +++ b/src/test/gtest/googlemock/docs/cheat_sheet.md @@ -0,0 +1,781 @@ +## gMock Cheat Sheet + + + + + +### Defining a Mock Class + +#### Mocking a Normal Class {#MockClass} + +Given + +```cpp +class Foo { + ... + virtual ~Foo(); + virtual int GetSize() const = 0; + virtual string Describe(const char* name) = 0; + virtual string Describe(int type) = 0; + virtual bool Process(Bar elem, int count) = 0; +}; +``` + +(note that `~Foo()` **must** be virtual) we can define its mock as + +```cpp +#include "gmock/gmock.h" + +class MockFoo : public Foo { + ... + MOCK_METHOD(int, GetSize, (), (const, override)); + MOCK_METHOD(string, Describe, (const char* name), (override)); + MOCK_METHOD(string, Describe, (int type), (override)); + MOCK_METHOD(bool, Process, (Bar elem, int count), (override)); +}; +``` + +To create a "nice" mock, which ignores all uninteresting calls, a "naggy" mock, +which warns on all uninteresting calls, or a "strict" mock, which treats them as +failures: + +```cpp +using ::testing::NiceMock; +using ::testing::NaggyMock; +using ::testing::StrictMock; + +NiceMock nice_foo; // The type is a subclass of MockFoo. +NaggyMock naggy_foo; // The type is a subclass of MockFoo. +StrictMock strict_foo; // The type is a subclass of MockFoo. +``` + +**Note:** A mock object is currently naggy by default. We may make it nice by +default in the future. + +#### Mocking a Class Template {#MockTemplate} + +Class templates can be mocked just like any class. + +To mock + +```cpp +template +class StackInterface { + ... + virtual ~StackInterface(); + virtual int GetSize() const = 0; + virtual void Push(const Elem& x) = 0; +}; +``` + +(note that all member functions that are mocked, including `~StackInterface()` +**must** be virtual). + +```cpp +template +class MockStack : public StackInterface { + ... + MOCK_METHOD(int, GetSize, (), (const, override)); + MOCK_METHOD(void, Push, (const Elem& x), (override)); +}; +``` + +#### Specifying Calling Conventions for Mock Functions + +If your mock function doesn't use the default calling convention, you can +specify it by adding `Calltype(convention)` to `MOCK_METHOD`'s 4th parameter. +For example, + +```cpp + MOCK_METHOD(bool, Foo, (int n), (Calltype(STDMETHODCALLTYPE))); + MOCK_METHOD(int, Bar, (double x, double y), + (const, Calltype(STDMETHODCALLTYPE))); +``` + +where `STDMETHODCALLTYPE` is defined by `` on Windows. + +### Using Mocks in Tests {#UsingMocks} + +The typical work flow is: + +1. Import the gMock names you need to use. All gMock symbols are in the + `testing` namespace unless they are macros or otherwise noted. +2. Create the mock objects. +3. Optionally, set the default actions of the mock objects. +4. Set your expectations on the mock objects (How will they be called? What + will they do?). +5. Exercise code that uses the mock objects; if necessary, check the result + using googletest assertions. +6. When a mock object is destructed, gMock automatically verifies that all + expectations on it have been satisfied. + +Here's an example: + +```cpp +using ::testing::Return; // #1 + +TEST(BarTest, DoesThis) { + MockFoo foo; // #2 + + ON_CALL(foo, GetSize()) // #3 + .WillByDefault(Return(1)); + // ... other default actions ... + + EXPECT_CALL(foo, Describe(5)) // #4 + .Times(3) + .WillRepeatedly(Return("Category 5")); + // ... other expectations ... + + EXPECT_EQ("good", MyProductionFunction(&foo)); // #5 +} // #6 +``` + +### Setting Default Actions {#OnCall} + +gMock has a **built-in default action** for any function that returns `void`, +`bool`, a numeric value, or a pointer. In C++11, it will additionally returns +the default-constructed value, if one exists for the given type. + +To customize the default action for functions with return type *`T`*: + +```cpp +using ::testing::DefaultValue; + +// Sets the default value to be returned. T must be CopyConstructible. +DefaultValue::Set(value); +// Sets a factory. Will be invoked on demand. T must be MoveConstructible. +// T MakeT(); +DefaultValue::SetFactory(&MakeT); +// ... use the mocks ... +// Resets the default value. +DefaultValue::Clear(); +``` + +Example usage: + +```cpp + // Sets the default action for return type std::unique_ptr to + // creating a new Buzz every time. + DefaultValue>::SetFactory( + [] { return MakeUnique(AccessLevel::kInternal); }); + + // When this fires, the default action of MakeBuzz() will run, which + // will return a new Buzz object. + EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")).Times(AnyNumber()); + + auto buzz1 = mock_buzzer_.MakeBuzz("hello"); + auto buzz2 = mock_buzzer_.MakeBuzz("hello"); + EXPECT_NE(nullptr, buzz1); + EXPECT_NE(nullptr, buzz2); + EXPECT_NE(buzz1, buzz2); + + // Resets the default action for return type std::unique_ptr, + // to avoid interfere with other tests. + DefaultValue>::Clear(); +``` + +To customize the default action for a particular method of a specific mock +object, use `ON_CALL()`. `ON_CALL()` has a similar syntax to `EXPECT_CALL()`, +but it is used for setting default behaviors (when you do not require that the +mock method is called). See [here](cook_book.md#UseOnCall) for a more detailed +discussion. + +```cpp +ON_CALL(mock-object, method(matchers)) + .With(multi-argument-matcher) ? + .WillByDefault(action); +``` + +### Setting Expectations {#ExpectCall} + +`EXPECT_CALL()` sets **expectations** on a mock method (How will it be called? +What will it do?): + +```cpp +EXPECT_CALL(mock-object, method (matchers)?) + .With(multi-argument-matcher) ? + .Times(cardinality) ? + .InSequence(sequences) * + .After(expectations) * + .WillOnce(action) * + .WillRepeatedly(action) ? + .RetiresOnSaturation(); ? +``` + +For each item above, `?` means it can be used at most once, while `*` means it +can be used any number of times. + +In order to pass, `EXPECT_CALL` must be used before the calls are actually made. + +The `(matchers)` is a comma-separated list of matchers that correspond to each +of the arguments of `method`, and sets the expectation only for calls of +`method` that matches all of the matchers. + +If `(matchers)` is omitted, the expectation is the same as if the matchers were +set to anything matchers (for example, `(_, _, _, _)` for a four-arg method). + +If `Times()` is omitted, the cardinality is assumed to be: + +* `Times(1)` when there is neither `WillOnce()` nor `WillRepeatedly()`; +* `Times(n)` when there are `n` `WillOnce()`s but no `WillRepeatedly()`, where + `n` >= 1; or +* `Times(AtLeast(n))` when there are `n` `WillOnce()`s and a + `WillRepeatedly()`, where `n` >= 0. + +A method with no `EXPECT_CALL()` is free to be invoked *any number of times*, +and the default action will be taken each time. + +### Matchers {#MatcherList} + + + +A **matcher** matches a *single* argument. You can use it inside `ON_CALL()` or +`EXPECT_CALL()`, or use it to validate a value directly using two macros: + + +| Macro | Description | +| :----------------------------------- | :------------------------------------ | +| `EXPECT_THAT(actual_value, matcher)` | Asserts that `actual_value` matches `matcher`. | +| `ASSERT_THAT(actual_value, matcher)` | The same as `EXPECT_THAT(actual_value, matcher)`, except that it generates a **fatal** failure. | + + +Built-in matchers (where `argument` is the function argument, e.g. +`actual_value` in the example above, or when used in the context of +`EXPECT_CALL(mock_object, method(matchers))`, the arguments of `method`) are +divided into several categories: + +#### Wildcard + +Matcher | Description +:-------------------------- | :----------------------------------------------- +`_` | `argument` can be any value of the correct type. +`A()` or `An()` | `argument` can be any value of type `type`. + +#### Generic Comparison + + +| Matcher | Description | +| :--------------------- | :-------------------------------------------------- | +| `Eq(value)` or `value` | `argument == value` | +| `Ge(value)` | `argument >= value` | +| `Gt(value)` | `argument > value` | +| `Le(value)` | `argument <= value` | +| `Lt(value)` | `argument < value` | +| `Ne(value)` | `argument != value` | +| `IsFalse()` | `argument` evaluates to `false` in a Boolean context. | +| `IsTrue()` | `argument` evaluates to `true` in a Boolean context. | +| `IsNull()` | `argument` is a `NULL` pointer (raw or smart). | +| `NotNull()` | `argument` is a non-null pointer (raw or smart). | +| `Optional(m)` | `argument` is `optional<>` that contains a value matching `m`. | +| `VariantWith(m)` | `argument` is `variant<>` that holds the alternative of type T with a value matching `m`. | +| `Ref(variable)` | `argument` is a reference to `variable`. | +| `TypedEq(value)` | `argument` has type `type` and is equal to `value`. You may need to use this instead of `Eq(value)` when the mock function is overloaded. | + + +Except `Ref()`, these matchers make a *copy* of `value` in case it's modified or +destructed later. If the compiler complains that `value` doesn't have a public +copy constructor, try wrap it in `ByRef()`, e.g. +`Eq(ByRef(non_copyable_value))`. If you do that, make sure `non_copyable_value` +is not changed afterwards, or the meaning of your matcher will be changed. + +#### Floating-Point Matchers {#FpMatchers} + + +| Matcher | Description | +| :------------------------------- | :--------------------------------- | +| `DoubleEq(a_double)` | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as unequal. | +| `FloatEq(a_float)` | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as unequal. | +| `NanSensitiveDoubleEq(a_double)` | `argument` is a `double` value approximately equal to `a_double`, treating two NaNs as equal. | +| `NanSensitiveFloatEq(a_float)` | `argument` is a `float` value approximately equal to `a_float`, treating two NaNs as equal. | + + +The above matchers use ULP-based comparison (the same as used in googletest). +They automatically pick a reasonable error bound based on the absolute value of +the expected value. `DoubleEq()` and `FloatEq()` conform to the IEEE standard, +which requires comparing two NaNs for equality to return false. The +`NanSensitive*` version instead treats two NaNs as equal, which is often what a +user wants. + + +| Matcher | Description | +| :------------------------------------------------ | :----------------------- | +| `DoubleNear(a_double, max_abs_error)` | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as unequal. | +| `FloatNear(a_float, max_abs_error)` | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as unequal. | +| `NanSensitiveDoubleNear(a_double, max_abs_error)` | `argument` is a `double` value close to `a_double` (absolute error <= `max_abs_error`), treating two NaNs as equal. | +| `NanSensitiveFloatNear(a_float, max_abs_error)` | `argument` is a `float` value close to `a_float` (absolute error <= `max_abs_error`), treating two NaNs as equal. | + + +#### String Matchers + +The `argument` can be either a C string or a C++ string object: + + +| Matcher | Description | +| :---------------------- | :------------------------------------------------- | +| `ContainsRegex(string)` | `argument` matches the given regular expression. | +| `EndsWith(suffix)` | `argument` ends with string `suffix`. | +| `HasSubstr(string)` | `argument` contains `string` as a sub-string. | +| `MatchesRegex(string)` | `argument` matches the given regular expression with the match starting at the first character and ending at the last character. | +| `StartsWith(prefix)` | `argument` starts with string `prefix`. | +| `StrCaseEq(string)` | `argument` is equal to `string`, ignoring case. | +| `StrCaseNe(string)` | `argument` is not equal to `string`, ignoring case. | +| `StrEq(string)` | `argument` is equal to `string`. | +| `StrNe(string)` | `argument` is not equal to `string`. | + + +`ContainsRegex()` and `MatchesRegex()` take ownership of the `RE` object. They +use the regular expression syntax defined +[here](../../googletest/docs/advanced.md#regular-expression-syntax). +`StrCaseEq()`, `StrCaseNe()`, `StrEq()`, and `StrNe()` work for wide strings as +well. + +#### Container Matchers + +Most STL-style containers support `==`, so you can use `Eq(expected_container)` +or simply `expected_container` to match a container exactly. If you want to +write the elements in-line, match them more flexibly, or get more informative +messages, you can use: + + +| Matcher | Description | +| :---------------------------------------- | :------------------------------- | +| `BeginEndDistanceIs(m)` | `argument` is a container whose `begin()` and `end()` iterators are separated by a number of increments matching `m`. E.g. `BeginEndDistanceIs(2)` or `BeginEndDistanceIs(Lt(2))`. For containers that define a `size()` method, `SizeIs(m)` may be more efficient. | +| `ContainerEq(container)` | The same as `Eq(container)` except that the failure message also includes which elements are in one container but not the other. | +| `Contains(e)` | `argument` contains an element that matches `e`, which can be either a value or a matcher. | +| `Each(e)` | `argument` is a container where *every* element matches `e`, which can be either a value or a matcher. | +| `ElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, where the *i*-th element matches `ei`, which can be a value or a matcher. | +| `ElementsAreArray({e0, e1, ..., en})`, `ElementsAreArray(a_container)`, `ElementsAreArray(begin, end)`, `ElementsAreArray(array)`, or `ElementsAreArray(array, count)` | The same as `ElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. | +| `IsEmpty()` | `argument` is an empty container (`container.empty()`). | +| `IsSubsetOf({e0, e1, ..., en})`, `IsSubsetOf(a_container)`, `IsSubsetOf(begin, end)`, `IsSubsetOf(array)`, or `IsSubsetOf(array, count)` | `argument` matches `UnorderedElementsAre(x0, x1, ..., xk)` for some subset `{x0, x1, ..., xk}` of the expected matchers. | +| `IsSupersetOf({e0, e1, ..., en})`, `IsSupersetOf(a_container)`, `IsSupersetOf(begin, end)`, `IsSupersetOf(array)`, or `IsSupersetOf(array, count)` | Some subset of `argument` matches `UnorderedElementsAre(`expected matchers`)`. | +| `Pointwise(m, container)`, `Pointwise(m, {e0, e1, ..., en})` | `argument` contains the same number of elements as in `container`, and for all i, (the i-th element in `argument`, the i-th element in `container`) match `m`, which is a matcher on 2-tuples. E.g. `Pointwise(Le(), upper_bounds)` verifies that each element in `argument` doesn't exceed the corresponding element in `upper_bounds`. See more detail below. | +| `SizeIs(m)` | `argument` is a container whose size matches `m`. E.g. `SizeIs(2)` or `SizeIs(Lt(2))`. | +| `UnorderedElementsAre(e0, e1, ..., en)` | `argument` has `n + 1` elements, and under *some* permutation of the elements, each element matches an `ei` (for a different `i`), which can be a value or a matcher. | +| `UnorderedElementsAreArray({e0, e1, ..., en})`, `UnorderedElementsAreArray(a_container)`, `UnorderedElementsAreArray(begin, end)`, `UnorderedElementsAreArray(array)`, or `UnorderedElementsAreArray(array, count)` | The same as `UnorderedElementsAre()` except that the expected element values/matchers come from an initializer list, STL-style container, iterator range, or C-style array. | +| `UnorderedPointwise(m, container)`, `UnorderedPointwise(m, {e0, e1, ..., en})` | Like `Pointwise(m, container)`, but ignores the order of elements. | +| `WhenSorted(m)` | When `argument` is sorted using the `<` operator, it matches container matcher `m`. E.g. `WhenSorted(ElementsAre(1, 2, 3))` verifies that `argument` contains elements 1, 2, and 3, ignoring order. | +| `WhenSortedBy(comparator, m)` | The same as `WhenSorted(m)`, except that the given comparator instead of `<` is used to sort `argument`. E.g. `WhenSortedBy(std::greater(), ElementsAre(3, 2, 1))`. | + + +**Notes:** + +* These matchers can also match: + 1. a native array passed by reference (e.g. in `Foo(const int (&a)[5])`), + and + 2. an array passed as a pointer and a count (e.g. in `Bar(const T* buffer, + int len)` -- see [Multi-argument Matchers](#MultiArgMatchers)). +* The array being matched may be multi-dimensional (i.e. its elements can be + arrays). +* `m` in `Pointwise(m, ...)` should be a matcher for `::std::tuple` + where `T` and `U` are the element type of the actual container and the + expected container, respectively. For example, to compare two `Foo` + containers where `Foo` doesn't support `operator==`, one might write: + + ```cpp + using ::std::get; + MATCHER(FooEq, "") { + return std::get<0>(arg).Equals(std::get<1>(arg)); + } + ... + EXPECT_THAT(actual_foos, Pointwise(FooEq(), expected_foos)); + ``` + +#### Member Matchers + + +| Matcher | Description | +| :------------------------------ | :----------------------------------------- | +| `Field(&class::field, m)` | `argument.field` (or `argument->field` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. | +| `Key(e)` | `argument.first` matches `e`, which can be either a value or a matcher. E.g. `Contains(Key(Le(5)))` can verify that a `map` contains a key `<= 5`. | +| `Pair(m1, m2)` | `argument` is an `std::pair` whose `first` field matches `m1` and `second` field matches `m2`. | +| `Property(&class::property, m)` | `argument.property()` (or `argument->property()` when `argument` is a plain pointer) matches matcher `m`, where `argument` is an object of type _class_. | + + +#### Matching the Result of a Function, Functor, or Callback + + +| Matcher | Description | +| :--------------- | :------------------------------------------------ | +| `ResultOf(f, m)` | `f(argument)` matches matcher `m`, where `f` is a function or functor. | + + +#### Pointer Matchers + + +| Matcher | Description | +| :------------------------ | :---------------------------------------------- | +| `Pointee(m)` | `argument` (either a smart pointer or a raw pointer) points to a value that matches matcher `m`. | +| `WhenDynamicCastTo(m)` | when `argument` is passed through `dynamic_cast()`, it matches matcher `m`. | + + + + + + +#### Multi-argument Matchers {#MultiArgMatchers} + +Technically, all matchers match a *single* value. A "multi-argument" matcher is +just one that matches a *tuple*. The following matchers can be used to match a +tuple `(x, y)`: + +Matcher | Description +:------ | :---------- +`Eq()` | `x == y` +`Ge()` | `x >= y` +`Gt()` | `x > y` +`Le()` | `x <= y` +`Lt()` | `x < y` +`Ne()` | `x != y` + +You can use the following selectors to pick a subset of the arguments (or +reorder them) to participate in the matching: + + +| Matcher | Description | +| :------------------------- | :---------------------------------------------- | +| `AllArgs(m)` | Equivalent to `m`. Useful as syntactic sugar in `.With(AllArgs(m))`. | +| `Args(m)` | The tuple of the `k` selected (using 0-based indices) arguments matches `m`, e.g. `Args<1, 2>(Eq())`. | + + +#### Composite Matchers + +You can make a matcher from one or more other matchers: + + +| Matcher | Description | +| :------------------------------- | :-------------------------------------- | +| `AllOf(m1, m2, ..., mn)` | `argument` matches all of the matchers `m1` to `mn`. | +| `AllOfArray({m0, m1, ..., mn})`, `AllOfArray(a_container)`, `AllOfArray(begin, end)`, `AllOfArray(array)`, or `AllOfArray(array, count)` | The same as `AllOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. | +| `AnyOf(m1, m2, ..., mn)` | `argument` matches at least one of the matchers `m1` to `mn`. | +| `AnyOfArray({m0, m1, ..., mn})`, `AnyOfArray(a_container)`, `AnyOfArray(begin, end)`, `AnyOfArray(array)`, or `AnyOfArray(array, count)` | The same as `AnyOf()` except that the matchers come from an initializer list, STL-style container, iterator range, or C-style array. | +| `Not(m)` | `argument` doesn't match matcher `m`. | + + + + +#### Adapters for Matchers + + +| Matcher | Description | +| :---------------------- | :------------------------------------ | +| `MatcherCast(m)` | casts matcher `m` to type `Matcher`. | +| `SafeMatcherCast(m)` | [safely casts](cook_book.md#casting-matchers) matcher `m` to type `Matcher`. | +| `Truly(predicate)` | `predicate(argument)` returns something considered by C++ to be true, where `predicate` is a function or functor. | + + +`AddressSatisfies(callback)` and `Truly(callback)` take ownership of `callback`, +which must be a permanent callback. + +#### Using Matchers as Predicates {#MatchersAsPredicatesCheat} + + +| Matcher | Description | +| :---------------------------- | :------------------------------------------ | +| `Matches(m)(value)` | evaluates to `true` if `value` matches `m`. You can use `Matches(m)` alone as a unary functor. | +| `ExplainMatchResult(m, value, result_listener)` | evaluates to `true` if `value` matches `m`, explaining the result to `result_listener`. | +| `Value(value, m)` | evaluates to `true` if `value` matches `m`. | + + +#### Defining Matchers + + +| Matcher | Description | +| :----------------------------------- | :------------------------------------ | +| `MATCHER(IsEven, "") { return (arg % 2) == 0; }` | Defines a matcher `IsEven()` to match an even number. | +| `MATCHER_P(IsDivisibleBy, n, "") { *result_listener << "where the remainder is " << (arg % n); return (arg % n) == 0; }` | Defines a macher `IsDivisibleBy(n)` to match a number divisible by `n`. | +| `MATCHER_P2(IsBetween, a, b, std::string(negation ? "isn't" : "is") + " between " + PrintToString(a) + " and " + PrintToString(b)) { return a <= arg && arg <= b; }` | Defines a matcher `IsBetween(a, b)` to match a value in the range [`a`, `b`]. | + + +**Notes:** + +1. The `MATCHER*` macros cannot be used inside a function or class. +2. The matcher body must be *purely functional* (i.e. it cannot have any side + effect, and the result must not depend on anything other than the value + being matched and the matcher parameters). +3. You can use `PrintToString(x)` to convert a value `x` of any type to a + string. + +### Actions {#ActionList} + +**Actions** specify what a mock function should do when invoked. + +#### Returning a Value + + +| | | +| :-------------------------- | :-------------------------------------------- | +| `Return()` | Return from a `void` mock function. | +| `Return(value)` | Return `value`. If the type of `value` is different to the mock function's return type, `value` is converted to the latter type at the time the expectation is set, not when the action is executed. | +| `ReturnArg()` | Return the `N`-th (0-based) argument. | +| `ReturnNew(a1, ..., ak)` | Return `new T(a1, ..., ak)`; a different object is created each time. | +| `ReturnNull()` | Return a null pointer. | +| `ReturnPointee(ptr)` | Return the value pointed to by `ptr`. | +| `ReturnRef(variable)` | Return a reference to `variable`. | +| `ReturnRefOfCopy(value)` | Return a reference to a copy of `value`; the copy lives as long as the action. | + + +#### Side Effects + + +| | | +| :--------------------------------- | :-------------------------------------- | +| `Assign(&variable, value)` | Assign `value` to variable. | +| `DeleteArg()` | Delete the `N`-th (0-based) argument, which must be a pointer. | +| `SaveArg(pointer)` | Save the `N`-th (0-based) argument to `*pointer`. | +| `SaveArgPointee(pointer)` | Save the value pointed to by the `N`-th (0-based) argument to `*pointer`. | +| `SetArgReferee(value)` | Assign value to the variable referenced by the `N`-th (0-based) argument. | +| `SetArgPointee(value)` | Assign `value` to the variable pointed by the `N`-th (0-based) argument. | +| `SetArgumentPointee(value)` | Same as `SetArgPointee(value)`. Deprecated. Will be removed in v1.7.0. | +| `SetArrayArgument(first, last)` | Copies the elements in source range [`first`, `last`) to the array pointed to by the `N`-th (0-based) argument, which can be either a pointer or an iterator. The action does not take ownership of the elements in the source range. | +| `SetErrnoAndReturn(error, value)` | Set `errno` to `error` and return `value`. | +| `Throw(exception)` | Throws the given exception, which can be any copyable value. Available since v1.1.0. | + + +#### Using a Function, Functor, or Lambda as an Action + +In the following, by "callable" we mean a free function, `std::function`, +functor, or lambda. + + +| | | +| :---------------------------------- | :------------------------------------- | +| `f` | Invoke f with the arguments passed to the mock function, where f is a callable. | +| `Invoke(f)` | Invoke `f` with the arguments passed to the mock function, where `f` can be a global/static function or a functor. | +| `Invoke(object_pointer, &class::method)` | Invoke the method on the object with the arguments passed to the mock function. | +| `InvokeWithoutArgs(f)` | Invoke `f`, which can be a global/static function or a functor. `f` must take no arguments. | +| `InvokeWithoutArgs(object_pointer, &class::method)` | Invoke the method on the object, which takes no arguments. | +| `InvokeArgument(arg1, arg2, ..., argk)` | Invoke the mock function's `N`-th (0-based) argument, which must be a function or a functor, with the `k` arguments. | + + +The return value of the invoked function is used as the return value of the +action. + +When defining a callable to be used with `Invoke*()`, you can declare any unused +parameters as `Unused`: + +```cpp +using ::testing::Invoke; +double Distance(Unused, double x, double y) { return sqrt(x*x + y*y); } +... +EXPECT_CALL(mock, Foo("Hi", _, _)).WillOnce(Invoke(Distance)); +``` + +`Invoke(callback)` and `InvokeWithoutArgs(callback)` take ownership of +`callback`, which must be permanent. The type of `callback` must be a base +callback type instead of a derived one, e.g. + +```cpp + BlockingClosure* done = new BlockingClosure; + ... Invoke(done) ...; // This won't compile! + + Closure* done2 = new BlockingClosure; + ... Invoke(done2) ...; // This works. +``` + +In `InvokeArgument(...)`, if an argument needs to be passed by reference, +wrap it inside `ByRef()`. For example, + +```cpp +using ::testing::ByRef; +using ::testing::InvokeArgument; +... +InvokeArgument<2>(5, string("Hi"), ByRef(foo)) +``` + +calls the mock function's #2 argument, passing to it `5` and `string("Hi")` by +value, and `foo` by reference. + +#### Default Action + + +| Matcher | Description | +| :------------ | :----------------------------------------------------- | +| `DoDefault()` | Do the default action (specified by `ON_CALL()` or the built-in one). | + + +**Note:** due to technical reasons, `DoDefault()` cannot be used inside a +composite action - trying to do so will result in a run-time error. + + + +#### Composite Actions + + +| | | +| :----------------------------- | :------------------------------------------ | +| `DoAll(a1, a2, ..., an)` | Do all actions `a1` to `an` and return the result of `an` in each invocation. The first `n - 1` sub-actions must return void. | +| `IgnoreResult(a)` | Perform action `a` and ignore its result. `a` must not return void. | +| `WithArg(a)` | Pass the `N`-th (0-based) argument of the mock function to action `a` and perform it. | +| `WithArgs(a)` | Pass the selected (0-based) arguments of the mock function to action `a` and perform it. | +| `WithoutArgs(a)` | Perform action `a` without any arguments. | + + +#### Defining Actions + + + + + + + +
`struct SumAction {`
+  `template `
+  `T operator()(T x, Ty) { return x + y; }`
+ `};` +
Defines a generic functor that can be used as an action summing its + arguments.
+ + +| | | +| :--------------------------------- | :-------------------------------------- | +| `ACTION(Sum) { return arg0 + arg1; }` | Defines an action `Sum()` to return the sum of the mock function's argument #0 and #1. | +| `ACTION_P(Plus, n) { return arg0 + n; }` | Defines an action `Plus(n)` to return the sum of the mock function's argument #0 and `n`. | +| `ACTION_Pk(Foo, p1, ..., pk) { statements; }` | Defines a parameterized action `Foo(p1, ..., pk)` to execute the given `statements`. | + + +The `ACTION*` macros cannot be used inside a function or class. + +### Cardinalities {#CardinalityList} + +These are used in `Times()` to specify how many times a mock function will be +called: + + +| | | +| :---------------- | :----------------------------------------------------- | +| `AnyNumber()` | The function can be called any number of times. | +| `AtLeast(n)` | The call is expected at least `n` times. | +| `AtMost(n)` | The call is expected at most `n` times. | +| `Between(m, n)` | The call is expected between `m` and `n` (inclusive) times. | +| `Exactly(n) or n` | The call is expected exactly `n` times. In particular, the call should never happen when `n` is 0. | + + +### Expectation Order + +By default, the expectations can be matched in *any* order. If some or all +expectations must be matched in a given order, there are two ways to specify it. +They can be used either independently or together. + +#### The After Clause {#AfterClause} + +```cpp +using ::testing::Expectation; +... +Expectation init_x = EXPECT_CALL(foo, InitX()); +Expectation init_y = EXPECT_CALL(foo, InitY()); +EXPECT_CALL(foo, Bar()) + .After(init_x, init_y); +``` + +says that `Bar()` can be called only after both `InitX()` and `InitY()` have +been called. + +If you don't know how many pre-requisites an expectation has when you write it, +you can use an `ExpectationSet` to collect them: + +```cpp +using ::testing::ExpectationSet; +... +ExpectationSet all_inits; +for (int i = 0; i < element_count; i++) { + all_inits += EXPECT_CALL(foo, InitElement(i)); +} +EXPECT_CALL(foo, Bar()) + .After(all_inits); +``` + +says that `Bar()` can be called only after all elements have been initialized +(but we don't care about which elements get initialized before the others). + +Modifying an `ExpectationSet` after using it in an `.After()` doesn't affect the +meaning of the `.After()`. + +#### Sequences {#UsingSequences} + +When you have a long chain of sequential expectations, it's easier to specify +the order using **sequences**, which don't require you to given each expectation +in the chain a different name. *All expected calls* in the same sequence must +occur in the order they are specified. + +```cpp +using ::testing::Return; +using ::testing::Sequence; +Sequence s1, s2; +... +EXPECT_CALL(foo, Reset()) + .InSequence(s1, s2) + .WillOnce(Return(true)); +EXPECT_CALL(foo, GetSize()) + .InSequence(s1) + .WillOnce(Return(1)); +EXPECT_CALL(foo, Describe(A())) + .InSequence(s2) + .WillOnce(Return("dummy")); +``` + +says that `Reset()` must be called before *both* `GetSize()` *and* `Describe()`, +and the latter two can occur in any order. + +To put many expectations in a sequence conveniently: + +```cpp +using ::testing::InSequence; +{ + InSequence seq; + + EXPECT_CALL(...)...; + EXPECT_CALL(...)...; + ... + EXPECT_CALL(...)...; +} +``` + +says that all expected calls in the scope of `seq` must occur in strict order. +The name `seq` is irrelevant. + +### Verifying and Resetting a Mock + +gMock will verify the expectations on a mock object when it is destructed, or +you can do it earlier: + +```cpp +using ::testing::Mock; +... +// Verifies and removes the expectations on mock_obj; +// returns true if and only if successful. +Mock::VerifyAndClearExpectations(&mock_obj); +... +// Verifies and removes the expectations on mock_obj; +// also removes the default actions set by ON_CALL(); +// returns true if and only if successful. +Mock::VerifyAndClear(&mock_obj); +``` + +You can also tell gMock that a mock object can be leaked and doesn't need to be +verified: + +```cpp +Mock::AllowLeak(&mock_obj); +``` + +### Mock Classes + +gMock defines a convenient mock class template + +```cpp +class MockFunction { + public: + MOCK_METHOD(R, Call, (A1, ..., An)); +}; +``` + +See this [recipe](cook_book.md#using-check-points) for one application of it. + +### Flags + + +| Flag | Description | +| :----------------------------- | :---------------------------------------- | +| `--gmock_catch_leaked_mocks=0` | Don't report leaked mock objects as failures. | +| `--gmock_verbose=LEVEL` | Sets the default verbosity level (`info`, `warning`, or `error`) of Google Mock messages. | + diff --git a/src/test/gtest/googlemock/docs/cook_book.md b/src/test/gtest/googlemock/docs/cook_book.md new file mode 100644 index 00000000..ea55ab35 --- /dev/null +++ b/src/test/gtest/googlemock/docs/cook_book.md @@ -0,0 +1,4270 @@ +# gMock Cookbook + + + +You can find recipes for using gMock here. If you haven't yet, please read +[this](for_dummies.md) first to make sure you understand the basics. + +**Note:** gMock lives in the `testing` name space. For readability, it is +recommended to write `using ::testing::Foo;` once in your file before using the +name `Foo` defined by gMock. We omit such `using` statements in this section for +brevity, but you should do it in your own code. + +## Creating Mock Classes + +Mock classes are defined as normal classes, using the `MOCK_METHOD` macro to +generate mocked methods. The macro gets 3 or 4 parameters: + +```cpp +class MyMock { + public: + MOCK_METHOD(ReturnType, MethodName, (Args...)); + MOCK_METHOD(ReturnType, MethodName, (Args...), (Specs...)); +}; +``` + +The first 3 parameters are simply the method declaration, split into 3 parts. +The 4th parameter accepts a closed list of qualifiers, which affect the +generated method: + +* **`const`** - Makes the mocked method a `const` method. Required if + overriding a `const` method. +* **`override`** - Marks the method with `override`. Recommended if overriding + a `virtual` method. +* **`noexcept`** - Marks the method with `noexcept`. Required if overriding a + `noexcept` method. +* **`Calltype(...)`** - Sets the call type for the method (e.g. to + `STDMETHODCALLTYPE`), useful in Windows. + +### Dealing with unprotected commas + +Unprotected commas, i.e. commas which are not surrounded by parentheses, prevent +`MOCK_METHOD` from parsing its arguments correctly: + +```cpp {.bad} +class MockFoo { + public: + MOCK_METHOD(std::pair, GetPair, ()); // Won't compile! + MOCK_METHOD(bool, CheckMap, (std::map, bool)); // Won't compile! +}; +``` + +Solution 1 - wrap with parentheses: + +```cpp {.good} +class MockFoo { + public: + MOCK_METHOD((std::pair), GetPair, ()); + MOCK_METHOD(bool, CheckMap, ((std::map), bool)); +}; +``` + +Note that wrapping a return or argument type with parentheses is, in general, +invalid C++. `MOCK_METHOD` removes the parentheses. + +Solution 2 - define an alias: + +```cpp {.good} +class MockFoo { + public: + using BoolAndInt = std::pair; + MOCK_METHOD(BoolAndInt, GetPair, ()); + using MapIntDouble = std::map; + MOCK_METHOD(bool, CheckMap, (MapIntDouble, bool)); +}; +``` + +### Mocking Private or Protected Methods + +You must always put a mock method definition (`MOCK_METHOD`) in a `public:` +section of the mock class, regardless of the method being mocked being `public`, +`protected`, or `private` in the base class. This allows `ON_CALL` and +`EXPECT_CALL` to reference the mock function from outside of the mock class. +(Yes, C++ allows a subclass to change the access level of a virtual function in +the base class.) Example: + +```cpp +class Foo { + public: + ... + virtual bool Transform(Gadget* g) = 0; + + protected: + virtual void Resume(); + + private: + virtual int GetTimeOut(); +}; + +class MockFoo : public Foo { + public: + ... + MOCK_METHOD(bool, Transform, (Gadget* g), (override)); + + // The following must be in the public section, even though the + // methods are protected or private in the base class. + MOCK_METHOD(void, Resume, (), (override)); + MOCK_METHOD(int, GetTimeOut, (), (override)); +}; +``` + +### Mocking Overloaded Methods + +You can mock overloaded functions as usual. No special attention is required: + +```cpp +class Foo { + ... + + // Must be virtual as we'll inherit from Foo. + virtual ~Foo(); + + // Overloaded on the types and/or numbers of arguments. + virtual int Add(Element x); + virtual int Add(int times, Element x); + + // Overloaded on the const-ness of this object. + virtual Bar& GetBar(); + virtual const Bar& GetBar() const; +}; + +class MockFoo : public Foo { + ... + MOCK_METHOD(int, Add, (Element x), (override)); + MOCK_METHOD(int, Add, (int times, Element x), (override)); + + MOCK_METHOD(Bar&, GetBar, (), (override)); + MOCK_METHOD(const Bar&, GetBar, (), (const, override)); +}; +``` + +**Note:** if you don't mock all versions of the overloaded method, the compiler +will give you a warning about some methods in the base class being hidden. To +fix that, use `using` to bring them in scope: + +```cpp +class MockFoo : public Foo { + ... + using Foo::Add; + MOCK_METHOD(int, Add, (Element x), (override)); + // We don't want to mock int Add(int times, Element x); + ... +}; +``` + +### Mocking Class Templates + +You can mock class templates just like any class. + +```cpp +template +class StackInterface { + ... + // Must be virtual as we'll inherit from StackInterface. + virtual ~StackInterface(); + + virtual int GetSize() const = 0; + virtual void Push(const Elem& x) = 0; +}; + +template +class MockStack : public StackInterface { + ... + MOCK_METHOD(int, GetSize, (), (override)); + MOCK_METHOD(void, Push, (const Elem& x), (override)); +}; +``` + +### Mocking Non-virtual Methods {#MockingNonVirtualMethods} + +gMock can mock non-virtual functions to be used in Hi-perf dependency +injection. + +In this case, instead of sharing a common base class with the real class, your +mock class will be *unrelated* to the real class, but contain methods with the +same signatures. The syntax for mocking non-virtual methods is the *same* as +mocking virtual methods (just don't add `override`): + +```cpp +// A simple packet stream class. None of its members is virtual. +class ConcretePacketStream { + public: + void AppendPacket(Packet* new_packet); + const Packet* GetPacket(size_t packet_number) const; + size_t NumberOfPackets() const; + ... +}; + +// A mock packet stream class. It inherits from no other, but defines +// GetPacket() and NumberOfPackets(). +class MockPacketStream { + public: + MOCK_METHOD(const Packet*, GetPacket, (size_t packet_number), (const)); + MOCK_METHOD(size_t, NumberOfPackets, (), (const)); + ... +}; +``` + +Note that the mock class doesn't define `AppendPacket()`, unlike the real class. +That's fine as long as the test doesn't need to call it. + +Next, you need a way to say that you want to use `ConcretePacketStream` in +production code, and use `MockPacketStream` in tests. Since the functions are +not virtual and the two classes are unrelated, you must specify your choice at +*compile time* (as opposed to run time). + +One way to do it is to templatize your code that needs to use a packet stream. +More specifically, you will give your code a template type argument for the type +of the packet stream. In production, you will instantiate your template with +`ConcretePacketStream` as the type argument. In tests, you will instantiate the +same template with `MockPacketStream`. For example, you may write: + +```cpp +template +void CreateConnection(PacketStream* stream) { ... } + +template +class PacketReader { + public: + void ReadPackets(PacketStream* stream, size_t packet_num); +}; +``` + +Then you can use `CreateConnection()` and +`PacketReader` in production code, and use +`CreateConnection()` and `PacketReader` in +tests. + +```cpp + MockPacketStream mock_stream; + EXPECT_CALL(mock_stream, ...)...; + .. set more expectations on mock_stream ... + PacketReader reader(&mock_stream); + ... exercise reader ... +``` + +### Mocking Free Functions + +It's possible to use gMock to mock a free function (i.e. a C-style function or a +static method). You just need to rewrite your code to use an interface (abstract +class). + +Instead of calling a free function (say, `OpenFile`) directly, introduce an +interface for it and have a concrete subclass that calls the free function: + +```cpp +class FileInterface { + public: + ... + virtual bool Open(const char* path, const char* mode) = 0; +}; + +class File : public FileInterface { + public: + ... + virtual bool Open(const char* path, const char* mode) { + return OpenFile(path, mode); + } +}; +``` + +Your code should talk to `FileInterface` to open a file. Now it's easy to mock +out the function. + +This may seem like a lot of hassle, but in practice you often have multiple +related functions that you can put in the same interface, so the per-function +syntactic overhead will be much lower. + +If you are concerned about the performance overhead incurred by virtual +functions, and profiling confirms your concern, you can combine this with the +recipe for [mocking non-virtual methods](#MockingNonVirtualMethods). + +### Old-Style `MOCK_METHODn` Macros + +Before the generic `MOCK_METHOD` macro was introduced, mocks where created using +a family of macros collectively called `MOCK_METHODn`. These macros are still +supported, though migration to the new `MOCK_METHOD` is recommended. + +The macros in the `MOCK_METHODn` family differ from `MOCK_METHOD`: + +* The general structure is `MOCK_METHODn(MethodName, ReturnType(Args))`, + instead of `MOCK_METHOD(ReturnType, MethodName, (Args))`. +* The number `n` must equal the number of arguments. +* When mocking a const method, one must use `MOCK_CONST_METHODn`. +* When mocking a class template, the macro name must be suffixed with `_T`. +* In order to specify the call type, the macro name must be suffixed with + `_WITH_CALLTYPE`, and the call type is the first macro argument. + +Old macros and their new equivalents: + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Simple
Old `MOCK_METHOD1(Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int))`
Const Method
Old +`MOCK_CONST_METHOD1(Foo, bool(int))`
New +`MOCK_METHOD(bool, Foo, (int), (const))`
Method in a Class Template
Old `MOCK_METHOD1_T(Foo, bool(int))`
New +`MOCK_METHOD(bool, Foo, (int))`
Const Method in a Class Template
Old + `MOCK_CONST_METHOD1_T(Foo, bool(int))`
New + `MOCK_METHOD(bool, Foo, (int), (const))`
Method with Call Type
Old +`MOCK_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), +(Calltype(STDMETHODCALLTYPE)))`
Const Method with Call Type
Old `MOCK_CONST_METHOD1_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, (int), (const, +Calltype(STDMETHODCALLTYPE)))`
Method with Call Type in a Class Template
Old `MOCK_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, Foo, +bool(int))`
New `MOCK_METHOD(bool, Foo, (int), +(Calltype(STDMETHODCALLTYPE)))`
Const Method with Call Type in a Class Template
Old `MOCK_CONST_METHOD1_T_WITH_CALLTYPE(STDMETHODCALLTYPE, +Foo, bool(int))`
New `MOCK_METHOD(bool, Foo, +(int), (const, Calltype(STDMETHODCALLTYPE)))`
+ +### The Nice, the Strict, and the Naggy {#NiceStrictNaggy} + +If a mock method has no `EXPECT_CALL` spec but is called, we say that it's an +"uninteresting call", and the default action (which can be specified using +`ON_CALL()`) of the method will be taken. Currently, an uninteresting call will +also by default cause gMock to print a warning. (In the future, we might remove +this warning by default.) + +However, sometimes you may want to ignore these uninteresting calls, and +sometimes you may want to treat them as errors. gMock lets you make the decision +on a per-mock-object basis. + +Suppose your test uses a mock class `MockFoo`: + +```cpp +TEST(...) { + MockFoo mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +If a method of `mock_foo` other than `DoThis()` is called, you will get a +warning. However, if you rewrite your test to use `NiceMock` instead, +you can suppress the warning: + +```cpp +using ::testing::NiceMock; + +TEST(...) { + NiceMock mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +`NiceMock` is a subclass of `MockFoo`, so it can be used wherever +`MockFoo` is accepted. + +It also works if `MockFoo`'s constructor takes some arguments, as +`NiceMock` "inherits" `MockFoo`'s constructors: + +```cpp +using ::testing::NiceMock; + +TEST(...) { + NiceMock mock_foo(5, "hi"); // Calls MockFoo(5, "hi"). + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... +} +``` + +The usage of `StrictMock` is similar, except that it makes all uninteresting +calls failures: + +```cpp +using ::testing::StrictMock; + +TEST(...) { + StrictMock mock_foo; + EXPECT_CALL(mock_foo, DoThis()); + ... code that uses mock_foo ... + + // The test will fail if a method of mock_foo other than DoThis() + // is called. +} +``` + +NOTE: `NiceMock` and `StrictMock` only affects *uninteresting* calls (calls of +*methods* with no expectations); they do not affect *unexpected* calls (calls of +methods with expectations, but they don't match). See +[Understanding Uninteresting vs Unexpected Calls](#uninteresting-vs-unexpected). + +There are some caveats though (I dislike them just as much as the next guy, but +sadly they are side effects of C++'s limitations): + +1. `NiceMock` and `StrictMock` only work for mock methods + defined using the `MOCK_METHOD` macro **directly** in the `MockFoo` class. + If a mock method is defined in a **base class** of `MockFoo`, the "nice" or + "strict" modifier may not affect it, depending on the compiler. In + particular, nesting `NiceMock` and `StrictMock` (e.g. + `NiceMock >`) is **not** supported. +2. `NiceMock` and `StrictMock` may not work correctly if the + destructor of `MockFoo` is not virtual. We would like to fix this, but it + requires cleaning up existing tests. http://b/28934720 tracks the issue. +3. During the constructor or destructor of `MockFoo`, the mock object is *not* + nice or strict. This may cause surprises if the constructor or destructor + calls a mock method on `this` object. (This behavior, however, is consistent + with C++'s general rule: if a constructor or destructor calls a virtual + method of `this` object, that method is treated as non-virtual. In other + words, to the base class's constructor or destructor, `this` object behaves + like an instance of the base class, not the derived class. This rule is + required for safety. Otherwise a base constructor may use members of a + derived class before they are initialized, or a base destructor may use + members of a derived class after they have been destroyed.) + +Finally, you should be **very cautious** about when to use naggy or strict +mocks, as they tend to make tests more brittle and harder to maintain. When you +refactor your code without changing its externally visible behavior, ideally you +shouldn't need to update any tests. If your code interacts with a naggy mock, +however, you may start to get spammed with warnings as the result of your +change. Worse, if your code interacts with a strict mock, your tests may start +to fail and you'll be forced to fix them. Our general recommendation is to use +nice mocks (not yet the default) most of the time, use naggy mocks (the current +default) when developing or debugging tests, and use strict mocks only as the +last resort. + +### Simplifying the Interface without Breaking Existing Code {#SimplerInterfaces} + +Sometimes a method has a long list of arguments that is mostly uninteresting. +For example: + +```cpp +class LogSink { + public: + ... + virtual void send(LogSeverity severity, const char* full_filename, + const char* base_filename, int line, + const struct tm* tm_time, + const char* message, size_t message_len) = 0; +}; +``` + +This method's argument list is lengthy and hard to work with (the `message` +argument is not even 0-terminated). If we mock it as is, using the mock will be +awkward. If, however, we try to simplify this interface, we'll need to fix all +clients depending on it, which is often infeasible. + +The trick is to redispatch the method in the mock class: + +```cpp +class ScopedMockLog : public LogSink { + public: + ... + virtual void send(LogSeverity severity, const char* full_filename, + const char* base_filename, int line, const tm* tm_time, + const char* message, size_t message_len) { + // We are only interested in the log severity, full file name, and + // log message. + Log(severity, full_filename, std::string(message, message_len)); + } + + // Implements the mock method: + // + // void Log(LogSeverity severity, + // const string& file_path, + // const string& message); + MOCK_METHOD(void, Log, + (LogSeverity severity, const string& file_path, + const string& message)); +}; +``` + +By defining a new mock method with a trimmed argument list, we make the mock +class more user-friendly. + +This technique may also be applied to make overloaded methods more amenable to +mocking. For example, when overloads have been used to implement default +arguments: + +```cpp +class MockTurtleFactory : public TurtleFactory { + public: + Turtle* MakeTurtle(int length, int weight) override { ... } + Turtle* MakeTurtle(int length, int weight, int speed) override { ... } + + // the above methods delegate to this one: + MOCK_METHOD(Turtle*, DoMakeTurtle, ()); +}; +``` + +This allows tests that don't care which overload was invoked to avoid specifying +argument matchers: + +```cpp +ON_CALL(factory, DoMakeTurtle) + .WillByDefault(MakeMockTurtle()); +``` + +### Alternative to Mocking Concrete Classes + +Often you may find yourself using classes that don't implement interfaces. In +order to test your code that uses such a class (let's call it `Concrete`), you +may be tempted to make the methods of `Concrete` virtual and then mock it. + +Try not to do that. + +Making a non-virtual function virtual is a big decision. It creates an extension +point where subclasses can tweak your class' behavior. This weakens your control +on the class because now it's harder to maintain the class invariants. You +should make a function virtual only when there is a valid reason for a subclass +to override it. + +Mocking concrete classes directly is problematic as it creates a tight coupling +between the class and the tests - any small change in the class may invalidate +your tests and make test maintenance a pain. + +To avoid such problems, many programmers have been practicing "coding to +interfaces": instead of talking to the `Concrete` class, your code would define +an interface and talk to it. Then you implement that interface as an adaptor on +top of `Concrete`. In tests, you can easily mock that interface to observe how +your code is doing. + +This technique incurs some overhead: + +* You pay the cost of virtual function calls (usually not a problem). +* There is more abstraction for the programmers to learn. + +However, it can also bring significant benefits in addition to better +testability: + +* `Concrete`'s API may not fit your problem domain very well, as you may not + be the only client it tries to serve. By designing your own interface, you + have a chance to tailor it to your need - you may add higher-level + functionalities, rename stuff, etc instead of just trimming the class. This + allows you to write your code (user of the interface) in a more natural way, + which means it will be more readable, more maintainable, and you'll be more + productive. +* If `Concrete`'s implementation ever has to change, you don't have to rewrite + everywhere it is used. Instead, you can absorb the change in your + implementation of the interface, and your other code and tests will be + insulated from this change. + +Some people worry that if everyone is practicing this technique, they will end +up writing lots of redundant code. This concern is totally understandable. +However, there are two reasons why it may not be the case: + +* Different projects may need to use `Concrete` in different ways, so the best + interfaces for them will be different. Therefore, each of them will have its + own domain-specific interface on top of `Concrete`, and they will not be the + same code. +* If enough projects want to use the same interface, they can always share it, + just like they have been sharing `Concrete`. You can check in the interface + and the adaptor somewhere near `Concrete` (perhaps in a `contrib` + sub-directory) and let many projects use it. + +You need to weigh the pros and cons carefully for your particular problem, but +I'd like to assure you that the Java community has been practicing this for a +long time and it's a proven effective technique applicable in a wide variety of +situations. :-) + +### Delegating Calls to a Fake {#DelegatingToFake} + +Some times you have a non-trivial fake implementation of an interface. For +example: + +```cpp +class Foo { + public: + virtual ~Foo() {} + virtual char DoThis(int n) = 0; + virtual void DoThat(const char* s, int* p) = 0; +}; + +class FakeFoo : public Foo { + public: + char DoThis(int n) override { + return (n > 0) ? '+' : + (n < 0) ? '-' : '0'; + } + + void DoThat(const char* s, int* p) override { + *p = strlen(s); + } +}; +``` + +Now you want to mock this interface such that you can set expectations on it. +However, you also want to use `FakeFoo` for the default behavior, as duplicating +it in the mock object is, well, a lot of work. + +When you define the mock class using gMock, you can have it delegate its default +action to a fake class you already have, using this pattern: + +```cpp +class MockFoo : public Foo { + public: + // Normal mock method definitions using gMock. + MOCK_METHOD(char, DoThis, (int n), (override)); + MOCK_METHOD(void, DoThat, (const char* s, int* p), (override)); + + // Delegates the default actions of the methods to a FakeFoo object. + // This must be called *before* the custom ON_CALL() statements. + void DelegateToFake() { + ON_CALL(*this, DoThis).WillByDefault([this](int n) { + return fake_.DoThis(n); + }); + ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) { + fake_.DoThat(s, p); + }); + } + + private: + FakeFoo fake_; // Keeps an instance of the fake in the mock. +}; +``` + +With that, you can use `MockFoo` in your tests as usual. Just remember that if +you don't explicitly set an action in an `ON_CALL()` or `EXPECT_CALL()`, the +fake will be called upon to do it.: + +```cpp +using ::testing::_; + +TEST(AbcTest, Xyz) { + MockFoo foo; + + foo.DelegateToFake(); // Enables the fake for delegation. + + // Put your ON_CALL(foo, ...)s here, if any. + + // No action specified, meaning to use the default action. + EXPECT_CALL(foo, DoThis(5)); + EXPECT_CALL(foo, DoThat(_, _)); + + int n = 0; + EXPECT_EQ('+', foo.DoThis(5)); // FakeFoo::DoThis() is invoked. + foo.DoThat("Hi", &n); // FakeFoo::DoThat() is invoked. + EXPECT_EQ(2, n); +} +``` + +**Some tips:** + +* If you want, you can still override the default action by providing your own + `ON_CALL()` or using `.WillOnce()` / `.WillRepeatedly()` in `EXPECT_CALL()`. +* In `DelegateToFake()`, you only need to delegate the methods whose fake + implementation you intend to use. + +* The general technique discussed here works for overloaded methods, but + you'll need to tell the compiler which version you mean. To disambiguate a + mock function (the one you specify inside the parentheses of `ON_CALL()`), + use [this technique](#SelectOverload); to disambiguate a fake function (the + one you place inside `Invoke()`), use a `static_cast` to specify the + function's type. For instance, if class `Foo` has methods `char DoThis(int + n)` and `bool DoThis(double x) const`, and you want to invoke the latter, + you need to write `Invoke(&fake_, static_cast(&FakeFoo::DoThis))` instead of `Invoke(&fake_, &FakeFoo::DoThis)` + (The strange-looking thing inside the angled brackets of `static_cast` is + the type of a function pointer to the second `DoThis()` method.). + +* Having to mix a mock and a fake is often a sign of something gone wrong. + Perhaps you haven't got used to the interaction-based way of testing yet. Or + perhaps your interface is taking on too many roles and should be split up. + Therefore, **don't abuse this**. We would only recommend to do it as an + intermediate step when you are refactoring your code. + +Regarding the tip on mixing a mock and a fake, here's an example on why it may +be a bad sign: Suppose you have a class `System` for low-level system +operations. In particular, it does file and I/O operations. And suppose you want +to test how your code uses `System` to do I/O, and you just want the file +operations to work normally. If you mock out the entire `System` class, you'll +have to provide a fake implementation for the file operation part, which +suggests that `System` is taking on too many roles. + +Instead, you can define a `FileOps` interface and an `IOOps` interface and split +`System`'s functionalities into the two. Then you can mock `IOOps` without +mocking `FileOps`. + +### Delegating Calls to a Real Object + +When using testing doubles (mocks, fakes, stubs, and etc), sometimes their +behaviors will differ from those of the real objects. This difference could be +either intentional (as in simulating an error such that you can test the error +handling code) or unintentional. If your mocks have different behaviors than the +real objects by mistake, you could end up with code that passes the tests but +fails in production. + +You can use the *delegating-to-real* technique to ensure that your mock has the +same behavior as the real object while retaining the ability to validate calls. +This technique is very similar to the [delegating-to-fake](#DelegatingToFake) +technique, the difference being that we use a real object instead of a fake. +Here's an example: + +```cpp +using ::testing::AtLeast; + +class MockFoo : public Foo { + public: + MockFoo() { + // By default, all calls are delegated to the real object. + ON_CALL(*this, DoThis).WillByDefault([this](int n) { + return real_.DoThis(n); + }); + ON_CALL(*this, DoThat).WillByDefault([this](const char* s, int* p) { + real_.DoThat(s, p); + }); + ... + } + MOCK_METHOD(char, DoThis, ...); + MOCK_METHOD(void, DoThat, ...); + ... + private: + Foo real_; +}; + +... + MockFoo mock; + EXPECT_CALL(mock, DoThis()) + .Times(3); + EXPECT_CALL(mock, DoThat("Hi")) + .Times(AtLeast(1)); + ... use mock in test ... +``` + +With this, gMock will verify that your code made the right calls (with the right +arguments, in the right order, called the right number of times, etc), and a +real object will answer the calls (so the behavior will be the same as in +production). This gives you the best of both worlds. + +### Delegating Calls to a Parent Class + +Ideally, you should code to interfaces, whose methods are all pure virtual. In +reality, sometimes you do need to mock a virtual method that is not pure (i.e, +it already has an implementation). For example: + +```cpp +class Foo { + public: + virtual ~Foo(); + + virtual void Pure(int n) = 0; + virtual int Concrete(const char* str) { ... } +}; + +class MockFoo : public Foo { + public: + // Mocking a pure method. + MOCK_METHOD(void, Pure, (int n), (override)); + // Mocking a concrete method. Foo::Concrete() is shadowed. + MOCK_METHOD(int, Concrete, (const char* str), (override)); +}; +``` + +Sometimes you may want to call `Foo::Concrete()` instead of +`MockFoo::Concrete()`. Perhaps you want to do it as part of a stub action, or +perhaps your test doesn't need to mock `Concrete()` at all (but it would be +oh-so painful to have to define a new mock class whenever you don't need to mock +one of its methods). + +The trick is to leave a back door in your mock class for accessing the real +methods in the base class: + +```cpp +class MockFoo : public Foo { + public: + // Mocking a pure method. + MOCK_METHOD(void, Pure, (int n), (override)); + // Mocking a concrete method. Foo::Concrete() is shadowed. + MOCK_METHOD(int, Concrete, (const char* str), (override)); + + // Use this to call Concrete() defined in Foo. + int FooConcrete(const char* str) { return Foo::Concrete(str); } +}; +``` + +Now, you can call `Foo::Concrete()` inside an action by: + +```cpp +... + EXPECT_CALL(foo, Concrete).WillOnce([&foo](const char* str) { + return foo.FooConcrete(str); + }); +``` + +or tell the mock object that you don't want to mock `Concrete()`: + +```cpp +... + ON_CALL(foo, Concrete).WillByDefault([&foo](const char* str) { + return foo.FooConcrete(str); + }); +``` + +(Why don't we just write `{ return foo.Concrete(str); }`? If you do that, +`MockFoo::Concrete()` will be called (and cause an infinite recursion) since +`Foo::Concrete()` is virtual. That's just how C++ works.) + +## Using Matchers + +### Matching Argument Values Exactly + +You can specify exactly which arguments a mock method is expecting: + +```cpp +using ::testing::Return; +... + EXPECT_CALL(foo, DoThis(5)) + .WillOnce(Return('a')); + EXPECT_CALL(foo, DoThat("Hello", bar)); +``` + +### Using Simple Matchers + +You can use matchers to match arguments that have a certain property: + +```cpp +using ::testing::NotNull; +using ::testing::Return; +... + EXPECT_CALL(foo, DoThis(Ge(5))) // The argument must be >= 5. + .WillOnce(Return('a')); + EXPECT_CALL(foo, DoThat("Hello", NotNull())); + // The second argument must not be NULL. +``` + +A frequently used matcher is `_`, which matches anything: + +```cpp + EXPECT_CALL(foo, DoThat(_, NotNull())); +``` + + +### Combining Matchers {#CombiningMatchers} + +You can build complex matchers from existing ones using `AllOf()`, +`AllOfArray()`, `AnyOf()`, `AnyOfArray()` and `Not()`: + +```cpp +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::HasSubstr; +using ::testing::Ne; +using ::testing::Not; +... + // The argument must be > 5 and != 10. + EXPECT_CALL(foo, DoThis(AllOf(Gt(5), + Ne(10)))); + + // The first argument must not contain sub-string "blah". + EXPECT_CALL(foo, DoThat(Not(HasSubstr("blah")), + NULL)); +``` + +### Casting Matchers {#SafeMatcherCast} + +gMock matchers are statically typed, meaning that the compiler can catch your +mistake if you use a matcher of the wrong type (for example, if you use `Eq(5)` +to match a `string` argument). Good for you! + +Sometimes, however, you know what you're doing and want the compiler to give you +some slack. One example is that you have a matcher for `long` and the argument +you want to match is `int`. While the two types aren't exactly the same, there +is nothing really wrong with using a `Matcher` to match an `int` - after +all, we can first convert the `int` argument to a `long` losslessly before +giving it to the matcher. + +To support this need, gMock gives you the `SafeMatcherCast(m)` function. It +casts a matcher `m` to type `Matcher`. To ensure safety, gMock checks that +(let `U` be the type `m` accepts : + +1. Type `T` can be *implicitly* cast to type `U`; +2. When both `T` and `U` are built-in arithmetic types (`bool`, integers, and + floating-point numbers), the conversion from `T` to `U` is not lossy (in + other words, any value representable by `T` can also be represented by `U`); + and +3. When `U` is a reference, `T` must also be a reference (as the underlying + matcher may be interested in the address of the `U` value). + +The code won't compile if any of these conditions isn't met. + +Here's one example: + +```cpp +using ::testing::SafeMatcherCast; + +// A base class and a child class. +class Base { ... }; +class Derived : public Base { ... }; + +class MockFoo : public Foo { + public: + MOCK_METHOD(void, DoThis, (Derived* derived), (override)); +}; + +... + MockFoo foo; + // m is a Matcher we got from somewhere. + EXPECT_CALL(foo, DoThis(SafeMatcherCast(m))); +``` + +If you find `SafeMatcherCast(m)` too limiting, you can use a similar function +`MatcherCast(m)`. The difference is that `MatcherCast` works as long as you +can `static_cast` type `T` to type `U`. + +`MatcherCast` essentially lets you bypass C++'s type system (`static_cast` isn't +always safe as it could throw away information, for example), so be careful not +to misuse/abuse it. + +### Selecting Between Overloaded Functions {#SelectOverload} + +If you expect an overloaded function to be called, the compiler may need some +help on which overloaded version it is. + +To disambiguate functions overloaded on the const-ness of this object, use the +`Const()` argument wrapper. + +```cpp +using ::testing::ReturnRef; + +class MockFoo : public Foo { + ... + MOCK_METHOD(Bar&, GetBar, (), (override)); + MOCK_METHOD(const Bar&, GetBar, (), (const, override)); +}; + +... + MockFoo foo; + Bar bar1, bar2; + EXPECT_CALL(foo, GetBar()) // The non-const GetBar(). + .WillOnce(ReturnRef(bar1)); + EXPECT_CALL(Const(foo), GetBar()) // The const GetBar(). + .WillOnce(ReturnRef(bar2)); +``` + +(`Const()` is defined by gMock and returns a `const` reference to its argument.) + +To disambiguate overloaded functions with the same number of arguments but +different argument types, you may need to specify the exact type of a matcher, +either by wrapping your matcher in `Matcher()`, or using a matcher whose +type is fixed (`TypedEq`, `An()`, etc): + +```cpp +using ::testing::An; +using ::testing::Matcher; +using ::testing::TypedEq; + +class MockPrinter : public Printer { + public: + MOCK_METHOD(void, Print, (int n), (override)); + MOCK_METHOD(void, Print, (char c), (override)); +}; + +TEST(PrinterTest, Print) { + MockPrinter printer; + + EXPECT_CALL(printer, Print(An())); // void Print(int); + EXPECT_CALL(printer, Print(Matcher(Lt(5)))); // void Print(int); + EXPECT_CALL(printer, Print(TypedEq('a'))); // void Print(char); + + printer.Print(3); + printer.Print(6); + printer.Print('a'); +} +``` + +### Performing Different Actions Based on the Arguments + +When a mock method is called, the *last* matching expectation that's still +active will be selected (think "newer overrides older"). So, you can make a +method do different things depending on its argument values like this: + +```cpp +using ::testing::_; +using ::testing::Lt; +using ::testing::Return; +... + // The default case. + EXPECT_CALL(foo, DoThis(_)) + .WillRepeatedly(Return('b')); + // The more specific case. + EXPECT_CALL(foo, DoThis(Lt(5))) + .WillRepeatedly(Return('a')); +``` + +Now, if `foo.DoThis()` is called with a value less than 5, `'a'` will be +returned; otherwise `'b'` will be returned. + +### Matching Multiple Arguments as a Whole + +Sometimes it's not enough to match the arguments individually. For example, we +may want to say that the first argument must be less than the second argument. +The `With()` clause allows us to match all arguments of a mock function as a +whole. For example, + +```cpp +using ::testing::_; +using ::testing::Ne; +using ::testing::Lt; +... + EXPECT_CALL(foo, InRange(Ne(0), _)) + .With(Lt()); +``` + +says that the first argument of `InRange()` must not be 0, and must be less than +the second argument. + +The expression inside `With()` must be a matcher of type +`Matcher< ::std::tuple >`, where `A1`, ..., `An` are the types of +the function arguments. + +You can also write `AllArgs(m)` instead of `m` inside `.With()`. The two forms +are equivalent, but `.With(AllArgs(Lt()))` is more readable than `.With(Lt())`. + +You can use `Args(m)` to match the `n` selected arguments (as a +tuple) against `m`. For example, + +```cpp +using ::testing::_; +using ::testing::AllOf; +using ::testing::Args; +using ::testing::Lt; +... + EXPECT_CALL(foo, Blah) + .With(AllOf(Args<0, 1>(Lt()), Args<1, 2>(Lt()))); +``` + +says that `Blah` will be called with arguments `x`, `y`, and `z` where `x < y < +z`. Note that in this example, it wasn't necessary specify the positional +matchers. + +As a convenience and example, gMock provides some matchers for 2-tuples, +including the `Lt()` matcher above. See [here](#MultiArgMatchers) for the +complete list. + +Note that if you want to pass the arguments to a predicate of your own (e.g. +`.With(Args<0, 1>(Truly(&MyPredicate)))`), that predicate MUST be written to +take a `::std::tuple` as its argument; gMock will pass the `n` selected +arguments as *one* single tuple to the predicate. + +### Using Matchers as Predicates + +Have you noticed that a matcher is just a fancy predicate that also knows how to +describe itself? Many existing algorithms take predicates as arguments (e.g. +those defined in STL's `` header), and it would be a shame if gMock +matchers were not allowed to participate. + +Luckily, you can use a matcher where a unary predicate functor is expected by +wrapping it inside the `Matches()` function. For example, + +```cpp +#include +#include + +using ::testing::Matches; +using ::testing::Ge; + +vector v; +... +// How many elements in v are >= 10? +const int count = count_if(v.begin(), v.end(), Matches(Ge(10))); +``` + +Since you can build complex matchers from simpler ones easily using gMock, this +gives you a way to conveniently construct composite predicates (doing the same +using STL's `` header is just painful). For example, here's a +predicate that's satisfied by any number that is >= 0, <= 100, and != 50: + +```cpp +using testing::AllOf; +using testing::Ge; +using testing::Le; +using testing::Matches; +using testing::Ne; +... +Matches(AllOf(Ge(0), Le(100), Ne(50))) +``` + +### Using Matchers in googletest Assertions + +Since matchers are basically predicates that also know how to describe +themselves, there is a way to take advantage of them in googletest assertions. +It's called `ASSERT_THAT` and `EXPECT_THAT`: + +```cpp + ASSERT_THAT(value, matcher); // Asserts that value matches matcher. + EXPECT_THAT(value, matcher); // The non-fatal version. +``` + +For example, in a googletest test you can write: + +```cpp +#include "gmock/gmock.h" + +using ::testing::AllOf; +using ::testing::Ge; +using ::testing::Le; +using ::testing::MatchesRegex; +using ::testing::StartsWith; + +... + EXPECT_THAT(Foo(), StartsWith("Hello")); + EXPECT_THAT(Bar(), MatchesRegex("Line \\d+")); + ASSERT_THAT(Baz(), AllOf(Ge(5), Le(10))); +``` + +which (as you can probably guess) executes `Foo()`, `Bar()`, and `Baz()`, and +verifies that: + +* `Foo()` returns a string that starts with `"Hello"`. +* `Bar()` returns a string that matches regular expression `"Line \\d+"`. +* `Baz()` returns a number in the range [5, 10]. + +The nice thing about these macros is that *they read like English*. They +generate informative messages too. For example, if the first `EXPECT_THAT()` +above fails, the message will be something like: + +```cpp +Value of: Foo() + Actual: "Hi, world!" +Expected: starts with "Hello" +``` + +**Credit:** The idea of `(ASSERT|EXPECT)_THAT` was borrowed from Joe Walnes' +Hamcrest project, which adds `assertThat()` to JUnit. + +### Using Predicates as Matchers + +gMock provides a [built-in set](#MatcherList) of matchers. In case you find them +lacking, you can use an arbitrary unary predicate function or functor as a +matcher - as long as the predicate accepts a value of the type you want. You do +this by wrapping the predicate inside the `Truly()` function, for example: + +```cpp +using ::testing::Truly; + +int IsEven(int n) { return (n % 2) == 0 ? 1 : 0; } +... + // Bar() must be called with an even number. + EXPECT_CALL(foo, Bar(Truly(IsEven))); +``` + +Note that the predicate function / functor doesn't have to return `bool`. It +works as long as the return value can be used as the condition in in statement +`if (condition) ...`. + + + +### Matching Arguments that Are Not Copyable + +When you do an `EXPECT_CALL(mock_obj, Foo(bar))`, gMock saves away a copy of +`bar`. When `Foo()` is called later, gMock compares the argument to `Foo()` with +the saved copy of `bar`. This way, you don't need to worry about `bar` being +modified or destroyed after the `EXPECT_CALL()` is executed. The same is true +when you use matchers like `Eq(bar)`, `Le(bar)`, and so on. + +But what if `bar` cannot be copied (i.e. has no copy constructor)? You could +define your own matcher function or callback and use it with `Truly()`, as the +previous couple of recipes have shown. Or, you may be able to get away from it +if you can guarantee that `bar` won't be changed after the `EXPECT_CALL()` is +executed. Just tell gMock that it should save a reference to `bar`, instead of a +copy of it. Here's how: + +```cpp +using ::testing::ByRef; +using ::testing::Eq; +using ::testing::Lt; +... + // Expects that Foo()'s argument == bar. + EXPECT_CALL(mock_obj, Foo(Eq(ByRef(bar)))); + + // Expects that Foo()'s argument < bar. + EXPECT_CALL(mock_obj, Foo(Lt(ByRef(bar)))); +``` + +Remember: if you do this, don't change `bar` after the `EXPECT_CALL()`, or the +result is undefined. + +### Validating a Member of an Object + +Often a mock function takes a reference to object as an argument. When matching +the argument, you may not want to compare the entire object against a fixed +object, as that may be over-specification. Instead, you may need to validate a +certain member variable or the result of a certain getter method of the object. +You can do this with `Field()` and `Property()`. More specifically, + +```cpp +Field(&Foo::bar, m) +``` + +is a matcher that matches a `Foo` object whose `bar` member variable satisfies +matcher `m`. + +```cpp +Property(&Foo::baz, m) +``` + +is a matcher that matches a `Foo` object whose `baz()` method returns a value +that satisfies matcher `m`. + +For example: + + +| Expression | Description | +| :--------------------------- | :--------------------------------------- | +| `Field(&Foo::number, Ge(3))` | Matches `x` where `x.number >= 3`. | +| `Property(&Foo::name, StartsWith("John "))` | Matches `x` where `x.name()` starts with `"John "`. | + + +Note that in `Property(&Foo::baz, ...)`, method `baz()` must take no argument +and be declared as `const`. + +BTW, `Field()` and `Property()` can also match plain pointers to objects. For +instance, + +```cpp +using ::testing::Field; +using ::testing::Ge; +... +Field(&Foo::number, Ge(3)) +``` + +matches a plain pointer `p` where `p->number >= 3`. If `p` is `NULL`, the match +will always fail regardless of the inner matcher. + +What if you want to validate more than one members at the same time? Remember +that there are [`AllOf()` and `AllOfArray()`](#CombiningMatchers). + +Finally `Field()` and `Property()` provide overloads that take the field or +property names as the first argument to include it in the error message. This +can be useful when creating combined matchers. + +```cpp +using ::testing::AllOf; +using ::testing::Field; +using ::testing::Matcher; +using ::testing::SafeMatcherCast; + +Matcher IsFoo(const Foo& foo) { + return AllOf(Field("some_field", &Foo::some_field, foo.some_field), + Field("other_field", &Foo::other_field, foo.other_field), + Field("last_field", &Foo::last_field, foo.last_field)); +} +``` + +### Validating the Value Pointed to by a Pointer Argument + +C++ functions often take pointers as arguments. You can use matchers like +`IsNull()`, `NotNull()`, and other comparison matchers to match a pointer, but +what if you want to make sure the value *pointed to* by the pointer, instead of +the pointer itself, has a certain property? Well, you can use the `Pointee(m)` +matcher. + +`Pointee(m)` matches a pointer if and only if `m` matches the value the pointer +points to. For example: + +```cpp +using ::testing::Ge; +using ::testing::Pointee; +... + EXPECT_CALL(foo, Bar(Pointee(Ge(3)))); +``` + +expects `foo.Bar()` to be called with a pointer that points to a value greater +than or equal to 3. + +One nice thing about `Pointee()` is that it treats a `NULL` pointer as a match +failure, so you can write `Pointee(m)` instead of + +```cpp +using ::testing::AllOf; +using ::testing::NotNull; +using ::testing::Pointee; +... + AllOf(NotNull(), Pointee(m)) +``` + +without worrying that a `NULL` pointer will crash your test. + +Also, did we tell you that `Pointee()` works with both raw pointers **and** +smart pointers (`std::unique_ptr`, `std::shared_ptr`, etc)? + +What if you have a pointer to pointer? You guessed it - you can use nested +`Pointee()` to probe deeper inside the value. For example, +`Pointee(Pointee(Lt(3)))` matches a pointer that points to a pointer that points +to a number less than 3 (what a mouthful...). + +### Testing a Certain Property of an Object + +Sometimes you want to specify that an object argument has a certain property, +but there is no existing matcher that does this. If you want good error +messages, you should [define a matcher](#NewMatchers). If you want to do it +quick and dirty, you could get away with writing an ordinary function. + +Let's say you have a mock function that takes an object of type `Foo`, which has +an `int bar()` method and an `int baz()` method, and you want to constrain that +the argument's `bar()` value plus its `baz()` value is a given number. Here's +how you can define a matcher to do it: + +```cpp +using ::testing::Matcher; +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; + +class BarPlusBazEqMatcher : public MatcherInterface { + public: + explicit BarPlusBazEqMatcher(int expected_sum) + : expected_sum_(expected_sum) {} + + bool MatchAndExplain(const Foo& foo, + MatchResultListener* /* listener */) const override { + return (foo.bar() + foo.baz()) == expected_sum_; + } + + void DescribeTo(::std::ostream* os) const override { + *os << "bar() + baz() equals " << expected_sum_; + } + + void DescribeNegationTo(::std::ostream* os) const override { + *os << "bar() + baz() does not equal " << expected_sum_; + } + private: + const int expected_sum_; +}; + +Matcher BarPlusBazEq(int expected_sum) { + return MakeMatcher(new BarPlusBazEqMatcher(expected_sum)); +} + +... + EXPECT_CALL(..., DoThis(BarPlusBazEq(5)))...; +``` + +### Matching Containers + +Sometimes an STL container (e.g. list, vector, map, ...) is passed to a mock +function and you may want to validate it. Since most STL containers support the +`==` operator, you can write `Eq(expected_container)` or simply +`expected_container` to match a container exactly. + +Sometimes, though, you may want to be more flexible (for example, the first +element must be an exact match, but the second element can be any positive +number, and so on). Also, containers used in tests often have a small number of +elements, and having to define the expected container out-of-line is a bit of a +hassle. + +You can use the `ElementsAre()` or `UnorderedElementsAre()` matcher in such +cases: + +```cpp +using ::testing::_; +using ::testing::ElementsAre; +using ::testing::Gt; +... + MOCK_METHOD(void, Foo, (const vector& numbers), (override)); +... + EXPECT_CALL(mock, Foo(ElementsAre(1, Gt(0), _, 5))); +``` + +The above matcher says that the container must have 4 elements, which must be 1, +greater than 0, anything, and 5 respectively. + +If you instead write: + +```cpp +using ::testing::_; +using ::testing::Gt; +using ::testing::UnorderedElementsAre; +... + MOCK_METHOD(void, Foo, (const vector& numbers), (override)); +... + EXPECT_CALL(mock, Foo(UnorderedElementsAre(1, Gt(0), _, 5))); +``` + +It means that the container must have 4 elements, which (under some permutation) +must be 1, greater than 0, anything, and 5 respectively. + +As an alternative you can place the arguments in a C-style array and use +`ElementsAreArray()` or `UnorderedElementsAreArray()` instead: + +```cpp +using ::testing::ElementsAreArray; +... + // ElementsAreArray accepts an array of element values. + const int expected_vector1[] = {1, 5, 2, 4, ...}; + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector1))); + + // Or, an array of element matchers. + Matcher expected_vector2[] = {1, Gt(2), _, 3, ...}; + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector2))); +``` + +In case the array needs to be dynamically created (and therefore the array size +cannot be inferred by the compiler), you can give `ElementsAreArray()` an +additional argument to specify the array size: + +```cpp +using ::testing::ElementsAreArray; +... + int* const expected_vector3 = new int[count]; + ... fill expected_vector3 with values ... + EXPECT_CALL(mock, Foo(ElementsAreArray(expected_vector3, count))); +``` + +Use `Pair` when comparing maps or other associative containers. + +```cpp +using testing::ElementsAre; +using testing::Pair; +... + std::map m = {{"a", 1}, {"b", 2}, {"c", 3}}; + EXPECT_THAT(m, ElementsAre(Pair("a", 1), Pair("b", 2), Pair("c", 3))); +``` + +**Tips:** + +* `ElementsAre*()` can be used to match *any* container that implements the + STL iterator pattern (i.e. it has a `const_iterator` type and supports + `begin()/end()`), not just the ones defined in STL. It will even work with + container types yet to be written - as long as they follows the above + pattern. +* You can use nested `ElementsAre*()` to match nested (multi-dimensional) + containers. +* If the container is passed by pointer instead of by reference, just write + `Pointee(ElementsAre*(...))`. +* The order of elements *matters* for `ElementsAre*()`. If you are using it + with containers whose element order are undefined (e.g. `hash_map`) you + should use `WhenSorted` around `ElementsAre`. + +### Sharing Matchers + +Under the hood, a gMock matcher object consists of a pointer to a ref-counted +implementation object. Copying matchers is allowed and very efficient, as only +the pointer is copied. When the last matcher that references the implementation +object dies, the implementation object will be deleted. + +Therefore, if you have some complex matcher that you want to use again and +again, there is no need to build it everytime. Just assign it to a matcher +variable and use that variable repeatedly! For example, + +```cpp +using ::testing::AllOf; +using ::testing::Gt; +using ::testing::Le; +using ::testing::Matcher; +... + Matcher in_range = AllOf(Gt(5), Le(10)); + ... use in_range as a matcher in multiple EXPECT_CALLs ... +``` + +### Matchers must have no side-effects {#PureMatchers} + +WARNING: gMock does not guarantee when or how many times a matcher will be +invoked. Therefore, all matchers must be *purely functional*: they cannot have +any side effects, and the match result must not depend on anything other than +the matcher's parameters and the value being matched. + +This requirement must be satisfied no matter how a matcher is defined (e.g., if +it is one of the standard matchers, or a custom matcher). In particular, a +matcher can never call a mock function, as that will affect the state of the +mock object and gMock. + +## Setting Expectations + +### Knowing When to Expect {#UseOnCall} + + + +**`ON_CALL`** is likely the *single most under-utilized construct* in gMock. + +There are basically two constructs for defining the behavior of a mock object: +`ON_CALL` and `EXPECT_CALL`. The difference? `ON_CALL` defines what happens when +a mock method is called, but doesn't imply any expectation on the method +being called. `EXPECT_CALL` not only defines the behavior, but also sets an +expectation that the method will be called with the given arguments, for the +given number of times (and *in the given order* when you specify the order +too). + +Since `EXPECT_CALL` does more, isn't it better than `ON_CALL`? Not really. Every +`EXPECT_CALL` adds a constraint on the behavior of the code under test. Having +more constraints than necessary is *baaad* - even worse than not having enough +constraints. + +This may be counter-intuitive. How could tests that verify more be worse than +tests that verify less? Isn't verification the whole point of tests? + +The answer lies in *what* a test should verify. **A good test verifies the +contract of the code.** If a test over-specifies, it doesn't leave enough +freedom to the implementation. As a result, changing the implementation without +breaking the contract (e.g. refactoring and optimization), which should be +perfectly fine to do, can break such tests. Then you have to spend time fixing +them, only to see them broken again the next time the implementation is changed. + +Keep in mind that one doesn't have to verify more than one property in one test. +In fact, **it's a good style to verify only one thing in one test.** If you do +that, a bug will likely break only one or two tests instead of dozens (which +case would you rather debug?). If you are also in the habit of giving tests +descriptive names that tell what they verify, you can often easily guess what's +wrong just from the test log itself. + +So use `ON_CALL` by default, and only use `EXPECT_CALL` when you actually intend +to verify that the call is made. For example, you may have a bunch of `ON_CALL`s +in your test fixture to set the common mock behavior shared by all tests in the +same group, and write (scarcely) different `EXPECT_CALL`s in different `TEST_F`s +to verify different aspects of the code's behavior. Compared with the style +where each `TEST` has many `EXPECT_CALL`s, this leads to tests that are more +resilient to implementational changes (and thus less likely to require +maintenance) and makes the intent of the tests more obvious (so they are easier +to maintain when you do need to maintain them). + +If you are bothered by the "Uninteresting mock function call" message printed +when a mock method without an `EXPECT_CALL` is called, you may use a `NiceMock` +instead to suppress all such messages for the mock object, or suppress the +message for specific methods by adding `EXPECT_CALL(...).Times(AnyNumber())`. DO +NOT suppress it by blindly adding an `EXPECT_CALL(...)`, or you'll have a test +that's a pain to maintain. + +### Ignoring Uninteresting Calls + +If you are not interested in how a mock method is called, just don't say +anything about it. In this case, if the method is ever called, gMock will +perform its default action to allow the test program to continue. If you are not +happy with the default action taken by gMock, you can override it using +`DefaultValue::Set()` (described [here](#DefaultValue)) or `ON_CALL()`. + +Please note that once you expressed interest in a particular mock method (via +`EXPECT_CALL()`), all invocations to it must match some expectation. If this +function is called but the arguments don't match any `EXPECT_CALL()` statement, +it will be an error. + +### Disallowing Unexpected Calls + +If a mock method shouldn't be called at all, explicitly say so: + +```cpp +using ::testing::_; +... + EXPECT_CALL(foo, Bar(_)) + .Times(0); +``` + +If some calls to the method are allowed, but the rest are not, just list all the +expected calls: + +```cpp +using ::testing::AnyNumber; +using ::testing::Gt; +... + EXPECT_CALL(foo, Bar(5)); + EXPECT_CALL(foo, Bar(Gt(10))) + .Times(AnyNumber()); +``` + +A call to `foo.Bar()` that doesn't match any of the `EXPECT_CALL()` statements +will be an error. + +### Understanding Uninteresting vs Unexpected Calls {#uninteresting-vs-unexpected} + +*Uninteresting* calls and *unexpected* calls are different concepts in gMock. +*Very* different. + +A call `x.Y(...)` is **uninteresting** if there's *not even a single* +`EXPECT_CALL(x, Y(...))` set. In other words, the test isn't interested in the +`x.Y()` method at all, as evident in that the test doesn't care to say anything +about it. + +A call `x.Y(...)` is **unexpected** if there are *some* `EXPECT_CALL(x, +Y(...))`s set, but none of them matches the call. Put another way, the test is +interested in the `x.Y()` method (therefore it explicitly sets some +`EXPECT_CALL` to verify how it's called); however, the verification fails as the +test doesn't expect this particular call to happen. + +**An unexpected call is always an error,** as the code under test doesn't behave +the way the test expects it to behave. + +**By default, an uninteresting call is not an error,** as it violates no +constraint specified by the test. (gMock's philosophy is that saying nothing +means there is no constraint.) However, it leads to a warning, as it *might* +indicate a problem (e.g. the test author might have forgotten to specify a +constraint). + +In gMock, `NiceMock` and `StrictMock` can be used to make a mock class "nice" or +"strict". How does this affect uninteresting calls and unexpected calls? + +A **nice mock** suppresses uninteresting call *warnings*. It is less chatty than +the default mock, but otherwise is the same. If a test fails with a default +mock, it will also fail using a nice mock instead. And vice versa. Don't expect +making a mock nice to change the test's result. + +A **strict mock** turns uninteresting call warnings into errors. So making a +mock strict may change the test's result. + +Let's look at an example: + +```cpp +TEST(...) { + NiceMock mock_registry; + EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) + .WillRepeatedly(Return("Larry Page")); + + // Use mock_registry in code under test. + ... &mock_registry ... +} +``` + +The sole `EXPECT_CALL` here says that all calls to `GetDomainOwner()` must have +`"google.com"` as the argument. If `GetDomainOwner("yahoo.com")` is called, it +will be an unexpected call, and thus an error. *Having a nice mock doesn't +change the severity of an unexpected call.* + +So how do we tell gMock that `GetDomainOwner()` can be called with some other +arguments as well? The standard technique is to add a "catch all" `EXPECT_CALL`: + +```cpp + EXPECT_CALL(mock_registry, GetDomainOwner(_)) + .Times(AnyNumber()); // catches all other calls to this method. + EXPECT_CALL(mock_registry, GetDomainOwner("google.com")) + .WillRepeatedly(Return("Larry Page")); +``` + +Remember that `_` is the wildcard matcher that matches anything. With this, if +`GetDomainOwner("google.com")` is called, it will do what the second +`EXPECT_CALL` says; if it is called with a different argument, it will do what +the first `EXPECT_CALL` says. + +Note that the order of the two `EXPECT_CALL`s is important, as a newer +`EXPECT_CALL` takes precedence over an older one. + +For more on uninteresting calls, nice mocks, and strict mocks, read +["The Nice, the Strict, and the Naggy"](#NiceStrictNaggy). + +### Ignoring Uninteresting Arguments {#ParameterlessExpectations} + +If your test doesn't care about the parameters (it only cares about the number +or order of calls), you can often simply omit the parameter list: + +```cpp + // Expect foo.Bar( ... ) twice with any arguments. + EXPECT_CALL(foo, Bar).Times(2); + + // Delegate to the given method whenever the factory is invoked. + ON_CALL(foo_factory, MakeFoo) + .WillByDefault(&BuildFooForTest); +``` + +This functionality is only available when a method is not overloaded; to prevent +unexpected behavior it is a compilation error to try to set an expectation on a +method where the specific overload is ambiguous. You can work around this by +supplying a [simpler mock interface](#SimplerInterfaces) than the mocked class +provides. + +This pattern is also useful when the arguments are interesting, but match logic +is substantially complex. You can leave the argument list unspecified and use +SaveArg actions to [save the values for later verification](#SaveArgVerify). If +you do that, you can easily differentiate calling the method the wrong number of +times from calling it with the wrong arguments. + +### Expecting Ordered Calls {#OrderedCalls} + +Although an `EXPECT_CALL()` statement defined earlier takes precedence when +gMock tries to match a function call with an expectation, by default calls don't +have to happen in the order `EXPECT_CALL()` statements are written. For example, +if the arguments match the matchers in the third `EXPECT_CALL()`, but not those +in the first two, then the third expectation will be used. + +If you would rather have all calls occur in the order of the expectations, put +the `EXPECT_CALL()` statements in a block where you define a variable of type +`InSequence`: + +```cpp +using ::testing::_; +using ::testing::InSequence; + + { + InSequence s; + + EXPECT_CALL(foo, DoThis(5)); + EXPECT_CALL(bar, DoThat(_)) + .Times(2); + EXPECT_CALL(foo, DoThis(6)); + } +``` + +In this example, we expect a call to `foo.DoThis(5)`, followed by two calls to +`bar.DoThat()` where the argument can be anything, which are in turn followed by +a call to `foo.DoThis(6)`. If a call occurred out-of-order, gMock will report an +error. + +### Expecting Partially Ordered Calls {#PartialOrder} + +Sometimes requiring everything to occur in a predetermined order can lead to +brittle tests. For example, we may care about `A` occurring before both `B` and +`C`, but aren't interested in the relative order of `B` and `C`. In this case, +the test should reflect our real intent, instead of being overly constraining. + +gMock allows you to impose an arbitrary DAG (directed acyclic graph) on the +calls. One way to express the DAG is to use the [After](#AfterClause) clause of +`EXPECT_CALL`. + +Another way is via the `InSequence()` clause (not the same as the `InSequence` +class), which we borrowed from jMock 2. It's less flexible than `After()`, but +more convenient when you have long chains of sequential calls, as it doesn't +require you to come up with different names for the expectations in the chains. +Here's how it works: + +If we view `EXPECT_CALL()` statements as nodes in a graph, and add an edge from +node A to node B wherever A must occur before B, we can get a DAG. We use the +term "sequence" to mean a directed path in this DAG. Now, if we decompose the +DAG into sequences, we just need to know which sequences each `EXPECT_CALL()` +belongs to in order to be able to reconstruct the original DAG. + +So, to specify the partial order on the expectations we need to do two things: +first to define some `Sequence` objects, and then for each `EXPECT_CALL()` say +which `Sequence` objects it is part of. + +Expectations in the same sequence must occur in the order they are written. For +example, + +```cpp +using ::testing::Sequence; +... + Sequence s1, s2; + + EXPECT_CALL(foo, A()) + .InSequence(s1, s2); + EXPECT_CALL(bar, B()) + .InSequence(s1); + EXPECT_CALL(bar, C()) + .InSequence(s2); + EXPECT_CALL(foo, D()) + .InSequence(s2); +``` + +specifies the following DAG (where `s1` is `A -> B`, and `s2` is `A -> C -> D`): + +```text + +---> B + | + A ---| + | + +---> C ---> D +``` + +This means that A must occur before B and C, and C must occur before D. There's +no restriction about the order other than these. + +### Controlling When an Expectation Retires + +When a mock method is called, gMock only considers expectations that are still +active. An expectation is active when created, and becomes inactive (aka +*retires*) when a call that has to occur later has occurred. For example, in + +```cpp +using ::testing::_; +using ::testing::Sequence; +... + Sequence s1, s2; + + EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #1 + .Times(AnyNumber()) + .InSequence(s1, s2); + EXPECT_CALL(log, Log(WARNING, _, "Data set is empty.")) // #2 + .InSequence(s1); + EXPECT_CALL(log, Log(WARNING, _, "User not found.")) // #3 + .InSequence(s2); +``` + +as soon as either #2 or #3 is matched, #1 will retire. If a warning `"File too +large."` is logged after this, it will be an error. + +Note that an expectation doesn't retire automatically when it's saturated. For +example, + +```cpp +using ::testing::_; +... + EXPECT_CALL(log, Log(WARNING, _, _)); // #1 + EXPECT_CALL(log, Log(WARNING, _, "File too large.")); // #2 +``` + +says that there will be exactly one warning with the message `"File too +large."`. If the second warning contains this message too, #2 will match again +and result in an upper-bound-violated error. + +If this is not what you want, you can ask an expectation to retire as soon as it +becomes saturated: + +```cpp +using ::testing::_; +... + EXPECT_CALL(log, Log(WARNING, _, _)); // #1 + EXPECT_CALL(log, Log(WARNING, _, "File too large.")) // #2 + .RetiresOnSaturation(); +``` + +Here #2 can be used only once, so if you have two warnings with the message +`"File too large."`, the first will match #2 and the second will match #1 - +there will be no error. + +## Using Actions + +### Returning References from Mock Methods + +If a mock function's return type is a reference, you need to use `ReturnRef()` +instead of `Return()` to return a result: + +```cpp +using ::testing::ReturnRef; + +class MockFoo : public Foo { + public: + MOCK_METHOD(Bar&, GetBar, (), (override)); +}; +... + MockFoo foo; + Bar bar; + EXPECT_CALL(foo, GetBar()) + .WillOnce(ReturnRef(bar)); +... +``` + +### Returning Live Values from Mock Methods + +The `Return(x)` action saves a copy of `x` when the action is created, and +always returns the same value whenever it's executed. Sometimes you may want to +instead return the *live* value of `x` (i.e. its value at the time when the +action is *executed*.). Use either `ReturnRef()` or `ReturnPointee()` for this +purpose. + +If the mock function's return type is a reference, you can do it using +`ReturnRef(x)`, as shown in the previous recipe ("Returning References from Mock +Methods"). However, gMock doesn't let you use `ReturnRef()` in a mock function +whose return type is not a reference, as doing that usually indicates a user +error. So, what shall you do? + +Though you may be tempted, DO NOT use `ByRef()`: + +```cpp +using testing::ByRef; +using testing::Return; + +class MockFoo : public Foo { + public: + MOCK_METHOD(int, GetValue, (), (override)); +}; +... + int x = 0; + MockFoo foo; + EXPECT_CALL(foo, GetValue()) + .WillRepeatedly(Return(ByRef(x))); // Wrong! + x = 42; + EXPECT_EQ(42, foo.GetValue()); +``` + +Unfortunately, it doesn't work here. The above code will fail with error: + +```text +Value of: foo.GetValue() + Actual: 0 +Expected: 42 +``` + +The reason is that `Return(*value*)` converts `value` to the actual return type +of the mock function at the time when the action is *created*, not when it is +*executed*. (This behavior was chosen for the action to be safe when `value` is +a proxy object that references some temporary objects.) As a result, `ByRef(x)` +is converted to an `int` value (instead of a `const int&`) when the expectation +is set, and `Return(ByRef(x))` will always return 0. + +`ReturnPointee(pointer)` was provided to solve this problem specifically. It +returns the value pointed to by `pointer` at the time the action is *executed*: + +```cpp +using testing::ReturnPointee; +... + int x = 0; + MockFoo foo; + EXPECT_CALL(foo, GetValue()) + .WillRepeatedly(ReturnPointee(&x)); // Note the & here. + x = 42; + EXPECT_EQ(42, foo.GetValue()); // This will succeed now. +``` + +### Combining Actions + +Want to do more than one thing when a function is called? That's fine. `DoAll()` +allow you to do sequence of actions every time. Only the return value of the +last action in the sequence will be used. + +```cpp +using ::testing::_; +using ::testing::DoAll; + +class MockFoo : public Foo { + public: + MOCK_METHOD(bool, Bar, (int n), (override)); +}; +... + EXPECT_CALL(foo, Bar(_)) + .WillOnce(DoAll(action_1, + action_2, + ... + action_n)); +``` + +### Verifying Complex Arguments {#SaveArgVerify} + +If you want to verify that a method is called with a particular argument but the +match criteria is complex, it can be difficult to distinguish between +cardinality failures (calling the method the wrong number of times) and argument +match failures. Similarly, if you are matching multiple parameters, it may not +be easy to distinguishing which argument failed to match. For example: + +```cpp + // Not ideal: this could fail because of a problem with arg1 or arg2, or maybe + // just the method wasn't called. + EXPECT_CALL(foo, SendValues(_, ElementsAre(1, 4, 4, 7), EqualsProto( ... ))); +``` + +You can instead save the arguments and test them individually: + +```cpp + EXPECT_CALL(foo, SendValues) + .WillOnce(DoAll(SaveArg<1>(&actual_array), SaveArg<2>(&actual_proto))); + ... run the test + EXPECT_THAT(actual_array, ElementsAre(1, 4, 4, 7)); + EXPECT_THAT(actual_proto, EqualsProto( ... )); +``` + +### Mocking Side Effects {#MockingSideEffects} + +Sometimes a method exhibits its effect not via returning a value but via side +effects. For example, it may change some global state or modify an output +argument. To mock side effects, in general you can define your own action by +implementing `::testing::ActionInterface`. + +If all you need to do is to change an output argument, the built-in +`SetArgPointee()` action is convenient: + +```cpp +using ::testing::_; +using ::testing::SetArgPointee; + +class MockMutator : public Mutator { + public: + MOCK_METHOD(void, Mutate, (bool mutate, int* value), (override)); + ... +} +... + MockMutator mutator; + EXPECT_CALL(mutator, Mutate(true, _)) + .WillOnce(SetArgPointee<1>(5)); +``` + +In this example, when `mutator.Mutate()` is called, we will assign 5 to the +`int` variable pointed to by argument #1 (0-based). + +`SetArgPointee()` conveniently makes an internal copy of the value you pass to +it, removing the need to keep the value in scope and alive. The implication +however is that the value must have a copy constructor and assignment operator. + +If the mock method also needs to return a value as well, you can chain +`SetArgPointee()` with `Return()` using `DoAll()`, remembering to put the +`Return()` statement last: + +```cpp +using ::testing::_; +using ::testing::Return; +using ::testing::SetArgPointee; + +class MockMutator : public Mutator { + public: + ... + MOCK_METHOD(bool, MutateInt, (int* value), (override)); +} +... + MockMutator mutator; + EXPECT_CALL(mutator, MutateInt(_)) + .WillOnce(DoAll(SetArgPointee<0>(5), + Return(true))); +``` + +Note, however, that if you use the `ReturnOKWith()` method, it will override the +values provided by `SetArgPointee()` in the response parameters of your function +call. + +If the output argument is an array, use the `SetArrayArgument(first, last)` +action instead. It copies the elements in source range `[first, last)` to the +array pointed to by the `N`-th (0-based) argument: + +```cpp +using ::testing::NotNull; +using ::testing::SetArrayArgument; + +class MockArrayMutator : public ArrayMutator { + public: + MOCK_METHOD(void, Mutate, (int* values, int num_values), (override)); + ... +} +... + MockArrayMutator mutator; + int values[5] = {1, 2, 3, 4, 5}; + EXPECT_CALL(mutator, Mutate(NotNull(), 5)) + .WillOnce(SetArrayArgument<0>(values, values + 5)); +``` + +This also works when the argument is an output iterator: + +```cpp +using ::testing::_; +using ::testing::SetArrayArgument; + +class MockRolodex : public Rolodex { + public: + MOCK_METHOD(void, GetNames, (std::back_insert_iterator>), + (override)); + ... +} +... + MockRolodex rolodex; + vector names; + names.push_back("George"); + names.push_back("John"); + names.push_back("Thomas"); + EXPECT_CALL(rolodex, GetNames(_)) + .WillOnce(SetArrayArgument<0>(names.begin(), names.end())); +``` + +### Changing a Mock Object's Behavior Based on the State + +If you expect a call to change the behavior of a mock object, you can use +`::testing::InSequence` to specify different behaviors before and after the +call: + +```cpp +using ::testing::InSequence; +using ::testing::Return; + +... + { + InSequence seq; + EXPECT_CALL(my_mock, IsDirty()) + .WillRepeatedly(Return(true)); + EXPECT_CALL(my_mock, Flush()); + EXPECT_CALL(my_mock, IsDirty()) + .WillRepeatedly(Return(false)); + } + my_mock.FlushIfDirty(); +``` + +This makes `my_mock.IsDirty()` return `true` before `my_mock.Flush()` is called +and return `false` afterwards. + +If the behavior change is more complex, you can store the effects in a variable +and make a mock method get its return value from that variable: + +```cpp +using ::testing::_; +using ::testing::SaveArg; +using ::testing::Return; + +ACTION_P(ReturnPointee, p) { return *p; } +... + int previous_value = 0; + EXPECT_CALL(my_mock, GetPrevValue) + .WillRepeatedly(ReturnPointee(&previous_value)); + EXPECT_CALL(my_mock, UpdateValue) + .WillRepeatedly(SaveArg<0>(&previous_value)); + my_mock.DoSomethingToUpdateValue(); +``` + +Here `my_mock.GetPrevValue()` will always return the argument of the last +`UpdateValue()` call. + +### Setting the Default Value for a Return Type {#DefaultValue} + +If a mock method's return type is a built-in C++ type or pointer, by default it +will return 0 when invoked. Also, in C++ 11 and above, a mock method whose +return type has a default constructor will return a default-constructed value by +default. You only need to specify an action if this default value doesn't work +for you. + +Sometimes, you may want to change this default value, or you may want to specify +a default value for types gMock doesn't know about. You can do this using the +`::testing::DefaultValue` class template: + +```cpp +using ::testing::DefaultValue; + +class MockFoo : public Foo { + public: + MOCK_METHOD(Bar, CalculateBar, (), (override)); +}; + + +... + Bar default_bar; + // Sets the default return value for type Bar. + DefaultValue::Set(default_bar); + + MockFoo foo; + + // We don't need to specify an action here, as the default + // return value works for us. + EXPECT_CALL(foo, CalculateBar()); + + foo.CalculateBar(); // This should return default_bar. + + // Unsets the default return value. + DefaultValue::Clear(); +``` + +Please note that changing the default value for a type can make you tests hard +to understand. We recommend you to use this feature judiciously. For example, +you may want to make sure the `Set()` and `Clear()` calls are right next to the +code that uses your mock. + +### Setting the Default Actions for a Mock Method + +You've learned how to change the default value of a given type. However, this +may be too coarse for your purpose: perhaps you have two mock methods with the +same return type and you want them to have different behaviors. The `ON_CALL()` +macro allows you to customize your mock's behavior at the method level: + +```cpp +using ::testing::_; +using ::testing::AnyNumber; +using ::testing::Gt; +using ::testing::Return; +... + ON_CALL(foo, Sign(_)) + .WillByDefault(Return(-1)); + ON_CALL(foo, Sign(0)) + .WillByDefault(Return(0)); + ON_CALL(foo, Sign(Gt(0))) + .WillByDefault(Return(1)); + + EXPECT_CALL(foo, Sign(_)) + .Times(AnyNumber()); + + foo.Sign(5); // This should return 1. + foo.Sign(-9); // This should return -1. + foo.Sign(0); // This should return 0. +``` + +As you may have guessed, when there are more than one `ON_CALL()` statements, +the newer ones in the order take precedence over the older ones. In other words, +the **last** one that matches the function arguments will be used. This matching +order allows you to set up the common behavior in a mock object's constructor or +the test fixture's set-up phase and specialize the mock's behavior later. + +Note that both `ON_CALL` and `EXPECT_CALL` have the same "later statements take +precedence" rule, but they don't interact. That is, `EXPECT_CALL`s have their +own precedence order distinct from the `ON_CALL` precedence order. + +### Using Functions/Methods/Functors/Lambdas as Actions {#FunctionsAsActions} + +If the built-in actions don't suit you, you can use an existing callable +(function, `std::function`, method, functor, lambda as an action. + + + +```cpp +using ::testing::_; using ::testing::Invoke; + +class MockFoo : public Foo { + public: + MOCK_METHOD(int, Sum, (int x, int y), (override)); + MOCK_METHOD(bool, ComplexJob, (int x), (override)); +}; + +int CalculateSum(int x, int y) { return x + y; } +int Sum3(int x, int y, int z) { return x + y + z; } + +class Helper { + public: + bool ComplexJob(int x); +}; + +... + MockFoo foo; + Helper helper; + EXPECT_CALL(foo, Sum(_, _)) + .WillOnce(&CalculateSum) + .WillRepeatedly(Invoke(NewPermanentCallback(Sum3, 1))); + EXPECT_CALL(foo, ComplexJob(_)) + .WillOnce(Invoke(&helper, &Helper::ComplexJob)); + .WillRepeatedly([](int x) { return x > 0; }); + + foo.Sum(5, 6); // Invokes CalculateSum(5, 6). + foo.Sum(2, 3); // Invokes Sum3(1, 2, 3). + foo.ComplexJob(10); // Invokes helper.ComplexJob(10). + foo.ComplexJob(-1); // Invokes the inline lambda. +``` + +The only requirement is that the type of the function, etc must be *compatible* +with the signature of the mock function, meaning that the latter's arguments can +be implicitly converted to the corresponding arguments of the former, and the +former's return type can be implicitly converted to that of the latter. So, you +can invoke something whose type is *not* exactly the same as the mock function, +as long as it's safe to do so - nice, huh? + +**`Note:`{.escaped}** + +* The action takes ownership of the callback and will delete it when the + action itself is destructed. +* If the type of a callback is derived from a base callback type `C`, you need + to implicitly cast it to `C` to resolve the overloading, e.g. + + ```cpp + using ::testing::Invoke; + ... + ResultCallback* is_ok = ...; + ... Invoke(is_ok) ...; // This works. + + BlockingClosure* done = new BlockingClosure; + ... Invoke(implicit_cast(done)) ...; // The cast is necessary. + ``` + +### Using Functions with Extra Info as Actions + +The function or functor you call using `Invoke()` must have the same number of +arguments as the mock function you use it for. Sometimes you may have a function +that takes more arguments, and you are willing to pass in the extra arguments +yourself to fill the gap. You can do this in gMock using callbacks with +pre-bound arguments. Here's an example: + +```cpp +using ::testing::Invoke; + +class MockFoo : public Foo { + public: + MOCK_METHOD(char, DoThis, (int n), (override)); +}; + +char SignOfSum(int x, int y) { + const int sum = x + y; + return (sum > 0) ? '+' : (sum < 0) ? '-' : '0'; +} + +TEST_F(FooTest, Test) { + MockFoo foo; + + EXPECT_CALL(foo, DoThis(2)) + .WillOnce(Invoke(NewPermanentCallback(SignOfSum, 5))); + EXPECT_EQ('+', foo.DoThis(2)); // Invokes SignOfSum(5, 2). +} +``` + +### Invoking a Function/Method/Functor/Lambda/Callback Without Arguments + +`Invoke()` is very useful for doing actions that are more complex. It passes the +mock function's arguments to the function, etc being invoked such that the +callee has the full context of the call to work with. If the invoked function is +not interested in some or all of the arguments, it can simply ignore them. + +Yet, a common pattern is that a test author wants to invoke a function without +the arguments of the mock function. `Invoke()` allows her to do that using a +wrapper function that throws away the arguments before invoking an underlining +nullary function. Needless to say, this can be tedious and obscures the intent +of the test. + +`InvokeWithoutArgs()` solves this problem. It's like `Invoke()` except that it +doesn't pass the mock function's arguments to the callee. Here's an example: + +```cpp +using ::testing::_; +using ::testing::InvokeWithoutArgs; + +class MockFoo : public Foo { + public: + MOCK_METHOD(bool, ComplexJob, (int n), (override)); +}; + +bool Job1() { ... } +bool Job2(int n, char c) { ... } + +... + MockFoo foo; + EXPECT_CALL(foo, ComplexJob(_)) + .WillOnce(InvokeWithoutArgs(Job1)) + .WillOnce(InvokeWithoutArgs(NewPermanentCallback(Job2, 5, 'a'))); + + foo.ComplexJob(10); // Invokes Job1(). + foo.ComplexJob(20); // Invokes Job2(5, 'a'). +``` + +**`Note:`{.escaped}** + +* The action takes ownership of the callback and will delete it when the + action itself is destructed. +* If the type of a callback is derived from a base callback type `C`, you need + to implicitly cast it to `C` to resolve the overloading, e.g. + + ```cpp + using ::testing::InvokeWithoutArgs; + ... + ResultCallback* is_ok = ...; + ... InvokeWithoutArgs(is_ok) ...; // This works. + + BlockingClosure* done = ...; + ... InvokeWithoutArgs(implicit_cast(done)) ...; + // The cast is necessary. + ``` + +### Invoking an Argument of the Mock Function + +Sometimes a mock function will receive a function pointer, a functor (in other +words, a "callable") as an argument, e.g. + +```cpp +class MockFoo : public Foo { + public: + MOCK_METHOD(bool, DoThis, (int n, (ResultCallback1* callback)), + (override)); +}; +``` + +and you may want to invoke this callable argument: + +```cpp +using ::testing::_; +... + MockFoo foo; + EXPECT_CALL(foo, DoThis(_, _)) + .WillOnce(...); + // Will execute callback->Run(5), where callback is the + // second argument DoThis() receives. +``` + +NOTE: The section below is legacy documentation from before C++ had lambdas: + +Arghh, you need to refer to a mock function argument but C++ has no lambda +(yet), so you have to define your own action. :-( Or do you really? + +Well, gMock has an action to solve *exactly* this problem: + +```cpp +InvokeArgument(arg_1, arg_2, ..., arg_m) +``` + +will invoke the `N`-th (0-based) argument the mock function receives, with +`arg_1`, `arg_2`, ..., and `arg_m`. No matter if the argument is a function +pointer, a functor, or a callback. gMock handles them all. + +With that, you could write: + +```cpp +using ::testing::_; +using ::testing::InvokeArgument; +... + EXPECT_CALL(foo, DoThis(_, _)) + .WillOnce(InvokeArgument<1>(5)); + // Will execute callback->Run(5), where callback is the + // second argument DoThis() receives. +``` + +What if the callable takes an argument by reference? No problem - just wrap it +inside `ByRef()`: + +```cpp + ... + MOCK_METHOD(bool, Bar, + ((ResultCallback2* callback)), + (override)); + ... + using ::testing::_; + using ::testing::ByRef; + using ::testing::InvokeArgument; + ... + MockFoo foo; + Helper helper; + ... + EXPECT_CALL(foo, Bar(_)) + .WillOnce(InvokeArgument<0>(5, ByRef(helper))); + // ByRef(helper) guarantees that a reference to helper, not a copy of it, + // will be passed to the callback. +``` + +What if the callable takes an argument by reference and we do **not** wrap the +argument in `ByRef()`? Then `InvokeArgument()` will *make a copy* of the +argument, and pass a *reference to the copy*, instead of a reference to the +original value, to the callable. This is especially handy when the argument is a +temporary value: + +```cpp + ... + MOCK_METHOD(bool, DoThat, (bool (*f)(const double& x, const string& s)), + (override)); + ... + using ::testing::_; + using ::testing::InvokeArgument; + ... + MockFoo foo; + ... + EXPECT_CALL(foo, DoThat(_)) + .WillOnce(InvokeArgument<0>(5.0, string("Hi"))); + // Will execute (*f)(5.0, string("Hi")), where f is the function pointer + // DoThat() receives. Note that the values 5.0 and string("Hi") are + // temporary and dead once the EXPECT_CALL() statement finishes. Yet + // it's fine to perform this action later, since a copy of the values + // are kept inside the InvokeArgument action. +``` + +### Ignoring an Action's Result + +Sometimes you have an action that returns *something*, but you need an action +that returns `void` (perhaps you want to use it in a mock function that returns +`void`, or perhaps it needs to be used in `DoAll()` and it's not the last in the +list). `IgnoreResult()` lets you do that. For example: + +```cpp +using ::testing::_; +using ::testing::DoAll; +using ::testing::IgnoreResult; +using ::testing::Return; + +int Process(const MyData& data); +string DoSomething(); + +class MockFoo : public Foo { + public: + MOCK_METHOD(void, Abc, (const MyData& data), (override)); + MOCK_METHOD(bool, Xyz, (), (override)); +}; + + ... + MockFoo foo; + EXPECT_CALL(foo, Abc(_)) + // .WillOnce(Invoke(Process)); + // The above line won't compile as Process() returns int but Abc() needs + // to return void. + .WillOnce(IgnoreResult(Process)); + EXPECT_CALL(foo, Xyz()) + .WillOnce(DoAll(IgnoreResult(DoSomething), + // Ignores the string DoSomething() returns. + Return(true))); +``` + +Note that you **cannot** use `IgnoreResult()` on an action that already returns +`void`. Doing so will lead to ugly compiler errors. + +### Selecting an Action's Arguments {#SelectingArgs} + +Say you have a mock function `Foo()` that takes seven arguments, and you have a +custom action that you want to invoke when `Foo()` is called. Trouble is, the +custom action only wants three arguments: + +```cpp +using ::testing::_; +using ::testing::Invoke; +... + MOCK_METHOD(bool, Foo, + (bool visible, const string& name, int x, int y, + (const map>), double& weight, double min_weight, + double max_wight)); +... +bool IsVisibleInQuadrant1(bool visible, int x, int y) { + return visible && x >= 0 && y >= 0; +} +... + EXPECT_CALL(mock, Foo) + .WillOnce(Invoke(IsVisibleInQuadrant1)); // Uh, won't compile. :-( +``` + +To please the compiler God, you need to define an "adaptor" that has the same +signature as `Foo()` and calls the custom action with the right arguments: + +```cpp +using ::testing::_; +using ::testing::Invoke; +... +bool MyIsVisibleInQuadrant1(bool visible, const string& name, int x, int y, + const map, double>& weight, + double min_weight, double max_wight) { + return IsVisibleInQuadrant1(visible, x, y); +} +... + EXPECT_CALL(mock, Foo) + .WillOnce(Invoke(MyIsVisibleInQuadrant1)); // Now it works. +``` + +But isn't this awkward? + +gMock provides a generic *action adaptor*, so you can spend your time minding +more important business than writing your own adaptors. Here's the syntax: + +```cpp +WithArgs(action) +``` + +creates an action that passes the arguments of the mock function at the given +indices (0-based) to the inner `action` and performs it. Using `WithArgs`, our +original example can be written as: + +```cpp +using ::testing::_; +using ::testing::Invoke; +using ::testing::WithArgs; +... + EXPECT_CALL(mock, Foo) + .WillOnce(WithArgs<0, 2, 3>(Invoke(IsVisibleInQuadrant1))); // No need to define your own adaptor. +``` + +For better readability, gMock also gives you: + +* `WithoutArgs(action)` when the inner `action` takes *no* argument, and +* `WithArg(action)` (no `s` after `Arg`) when the inner `action` takes + *one* argument. + +As you may have realized, `InvokeWithoutArgs(...)` is just syntactic sugar for +`WithoutArgs(Invoke(...))`. + +Here are more tips: + +* The inner action used in `WithArgs` and friends does not have to be + `Invoke()` -- it can be anything. +* You can repeat an argument in the argument list if necessary, e.g. + `WithArgs<2, 3, 3, 5>(...)`. +* You can change the order of the arguments, e.g. `WithArgs<3, 2, 1>(...)`. +* The types of the selected arguments do *not* have to match the signature of + the inner action exactly. It works as long as they can be implicitly + converted to the corresponding arguments of the inner action. For example, + if the 4-th argument of the mock function is an `int` and `my_action` takes + a `double`, `WithArg<4>(my_action)` will work. + +### Ignoring Arguments in Action Functions + +The [selecting-an-action's-arguments](#SelectingArgs) recipe showed us one way +to make a mock function and an action with incompatible argument lists fit +together. The downside is that wrapping the action in `WithArgs<...>()` can get +tedious for people writing the tests. + +If you are defining a function (or method, functor, lambda, callback) to be used +with `Invoke*()`, and you are not interested in some of its arguments, an +alternative to `WithArgs` is to declare the uninteresting arguments as `Unused`. +This makes the definition less cluttered and less fragile in case the types of +the uninteresting arguments change. It could also increase the chance the action +function can be reused. For example, given + +```cpp + public: + MOCK_METHOD(double, Foo, double(const string& label, double x, double y), + (override)); + MOCK_METHOD(double, Bar, (int index, double x, double y), (override)); +``` + +instead of + +```cpp +using ::testing::_; +using ::testing::Invoke; + +double DistanceToOriginWithLabel(const string& label, double x, double y) { + return sqrt(x*x + y*y); +} +double DistanceToOriginWithIndex(int index, double x, double y) { + return sqrt(x*x + y*y); +} +... + EXPECT_CALL(mock, Foo("abc", _, _)) + .WillOnce(Invoke(DistanceToOriginWithLabel)); + EXPECT_CALL(mock, Bar(5, _, _)) + .WillOnce(Invoke(DistanceToOriginWithIndex)); +``` + +you could write + +```cpp +using ::testing::_; +using ::testing::Invoke; +using ::testing::Unused; + +double DistanceToOrigin(Unused, double x, double y) { + return sqrt(x*x + y*y); +} +... + EXPECT_CALL(mock, Foo("abc", _, _)) + .WillOnce(Invoke(DistanceToOrigin)); + EXPECT_CALL(mock, Bar(5, _, _)) + .WillOnce(Invoke(DistanceToOrigin)); +``` + +### Sharing Actions + +Just like matchers, a gMock action object consists of a pointer to a ref-counted +implementation object. Therefore copying actions is also allowed and very +efficient. When the last action that references the implementation object dies, +the implementation object will be deleted. + +If you have some complex action that you want to use again and again, you may +not have to build it from scratch everytime. If the action doesn't have an +internal state (i.e. if it always does the same thing no matter how many times +it has been called), you can assign it to an action variable and use that +variable repeatedly. For example: + +```cpp +using ::testing::Action; +using ::testing::DoAll; +using ::testing::Return; +using ::testing::SetArgPointee; +... + Action set_flag = DoAll(SetArgPointee<0>(5), + Return(true)); + ... use set_flag in .WillOnce() and .WillRepeatedly() ... +``` + +However, if the action has its own state, you may be surprised if you share the +action object. Suppose you have an action factory `IncrementCounter(init)` which +creates an action that increments and returns a counter whose initial value is +`init`, using two actions created from the same expression and using a shared +action will exhibit different behaviors. Example: + +```cpp + EXPECT_CALL(foo, DoThis()) + .WillRepeatedly(IncrementCounter(0)); + EXPECT_CALL(foo, DoThat()) + .WillRepeatedly(IncrementCounter(0)); + foo.DoThis(); // Returns 1. + foo.DoThis(); // Returns 2. + foo.DoThat(); // Returns 1 - Blah() uses a different + // counter than Bar()'s. +``` + +versus + +```cpp +using ::testing::Action; +... + Action increment = IncrementCounter(0); + EXPECT_CALL(foo, DoThis()) + .WillRepeatedly(increment); + EXPECT_CALL(foo, DoThat()) + .WillRepeatedly(increment); + foo.DoThis(); // Returns 1. + foo.DoThis(); // Returns 2. + foo.DoThat(); // Returns 3 - the counter is shared. +``` + +### Testing Asynchronous Behavior + +One oft-encountered problem with gMock is that it can be hard to test +asynchronous behavior. Suppose you had a `EventQueue` class that you wanted to +test, and you created a separate `EventDispatcher` interface so that you could +easily mock it out. However, the implementation of the class fired all the +events on a background thread, which made test timings difficult. You could just +insert `sleep()` statements and hope for the best, but that makes your test +behavior nondeterministic. A better way is to use gMock actions and +`Notification` objects to force your asynchronous test to behave synchronously. + +```cpp +using ::testing::DoAll; +using ::testing::InvokeWithoutArgs; +using ::testing::Return; + +class MockEventDispatcher : public EventDispatcher { + MOCK_METHOD(bool, DispatchEvent, (int32), (override)); +}; + +ACTION_P(Notify, notification) { + notification->Notify(); +} + +TEST(EventQueueTest, EnqueueEventTest) { + MockEventDispatcher mock_event_dispatcher; + EventQueue event_queue(&mock_event_dispatcher); + + const int32 kEventId = 321; + Notification done; + EXPECT_CALL(mock_event_dispatcher, DispatchEvent(kEventId)) + .WillOnce(Notify(&done)); + + event_queue.EnqueueEvent(kEventId); + done.WaitForNotification(); +} +``` + +In the example above, we set our normal gMock expectations, but then add an +additional action to notify the `Notification` object. Now we can just call +`Notification::WaitForNotification()` in the main thread to wait for the +asynchronous call to finish. After that, our test suite is complete and we can +safely exit. + +Note: this example has a downside: namely, if the expectation is not satisfied, +our test will run forever. It will eventually time-out and fail, but it will +take longer and be slightly harder to debug. To alleviate this problem, you can +use `WaitForNotificationWithTimeout(ms)` instead of `WaitForNotification()`. + +## Misc Recipes on Using gMock + +### Mocking Methods That Use Move-Only Types + +C++11 introduced *move-only types*. A move-only-typed value can be moved from +one object to another, but cannot be copied. `std::unique_ptr` is probably +the most commonly used move-only type. + +Mocking a method that takes and/or returns move-only types presents some +challenges, but nothing insurmountable. This recipe shows you how you can do it. +Note that the support for move-only method arguments was only introduced to +gMock in April 2017; in older code, you may find more complex +[workarounds](#LegacyMoveOnly) for lack of this feature. + +Let’s say we are working on a fictional project that lets one post and share +snippets called “buzzesâ€. Your code uses these types: + +```cpp +enum class AccessLevel { kInternal, kPublic }; + +class Buzz { + public: + explicit Buzz(AccessLevel access) { ... } + ... +}; + +class Buzzer { + public: + virtual ~Buzzer() {} + virtual std::unique_ptr MakeBuzz(StringPiece text) = 0; + virtual bool ShareBuzz(std::unique_ptr buzz, int64_t timestamp) = 0; + ... +}; +``` + +A `Buzz` object represents a snippet being posted. A class that implements the +`Buzzer` interface is capable of creating and sharing `Buzz`es. Methods in +`Buzzer` may return a `unique_ptr` or take a `unique_ptr`. Now we +need to mock `Buzzer` in our tests. + +To mock a method that accepts or returns move-only types, you just use the +familiar `MOCK_METHOD` syntax as usual: + +```cpp +class MockBuzzer : public Buzzer { + public: + MOCK_METHOD(std::unique_ptr, MakeBuzz, (StringPiece text), (override)); + MOCK_METHOD(bool, ShareBuzz, (std::unique_ptr buzz, int64_t timestamp), + (override)); +}; +``` + +Now that we have the mock class defined, we can use it in tests. In the +following code examples, we assume that we have defined a `MockBuzzer` object +named `mock_buzzer_`: + +```cpp + MockBuzzer mock_buzzer_; +``` + +First let’s see how we can set expectations on the `MakeBuzz()` method, which +returns a `unique_ptr`. + +As usual, if you set an expectation without an action (i.e. the `.WillOnce()` or +`.WillRepeatedly()` clause), when that expectation fires, the default action for +that method will be taken. Since `unique_ptr<>` has a default constructor that +returns a null `unique_ptr`, that’s what you’ll get if you don’t specify an +action: + +```cpp + // Use the default action. + EXPECT_CALL(mock_buzzer_, MakeBuzz("hello")); + + // Triggers the previous EXPECT_CALL. + EXPECT_EQ(nullptr, mock_buzzer_.MakeBuzz("hello")); +``` + +If you are not happy with the default action, you can tweak it as usual; see +[Setting Default Actions](#OnCall). + +If you just need to return a pre-defined move-only value, you can use the +`Return(ByMove(...))` action: + +```cpp + // When this fires, the unique_ptr<> specified by ByMove(...) will + // be returned. + EXPECT_CALL(mock_buzzer_, MakeBuzz("world")) + .WillOnce(Return(ByMove(MakeUnique(AccessLevel::kInternal)))); + + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("world")); +``` + +Note that `ByMove()` is essential here - if you drop it, the code won’t compile. + +Quiz time! What do you think will happen if a `Return(ByMove(...))` action is +performed more than once (e.g. you write `... +.WillRepeatedly(Return(ByMove(...)));`)? Come think of it, after the first time +the action runs, the source value will be consumed (since it’s a move-only +value), so the next time around, there’s no value to move from -- you’ll get a +run-time error that `Return(ByMove(...))` can only be run once. + +If you need your mock method to do more than just moving a pre-defined value, +remember that you can always use a lambda or a callable object, which can do +pretty much anything you want: + +```cpp + EXPECT_CALL(mock_buzzer_, MakeBuzz("x")) + .WillRepeatedly([](StringPiece text) { + return MakeUnique(AccessLevel::kInternal); + }); + + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); + EXPECT_NE(nullptr, mock_buzzer_.MakeBuzz("x")); +``` + +Every time this `EXPECT_CALL` fires, a new `unique_ptr` will be created +and returned. You cannot do this with `Return(ByMove(...))`. + +That covers returning move-only values; but how do we work with methods +accepting move-only arguments? The answer is that they work normally, although +some actions will not compile when any of method's arguments are move-only. You +can always use `Return`, or a [lambda or functor](#FunctionsAsActions): + +```cpp + using ::testing::Unused; + + EXPECT_CALL(mock_buzzer_, ShareBuzz(NotNull(), _)).WillOnce(Return(true)); + EXPECT_TRUE(mock_buzzer_.ShareBuzz(MakeUnique(AccessLevel::kInternal)), + 0); + + EXPECT_CALL(mock_buzzer_, ShareBuzz(_, _)).WillOnce( + [](std::unique_ptr buzz, Unused) { return buzz != nullptr; }); + EXPECT_FALSE(mock_buzzer_.ShareBuzz(nullptr, 0)); +``` + +Many built-in actions (`WithArgs`, `WithoutArgs`,`DeleteArg`, `SaveArg`, ...) +could in principle support move-only arguments, but the support for this is not +implemented yet. If this is blocking you, please file a bug. + +A few actions (e.g. `DoAll`) copy their arguments internally, so they can never +work with non-copyable objects; you'll have to use functors instead. + +#### Legacy workarounds for move-only types {#LegacyMoveOnly} + +Support for move-only function arguments was only introduced to gMock in April +2017. In older code, you may encounter the following workaround for the lack of +this feature (it is no longer necessary - we're including it just for +reference): + +```cpp +class MockBuzzer : public Buzzer { + public: + MOCK_METHOD(bool, DoShareBuzz, (Buzz* buzz, Time timestamp)); + bool ShareBuzz(std::unique_ptr buzz, Time timestamp) override { + return DoShareBuzz(buzz.get(), timestamp); + } +}; +``` + +The trick is to delegate the `ShareBuzz()` method to a mock method (let’s call +it `DoShareBuzz()`) that does not take move-only parameters. Then, instead of +setting expectations on `ShareBuzz()`, you set them on the `DoShareBuzz()` mock +method: + +```cpp + MockBuzzer mock_buzzer_; + EXPECT_CALL(mock_buzzer_, DoShareBuzz(NotNull(), _)); + + // When one calls ShareBuzz() on the MockBuzzer like this, the call is + // forwarded to DoShareBuzz(), which is mocked. Therefore this statement + // will trigger the above EXPECT_CALL. + mock_buzzer_.ShareBuzz(MakeUnique(AccessLevel::kInternal), 0); +``` + +### Making the Compilation Faster + +Believe it or not, the *vast majority* of the time spent on compiling a mock +class is in generating its constructor and destructor, as they perform +non-trivial tasks (e.g. verification of the expectations). What's more, mock +methods with different signatures have different types and thus their +constructors/destructors need to be generated by the compiler separately. As a +result, if you mock many different types of methods, compiling your mock class +can get really slow. + +If you are experiencing slow compilation, you can move the definition of your +mock class' constructor and destructor out of the class body and into a `.cc` +file. This way, even if you `#include` your mock class in N files, the compiler +only needs to generate its constructor and destructor once, resulting in a much +faster compilation. + +Let's illustrate the idea using an example. Here's the definition of a mock +class before applying this recipe: + +```cpp +// File mock_foo.h. +... +class MockFoo : public Foo { + public: + // Since we don't declare the constructor or the destructor, + // the compiler will generate them in every translation unit + // where this mock class is used. + + MOCK_METHOD(int, DoThis, (), (override)); + MOCK_METHOD(bool, DoThat, (const char* str), (override)); + ... more mock methods ... +}; +``` + +After the change, it would look like: + +```cpp +// File mock_foo.h. +... +class MockFoo : public Foo { + public: + // The constructor and destructor are declared, but not defined, here. + MockFoo(); + virtual ~MockFoo(); + + MOCK_METHOD(int, DoThis, (), (override)); + MOCK_METHOD(bool, DoThat, (const char* str), (override)); + ... more mock methods ... +}; +``` + +and + +```cpp +// File mock_foo.cc. +#include "path/to/mock_foo.h" + +// The definitions may appear trivial, but the functions actually do a +// lot of things through the constructors/destructors of the member +// variables used to implement the mock methods. +MockFoo::MockFoo() {} +MockFoo::~MockFoo() {} +``` + +### Forcing a Verification + +When it's being destroyed, your friendly mock object will automatically verify +that all expectations on it have been satisfied, and will generate googletest +failures if not. This is convenient as it leaves you with one less thing to +worry about. That is, unless you are not sure if your mock object will be +destroyed. + +How could it be that your mock object won't eventually be destroyed? Well, it +might be created on the heap and owned by the code you are testing. Suppose +there's a bug in that code and it doesn't delete the mock object properly - you +could end up with a passing test when there's actually a bug. + +Using a heap checker is a good idea and can alleviate the concern, but its +implementation is not 100% reliable. So, sometimes you do want to *force* gMock +to verify a mock object before it is (hopefully) destructed. You can do this +with `Mock::VerifyAndClearExpectations(&mock_object)`: + +```cpp +TEST(MyServerTest, ProcessesRequest) { + using ::testing::Mock; + + MockFoo* const foo = new MockFoo; + EXPECT_CALL(*foo, ...)...; + // ... other expectations ... + + // server now owns foo. + MyServer server(foo); + server.ProcessRequest(...); + + // In case that server's destructor will forget to delete foo, + // this will verify the expectations anyway. + Mock::VerifyAndClearExpectations(foo); +} // server is destroyed when it goes out of scope here. +``` + +**Tip:** The `Mock::VerifyAndClearExpectations()` function returns a `bool` to +indicate whether the verification was successful (`true` for yes), so you can +wrap that function call inside a `ASSERT_TRUE()` if there is no point going +further when the verification has failed. + +### Using Check Points {#UsingCheckPoints} + +Sometimes you may want to "reset" a mock object at various check points in your +test: at each check point, you verify that all existing expectations on the mock +object have been satisfied, and then you set some new expectations on it as if +it's newly created. This allows you to work with a mock object in "phases" whose +sizes are each manageable. + +One such scenario is that in your test's `SetUp()` function, you may want to put +the object you are testing into a certain state, with the help from a mock +object. Once in the desired state, you want to clear all expectations on the +mock, such that in the `TEST_F` body you can set fresh expectations on it. + +As you may have figured out, the `Mock::VerifyAndClearExpectations()` function +we saw in the previous recipe can help you here. Or, if you are using +`ON_CALL()` to set default actions on the mock object and want to clear the +default actions as well, use `Mock::VerifyAndClear(&mock_object)` instead. This +function does what `Mock::VerifyAndClearExpectations(&mock_object)` does and +returns the same `bool`, **plus** it clears the `ON_CALL()` statements on +`mock_object` too. + +Another trick you can use to achieve the same effect is to put the expectations +in sequences and insert calls to a dummy "check-point" function at specific +places. Then you can verify that the mock function calls do happen at the right +time. For example, if you are exercising code: + +```cpp + Foo(1); + Foo(2); + Foo(3); +``` + +and want to verify that `Foo(1)` and `Foo(3)` both invoke `mock.Bar("a")`, but +`Foo(2)` doesn't invoke anything. You can write: + +```cpp +using ::testing::MockFunction; + +TEST(FooTest, InvokesBarCorrectly) { + MyMock mock; + // Class MockFunction has exactly one mock method. It is named + // Call() and has type F. + MockFunction check; + { + InSequence s; + + EXPECT_CALL(mock, Bar("a")); + EXPECT_CALL(check, Call("1")); + EXPECT_CALL(check, Call("2")); + EXPECT_CALL(mock, Bar("a")); + } + Foo(1); + check.Call("1"); + Foo(2); + check.Call("2"); + Foo(3); +} +``` + +The expectation spec says that the first `Bar("a")` must happen before check +point "1", the second `Bar("a")` must happen after check point "2", and nothing +should happen between the two check points. The explicit check points make it +easy to tell which `Bar("a")` is called by which call to `Foo()`. + +### Mocking Destructors + +Sometimes you want to make sure a mock object is destructed at the right time, +e.g. after `bar->A()` is called but before `bar->B()` is called. We already know +that you can specify constraints on the [order](#OrderedCalls) of mock function +calls, so all we need to do is to mock the destructor of the mock function. + +This sounds simple, except for one problem: a destructor is a special function +with special syntax and special semantics, and the `MOCK_METHOD` macro doesn't +work for it: + +```cpp +MOCK_METHOD(void, ~MockFoo, ()); // Won't compile! +``` + +The good news is that you can use a simple pattern to achieve the same effect. +First, add a mock function `Die()` to your mock class and call it in the +destructor, like this: + +```cpp +class MockFoo : public Foo { + ... + // Add the following two lines to the mock class. + MOCK_METHOD(void, Die, ()); + virtual ~MockFoo() { Die(); } +}; +``` + +(If the name `Die()` clashes with an existing symbol, choose another name.) Now, +we have translated the problem of testing when a `MockFoo` object dies to +testing when its `Die()` method is called: + +```cpp + MockFoo* foo = new MockFoo; + MockBar* bar = new MockBar; + ... + { + InSequence s; + + // Expects *foo to die after bar->A() and before bar->B(). + EXPECT_CALL(*bar, A()); + EXPECT_CALL(*foo, Die()); + EXPECT_CALL(*bar, B()); + } +``` + +And that's that. + +### Using gMock and Threads {#UsingThreads} + +In a **unit** test, it's best if you could isolate and test a piece of code in a +single-threaded context. That avoids race conditions and dead locks, and makes +debugging your test much easier. + +Yet most programs are multi-threaded, and sometimes to test something we need to +pound on it from more than one thread. gMock works for this purpose too. + +Remember the steps for using a mock: + +1. Create a mock object `foo`. +2. Set its default actions and expectations using `ON_CALL()` and + `EXPECT_CALL()`. +3. The code under test calls methods of `foo`. +4. Optionally, verify and reset the mock. +5. Destroy the mock yourself, or let the code under test destroy it. The + destructor will automatically verify it. + +If you follow the following simple rules, your mocks and threads can live +happily together: + +* Execute your *test code* (as opposed to the code being tested) in *one* + thread. This makes your test easy to follow. +* Obviously, you can do step #1 without locking. +* When doing step #2 and #5, make sure no other thread is accessing `foo`. + Obvious too, huh? +* #3 and #4 can be done either in one thread or in multiple threads - anyway + you want. gMock takes care of the locking, so you don't have to do any - + unless required by your test logic. + +If you violate the rules (for example, if you set expectations on a mock while +another thread is calling its methods), you get undefined behavior. That's not +fun, so don't do it. + +gMock guarantees that the action for a mock function is done in the same thread +that called the mock function. For example, in + +```cpp + EXPECT_CALL(mock, Foo(1)) + .WillOnce(action1); + EXPECT_CALL(mock, Foo(2)) + .WillOnce(action2); +``` + +if `Foo(1)` is called in thread 1 and `Foo(2)` is called in thread 2, gMock will +execute `action1` in thread 1 and `action2` in thread 2. + +gMock does *not* impose a sequence on actions performed in different threads +(doing so may create deadlocks as the actions may need to cooperate). This means +that the execution of `action1` and `action2` in the above example *may* +interleave. If this is a problem, you should add proper synchronization logic to +`action1` and `action2` to make the test thread-safe. + +Also, remember that `DefaultValue` is a global resource that potentially +affects *all* living mock objects in your program. Naturally, you won't want to +mess with it from multiple threads or when there still are mocks in action. + +### Controlling How Much Information gMock Prints + +When gMock sees something that has the potential of being an error (e.g. a mock +function with no expectation is called, a.k.a. an uninteresting call, which is +allowed but perhaps you forgot to explicitly ban the call), it prints some +warning messages, including the arguments of the function, the return value, and +the stack trace. Hopefully this will remind you to take a look and see if there +is indeed a problem. + +Sometimes you are confident that your tests are correct and may not appreciate +such friendly messages. Some other times, you are debugging your tests or +learning about the behavior of the code you are testing, and wish you could +observe every mock call that happens (including argument values, the return +value, and the stack trace). Clearly, one size doesn't fit all. + +You can control how much gMock tells you using the `--gmock_verbose=LEVEL` +command-line flag, where `LEVEL` is a string with three possible values: + +* `info`: gMock will print all informational messages, warnings, and errors + (most verbose). At this setting, gMock will also log any calls to the + `ON_CALL/EXPECT_CALL` macros. It will include a stack trace in + "uninteresting call" warnings. +* `warning`: gMock will print both warnings and errors (less verbose); it will + omit the stack traces in "uninteresting call" warnings. This is the default. +* `error`: gMock will print errors only (least verbose). + +Alternatively, you can adjust the value of that flag from within your tests like +so: + +```cpp + ::testing::FLAGS_gmock_verbose = "error"; +``` + +If you find gMock printing too many stack frames with its informational or +warning messages, remember that you can control their amount with the +`--gtest_stack_trace_depth=max_depth` flag. + +Now, judiciously use the right flag to enable gMock serve you better! + +### Gaining Super Vision into Mock Calls + +You have a test using gMock. It fails: gMock tells you some expectations aren't +satisfied. However, you aren't sure why: Is there a typo somewhere in the +matchers? Did you mess up the order of the `EXPECT_CALL`s? Or is the code under +test doing something wrong? How can you find out the cause? + +Won't it be nice if you have X-ray vision and can actually see the trace of all +`EXPECT_CALL`s and mock method calls as they are made? For each call, would you +like to see its actual argument values and which `EXPECT_CALL` gMock thinks it +matches? If you still need some help to figure out who made these calls, how +about being able to see the complete stack trace at each mock call? + +You can unlock this power by running your test with the `--gmock_verbose=info` +flag. For example, given the test program: + +```cpp +#include "gmock/gmock.h" + +using testing::_; +using testing::HasSubstr; +using testing::Return; + +class MockFoo { + public: + MOCK_METHOD(void, F, (const string& x, const string& y)); +}; + +TEST(Foo, Bar) { + MockFoo mock; + EXPECT_CALL(mock, F(_, _)).WillRepeatedly(Return()); + EXPECT_CALL(mock, F("a", "b")); + EXPECT_CALL(mock, F("c", HasSubstr("d"))); + + mock.F("a", "good"); + mock.F("a", "b"); +} +``` + +if you run it with `--gmock_verbose=info`, you will see this output: + +```shell +[ RUN ] Foo.Bar + +foo_test.cc:14: EXPECT_CALL(mock, F(_, _)) invoked +Stack trace: ... + +foo_test.cc:15: EXPECT_CALL(mock, F("a", "b")) invoked +Stack trace: ... + +foo_test.cc:16: EXPECT_CALL(mock, F("c", HasSubstr("d"))) invoked +Stack trace: ... + +foo_test.cc:14: Mock function call matches EXPECT_CALL(mock, F(_, _))... + Function call: F(@0x7fff7c8dad40"a",@0x7fff7c8dad10"good") +Stack trace: ... + +foo_test.cc:15: Mock function call matches EXPECT_CALL(mock, F("a", "b"))... + Function call: F(@0x7fff7c8dada0"a",@0x7fff7c8dad70"b") +Stack trace: ... + +foo_test.cc:16: Failure +Actual function call count doesn't match EXPECT_CALL(mock, F("c", HasSubstr("d")))... + Expected: to be called once + Actual: never called - unsatisfied and active +[ FAILED ] Foo.Bar +``` + +Suppose the bug is that the `"c"` in the third `EXPECT_CALL` is a typo and +should actually be `"a"`. With the above message, you should see that the actual +`F("a", "good")` call is matched by the first `EXPECT_CALL`, not the third as +you thought. From that it should be obvious that the third `EXPECT_CALL` is +written wrong. Case solved. + +If you are interested in the mock call trace but not the stack traces, you can +combine `--gmock_verbose=info` with `--gtest_stack_trace_depth=0` on the test +command line. + + + +### Running Tests in Emacs + +If you build and run your tests in Emacs using the `M-x google-compile` command +(as many googletest users do), the source file locations of gMock and googletest +errors will be highlighted. Just press `` on one of them and you'll be +taken to the offending line. Or, you can just type `C-x`` to jump to the next +error. + +To make it even easier, you can add the following lines to your `~/.emacs` file: + +```text +(global-set-key "\M-m" 'google-compile) ; m is for make +(global-set-key [M-down] 'next-error) +(global-set-key [M-up] '(lambda () (interactive) (next-error -1))) +``` + +Then you can type `M-m` to start a build (if you want to run the test as well, +just make sure `foo_test.run` or `runtests` is in the build command you supply +after typing `M-m`), or `M-up`/`M-down` to move back and forth between errors. + +## Extending gMock + +### Writing New Matchers Quickly {#NewMatchers} + +WARNING: gMock does not guarantee when or how many times a matcher will be +invoked. Therefore, all matchers must be functionally pure. See +[this section](#PureMatchers) for more details. + +The `MATCHER*` family of macros can be used to define custom matchers easily. +The syntax: + +```cpp +MATCHER(name, description_string_expression) { statements; } +``` + +will define a matcher with the given name that executes the statements, which +must return a `bool` to indicate if the match succeeds. Inside the statements, +you can refer to the value being matched by `arg`, and refer to its type by +`arg_type`. + +The *description string* is a `string`-typed expression that documents what the +matcher does, and is used to generate the failure message when the match fails. +It can (and should) reference the special `bool` variable `negation`, and should +evaluate to the description of the matcher when `negation` is `false`, or that +of the matcher's negation when `negation` is `true`. + +For convenience, we allow the description string to be empty (`""`), in which +case gMock will use the sequence of words in the matcher name as the +description. + +For example: + +```cpp +MATCHER(IsDivisibleBy7, "") { return (arg % 7) == 0; } +``` + +allows you to write + +```cpp + // Expects mock_foo.Bar(n) to be called where n is divisible by 7. + EXPECT_CALL(mock_foo, Bar(IsDivisibleBy7())); +``` + +or, + +```cpp + using ::testing::Not; + ... + // Verifies that two values are divisible by 7. + EXPECT_THAT(some_expression, IsDivisibleBy7()); + EXPECT_THAT(some_other_expression, Not(IsDivisibleBy7())); +``` + +If the above assertions fail, they will print something like: + +```shell + Value of: some_expression + Expected: is divisible by 7 + Actual: 27 + ... + Value of: some_other_expression + Expected: not (is divisible by 7) + Actual: 21 +``` + +where the descriptions `"is divisible by 7"` and `"not (is divisible by 7)"` are +automatically calculated from the matcher name `IsDivisibleBy7`. + +As you may have noticed, the auto-generated descriptions (especially those for +the negation) may not be so great. You can always override them with a `string` +expression of your own: + +```cpp +MATCHER(IsDivisibleBy7, + absl::StrCat(negation ? "isn't" : "is", " divisible by 7")) { + return (arg % 7) == 0; +} +``` + +Optionally, you can stream additional information to a hidden argument named +`result_listener` to explain the match result. For example, a better definition +of `IsDivisibleBy7` is: + +```cpp +MATCHER(IsDivisibleBy7, "") { + if ((arg % 7) == 0) + return true; + + *result_listener << "the remainder is " << (arg % 7); + return false; +} +``` + +With this definition, the above assertion will give a better message: + +```shell + Value of: some_expression + Expected: is divisible by 7 + Actual: 27 (the remainder is 6) +``` + +You should let `MatchAndExplain()` print *any additional information* that can +help a user understand the match result. Note that it should explain why the +match succeeds in case of a success (unless it's obvious) - this is useful when +the matcher is used inside `Not()`. There is no need to print the argument value +itself, as gMock already prints it for you. + +NOTE: The type of the value being matched (`arg_type`) is determined by the +context in which you use the matcher and is supplied to you by the compiler, so +you don't need to worry about declaring it (nor can you). This allows the +matcher to be polymorphic. For example, `IsDivisibleBy7()` can be used to match +any type where the value of `(arg % 7) == 0` can be implicitly converted to a +`bool`. In the `Bar(IsDivisibleBy7())` example above, if method `Bar()` takes an +`int`, `arg_type` will be `int`; if it takes an `unsigned long`, `arg_type` will +be `unsigned long`; and so on. + +### Writing New Parameterized Matchers Quickly + +Sometimes you'll want to define a matcher that has parameters. For that you can +use the macro: + +```cpp +MATCHER_P(name, param_name, description_string) { statements; } +``` + +where the description string can be either `""` or a `string` expression that +references `negation` and `param_name`. + +For example: + +```cpp +MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +``` + +will allow you to write: + +```cpp + EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +``` + +which may lead to this message (assuming `n` is 10): + +```shell + Value of: Blah("a") + Expected: has absolute value 10 + Actual: -9 +``` + +Note that both the matcher description and its parameter are printed, making the +message human-friendly. + +In the matcher definition body, you can write `foo_type` to reference the type +of a parameter named `foo`. For example, in the body of +`MATCHER_P(HasAbsoluteValue, value)` above, you can write `value_type` to refer +to the type of `value`. + +gMock also provides `MATCHER_P2`, `MATCHER_P3`, ..., up to `MATCHER_P10` to +support multi-parameter matchers: + +```cpp +MATCHER_Pk(name, param_1, ..., param_k, description_string) { statements; } +``` + +Please note that the custom description string is for a particular *instance* of +the matcher, where the parameters have been bound to actual values. Therefore +usually you'll want the parameter values to be part of the description. gMock +lets you do that by referencing the matcher parameters in the description string +expression. + +For example, + +```cpp +using ::testing::PrintToString; +MATCHER_P2(InClosedRange, low, hi, + absl::StrFormat("%s in range [%s, %s]", negation ? "isn't" : "is", + PrintToString(low), PrintToString(hi))) { + return low <= arg && arg <= hi; +} +... +EXPECT_THAT(3, InClosedRange(4, 6)); +``` + +would generate a failure that contains the message: + +```shell + Expected: is in range [4, 6] +``` + +If you specify `""` as the description, the failure message will contain the +sequence of words in the matcher name followed by the parameter values printed +as a tuple. For example, + +```cpp + MATCHER_P2(InClosedRange, low, hi, "") { ... } + ... + EXPECT_THAT(3, InClosedRange(4, 6)); +``` + +would generate a failure that contains the text: + +```shell + Expected: in closed range (4, 6) +``` + +For the purpose of typing, you can view + +```cpp +MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +``` + +as shorthand for + +```cpp +template +FooMatcherPk +Foo(p1_type p1, ..., pk_type pk) { ... } +``` + +When you write `Foo(v1, ..., vk)`, the compiler infers the types of the +parameters `v1`, ..., and `vk` for you. If you are not happy with the result of +the type inference, you can specify the types by explicitly instantiating the +template, as in `Foo(5, false)`. As said earlier, you don't get to +(or need to) specify `arg_type` as that's determined by the context in which the +matcher is used. + +You can assign the result of expression `Foo(p1, ..., pk)` to a variable of type +`FooMatcherPk`. This can be useful when composing +matchers. Matchers that don't have a parameter or have only one parameter have +special types: you can assign `Foo()` to a `FooMatcher`-typed variable, and +assign `Foo(p)` to a `FooMatcherP`-typed variable. + +While you can instantiate a matcher template with reference types, passing the +parameters by pointer usually makes your code more readable. If, however, you +still want to pass a parameter by reference, be aware that in the failure +message generated by the matcher you will see the value of the referenced object +but not its address. + +You can overload matchers with different numbers of parameters: + +```cpp +MATCHER_P(Blah, a, description_string_1) { ... } +MATCHER_P2(Blah, a, b, description_string_2) { ... } +``` + +While it's tempting to always use the `MATCHER*` macros when defining a new +matcher, you should also consider implementing `MatcherInterface` or using +`MakePolymorphicMatcher()` instead (see the recipes that follow), especially if +you need to use the matcher a lot. While these approaches require more work, +they give you more control on the types of the value being matched and the +matcher parameters, which in general leads to better compiler error messages +that pay off in the long run. They also allow overloading matchers based on +parameter types (as opposed to just based on the number of parameters). + +### Writing New Monomorphic Matchers + +A matcher of argument type `T` implements `::testing::MatcherInterface` and +does two things: it tests whether a value of type `T` matches the matcher, and +can describe what kind of values it matches. The latter ability is used for +generating readable error messages when expectations are violated. + +The interface looks like this: + +```cpp +class MatchResultListener { + public: + ... + // Streams x to the underlying ostream; does nothing if the ostream + // is NULL. + template + MatchResultListener& operator<<(const T& x); + + // Returns the underlying ostream. + ::std::ostream* stream(); +}; + +template +class MatcherInterface { + public: + virtual ~MatcherInterface(); + + // Returns true if and only if the matcher matches x; also explains the match + // result to 'listener'. + virtual bool MatchAndExplain(T x, MatchResultListener* listener) const = 0; + + // Describes this matcher to an ostream. + virtual void DescribeTo(::std::ostream* os) const = 0; + + // Describes the negation of this matcher to an ostream. + virtual void DescribeNegationTo(::std::ostream* os) const; +}; +``` + +If you need a custom matcher but `Truly()` is not a good option (for example, +you may not be happy with the way `Truly(predicate)` describes itself, or you +may want your matcher to be polymorphic as `Eq(value)` is), you can define a +matcher to do whatever you want in two steps: first implement the matcher +interface, and then define a factory function to create a matcher instance. The +second step is not strictly needed but it makes the syntax of using the matcher +nicer. + +For example, you can define a matcher to test whether an `int` is divisible by 7 +and then use it like this: + +```cpp +using ::testing::MakeMatcher; +using ::testing::Matcher; +using ::testing::MatcherInterface; +using ::testing::MatchResultListener; + +class DivisibleBy7Matcher : public MatcherInterface { + public: + bool MatchAndExplain(int n, + MatchResultListener* /* listener */) const override { + return (n % 7) == 0; + } + + void DescribeTo(::std::ostream* os) const override { + *os << "is divisible by 7"; + } + + void DescribeNegationTo(::std::ostream* os) const override { + *os << "is not divisible by 7"; + } +}; + +Matcher DivisibleBy7() { + return MakeMatcher(new DivisibleBy7Matcher); +} + +... + EXPECT_CALL(foo, Bar(DivisibleBy7())); +``` + +You may improve the matcher message by streaming additional information to the +`listener` argument in `MatchAndExplain()`: + +```cpp +class DivisibleBy7Matcher : public MatcherInterface { + public: + bool MatchAndExplain(int n, + MatchResultListener* listener) const override { + const int remainder = n % 7; + if (remainder != 0) { + *listener << "the remainder is " << remainder; + } + return remainder == 0; + } + ... +}; +``` + +Then, `EXPECT_THAT(x, DivisibleBy7());` may generate a message like this: + +```shell +Value of: x +Expected: is divisible by 7 + Actual: 23 (the remainder is 2) +``` + +### Writing New Polymorphic Matchers + +You've learned how to write your own matchers in the previous recipe. Just one +problem: a matcher created using `MakeMatcher()` only works for one particular +type of arguments. If you want a *polymorphic* matcher that works with arguments +of several types (for instance, `Eq(x)` can be used to match a *`value`* as long +as `value == x` compiles -- *`value`* and `x` don't have to share the same +type), you can learn the trick from `testing/base/public/gmock-matchers.h` but +it's a bit involved. + +Fortunately, most of the time you can define a polymorphic matcher easily with +the help of `MakePolymorphicMatcher()`. Here's how you can define `NotNull()` as +an example: + +```cpp +using ::testing::MakePolymorphicMatcher; +using ::testing::MatchResultListener; +using ::testing::PolymorphicMatcher; + +class NotNullMatcher { + public: + // To implement a polymorphic matcher, first define a COPYABLE class + // that has three members MatchAndExplain(), DescribeTo(), and + // DescribeNegationTo(), like the following. + + // In this example, we want to use NotNull() with any pointer, so + // MatchAndExplain() accepts a pointer of any type as its first argument. + // In general, you can define MatchAndExplain() as an ordinary method or + // a method template, or even overload it. + template + bool MatchAndExplain(T* p, + MatchResultListener* /* listener */) const { + return p != NULL; + } + + // Describes the property of a value matching this matcher. + void DescribeTo(std::ostream* os) const { *os << "is not NULL"; } + + // Describes the property of a value NOT matching this matcher. + void DescribeNegationTo(std::ostream* os) const { *os << "is NULL"; } +}; + +// To construct a polymorphic matcher, pass an instance of the class +// to MakePolymorphicMatcher(). Note the return type. +PolymorphicMatcher NotNull() { + return MakePolymorphicMatcher(NotNullMatcher()); +} + +... + + EXPECT_CALL(foo, Bar(NotNull())); // The argument must be a non-NULL pointer. +``` + +**Note:** Your polymorphic matcher class does **not** need to inherit from +`MatcherInterface` or any other class, and its methods do **not** need to be +virtual. + +Like in a monomorphic matcher, you may explain the match result by streaming +additional information to the `listener` argument in `MatchAndExplain()`. + +### Writing New Cardinalities + +A cardinality is used in `Times()` to tell gMock how many times you expect a +call to occur. It doesn't have to be exact. For example, you can say +`AtLeast(5)` or `Between(2, 4)`. + +If the [built-in set](cheat_sheet.md#CardinalityList) of cardinalities doesn't +suit you, you are free to define your own by implementing the following +interface (in namespace `testing`): + +```cpp +class CardinalityInterface { + public: + virtual ~CardinalityInterface(); + + // Returns true if and only if call_count calls will satisfy this cardinality. + virtual bool IsSatisfiedByCallCount(int call_count) const = 0; + + // Returns true if and only if call_count calls will saturate this + // cardinality. + virtual bool IsSaturatedByCallCount(int call_count) const = 0; + + // Describes self to an ostream. + virtual void DescribeTo(std::ostream* os) const = 0; +}; +``` + +For example, to specify that a call must occur even number of times, you can +write + +```cpp +using ::testing::Cardinality; +using ::testing::CardinalityInterface; +using ::testing::MakeCardinality; + +class EvenNumberCardinality : public CardinalityInterface { + public: + bool IsSatisfiedByCallCount(int call_count) const override { + return (call_count % 2) == 0; + } + + bool IsSaturatedByCallCount(int call_count) const override { + return false; + } + + void DescribeTo(std::ostream* os) const { + *os << "called even number of times"; + } +}; + +Cardinality EvenNumber() { + return MakeCardinality(new EvenNumberCardinality); +} + +... + EXPECT_CALL(foo, Bar(3)) + .Times(EvenNumber()); +``` + +### Writing New Actions Quickly {#QuickNewActions} + +If the built-in actions don't work for you, you can easily define your own one. +Just define a functor class with a (possibly templated) call operator, matching +the signature of your action. + +```cpp +struct Increment { + template + T operator()(T* arg) { + return ++(*arg); + } +} +``` + +The same approach works with stateful functors (or any callable, really): + +``` +struct MultiplyBy { + template + T operator()(T arg) { return arg * multiplier; } + + int multiplier; +} + +// Then use: +// EXPECT_CALL(...).WillOnce(MultiplyBy{7}); +``` + +#### Legacy macro-based Actions + +Before C++11, the functor-based actions were not supported; the old way of +writing actions was through a set of `ACTION*` macros. We suggest to avoid them +in new code; they hide a lot of logic behind the macro, potentially leading to +harder-to-understand compiler errors. Nevertheless, we cover them here for +completeness. + +By writing + +```cpp +ACTION(name) { statements; } +``` + +in a namespace scope (i.e. not inside a class or function), you will define an +action with the given name that executes the statements. The value returned by +`statements` will be used as the return value of the action. Inside the +statements, you can refer to the K-th (0-based) argument of the mock function as +`argK`. For example: + +```cpp +ACTION(IncrementArg1) { return ++(*arg1); } +``` + +allows you to write + +```cpp +... WillOnce(IncrementArg1()); +``` + +Note that you don't need to specify the types of the mock function arguments. +Rest assured that your code is type-safe though: you'll get a compiler error if +`*arg1` doesn't support the `++` operator, or if the type of `++(*arg1)` isn't +compatible with the mock function's return type. + +Another example: + +```cpp +ACTION(Foo) { + (*arg2)(5); + Blah(); + *arg1 = 0; + return arg0; +} +``` + +defines an action `Foo()` that invokes argument #2 (a function pointer) with 5, +calls function `Blah()`, sets the value pointed to by argument #1 to 0, and +returns argument #0. + +For more convenience and flexibility, you can also use the following pre-defined +symbols in the body of `ACTION`: + +`argK_type` | The type of the K-th (0-based) argument of the mock function +:-------------- | :----------------------------------------------------------- +`args` | All arguments of the mock function as a tuple +`args_type` | The type of all arguments of the mock function as a tuple +`return_type` | The return type of the mock function +`function_type` | The type of the mock function + +For example, when using an `ACTION` as a stub action for mock function: + +```cpp +int DoSomething(bool flag, int* ptr); +``` + +we have: + +Pre-defined Symbol | Is Bound To +------------------ | --------------------------------- +`arg0` | the value of `flag` +`arg0_type` | the type `bool` +`arg1` | the value of `ptr` +`arg1_type` | the type `int*` +`args` | the tuple `(flag, ptr)` +`args_type` | the type `std::tuple` +`return_type` | the type `int` +`function_type` | the type `int(bool, int*)` + +#### Legacy macro-based parameterized Actions + +Sometimes you'll want to parameterize an action you define. For that we have +another macro + +```cpp +ACTION_P(name, param) { statements; } +``` + +For example, + +```cpp +ACTION_P(Add, n) { return arg0 + n; } +``` + +will allow you to write + +```cpp +// Returns argument #0 + 5. +... WillOnce(Add(5)); +``` + +For convenience, we use the term *arguments* for the values used to invoke the +mock function, and the term *parameters* for the values used to instantiate an +action. + +Note that you don't need to provide the type of the parameter either. Suppose +the parameter is named `param`, you can also use the gMock-defined symbol +`param_type` to refer to the type of the parameter as inferred by the compiler. +For example, in the body of `ACTION_P(Add, n)` above, you can write `n_type` for +the type of `n`. + +gMock also provides `ACTION_P2`, `ACTION_P3`, and etc to support multi-parameter +actions. For example, + +```cpp +ACTION_P2(ReturnDistanceTo, x, y) { + double dx = arg0 - x; + double dy = arg1 - y; + return sqrt(dx*dx + dy*dy); +} +``` + +lets you write + +```cpp +... WillOnce(ReturnDistanceTo(5.0, 26.5)); +``` + +You can view `ACTION` as a degenerated parameterized action where the number of +parameters is 0. + +You can also easily define actions overloaded on the number of parameters: + +```cpp +ACTION_P(Plus, a) { ... } +ACTION_P2(Plus, a, b) { ... } +``` + +### Restricting the Type of an Argument or Parameter in an ACTION + +For maximum brevity and reusability, the `ACTION*` macros don't ask you to +provide the types of the mock function arguments and the action parameters. +Instead, we let the compiler infer the types for us. + +Sometimes, however, we may want to be more explicit about the types. There are +several tricks to do that. For example: + +```cpp +ACTION(Foo) { + // Makes sure arg0 can be converted to int. + int n = arg0; + ... use n instead of arg0 here ... +} + +ACTION_P(Bar, param) { + // Makes sure the type of arg1 is const char*. + ::testing::StaticAssertTypeEq(); + + // Makes sure param can be converted to bool. + bool flag = param; +} +``` + +where `StaticAssertTypeEq` is a compile-time assertion in googletest that +verifies two types are the same. + +### Writing New Action Templates Quickly + +Sometimes you want to give an action explicit template parameters that cannot be +inferred from its value parameters. `ACTION_TEMPLATE()` supports that and can be +viewed as an extension to `ACTION()` and `ACTION_P*()`. + +The syntax: + +```cpp +ACTION_TEMPLATE(ActionName, + HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), + AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +``` + +defines an action template that takes *m* explicit template parameters and *n* +value parameters, where *m* is in [1, 10] and *n* is in [0, 10]. `name_i` is the +name of the *i*-th template parameter, and `kind_i` specifies whether it's a +`typename`, an integral constant, or a template. `p_i` is the name of the *i*-th +value parameter. + +Example: + +```cpp +// DuplicateArg(output) converts the k-th argument of the mock +// function to type T and copies it to *output. +ACTION_TEMPLATE(DuplicateArg, + // Note the comma between int and k: + HAS_2_TEMPLATE_PARAMS(int, k, typename, T), + AND_1_VALUE_PARAMS(output)) { + *output = T(::std::get(args)); +} +``` + +To create an instance of an action template, write: + +```cpp +ActionName(v1, ..., v_n) +``` + +where the `t`s are the template arguments and the `v`s are the value arguments. +The value argument types are inferred by the compiler. For example: + +```cpp +using ::testing::_; +... + int n; + EXPECT_CALL(mock, Foo).WillOnce(DuplicateArg<1, unsigned char>(&n)); +``` + +If you want to explicitly specify the value argument types, you can provide +additional template arguments: + +```cpp +ActionName(v1, ..., v_n) +``` + +where `u_i` is the desired type of `v_i`. + +`ACTION_TEMPLATE` and `ACTION`/`ACTION_P*` can be overloaded on the number of +value parameters, but not on the number of template parameters. Without the +restriction, the meaning of the following is unclear: + +```cpp + OverloadedAction(x); +``` + +Are we using a single-template-parameter action where `bool` refers to the type +of `x`, or a two-template-parameter action where the compiler is asked to infer +the type of `x`? + +### Using the ACTION Object's Type + +If you are writing a function that returns an `ACTION` object, you'll need to +know its type. The type depends on the macro used to define the action and the +parameter types. The rule is relatively simple: + +| Given Definition | Expression | Has Type | +| ----------------------------- | ------------------- | --------------------- | +| `ACTION(Foo)` | `Foo()` | `FooAction` | +| `ACTION_TEMPLATE(Foo,` | `Foo()` : t_m>` : +: `AND_0_VALUE_PARAMS())` : : : +| `ACTION_P(Bar, param)` | `Bar(int_value)` | `BarActionP` | +| `ACTION_TEMPLATE(Bar,` | `Bar` | `FooActionP` : +: `AND_1_VALUE_PARAMS(p1))` : : : +| `ACTION_P2(Baz, p1, p2)` | `Baz(bool_value,` | `BazActionP2` : +| `ACTION_TEMPLATE(Baz,` | `Baz` | `FooActionP2` : +: `AND_2_VALUE_PARAMS(p1, p2))` : `int_value)` : : +| ... | ... | ... | + +Note that we have to pick different suffixes (`Action`, `ActionP`, `ActionP2`, +and etc) for actions with different numbers of value parameters, or the action +definitions cannot be overloaded on the number of them. + +### Writing New Monomorphic Actions {#NewMonoActions} + +While the `ACTION*` macros are very convenient, sometimes they are +inappropriate. For example, despite the tricks shown in the previous recipes, +they don't let you directly specify the types of the mock function arguments and +the action parameters, which in general leads to unoptimized compiler error +messages that can baffle unfamiliar users. They also don't allow overloading +actions based on parameter types without jumping through some hoops. + +An alternative to the `ACTION*` macros is to implement +`::testing::ActionInterface`, where `F` is the type of the mock function in +which the action will be used. For example: + +```cpp +template +class ActionInterface { + public: + virtual ~ActionInterface(); + + // Performs the action. Result is the return type of function type + // F, and ArgumentTuple is the tuple of arguments of F. + // + + // For example, if F is int(bool, const string&), then Result would + // be int, and ArgumentTuple would be ::std::tuple. + virtual Result Perform(const ArgumentTuple& args) = 0; +}; +``` + +```cpp +using ::testing::_; +using ::testing::Action; +using ::testing::ActionInterface; +using ::testing::MakeAction; + +typedef int IncrementMethod(int*); + +class IncrementArgumentAction : public ActionInterface { + public: + int Perform(const ::std::tuple& args) override { + int* p = ::std::get<0>(args); // Grabs the first argument. + return *p++; + } +}; + +Action IncrementArgument() { + return MakeAction(new IncrementArgumentAction); +} + +... + EXPECT_CALL(foo, Baz(_)) + .WillOnce(IncrementArgument()); + + int n = 5; + foo.Baz(&n); // Should return 5 and change n to 6. +``` + +### Writing New Polymorphic Actions {#NewPolyActions} + +The previous recipe showed you how to define your own action. This is all good, +except that you need to know the type of the function in which the action will +be used. Sometimes that can be a problem. For example, if you want to use the +action in functions with *different* types (e.g. like `Return()` and +`SetArgPointee()`). + +If an action can be used in several types of mock functions, we say it's +*polymorphic*. The `MakePolymorphicAction()` function template makes it easy to +define such an action: + +```cpp +namespace testing { +template +PolymorphicAction MakePolymorphicAction(const Impl& impl); +} // namespace testing +``` + +As an example, let's define an action that returns the second argument in the +mock function's argument list. The first step is to define an implementation +class: + +```cpp +class ReturnSecondArgumentAction { + public: + template + Result Perform(const ArgumentTuple& args) const { + // To get the i-th (0-based) argument, use ::std::get(args). + return ::std::get<1>(args); + } +}; +``` + +This implementation class does *not* need to inherit from any particular class. +What matters is that it must have a `Perform()` method template. This method +template takes the mock function's arguments as a tuple in a **single** +argument, and returns the result of the action. It can be either `const` or not, +but must be invokable with exactly one template argument, which is the result +type. In other words, you must be able to call `Perform(args)` where `R` is +the mock function's return type and `args` is its arguments in a tuple. + +Next, we use `MakePolymorphicAction()` to turn an instance of the implementation +class into the polymorphic action we need. It will be convenient to have a +wrapper for this: + +```cpp +using ::testing::MakePolymorphicAction; +using ::testing::PolymorphicAction; + +PolymorphicAction ReturnSecondArgument() { + return MakePolymorphicAction(ReturnSecondArgumentAction()); +} +``` + +Now, you can use this polymorphic action the same way you use the built-in ones: + +```cpp +using ::testing::_; + +class MockFoo : public Foo { + public: + MOCK_METHOD(int, DoThis, (bool flag, int n), (override)); + MOCK_METHOD(string, DoThat, (int x, const char* str1, const char* str2), + (override)); +}; + + ... + MockFoo foo; + EXPECT_CALL(foo, DoThis).WillOnce(ReturnSecondArgument()); + EXPECT_CALL(foo, DoThat).WillOnce(ReturnSecondArgument()); + ... + foo.DoThis(true, 5); // Will return 5. + foo.DoThat(1, "Hi", "Bye"); // Will return "Hi". +``` + +### Teaching gMock How to Print Your Values + +When an uninteresting or unexpected call occurs, gMock prints the argument +values and the stack trace to help you debug. Assertion macros like +`EXPECT_THAT` and `EXPECT_EQ` also print the values in question when the +assertion fails. gMock and googletest do this using googletest's user-extensible +value printer. + +This printer knows how to print built-in C++ types, native arrays, STL +containers, and any type that supports the `<<` operator. For other types, it +prints the raw bytes in the value and hopes that you the user can figure it out. +[googletest's advanced guide](../../googletest/docs/advanced.md#teaching-googletest-how-to-print-your-values) +explains how to extend the printer to do a better job at printing your +particular type than to dump the bytes. + +## Useful Mocks Created Using gMock + + + + +### Mock std::function {#MockFunction} + +`std::function` is a general function type introduced in C++11. It is a +preferred way of passing callbacks to new interfaces. Functions are copiable, +and are not usually passed around by pointer, which makes them tricky to mock. +But fear not - `MockFunction` can help you with that. + +`MockFunction` has a mock method `Call()` with the signature: + +```cpp + R Call(T1, ..., Tn); +``` + +It also has a `AsStdFunction()` method, which creates a `std::function` proxy +forwarding to Call: + +```cpp + std::function AsStdFunction(); +``` + +To use `MockFunction`, first create `MockFunction` object and set up +expectations on its `Call` method. Then pass proxy obtained from +`AsStdFunction()` to the code you are testing. For example: + +```cpp +TEST(FooTest, RunsCallbackWithBarArgument) { + // 1. Create a mock object. + MockFunction mock_function; + + // 2. Set expectations on Call() method. + EXPECT_CALL(mock_function, Call("bar")).WillOnce(Return(1)); + + // 3. Exercise code that uses std::function. + Foo(mock_function.AsStdFunction()); + // Foo's signature can be either of: + // void Foo(const std::function& fun); + // void Foo(std::function fun); + + // 4. All expectations will be verified when mock_function + // goes out of scope and is destroyed. +} +``` + +Remember that function objects created with `AsStdFunction()` are just +forwarders. If you create multiple of them, they will share the same set of +expectations. + +Although `std::function` supports unlimited number of arguments, `MockFunction` +implementation is limited to ten. If you ever hit that limit... well, your +callback has bigger problems than being mockable. :-) + + diff --git a/src/test/gtest/googlemock/docs/for_dummies.md b/src/test/gtest/googlemock/docs/for_dummies.md new file mode 100644 index 00000000..e11c18d9 --- /dev/null +++ b/src/test/gtest/googlemock/docs/for_dummies.md @@ -0,0 +1,700 @@ +## gMock for Dummies {#GMockForDummies} + + + +### What Is gMock? + +When you write a prototype or test, often it's not feasible or wise to rely on +real objects entirely. A **mock object** implements the same interface as a real +object (so it can be used as one), but lets you specify at run time how it will +be used and what it should do (which methods will be called? in which order? how +many times? with what arguments? what will they return? etc). + +**Note:** It is easy to confuse the term *fake objects* with mock objects. Fakes +and mocks actually mean very different things in the Test-Driven Development +(TDD) community: + +* **Fake** objects have working implementations, but usually take some + shortcut (perhaps to make the operations less expensive), which makes them + not suitable for production. An in-memory file system would be an example of + a fake. +* **Mocks** are objects pre-programmed with *expectations*, which form a + specification of the calls they are expected to receive. + +If all this seems too abstract for you, don't worry - the most important thing +to remember is that a mock allows you to check the *interaction* between itself +and code that uses it. The difference between fakes and mocks shall become much +clearer once you start to use mocks. + +**gMock** is a library (sometimes we also call it a "framework" to make it sound +cool) for creating mock classes and using them. It does to C++ what +jMock/EasyMock does to Java (well, more or less). + +When using gMock, + +1. first, you use some simple macros to describe the interface you want to + mock, and they will expand to the implementation of your mock class; +2. next, you create some mock objects and specify its expectations and behavior + using an intuitive syntax; +3. then you exercise code that uses the mock objects. gMock will catch any + violation to the expectations as soon as it arises. + +### Why gMock? + +While mock objects help you remove unnecessary dependencies in tests and make +them fast and reliable, using mocks manually in C++ is *hard*: + +* Someone has to implement the mocks. The job is usually tedious and + error-prone. No wonder people go great distance to avoid it. +* The quality of those manually written mocks is a bit, uh, unpredictable. You + may see some really polished ones, but you may also see some that were + hacked up in a hurry and have all sorts of ad hoc restrictions. +* The knowledge you gained from using one mock doesn't transfer to the next + one. + +In contrast, Java and Python programmers have some fine mock frameworks (jMock, +EasyMock, [Mox](http://wtf/mox), etc), which automate the creation of mocks. As +a result, mocking is a proven effective technique and widely adopted practice in +those communities. Having the right tool absolutely makes the difference. + +gMock was built to help C++ programmers. It was inspired by jMock and EasyMock, +but designed with C++'s specifics in mind. It is your friend if any of the +following problems is bothering you: + +* You are stuck with a sub-optimal design and wish you had done more + prototyping before it was too late, but prototyping in C++ is by no means + "rapid". +* Your tests are slow as they depend on too many libraries or use expensive + resources (e.g. a database). +* Your tests are brittle as some resources they use are unreliable (e.g. the + network). +* You want to test how your code handles a failure (e.g. a file checksum + error), but it's not easy to cause one. +* You need to make sure that your module interacts with other modules in the + right way, but it's hard to observe the interaction; therefore you resort to + observing the side effects at the end of the action, but it's awkward at + best. +* You want to "mock out" your dependencies, except that they don't have mock + implementations yet; and, frankly, you aren't thrilled by some of those + hand-written mocks. + +We encourage you to use gMock as + +* a *design* tool, for it lets you experiment with your interface design early + and often. More iterations lead to better designs! +* a *testing* tool to cut your tests' outbound dependencies and probe the + interaction between your module and its collaborators. + +### Getting Started + +gMock is bundled with googletest. + +### A Case for Mock Turtles + +Let's look at an example. Suppose you are developing a graphics program that +relies on a [LOGO](http://en.wikipedia.org/wiki/Logo_programming_language)-like +API for drawing. How would you test that it does the right thing? Well, you can +run it and compare the screen with a golden screen snapshot, but let's admit it: +tests like this are expensive to run and fragile (What if you just upgraded to a +shiny new graphics card that has better anti-aliasing? Suddenly you have to +update all your golden images.). It would be too painful if all your tests are +like this. Fortunately, you learned about +[Dependency Injection](http://en.wikipedia.org/wiki/Dependency_injection) and know the right thing +to do: instead of having your application talk to the system API directly, wrap +the API in an interface (say, `Turtle`) and code to that interface: + +```cpp +class Turtle { + ... + virtual ~Turtle() {}; + virtual void PenUp() = 0; + virtual void PenDown() = 0; + virtual void Forward(int distance) = 0; + virtual void Turn(int degrees) = 0; + virtual void GoTo(int x, int y) = 0; + virtual int GetX() const = 0; + virtual int GetY() const = 0; +}; +``` + +(Note that the destructor of `Turtle` **must** be virtual, as is the case for +**all** classes you intend to inherit from - otherwise the destructor of the +derived class will not be called when you delete an object through a base +pointer, and you'll get corrupted program states like memory leaks.) + +You can control whether the turtle's movement will leave a trace using `PenUp()` +and `PenDown()`, and control its movement using `Forward()`, `Turn()`, and +`GoTo()`. Finally, `GetX()` and `GetY()` tell you the current position of the +turtle. + +Your program will normally use a real implementation of this interface. In +tests, you can use a mock implementation instead. This allows you to easily +check what drawing primitives your program is calling, with what arguments, and +in which order. Tests written this way are much more robust (they won't break +because your new machine does anti-aliasing differently), easier to read and +maintain (the intent of a test is expressed in the code, not in some binary +images), and run *much, much faster*. + +### Writing the Mock Class + +If you are lucky, the mocks you need to use have already been implemented by +some nice people. If, however, you find yourself in the position to write a mock +class, relax - gMock turns this task into a fun game! (Well, almost.) + +#### How to Define It + +Using the `Turtle` interface as example, here are the simple steps you need to +follow: + +* Derive a class `MockTurtle` from `Turtle`. +* Take a *virtual* function of `Turtle` (while it's possible to + [mock non-virtual methods using templates](cook_book.md#MockingNonVirtualMethods), + it's much more involved). +* In the `public:` section of the child class, write `MOCK_METHOD();` +* Now comes the fun part: you take the function signature, cut-and-paste it + into the macro, and add two commas - one between the return type and the + name, another between the name and the argument list. +* If you're mocking a const method, add a 4th parameter containing `(const)` + (the parentheses are required). +* Since you're overriding a virtual method, we suggest adding the `override` + keyword. For const methods the 4th parameter becomes `(const, override)`, + for non-const methods just `(override)`. This isn't mandatory. +* Repeat until all virtual functions you want to mock are done. (It goes + without saying that *all* pure virtual methods in your abstract class must + be either mocked or overridden.) + +After the process, you should have something like: + +```cpp +#include "gmock/gmock.h" // Brings in gMock. + +class MockTurtle : public Turtle { + public: + ... + MOCK_METHOD(void, PenUp, (), (override)); + MOCK_METHOD(void, PenDown, (), (override)); + MOCK_METHOD(void, Forward, (int distance), (override)); + MOCK_METHOD(void, Turn, (int degrees), (override)); + MOCK_METHOD(void, GoTo, (int x, int y), (override)); + MOCK_METHOD(int, GetX, (), (const, override)); + MOCK_METHOD(int, GetY, (), (const, override)); +}; +``` + +You don't need to define these mock methods somewhere else - the `MOCK_METHOD` +macro will generate the definitions for you. It's that simple! + +#### Where to Put It + +When you define a mock class, you need to decide where to put its definition. +Some people put it in a `_test.cc`. This is fine when the interface being mocked +(say, `Foo`) is owned by the same person or team. Otherwise, when the owner of +`Foo` changes it, your test could break. (You can't really expect `Foo`'s +maintainer to fix every test that uses `Foo`, can you?) + +So, the rule of thumb is: if you need to mock `Foo` and it's owned by others, +define the mock class in `Foo`'s package (better, in a `testing` sub-package +such that you can clearly separate production code and testing utilities), put +it in a `.h` and a `cc_library`. Then everyone can reference them from their +tests. If `Foo` ever changes, there is only one copy of `MockFoo` to change, and +only tests that depend on the changed methods need to be fixed. + +Another way to do it: you can introduce a thin layer `FooAdaptor` on top of +`Foo` and code to this new interface. Since you own `FooAdaptor`, you can absorb +changes in `Foo` much more easily. While this is more work initially, carefully +choosing the adaptor interface can make your code easier to write and more +readable (a net win in the long run), as you can choose `FooAdaptor` to fit your +specific domain much better than `Foo` does. + + + +### Using Mocks in Tests + +Once you have a mock class, using it is easy. The typical work flow is: + +1. Import the gMock names from the `testing` namespace such that you can use + them unqualified (You only have to do it once per file. Remember that + namespaces are a good idea. +2. Create some mock objects. +3. Specify your expectations on them (How many times will a method be called? + With what arguments? What should it do? etc.). +4. Exercise some code that uses the mocks; optionally, check the result using + googletest assertions. If a mock method is called more than expected or with + wrong arguments, you'll get an error immediately. +5. When a mock is destructed, gMock will automatically check whether all + expectations on it have been satisfied. + +Here's an example: + +```cpp +#include "path/to/mock-turtle.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +using ::testing::AtLeast; // #1 + +TEST(PainterTest, CanDrawSomething) { + MockTurtle turtle; // #2 + EXPECT_CALL(turtle, PenDown()) // #3 + .Times(AtLeast(1)); + + Painter painter(&turtle); // #4 + + EXPECT_TRUE(painter.DrawCircle(0, 0, 10)); // #5 +} +``` + +As you might have guessed, this test checks that `PenDown()` is called at least +once. If the `painter` object didn't call this method, your test will fail with +a message like this: + +```text +path/to/my_test.cc:119: Failure +Actual function call count doesn't match this expectation: +Actually: never called; +Expected: called at least once. +Stack trace: +... +``` + +**Tip 1:** If you run the test from an Emacs buffer, you can hit on the +line number to jump right to the failed expectation. + +**Tip 2:** If your mock objects are never deleted, the final verification won't +happen. Therefore it's a good idea to turn on the heap checker in your tests +when you allocate mocks on the heap. You get that automatically if you use the +`gtest_main` library already. + +**Important note:** gMock requires expectations to be set **before** the mock +functions are called, otherwise the behavior is **undefined**. In particular, +you mustn't interleave `EXPECT_CALL()s` and calls to the mock functions. + +This means `EXPECT_CALL()` should be read as expecting that a call will occur +*in the future*, not that a call has occurred. Why does gMock work like that? +Well, specifying the expectation beforehand allows gMock to report a violation +as soon as it rises, when the context (stack trace, etc) is still available. +This makes debugging much easier. + +Admittedly, this test is contrived and doesn't do much. You can easily achieve +the same effect without using gMock. However, as we shall reveal soon, gMock +allows you to do *so much more* with the mocks. + +### Setting Expectations + +The key to using a mock object successfully is to set the *right expectations* +on it. If you set the expectations too strict, your test will fail as the result +of unrelated changes. If you set them too loose, bugs can slip through. You want +to do it just right such that your test can catch exactly the kind of bugs you +intend it to catch. gMock provides the necessary means for you to do it "just +right." + +#### General Syntax + +In gMock we use the `EXPECT_CALL()` macro to set an expectation on a mock +method. The general syntax is: + +```cpp +EXPECT_CALL(mock_object, method(matchers)) + .Times(cardinality) + .WillOnce(action) + .WillRepeatedly(action); +``` + +The macro has two arguments: first the mock object, and then the method and its +arguments. Note that the two are separated by a comma (`,`), not a period (`.`). +(Why using a comma? The answer is that it was necessary for technical reasons.) +If the method is not overloaded, the macro can also be called without matchers: + +```cpp +EXPECT_CALL(mock_object, non-overloaded-method) + .Times(cardinality) + .WillOnce(action) + .WillRepeatedly(action); +``` + +This syntax allows the test writer to specify "called with any arguments" +without explicitly specifying the number or types of arguments. To avoid +unintended ambiguity, this syntax may only be used for methods which are not +overloaded + +Either form of the macro can be followed by some optional *clauses* that provide +more information about the expectation. We'll discuss how each clause works in +the coming sections. + +This syntax is designed to make an expectation read like English. For example, +you can probably guess that + +```cpp +using ::testing::Return; +... +EXPECT_CALL(turtle, GetX()) + .Times(5) + .WillOnce(Return(100)) + .WillOnce(Return(150)) + .WillRepeatedly(Return(200)); +``` + +says that the `turtle` object's `GetX()` method will be called five times, it +will return 100 the first time, 150 the second time, and then 200 every time. +Some people like to call this style of syntax a Domain-Specific Language (DSL). + +**Note:** Why do we use a macro to do this? Well it serves two purposes: first +it makes expectations easily identifiable (either by `gsearch` or by a human +reader), and second it allows gMock to include the source file location of a +failed expectation in messages, making debugging easier. + +#### Matchers: What Arguments Do We Expect? + +When a mock function takes arguments, we may specify what arguments we are +expecting, for example: + +```cpp +// Expects the turtle to move forward by 100 units. +EXPECT_CALL(turtle, Forward(100)); +``` + +Oftentimes you do not want to be too specific. Remember that talk about tests +being too rigid? Over specification leads to brittle tests and obscures the +intent of tests. Therefore we encourage you to specify only what's necessary—no +more, no less. If you aren't interested in the value of an argument, write `_` +as the argument, which means "anything goes": + +```cpp +using ::testing::_; +... +// Expects that the turtle jumps to somewhere on the x=50 line. +EXPECT_CALL(turtle, GoTo(50, _)); +``` + +`_` is an instance of what we call **matchers**. A matcher is like a predicate +and can test whether an argument is what we'd expect. You can use a matcher +inside `EXPECT_CALL()` wherever a function argument is expected. `_` is a +convenient way of saying "any value". + +In the above examples, `100` and `50` are also matchers; implicitly, they are +the same as `Eq(100)` and `Eq(50)`, which specify that the argument must be +equal (using `operator==`) to the matcher argument. There are many +[built-in matchers](#MatcherList) for common types (as well as +[custom matchers](cook_book.md#NewMatchers)); for example: + +```cpp +using ::testing::Ge; +... +// Expects the turtle moves forward by at least 100. +EXPECT_CALL(turtle, Forward(Ge(100))); +``` + +If you don't care about *any* arguments, rather than specify `_` for each of +them you may instead omit the parameter list: + +```cpp +// Expects the turtle to move forward. +EXPECT_CALL(turtle, Forward); +// Expects the turtle to jump somewhere. +EXPECT_CALL(turtle, GoTo); +``` + +This works for all non-overloaded methods; if a method is overloaded, you need +to help gMock resolve which overload is expected by specifying the number of +arguments and possibly also the +[types of the arguments](cook_book.md#SelectOverload). + +#### Cardinalities: How Many Times Will It Be Called? + +The first clause we can specify following an `EXPECT_CALL()` is `Times()`. We +call its argument a **cardinality** as it tells *how many times* the call should +occur. It allows us to repeat an expectation many times without actually writing +it as many times. More importantly, a cardinality can be "fuzzy", just like a +matcher can be. This allows a user to express the intent of a test exactly. + +An interesting special case is when we say `Times(0)`. You may have guessed - it +means that the function shouldn't be called with the given arguments at all, and +gMock will report a googletest failure whenever the function is (wrongfully) +called. + +We've seen `AtLeast(n)` as an example of fuzzy cardinalities earlier. For the +list of built-in cardinalities you can use, see +[here](cheat_sheet.md#CardinalityList). + +The `Times()` clause can be omitted. **If you omit `Times()`, gMock will infer +the cardinality for you.** The rules are easy to remember: + +* If **neither** `WillOnce()` **nor** `WillRepeatedly()` is in the + `EXPECT_CALL()`, the inferred cardinality is `Times(1)`. +* If there are *n* `WillOnce()`'s but **no** `WillRepeatedly()`, where *n* >= + 1, the cardinality is `Times(n)`. +* If there are *n* `WillOnce()`'s and **one** `WillRepeatedly()`, where *n* >= + 0, the cardinality is `Times(AtLeast(n))`. + +**Quick quiz:** what do you think will happen if a function is expected to be +called twice but actually called four times? + +#### Actions: What Should It Do? + +Remember that a mock object doesn't really have a working implementation? We as +users have to tell it what to do when a method is invoked. This is easy in +gMock. + +First, if the return type of a mock function is a built-in type or a pointer, +the function has a **default action** (a `void` function will just return, a +`bool` function will return `false`, and other functions will return 0). In +addition, in C++ 11 and above, a mock function whose return type is +default-constructible (i.e. has a default constructor) has a default action of +returning a default-constructed value. If you don't say anything, this behavior +will be used. + +Second, if a mock function doesn't have a default action, or the default action +doesn't suit you, you can specify the action to be taken each time the +expectation matches using a series of `WillOnce()` clauses followed by an +optional `WillRepeatedly()`. For example, + +```cpp +using ::testing::Return; +... +EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(100)) + .WillOnce(Return(200)) + .WillOnce(Return(300)); +``` + +says that `turtle.GetX()` will be called *exactly three times* (gMock inferred +this from how many `WillOnce()` clauses we've written, since we didn't +explicitly write `Times()`), and will return 100, 200, and 300 respectively. + +```cpp +using ::testing::Return; +... +EXPECT_CALL(turtle, GetY()) + .WillOnce(Return(100)) + .WillOnce(Return(200)) + .WillRepeatedly(Return(300)); +``` + +says that `turtle.GetY()` will be called *at least twice* (gMock knows this as +we've written two `WillOnce()` clauses and a `WillRepeatedly()` while having no +explicit `Times()`), will return 100 and 200 respectively the first two times, +and 300 from the third time on. + +Of course, if you explicitly write a `Times()`, gMock will not try to infer the +cardinality itself. What if the number you specified is larger than there are +`WillOnce()` clauses? Well, after all `WillOnce()`s are used up, gMock will do +the *default* action for the function every time (unless, of course, you have a +`WillRepeatedly()`.). + +What can we do inside `WillOnce()` besides `Return()`? You can return a +reference using `ReturnRef(*variable*)`, or invoke a pre-defined function, among +[others](cook_book.md#using-actions). + +**Important note:** The `EXPECT_CALL()` statement evaluates the action clause +only once, even though the action may be performed many times. Therefore you +must be careful about side effects. The following may not do what you want: + +```cpp +using ::testing::Return; +... +int n = 100; +EXPECT_CALL(turtle, GetX()) + .Times(4) + .WillRepeatedly(Return(n++)); +``` + +Instead of returning 100, 101, 102, ..., consecutively, this mock function will +always return 100 as `n++` is only evaluated once. Similarly, `Return(new Foo)` +will create a new `Foo` object when the `EXPECT_CALL()` is executed, and will +return the same pointer every time. If you want the side effect to happen every +time, you need to define a custom action, which we'll teach in the +[cook book](http://). + +Time for another quiz! What do you think the following means? + +```cpp +using ::testing::Return; +... +EXPECT_CALL(turtle, GetY()) + .Times(4) + .WillOnce(Return(100)); +``` + +Obviously `turtle.GetY()` is expected to be called four times. But if you think +it will return 100 every time, think twice! Remember that one `WillOnce()` +clause will be consumed each time the function is invoked and the default action +will be taken afterwards. So the right answer is that `turtle.GetY()` will +return 100 the first time, but **return 0 from the second time on**, as +returning 0 is the default action for `int` functions. + +#### Using Multiple Expectations {#MultiExpectations} + +So far we've only shown examples where you have a single expectation. More +realistically, you'll specify expectations on multiple mock methods which may be +from multiple mock objects. + +By default, when a mock method is invoked, gMock will search the expectations in +the **reverse order** they are defined, and stop when an active expectation that +matches the arguments is found (you can think of it as "newer rules override +older ones."). If the matching expectation cannot take any more calls, you will +get an upper-bound-violated failure. Here's an example: + +```cpp +using ::testing::_; +... +EXPECT_CALL(turtle, Forward(_)); // #1 +EXPECT_CALL(turtle, Forward(10)) // #2 + .Times(2); +``` + +If `Forward(10)` is called three times in a row, the third time it will be an +error, as the last matching expectation (#2) has been saturated. If, however, +the third `Forward(10)` call is replaced by `Forward(20)`, then it would be OK, +as now #1 will be the matching expectation. + +**Note:** Why does gMock search for a match in the *reverse* order of the +expectations? The reason is that this allows a user to set up the default +expectations in a mock object's constructor or the test fixture's set-up phase +and then customize the mock by writing more specific expectations in the test +body. So, if you have two expectations on the same method, you want to put the +one with more specific matchers **after** the other, or the more specific rule +would be shadowed by the more general one that comes after it. + +**Tip:** It is very common to start with a catch-all expectation for a method +and `Times(AnyNumber())` (omitting arguments, or with `_` for all arguments, if +overloaded). This makes any calls to the method expected. This is not necessary +for methods that are not mentioned at all (these are "uninteresting"), but is +useful for methods that have some expectations, but for which other calls are +ok. See +[Understanding Uninteresting vs Unexpected Calls](cook_book.md#uninteresting-vs-unexpected). + +#### Ordered vs Unordered Calls {#OrderedCalls} + +By default, an expectation can match a call even though an earlier expectation +hasn't been satisfied. In other words, the calls don't have to occur in the +order the expectations are specified. + +Sometimes, you may want all the expected calls to occur in a strict order. To +say this in gMock is easy: + +```cpp +using ::testing::InSequence; +... +TEST(FooTest, DrawsLineSegment) { + ... + { + InSequence seq; + + EXPECT_CALL(turtle, PenDown()); + EXPECT_CALL(turtle, Forward(100)); + EXPECT_CALL(turtle, PenUp()); + } + Foo(); +} +``` + +By creating an object of type `InSequence`, all expectations in its scope are +put into a *sequence* and have to occur *sequentially*. Since we are just +relying on the constructor and destructor of this object to do the actual work, +its name is really irrelevant. + +In this example, we test that `Foo()` calls the three expected functions in the +order as written. If a call is made out-of-order, it will be an error. + +(What if you care about the relative order of some of the calls, but not all of +them? Can you specify an arbitrary partial order? The answer is ... yes! The +details can be found [here](cook_book.md#OrderedCalls).) + +#### All Expectations Are Sticky (Unless Said Otherwise) {#StickyExpectations} + +Now let's do a quick quiz to see how well you can use this mock stuff already. +How would you test that the turtle is asked to go to the origin *exactly twice* +(you want to ignore any other instructions it receives)? + +After you've come up with your answer, take a look at ours and compare notes +(solve it yourself first - don't cheat!): + +```cpp +using ::testing::_; +using ::testing::AnyNumber; +... +EXPECT_CALL(turtle, GoTo(_, _)) // #1 + .Times(AnyNumber()); +EXPECT_CALL(turtle, GoTo(0, 0)) // #2 + .Times(2); +``` + +Suppose `turtle.GoTo(0, 0)` is called three times. In the third time, gMock will +see that the arguments match expectation #2 (remember that we always pick the +last matching expectation). Now, since we said that there should be only two +such calls, gMock will report an error immediately. This is basically what we've +told you in the [Using Multiple Expectations](#MultiExpectations) section above. + +This example shows that **expectations in gMock are "sticky" by default**, in +the sense that they remain active even after we have reached their invocation +upper bounds. This is an important rule to remember, as it affects the meaning +of the spec, and is **different** to how it's done in many other mocking +frameworks (Why'd we do that? Because we think our rule makes the common cases +easier to express and understand.). + +Simple? Let's see if you've really understood it: what does the following code +say? + +```cpp +using ::testing::Return; +... +for (int i = n; i > 0; i--) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)); +} +``` + +If you think it says that `turtle.GetX()` will be called `n` times and will +return 10, 20, 30, ..., consecutively, think twice! The problem is that, as we +said, expectations are sticky. So, the second time `turtle.GetX()` is called, +the last (latest) `EXPECT_CALL()` statement will match, and will immediately +lead to an "upper bound violated" error - this piece of code is not very useful! + +One correct way of saying that `turtle.GetX()` will return 10, 20, 30, ..., is +to explicitly say that the expectations are *not* sticky. In other words, they +should *retire* as soon as they are saturated: + +```cpp +using ::testing::Return; +... +for (int i = n; i > 0; i--) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)) + .RetiresOnSaturation(); +} +``` + +And, there's a better way to do it: in this case, we expect the calls to occur +in a specific order, and we line up the actions to match the order. Since the +order is important here, we should make it explicit using a sequence: + +```cpp +using ::testing::InSequence; +using ::testing::Return; +... +{ + InSequence s; + + for (int i = 1; i <= n; i++) { + EXPECT_CALL(turtle, GetX()) + .WillOnce(Return(10*i)) + .RetiresOnSaturation(); + } +} +``` + +By the way, the other situation where an expectation may *not* be sticky is when +it's in a sequence - as soon as another expectation that comes after it in the +sequence has been used, it automatically retires (and will never be used to +match any call). + +#### Uninteresting Calls + +A mock object may have many methods, and not all of them are that interesting. +For example, in some tests we may not care about how many times `GetX()` and +`GetY()` get called. + +In gMock, if you are not interested in a method, just don't say anything about +it. If a call to this method occurs, you'll see a warning in the test output, +but it won't be a failure. This is called "naggy" behavior; to change, see +[The Nice, the Strict, and the Naggy](cook_book.md#NiceStrictNaggy). diff --git a/src/test/gtest/googlemock/docs/gmock_faq.md b/src/test/gtest/googlemock/docs/gmock_faq.md new file mode 100644 index 00000000..214aabf1 --- /dev/null +++ b/src/test/gtest/googlemock/docs/gmock_faq.md @@ -0,0 +1,396 @@ +## Legacy gMock FAQ {#GMockFaq} + + + +### When I call a method on my mock object, the method for the real object is invoked instead. What's the problem? + +In order for a method to be mocked, it must be *virtual*, unless you use the +[high-perf dependency injection technique](#MockingNonVirtualMethods). + +### Can I mock a variadic function? + +You cannot mock a variadic function (i.e. a function taking ellipsis (`...`) +arguments) directly in gMock. + +The problem is that in general, there is *no way* for a mock object to know how +many arguments are passed to the variadic method, and what the arguments' types +are. Only the *author of the base class* knows the protocol, and we cannot look +into his or her head. + +Therefore, to mock such a function, the *user* must teach the mock object how to +figure out the number of arguments and their types. One way to do it is to +provide overloaded versions of the function. + +Ellipsis arguments are inherited from C and not really a C++ feature. They are +unsafe to use and don't work with arguments that have constructors or +destructors. Therefore we recommend to avoid them in C++ as much as possible. + +### MSVC gives me warning C4301 or C4373 when I define a mock method with a const parameter. Why? + +If you compile this using Microsoft Visual C++ 2005 SP1: + +```cpp +class Foo { + ... + virtual void Bar(const int i) = 0; +}; + +class MockFoo : public Foo { + ... + MOCK_METHOD(void, Bar, (const int i), (override)); +}; +``` + +You may get the following warning: + +```shell +warning C4301: 'MockFoo::Bar': overriding virtual function only differs from 'Foo::Bar' by const/volatile qualifier +``` + +This is a MSVC bug. The same code compiles fine with gcc, for example. If you +use Visual C++ 2008 SP1, you would get the warning: + +```shell +warning C4373: 'MockFoo::Bar': virtual function overrides 'Foo::Bar', previous versions of the compiler did not override when parameters only differed by const/volatile qualifiers +``` + +In C++, if you *declare* a function with a `const` parameter, the `const` +modifier is ignored. Therefore, the `Foo` base class above is equivalent to: + +```cpp +class Foo { + ... + virtual void Bar(int i) = 0; // int or const int? Makes no difference. +}; +``` + +In fact, you can *declare* `Bar()` with an `int` parameter, and define it with a +`const int` parameter. The compiler will still match them up. + +Since making a parameter `const` is meaningless in the method declaration, we +recommend to remove it in both `Foo` and `MockFoo`. That should workaround the +VC bug. + +Note that we are talking about the *top-level* `const` modifier here. If the +function parameter is passed by pointer or reference, declaring the pointee or +referee as `const` is still meaningful. For example, the following two +declarations are *not* equivalent: + +```cpp +void Bar(int* p); // Neither p nor *p is const. +void Bar(const int* p); // p is not const, but *p is. +``` + + + +### I can't figure out why gMock thinks my expectations are not satisfied. What should I do? + +You might want to run your test with `--gmock_verbose=info`. This flag lets +gMock print a trace of every mock function call it receives. By studying the +trace, you'll gain insights on why the expectations you set are not met. + +If you see the message "The mock function has no default action set, and its +return type has no default value set.", then try +[adding a default action](for_dummies.md#DefaultValue). Due to a known issue, +unexpected calls on mocks without default actions don't print out a detailed +comparison between the actual arguments and the expected arguments. + +### My program crashed and `ScopedMockLog` spit out tons of messages. Is it a gMock bug? + +gMock and `ScopedMockLog` are likely doing the right thing here. + +When a test crashes, the failure signal handler will try to log a lot of +information (the stack trace, and the address map, for example). The messages +are compounded if you have many threads with depth stacks. When `ScopedMockLog` +intercepts these messages and finds that they don't match any expectations, it +prints an error for each of them. + +You can learn to ignore the errors, or you can rewrite your expectations to make +your test more robust, for example, by adding something like: + +```cpp +using ::testing::AnyNumber; +using ::testing::Not; +... + // Ignores any log not done by us. + EXPECT_CALL(log, Log(_, Not(EndsWith("/my_file.cc")), _)) + .Times(AnyNumber()); +``` + +### How can I assert that a function is NEVER called? + +```cpp +using ::testing::_; +... + EXPECT_CALL(foo, Bar(_)) + .Times(0); +``` + + + +### I have a failed test where gMock tells me TWICE that a particular expectation is not satisfied. Isn't this redundant? + +When gMock detects a failure, it prints relevant information (the mock function +arguments, the state of relevant expectations, and etc) to help the user debug. +If another failure is detected, gMock will do the same, including printing the +state of relevant expectations. + +Sometimes an expectation's state didn't change between two failures, and you'll +see the same description of the state twice. They are however *not* redundant, +as they refer to *different points in time*. The fact they are the same *is* +interesting information. + +### I get a heapcheck failure when using a mock object, but using a real object is fine. What can be wrong? + +Does the class (hopefully a pure interface) you are mocking have a virtual +destructor? + +Whenever you derive from a base class, make sure its destructor is virtual. +Otherwise Bad Things will happen. Consider the following code: + +```cpp +class Base { + public: + // Not virtual, but should be. + ~Base() { ... } + ... +}; + +class Derived : public Base { + public: + ... + private: + std::string value_; +}; + +... + Base* p = new Derived; + ... + delete p; // Surprise! ~Base() will be called, but ~Derived() will not + // - value_ is leaked. +``` + +By changing `~Base()` to virtual, `~Derived()` will be correctly called when +`delete p` is executed, and the heap checker will be happy. + +### The "newer expectations override older ones" rule makes writing expectations awkward. Why does gMock do that? + +When people complain about this, often they are referring to code like: + +```cpp +using ::testing::Return; +... + // foo.Bar() should be called twice, return 1 the first time, and return + // 2 the second time. However, I have to write the expectations in the + // reverse order. This sucks big time!!! + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(2)) + .RetiresOnSaturation(); + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .RetiresOnSaturation(); +``` + +The problem, is that they didn't pick the **best** way to express the test's +intent. + +By default, expectations don't have to be matched in *any* particular order. If +you want them to match in a certain order, you need to be explicit. This is +gMock's (and jMock's) fundamental philosophy: it's easy to accidentally +over-specify your tests, and we want to make it harder to do so. + +There are two better ways to write the test spec. You could either put the +expectations in sequence: + +```cpp +using ::testing::Return; +... + // foo.Bar() should be called twice, return 1 the first time, and return + // 2 the second time. Using a sequence, we can write the expectations + // in their natural order. + { + InSequence s; + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .RetiresOnSaturation(); + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(2)) + .RetiresOnSaturation(); + } +``` + +or you can put the sequence of actions in the same expectation: + +```cpp +using ::testing::Return; +... + // foo.Bar() should be called twice, return 1 the first time, and return + // 2 the second time. + EXPECT_CALL(foo, Bar()) + .WillOnce(Return(1)) + .WillOnce(Return(2)) + .RetiresOnSaturation(); +``` + +Back to the original questions: why does gMock search the expectations (and +`ON_CALL`s) from back to front? Because this allows a user to set up a mock's +behavior for the common case early (e.g. in the mock's constructor or the test +fixture's set-up phase) and customize it with more specific rules later. If +gMock searches from front to back, this very useful pattern won't be possible. + +### gMock prints a warning when a function without EXPECT_CALL is called, even if I have set its behavior using ON_CALL. Would it be reasonable not to show the warning in this case? + +When choosing between being neat and being safe, we lean toward the latter. So +the answer is that we think it's better to show the warning. + +Often people write `ON_CALL`s in the mock object's constructor or `SetUp()`, as +the default behavior rarely changes from test to test. Then in the test body +they set the expectations, which are often different for each test. Having an +`ON_CALL` in the set-up part of a test doesn't mean that the calls are expected. +If there's no `EXPECT_CALL` and the method is called, it's possibly an error. If +we quietly let the call go through without notifying the user, bugs may creep in +unnoticed. + +If, however, you are sure that the calls are OK, you can write + +```cpp +using ::testing::_; +... + EXPECT_CALL(foo, Bar(_)) + .WillRepeatedly(...); +``` + +instead of + +```cpp +using ::testing::_; +... + ON_CALL(foo, Bar(_)) + .WillByDefault(...); +``` + +This tells gMock that you do expect the calls and no warning should be printed. + +Also, you can control the verbosity by specifying `--gmock_verbose=error`. Other +values are `info` and `warning`. If you find the output too noisy when +debugging, just choose a less verbose level. + +### How can I delete the mock function's argument in an action? + +If your mock function takes a pointer argument and you want to delete that +argument, you can use testing::DeleteArg() to delete the N'th (zero-indexed) +argument: + +```cpp +using ::testing::_; + ... + MOCK_METHOD(void, Bar, (X* x, const Y& y)); + ... + EXPECT_CALL(mock_foo_, Bar(_, _)) + .WillOnce(testing::DeleteArg<0>())); +``` + +### How can I perform an arbitrary action on a mock function's argument? + +If you find yourself needing to perform some action that's not supported by +gMock directly, remember that you can define your own actions using +[`MakeAction()`](#NewMonoActions) or +[`MakePolymorphicAction()`](#NewPolyActions), or you can write a stub function +and invoke it using [`Invoke()`](#FunctionsAsActions). + +```cpp +using ::testing::_; +using ::testing::Invoke; + ... + MOCK_METHOD(void, Bar, (X* p)); + ... + EXPECT_CALL(mock_foo_, Bar(_)) + .WillOnce(Invoke(MyAction(...))); +``` + +### My code calls a static/global function. Can I mock it? + +You can, but you need to make some changes. + +In general, if you find yourself needing to mock a static function, it's a sign +that your modules are too tightly coupled (and less flexible, less reusable, +less testable, etc). You are probably better off defining a small interface and +call the function through that interface, which then can be easily mocked. It's +a bit of work initially, but usually pays for itself quickly. + +This Google Testing Blog +[post](https://testing.googleblog.com/2008/06/defeat-static-cling.html) says it +excellently. Check it out. + +### My mock object needs to do complex stuff. It's a lot of pain to specify the actions. gMock sucks! + +I know it's not a question, but you get an answer for free any way. :-) + +With gMock, you can create mocks in C++ easily. And people might be tempted to +use them everywhere. Sometimes they work great, and sometimes you may find them, +well, a pain to use. So, what's wrong in the latter case? + +When you write a test without using mocks, you exercise the code and assert that +it returns the correct value or that the system is in an expected state. This is +sometimes called "state-based testing". + +Mocks are great for what some call "interaction-based" testing: instead of +checking the system state at the very end, mock objects verify that they are +invoked the right way and report an error as soon as it arises, giving you a +handle on the precise context in which the error was triggered. This is often +more effective and economical to do than state-based testing. + +If you are doing state-based testing and using a test double just to simulate +the real object, you are probably better off using a fake. Using a mock in this +case causes pain, as it's not a strong point for mocks to perform complex +actions. If you experience this and think that mocks suck, you are just not +using the right tool for your problem. Or, you might be trying to solve the +wrong problem. :-) + +### I got a warning "Uninteresting function call encountered - default action taken.." Should I panic? + +By all means, NO! It's just an FYI. :-) + +What it means is that you have a mock function, you haven't set any expectations +on it (by gMock's rule this means that you are not interested in calls to this +function and therefore it can be called any number of times), and it is called. +That's OK - you didn't say it's not OK to call the function! + +What if you actually meant to disallow this function to be called, but forgot to +write `EXPECT_CALL(foo, Bar()).Times(0)`? While one can argue that it's the +user's fault, gMock tries to be nice and prints you a note. + +So, when you see the message and believe that there shouldn't be any +uninteresting calls, you should investigate what's going on. To make your life +easier, gMock dumps the stack trace when an uninteresting call is encountered. +From that you can figure out which mock function it is, and how it is called. + +### I want to define a custom action. Should I use Invoke() or implement the ActionInterface interface? + +Either way is fine - you want to choose the one that's more convenient for your +circumstance. + +Usually, if your action is for a particular function type, defining it using +`Invoke()` should be easier; if your action can be used in functions of +different types (e.g. if you are defining `Return(*value*)`), +`MakePolymorphicAction()` is easiest. Sometimes you want precise control on what +types of functions the action can be used in, and implementing `ActionInterface` +is the way to go here. See the implementation of `Return()` in +`testing/base/public/gmock-actions.h` for an example. + +### I use SetArgPointee() in WillOnce(), but gcc complains about "conflicting return type specified". What does it mean? + +You got this error as gMock has no idea what value it should return when the +mock method is called. `SetArgPointee()` says what the side effect is, but +doesn't say what the return value should be. You need `DoAll()` to chain a +`SetArgPointee()` with a `Return()` that provides a value appropriate to the API +being mocked. + +See this [recipe](cook_book.md#mocking-side-effects) for more details and an +example. + +### I have a huge mock class, and Microsoft Visual C++ runs out of memory when compiling it. What can I do? + +We've noticed that when the `/clr` compiler flag is used, Visual C++ uses 5~6 +times as much memory when compiling a mock class. We suggest to avoid `/clr` +when compiling native C++ mocks. diff --git a/src/test/gtest/googlemock/include/gmock/gmock-actions.h b/src/test/gtest/googlemock/include/gmock/gmock-actions.h new file mode 100644 index 00000000..f12d39be --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-actions.h @@ -0,0 +1,1142 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used actions. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ + +#ifndef _WIN32_WCE +# include +#endif + +#include +#include +#include +#include +#include +#include + +#include "gmock/internal/gmock-internal-utils.h" +#include "gmock/internal/gmock-port.h" + +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4100) +#endif + +namespace testing { + +// To implement an action Foo, define: +// 1. a class FooAction that implements the ActionInterface interface, and +// 2. a factory function that creates an Action object from a +// const FooAction*. +// +// The two-level delegation design follows that of Matcher, providing +// consistency for extension developers. It also eases ownership +// management as Action objects can now be copied like plain values. + +namespace internal { + +// BuiltInDefaultValueGetter::Get() returns a +// default-constructed T value. BuiltInDefaultValueGetter::Get() crashes with an error. +// +// This primary template is used when kDefaultConstructible is true. +template +struct BuiltInDefaultValueGetter { + static T Get() { return T(); } +}; +template +struct BuiltInDefaultValueGetter { + static T Get() { + Assert(false, __FILE__, __LINE__, + "Default action undefined for the function return type."); + return internal::Invalid(); + // The above statement will never be reached, but is required in + // order for this function to compile. + } +}; + +// BuiltInDefaultValue::Get() returns the "built-in" default value +// for type T, which is NULL when T is a raw pointer type, 0 when T is +// a numeric type, false when T is bool, or "" when T is string or +// std::string. In addition, in C++11 and above, it turns a +// default-constructed T value if T is default constructible. For any +// other type T, the built-in default T value is undefined, and the +// function will abort the process. +template +class BuiltInDefaultValue { + public: + // This function returns true if and only if type T has a built-in default + // value. + static bool Exists() { + return ::std::is_default_constructible::value; + } + + static T Get() { + return BuiltInDefaultValueGetter< + T, ::std::is_default_constructible::value>::Get(); + } +}; + +// This partial specialization says that we use the same built-in +// default value for T and const T. +template +class BuiltInDefaultValue { + public: + static bool Exists() { return BuiltInDefaultValue::Exists(); } + static T Get() { return BuiltInDefaultValue::Get(); } +}; + +// This partial specialization defines the default values for pointer +// types. +template +class BuiltInDefaultValue { + public: + static bool Exists() { return true; } + static T* Get() { return nullptr; } +}; + +// The following specializations define the default values for +// specific types we care about. +#define GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(type, value) \ + template <> \ + class BuiltInDefaultValue { \ + public: \ + static bool Exists() { return true; } \ + static type Get() { return value; } \ + } + +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(void, ); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(::std::string, ""); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(bool, false); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned char, '\0'); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed char, '\0'); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(char, '\0'); + +// There's no need for a default action for signed wchar_t, as that +// type is the same as wchar_t for gcc, and invalid for MSVC. +// +// There's also no need for a default action for unsigned wchar_t, as +// that type is the same as unsigned int for gcc, and invalid for +// MSVC. +#if GMOCK_WCHAR_T_IS_NATIVE_ +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(wchar_t, 0U); // NOLINT +#endif + +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned short, 0U); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed short, 0); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned int, 0U); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed int, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(unsigned long, 0UL); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(signed long, 0L); // NOLINT +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(UInt64, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(Int64, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(float, 0); +GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_(double, 0); + +#undef GMOCK_DEFINE_DEFAULT_ACTION_FOR_RETURN_TYPE_ + +} // namespace internal + +// When an unexpected function call is encountered, Google Mock will +// let it return a default value if the user has specified one for its +// return type, or if the return type has a built-in default value; +// otherwise Google Mock won't know what value to return and will have +// to abort the process. +// +// The DefaultValue class allows a user to specify the +// default value for a type T that is both copyable and publicly +// destructible (i.e. anything that can be used as a function return +// type). The usage is: +// +// // Sets the default value for type T to be foo. +// DefaultValue::Set(foo); +template +class DefaultValue { + public: + // Sets the default value for type T; requires T to be + // copy-constructable and have a public destructor. + static void Set(T x) { + delete producer_; + producer_ = new FixedValueProducer(x); + } + + // Provides a factory function to be called to generate the default value. + // This method can be used even if T is only move-constructible, but it is not + // limited to that case. + typedef T (*FactoryFunction)(); + static void SetFactory(FactoryFunction factory) { + delete producer_; + producer_ = new FactoryValueProducer(factory); + } + + // Unsets the default value for type T. + static void Clear() { + delete producer_; + producer_ = nullptr; + } + + // Returns true if and only if the user has set the default value for type T. + static bool IsSet() { return producer_ != nullptr; } + + // Returns true if T has a default return value set by the user or there + // exists a built-in default value. + static bool Exists() { + return IsSet() || internal::BuiltInDefaultValue::Exists(); + } + + // Returns the default value for type T if the user has set one; + // otherwise returns the built-in default value. Requires that Exists() + // is true, which ensures that the return value is well-defined. + static T Get() { + return producer_ == nullptr ? internal::BuiltInDefaultValue::Get() + : producer_->Produce(); + } + + private: + class ValueProducer { + public: + virtual ~ValueProducer() {} + virtual T Produce() = 0; + }; + + class FixedValueProducer : public ValueProducer { + public: + explicit FixedValueProducer(T value) : value_(value) {} + T Produce() override { return value_; } + + private: + const T value_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(FixedValueProducer); + }; + + class FactoryValueProducer : public ValueProducer { + public: + explicit FactoryValueProducer(FactoryFunction factory) + : factory_(factory) {} + T Produce() override { return factory_(); } + + private: + const FactoryFunction factory_; + GTEST_DISALLOW_COPY_AND_ASSIGN_(FactoryValueProducer); + }; + + static ValueProducer* producer_; +}; + +// This partial specialization allows a user to set default values for +// reference types. +template +class DefaultValue { + public: + // Sets the default value for type T&. + static void Set(T& x) { // NOLINT + address_ = &x; + } + + // Unsets the default value for type T&. + static void Clear() { address_ = nullptr; } + + // Returns true if and only if the user has set the default value for type T&. + static bool IsSet() { return address_ != nullptr; } + + // Returns true if T has a default return value set by the user or there + // exists a built-in default value. + static bool Exists() { + return IsSet() || internal::BuiltInDefaultValue::Exists(); + } + + // Returns the default value for type T& if the user has set one; + // otherwise returns the built-in default value if there is one; + // otherwise aborts the process. + static T& Get() { + return address_ == nullptr ? internal::BuiltInDefaultValue::Get() + : *address_; + } + + private: + static T* address_; +}; + +// This specialization allows DefaultValue::Get() to +// compile. +template <> +class DefaultValue { + public: + static bool Exists() { return true; } + static void Get() {} +}; + +// Points to the user-set default value for type T. +template +typename DefaultValue::ValueProducer* DefaultValue::producer_ = nullptr; + +// Points to the user-set default value for type T&. +template +T* DefaultValue::address_ = nullptr; + +// Implement this interface to define an action for function type F. +template +class ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + ActionInterface() {} + virtual ~ActionInterface() {} + + // Performs the action. This method is not const, as in general an + // action can have side effects and be stateful. For example, a + // get-the-next-element-from-the-collection action will need to + // remember the current element. + virtual Result Perform(const ArgumentTuple& args) = 0; + + private: + GTEST_DISALLOW_COPY_AND_ASSIGN_(ActionInterface); +}; + +// An Action is a copyable and IMMUTABLE (except by assignment) +// object that represents an action to be taken when a mock function +// of type F is called. The implementation of Action is just a +// std::shared_ptr to const ActionInterface. Don't inherit from Action! +// You can view an object implementing ActionInterface as a +// concrete action (including its current state), and an Action +// object as a handle to it. +template +class Action { + // Adapter class to allow constructing Action from a legacy ActionInterface. + // New code should create Actions from functors instead. + struct ActionAdapter { + // Adapter must be copyable to satisfy std::function requirements. + ::std::shared_ptr> impl_; + + template + typename internal::Function::Result operator()(Args&&... args) { + return impl_->Perform( + ::std::forward_as_tuple(::std::forward(args)...)); + } + }; + + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + // Constructs a null Action. Needed for storing Action objects in + // STL containers. + Action() {} + + // Construct an Action from a specified callable. + // This cannot take std::function directly, because then Action would not be + // directly constructible from lambda (it would require two conversions). + template , G>::value>::type> + Action(G&& fun) : fun_(::std::forward(fun)) {} // NOLINT + + // Constructs an Action from its implementation. + explicit Action(ActionInterface* impl) + : fun_(ActionAdapter{::std::shared_ptr>(impl)}) {} + + // This constructor allows us to turn an Action object into an + // Action, as long as F's arguments can be implicitly converted + // to Func's and Func's return type can be implicitly converted to F's. + template + explicit Action(const Action& action) : fun_(action.fun_) {} + + // Returns true if and only if this is the DoDefault() action. + bool IsDoDefault() const { return fun_ == nullptr; } + + // Performs the action. Note that this method is const even though + // the corresponding method in ActionInterface is not. The reason + // is that a const Action means that it cannot be re-bound to + // another concrete action, not that the concrete action it binds to + // cannot change state. (Think of the difference between a const + // pointer and a pointer to const.) + Result Perform(ArgumentTuple args) const { + if (IsDoDefault()) { + internal::IllegalDoDefault(__FILE__, __LINE__); + } + return internal::Apply(fun_, ::std::move(args)); + } + + private: + template + friend class Action; + + // fun_ is an empty function if and only if this is the DoDefault() action. + ::std::function fun_; +}; + +// The PolymorphicAction class template makes it easy to implement a +// polymorphic action (i.e. an action that can be used in mock +// functions of than one type, e.g. Return()). +// +// To define a polymorphic action, a user first provides a COPYABLE +// implementation class that has a Perform() method template: +// +// class FooAction { +// public: +// template +// Result Perform(const ArgumentTuple& args) const { +// // Processes the arguments and returns a result, using +// // std::get(args) to get the N-th (0-based) argument in the tuple. +// } +// ... +// }; +// +// Then the user creates the polymorphic action using +// MakePolymorphicAction(object) where object has type FooAction. See +// the definition of Return(void) and SetArgumentPointee(value) for +// complete examples. +template +class PolymorphicAction { + public: + explicit PolymorphicAction(const Impl& impl) : impl_(impl) {} + + template + operator Action() const { + return Action(new MonomorphicImpl(impl_)); + } + + private: + template + class MonomorphicImpl : public ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + explicit MonomorphicImpl(const Impl& impl) : impl_(impl) {} + + Result Perform(const ArgumentTuple& args) override { + return impl_.template Perform(args); + } + + private: + Impl impl_; + + GTEST_DISALLOW_ASSIGN_(MonomorphicImpl); + }; + + Impl impl_; + + GTEST_DISALLOW_ASSIGN_(PolymorphicAction); +}; + +// Creates an Action from its implementation and returns it. The +// created Action object owns the implementation. +template +Action MakeAction(ActionInterface* impl) { + return Action(impl); +} + +// Creates a polymorphic action from its implementation. This is +// easier to use than the PolymorphicAction constructor as it +// doesn't require you to explicitly write the template argument, e.g. +// +// MakePolymorphicAction(foo); +// vs +// PolymorphicAction(foo); +template +inline PolymorphicAction MakePolymorphicAction(const Impl& impl) { + return PolymorphicAction(impl); +} + +namespace internal { + +// Helper struct to specialize ReturnAction to execute a move instead of a copy +// on return. Useful for move-only types, but could be used on any type. +template +struct ByMoveWrapper { + explicit ByMoveWrapper(T value) : payload(std::move(value)) {} + T payload; +}; + +// Implements the polymorphic Return(x) action, which can be used in +// any function that returns the type of x, regardless of the argument +// types. +// +// Note: The value passed into Return must be converted into +// Function::Result when this action is cast to Action rather than +// when that action is performed. This is important in scenarios like +// +// MOCK_METHOD1(Method, T(U)); +// ... +// { +// Foo foo; +// X x(&foo); +// EXPECT_CALL(mock, Method(_)).WillOnce(Return(x)); +// } +// +// In the example above the variable x holds reference to foo which leaves +// scope and gets destroyed. If copying X just copies a reference to foo, +// that copy will be left with a hanging reference. If conversion to T +// makes a copy of foo, the above code is safe. To support that scenario, we +// need to make sure that the type conversion happens inside the EXPECT_CALL +// statement, and conversion of the result of Return to Action is a +// good place for that. +// +// The real life example of the above scenario happens when an invocation +// of gtl::Container() is passed into Return. +// +template +class ReturnAction { + public: + // Constructs a ReturnAction object from the value to be returned. + // 'value' is passed by value instead of by const reference in order + // to allow Return("string literal") to compile. + explicit ReturnAction(R value) : value_(new R(std::move(value))) {} + + // This template type conversion operator allows Return(x) to be + // used in ANY function that returns x's type. + template + operator Action() const { // NOLINT + // Assert statement belongs here because this is the best place to verify + // conditions on F. It produces the clearest error messages + // in most compilers. + // Impl really belongs in this scope as a local class but can't + // because MSVC produces duplicate symbols in different translation units + // in this case. Until MS fixes that bug we put Impl into the class scope + // and put the typedef both here (for use in assert statement) and + // in the Impl class. But both definitions must be the same. + typedef typename Function::Result Result; + GTEST_COMPILE_ASSERT_( + !std::is_reference::value, + use_ReturnRef_instead_of_Return_to_return_a_reference); + static_assert(!std::is_void::value, + "Can't use Return() on an action expected to return `void`."); + return Action(new Impl(value_)); + } + + private: + // Implements the Return(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + // The implicit cast is necessary when Result has more than one + // single-argument constructor (e.g. Result is std::vector) and R + // has a type conversion operator template. In that case, value_(value) + // won't compile as the compiler doesn't known which constructor of + // Result to call. ImplicitCast_ forces the compiler to convert R to + // Result without considering explicit constructors, thus resolving the + // ambiguity. value_ is then initialized using its copy constructor. + explicit Impl(const std::shared_ptr& value) + : value_before_cast_(*value), + value_(ImplicitCast_(value_before_cast_)) {} + + Result Perform(const ArgumentTuple&) override { return value_; } + + private: + GTEST_COMPILE_ASSERT_(!std::is_reference::value, + Result_cannot_be_a_reference_type); + // We save the value before casting just in case it is being cast to a + // wrapper type. + R value_before_cast_; + Result value_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(Impl); + }; + + // Partially specialize for ByMoveWrapper. This version of ReturnAction will + // move its contents instead. + template + class Impl, F> : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const std::shared_ptr& wrapper) + : performed_(false), wrapper_(wrapper) {} + + Result Perform(const ArgumentTuple&) override { + GTEST_CHECK_(!performed_) + << "A ByMove() action should only be performed once."; + performed_ = true; + return std::move(wrapper_->payload); + } + + private: + bool performed_; + const std::shared_ptr wrapper_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const std::shared_ptr value_; + + GTEST_DISALLOW_ASSIGN_(ReturnAction); +}; + +// Implements the ReturnNull() action. +class ReturnNullAction { + public: + // Allows ReturnNull() to be used in any pointer-returning function. In C++11 + // this is enforced by returning nullptr, and in non-C++11 by asserting a + // pointer type on compile time. + template + static Result Perform(const ArgumentTuple&) { + return nullptr; + } +}; + +// Implements the Return() action. +class ReturnVoidAction { + public: + // Allows Return() to be used in any void-returning function. + template + static void Perform(const ArgumentTuple&) { + static_assert(std::is_void::value, "Result should be void."); + } +}; + +// Implements the polymorphic ReturnRef(x) action, which can be used +// in any function that returns a reference to the type of x, +// regardless of the argument types. +template +class ReturnRefAction { + public: + // Constructs a ReturnRefAction object from the reference to be returned. + explicit ReturnRefAction(T& ref) : ref_(ref) {} // NOLINT + + // This template type conversion operator allows ReturnRef(x) to be + // used in ANY function that returns a reference to x's type. + template + operator Action() const { + typedef typename Function::Result Result; + // Asserts that the function return type is a reference. This + // catches the user error of using ReturnRef(x) when Return(x) + // should be used, and generates some helpful error message. + GTEST_COMPILE_ASSERT_(std::is_reference::value, + use_Return_instead_of_ReturnRef_to_return_a_value); + return Action(new Impl(ref_)); + } + + private: + // Implements the ReturnRef(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(T& ref) : ref_(ref) {} // NOLINT + + Result Perform(const ArgumentTuple&) override { return ref_; } + + private: + T& ref_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + T& ref_; + + GTEST_DISALLOW_ASSIGN_(ReturnRefAction); +}; + +// Implements the polymorphic ReturnRefOfCopy(x) action, which can be +// used in any function that returns a reference to the type of x, +// regardless of the argument types. +template +class ReturnRefOfCopyAction { + public: + // Constructs a ReturnRefOfCopyAction object from the reference to + // be returned. + explicit ReturnRefOfCopyAction(const T& value) : value_(value) {} // NOLINT + + // This template type conversion operator allows ReturnRefOfCopy(x) to be + // used in ANY function that returns a reference to x's type. + template + operator Action() const { + typedef typename Function::Result Result; + // Asserts that the function return type is a reference. This + // catches the user error of using ReturnRefOfCopy(x) when Return(x) + // should be used, and generates some helpful error message. + GTEST_COMPILE_ASSERT_( + std::is_reference::value, + use_Return_instead_of_ReturnRefOfCopy_to_return_a_value); + return Action(new Impl(value_)); + } + + private: + // Implements the ReturnRefOfCopy(x) action for a particular function type F. + template + class Impl : public ActionInterface { + public: + typedef typename Function::Result Result; + typedef typename Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const T& value) : value_(value) {} // NOLINT + + Result Perform(const ArgumentTuple&) override { return value_; } + + private: + T value_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const T value_; + + GTEST_DISALLOW_ASSIGN_(ReturnRefOfCopyAction); +}; + +// Implements the polymorphic DoDefault() action. +class DoDefaultAction { + public: + // This template type conversion operator allows DoDefault() to be + // used in any function. + template + operator Action() const { return Action(); } // NOLINT +}; + +// Implements the Assign action to set a given pointer referent to a +// particular value. +template +class AssignAction { + public: + AssignAction(T1* ptr, T2 value) : ptr_(ptr), value_(value) {} + + template + void Perform(const ArgumentTuple& /* args */) const { + *ptr_ = value_; + } + + private: + T1* const ptr_; + const T2 value_; + + GTEST_DISALLOW_ASSIGN_(AssignAction); +}; + +#if !GTEST_OS_WINDOWS_MOBILE + +// Implements the SetErrnoAndReturn action to simulate return from +// various system calls and libc functions. +template +class SetErrnoAndReturnAction { + public: + SetErrnoAndReturnAction(int errno_value, T result) + : errno_(errno_value), + result_(result) {} + template + Result Perform(const ArgumentTuple& /* args */) const { + errno = errno_; + return result_; + } + + private: + const int errno_; + const T result_; + + GTEST_DISALLOW_ASSIGN_(SetErrnoAndReturnAction); +}; + +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Implements the SetArgumentPointee(x) action for any function +// whose N-th argument (0-based) is a pointer to x's type. +template +struct SetArgumentPointeeAction { + A value; + + template + void operator()(const Args&... args) const { + *::std::get(std::tie(args...)) = value; + } +}; + +// Implements the Invoke(object_ptr, &Class::Method) action. +template +struct InvokeMethodAction { + Class* const obj_ptr; + const MethodPtr method_ptr; + + template + auto operator()(Args&&... args) const + -> decltype((obj_ptr->*method_ptr)(std::forward(args)...)) { + return (obj_ptr->*method_ptr)(std::forward(args)...); + } +}; + +// Implements the InvokeWithoutArgs(f) action. The template argument +// FunctionImpl is the implementation type of f, which can be either a +// function pointer or a functor. InvokeWithoutArgs(f) can be used as an +// Action as long as f's type is compatible with F. +template +struct InvokeWithoutArgsAction { + FunctionImpl function_impl; + + // Allows InvokeWithoutArgs(f) to be used as any action whose type is + // compatible with f. + template + auto operator()(const Args&...) -> decltype(function_impl()) { + return function_impl(); + } +}; + +// Implements the InvokeWithoutArgs(object_ptr, &Class::Method) action. +template +struct InvokeMethodWithoutArgsAction { + Class* const obj_ptr; + const MethodPtr method_ptr; + + using ReturnType = typename std::result_of::type; + + template + ReturnType operator()(const Args&...) const { + return (obj_ptr->*method_ptr)(); + } +}; + +// Implements the IgnoreResult(action) action. +template +class IgnoreResultAction { + public: + explicit IgnoreResultAction(const A& action) : action_(action) {} + + template + operator Action() const { + // Assert statement belongs here because this is the best place to verify + // conditions on F. It produces the clearest error messages + // in most compilers. + // Impl really belongs in this scope as a local class but can't + // because MSVC produces duplicate symbols in different translation units + // in this case. Until MS fixes that bug we put Impl into the class scope + // and put the typedef both here (for use in assert statement) and + // in the Impl class. But both definitions must be the same. + typedef typename internal::Function::Result Result; + + // Asserts at compile time that F returns void. + static_assert(std::is_void::value, "Result type should be void."); + + return Action(new Impl(action_)); + } + + private: + template + class Impl : public ActionInterface { + public: + typedef typename internal::Function::Result Result; + typedef typename internal::Function::ArgumentTuple ArgumentTuple; + + explicit Impl(const A& action) : action_(action) {} + + void Perform(const ArgumentTuple& args) override { + // Performs the action and ignores its result. + action_.Perform(args); + } + + private: + // Type OriginalFunction is the same as F except that its return + // type is IgnoredValue. + typedef typename internal::Function::MakeResultIgnoredValue + OriginalFunction; + + const Action action_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + const A action_; + + GTEST_DISALLOW_ASSIGN_(IgnoreResultAction); +}; + +template +struct WithArgsAction { + InnerAction action; + + // The inner action could be anything convertible to Action. + // We use the conversion operator to detect the signature of the inner Action. + template + operator Action() const { // NOLINT + Action>::type...)> + converted(action); + + return [converted](Args... args) -> R { + return converted.Perform(std::forward_as_tuple( + std::get(std::forward_as_tuple(std::forward(args)...))...)); + }; + } +}; + +template +struct DoAllAction { + private: + template + std::vector> Convert(IndexSequence) const { + return {std::get(actions)...}; + } + + public: + std::tuple actions; + + template + operator Action() const { // NOLINT + struct Op { + std::vector> converted; + Action last; + R operator()(Args... args) const { + auto tuple_args = std::forward_as_tuple(std::forward(args)...); + for (auto& a : converted) { + a.Perform(tuple_args); + } + return last.Perform(tuple_args); + } + }; + return Op{Convert(MakeIndexSequence()), + std::get(actions)}; + } +}; + +} // namespace internal + +// An Unused object can be implicitly constructed from ANY value. +// This is handy when defining actions that ignore some or all of the +// mock function arguments. For example, given +// +// MOCK_METHOD3(Foo, double(const string& label, double x, double y)); +// MOCK_METHOD3(Bar, double(int index, double x, double y)); +// +// instead of +// +// double DistanceToOriginWithLabel(const string& label, double x, double y) { +// return sqrt(x*x + y*y); +// } +// double DistanceToOriginWithIndex(int index, double x, double y) { +// return sqrt(x*x + y*y); +// } +// ... +// EXPECT_CALL(mock, Foo("abc", _, _)) +// .WillOnce(Invoke(DistanceToOriginWithLabel)); +// EXPECT_CALL(mock, Bar(5, _, _)) +// .WillOnce(Invoke(DistanceToOriginWithIndex)); +// +// you could write +// +// // We can declare any uninteresting argument as Unused. +// double DistanceToOrigin(Unused, double x, double y) { +// return sqrt(x*x + y*y); +// } +// ... +// EXPECT_CALL(mock, Foo("abc", _, _)).WillOnce(Invoke(DistanceToOrigin)); +// EXPECT_CALL(mock, Bar(5, _, _)).WillOnce(Invoke(DistanceToOrigin)); +typedef internal::IgnoredValue Unused; + +// Creates an action that does actions a1, a2, ..., sequentially in +// each invocation. +template +internal::DoAllAction::type...> DoAll( + Action&&... action) { + return {std::forward_as_tuple(std::forward(action)...)}; +} + +// WithArg(an_action) creates an action that passes the k-th +// (0-based) argument of the mock function to an_action and performs +// it. It adapts an action accepting one argument to one that accepts +// multiple arguments. For convenience, we also provide +// WithArgs(an_action) (defined below) as a synonym. +template +internal::WithArgsAction::type, k> +WithArg(InnerAction&& action) { + return {std::forward(action)}; +} + +// WithArgs(an_action) creates an action that passes +// the selected arguments of the mock function to an_action and +// performs it. It serves as an adaptor between actions with +// different argument lists. +template +internal::WithArgsAction::type, k, ks...> +WithArgs(InnerAction&& action) { + return {std::forward(action)}; +} + +// WithoutArgs(inner_action) can be used in a mock function with a +// non-empty argument list to perform inner_action, which takes no +// argument. In other words, it adapts an action accepting no +// argument to one that accepts (and ignores) arguments. +template +internal::WithArgsAction::type> +WithoutArgs(InnerAction&& action) { + return {std::forward(action)}; +} + +// Creates an action that returns 'value'. 'value' is passed by value +// instead of const reference - otherwise Return("string literal") +// will trigger a compiler error about using array as initializer. +template +internal::ReturnAction Return(R value) { + return internal::ReturnAction(std::move(value)); +} + +// Creates an action that returns NULL. +inline PolymorphicAction ReturnNull() { + return MakePolymorphicAction(internal::ReturnNullAction()); +} + +// Creates an action that returns from a void function. +inline PolymorphicAction Return() { + return MakePolymorphicAction(internal::ReturnVoidAction()); +} + +// Creates an action that returns the reference to a variable. +template +inline internal::ReturnRefAction ReturnRef(R& x) { // NOLINT + return internal::ReturnRefAction(x); +} + +// Creates an action that returns the reference to a copy of the +// argument. The copy is created when the action is constructed and +// lives as long as the action. +template +inline internal::ReturnRefOfCopyAction ReturnRefOfCopy(const R& x) { + return internal::ReturnRefOfCopyAction(x); +} + +// Modifies the parent action (a Return() action) to perform a move of the +// argument instead of a copy. +// Return(ByMove()) actions can only be executed once and will assert this +// invariant. +template +internal::ByMoveWrapper ByMove(R x) { + return internal::ByMoveWrapper(std::move(x)); +} + +// Creates an action that does the default action for the give mock function. +inline internal::DoDefaultAction DoDefault() { + return internal::DoDefaultAction(); +} + +// Creates an action that sets the variable pointed by the N-th +// (0-based) function argument to 'value'. +template +internal::SetArgumentPointeeAction SetArgPointee(T x) { + return {std::move(x)}; +} + +// The following version is DEPRECATED. +template +internal::SetArgumentPointeeAction SetArgumentPointee(T x) { + return {std::move(x)}; +} + +// Creates an action that sets a pointer referent to a given value. +template +PolymorphicAction > Assign(T1* ptr, T2 val) { + return MakePolymorphicAction(internal::AssignAction(ptr, val)); +} + +#if !GTEST_OS_WINDOWS_MOBILE + +// Creates an action that sets errno and returns the appropriate error. +template +PolymorphicAction > +SetErrnoAndReturn(int errval, T result) { + return MakePolymorphicAction( + internal::SetErrnoAndReturnAction(errval, result)); +} + +#endif // !GTEST_OS_WINDOWS_MOBILE + +// Various overloads for Invoke(). + +// Legacy function. +// Actions can now be implicitly constructed from callables. No need to create +// wrapper objects. +// This function exists for backwards compatibility. +template +typename std::decay::type Invoke(FunctionImpl&& function_impl) { + return std::forward(function_impl); +} + +// Creates an action that invokes the given method on the given object +// with the mock function's arguments. +template +internal::InvokeMethodAction Invoke(Class* obj_ptr, + MethodPtr method_ptr) { + return {obj_ptr, method_ptr}; +} + +// Creates an action that invokes 'function_impl' with no argument. +template +internal::InvokeWithoutArgsAction::type> +InvokeWithoutArgs(FunctionImpl function_impl) { + return {std::move(function_impl)}; +} + +// Creates an action that invokes the given method on the given object +// with no argument. +template +internal::InvokeMethodWithoutArgsAction InvokeWithoutArgs( + Class* obj_ptr, MethodPtr method_ptr) { + return {obj_ptr, method_ptr}; +} + +// Creates an action that performs an_action and throws away its +// result. In other words, it changes the return type of an_action to +// void. an_action MUST NOT return void, or the code won't compile. +template +inline internal::IgnoreResultAction IgnoreResult(const A& an_action) { + return internal::IgnoreResultAction(an_action); +} + +// Creates a reference wrapper for the given L-value. If necessary, +// you can explicitly specify the type of the reference. For example, +// suppose 'derived' is an object of type Derived, ByRef(derived) +// would wrap a Derived&. If you want to wrap a const Base& instead, +// where Base is a base class of Derived, just write: +// +// ByRef(derived) +// +// N.B. ByRef is redundant with std::ref, std::cref and std::reference_wrapper. +// However, it may still be used for consistency with ByMove(). +template +inline ::std::reference_wrapper ByRef(T& l_value) { // NOLINT + return ::std::reference_wrapper(l_value); +} + +} // namespace testing + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_ACTIONS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-cardinalities.h b/src/test/gtest/googlemock/include/gmock/gmock-cardinalities.h new file mode 100644 index 00000000..46e01e10 --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-cardinalities.h @@ -0,0 +1,157 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used cardinalities. More +// cardinalities can be defined by the user implementing the +// CardinalityInterface interface if necessary. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ + +#include +#include +#include // NOLINT +#include "gmock/internal/gmock-port.h" +#include "gtest/gtest.h" + +GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ +/* class A needs to have dll-interface to be used by clients of class B */) + +namespace testing { + +// To implement a cardinality Foo, define: +// 1. a class FooCardinality that implements the +// CardinalityInterface interface, and +// 2. a factory function that creates a Cardinality object from a +// const FooCardinality*. +// +// The two-level delegation design follows that of Matcher, providing +// consistency for extension developers. It also eases ownership +// management as Cardinality objects can now be copied like plain values. + +// The implementation of a cardinality. +class CardinalityInterface { + public: + virtual ~CardinalityInterface() {} + + // Conservative estimate on the lower/upper bound of the number of + // calls allowed. + virtual int ConservativeLowerBound() const { return 0; } + virtual int ConservativeUpperBound() const { return INT_MAX; } + + // Returns true if and only if call_count calls will satisfy this + // cardinality. + virtual bool IsSatisfiedByCallCount(int call_count) const = 0; + + // Returns true if and only if call_count calls will saturate this + // cardinality. + virtual bool IsSaturatedByCallCount(int call_count) const = 0; + + // Describes self to an ostream. + virtual void DescribeTo(::std::ostream* os) const = 0; +}; + +// A Cardinality is a copyable and IMMUTABLE (except by assignment) +// object that specifies how many times a mock function is expected to +// be called. The implementation of Cardinality is just a std::shared_ptr +// to const CardinalityInterface. Don't inherit from Cardinality! +class GTEST_API_ Cardinality { + public: + // Constructs a null cardinality. Needed for storing Cardinality + // objects in STL containers. + Cardinality() {} + + // Constructs a Cardinality from its implementation. + explicit Cardinality(const CardinalityInterface* impl) : impl_(impl) {} + + // Conservative estimate on the lower/upper bound of the number of + // calls allowed. + int ConservativeLowerBound() const { return impl_->ConservativeLowerBound(); } + int ConservativeUpperBound() const { return impl_->ConservativeUpperBound(); } + + // Returns true if and only if call_count calls will satisfy this + // cardinality. + bool IsSatisfiedByCallCount(int call_count) const { + return impl_->IsSatisfiedByCallCount(call_count); + } + + // Returns true if and only if call_count calls will saturate this + // cardinality. + bool IsSaturatedByCallCount(int call_count) const { + return impl_->IsSaturatedByCallCount(call_count); + } + + // Returns true if and only if call_count calls will over-saturate this + // cardinality, i.e. exceed the maximum number of allowed calls. + bool IsOverSaturatedByCallCount(int call_count) const { + return impl_->IsSaturatedByCallCount(call_count) && + !impl_->IsSatisfiedByCallCount(call_count); + } + + // Describes self to an ostream + void DescribeTo(::std::ostream* os) const { impl_->DescribeTo(os); } + + // Describes the given actual call count to an ostream. + static void DescribeActualCallCountTo(int actual_call_count, + ::std::ostream* os); + + private: + std::shared_ptr impl_; +}; + +// Creates a cardinality that allows at least n calls. +GTEST_API_ Cardinality AtLeast(int n); + +// Creates a cardinality that allows at most n calls. +GTEST_API_ Cardinality AtMost(int n); + +// Creates a cardinality that allows any number of calls. +GTEST_API_ Cardinality AnyNumber(); + +// Creates a cardinality that allows between min and max calls. +GTEST_API_ Cardinality Between(int min, int max); + +// Creates a cardinality that allows exactly n calls. +GTEST_API_ Cardinality Exactly(int n); + +// Creates a cardinality from its implementation. +inline Cardinality MakeCardinality(const CardinalityInterface* c) { + return Cardinality(c); +} + +} // namespace testing + +GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_CARDINALITIES_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-function-mocker.h b/src/test/gtest/googlemock/include/gmock/gmock-function-mocker.h new file mode 100644 index 00000000..cc1535c8 --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-function-mocker.h @@ -0,0 +1,253 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements MOCK_METHOD. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ // NOLINT +#define THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ // NOLINT + +#include "gmock/gmock-generated-function-mockers.h" // NOLINT +#include "gmock/internal/gmock-pp.h" + +#define MOCK_METHOD(...) \ + GMOCK_PP_VARIADIC_CALL(GMOCK_INTERNAL_MOCK_METHOD_ARG_, __VA_ARGS__) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_1(...) \ + GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_2(...) \ + GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_3(_Ret, _MethodName, _Args) \ + GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, ()) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_4(_Ret, _MethodName, _Args, _Spec) \ + GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Args); \ + GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Spec); \ + GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE( \ + GMOCK_PP_NARG0 _Args, GMOCK_INTERNAL_SIGNATURE(_Ret, _Args)); \ + GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \ + GMOCK_INTERNAL_MOCK_METHOD_IMPL( \ + GMOCK_PP_NARG0 _Args, _MethodName, GMOCK_INTERNAL_HAS_CONST(_Spec), \ + GMOCK_INTERNAL_HAS_OVERRIDE(_Spec), GMOCK_INTERNAL_HAS_FINAL(_Spec), \ + GMOCK_INTERNAL_HAS_NOEXCEPT(_Spec), GMOCK_INTERNAL_GET_CALLTYPE(_Spec), \ + (GMOCK_INTERNAL_SIGNATURE(_Ret, _Args))) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_5(...) \ + GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_6(...) \ + GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) + +#define GMOCK_INTERNAL_MOCK_METHOD_ARG_7(...) \ + GMOCK_INTERNAL_WRONG_ARITY(__VA_ARGS__) + +#define GMOCK_INTERNAL_WRONG_ARITY(...) \ + static_assert( \ + false, \ + "MOCK_METHOD must be called with 3 or 4 arguments. _Ret, " \ + "_MethodName, _Args and optionally _Spec. _Args and _Spec must be " \ + "enclosed in parentheses. If _Ret is a type with unprotected commas, " \ + "it must also be enclosed in parentheses.") + +#define GMOCK_INTERNAL_ASSERT_PARENTHESIS(_Tuple) \ + static_assert( \ + GMOCK_PP_IS_ENCLOSED_PARENS(_Tuple), \ + GMOCK_PP_STRINGIZE(_Tuple) " should be enclosed in parentheses.") + +#define GMOCK_INTERNAL_ASSERT_VALID_SIGNATURE(_N, ...) \ + static_assert( \ + std::is_function<__VA_ARGS__>::value, \ + "Signature must be a function type, maybe return type contains " \ + "unprotected comma."); \ + static_assert( \ + ::testing::tuple_size::ArgumentTuple>::value == _N, \ + "This method does not take " GMOCK_PP_STRINGIZE( \ + _N) " arguments. Parenthesize all types with unproctected commas.") + +#define GMOCK_INTERNAL_ASSERT_VALID_SPEC(_Spec) \ + GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT, ~, _Spec) + +#define GMOCK_INTERNAL_MOCK_METHOD_IMPL(_N, _MethodName, _Constness, \ + _Override, _Final, _Noexcept, \ + _CallType, _Signature) \ + typename ::testing::internal::Function::Result \ + GMOCK_INTERNAL_EXPAND(_CallType) \ + _MethodName(GMOCK_PP_REPEAT(GMOCK_INTERNAL_PARAMETER, _Signature, _N)) \ + GMOCK_PP_IF(_Constness, const, ) GMOCK_PP_IF(_Noexcept, noexcept, ) \ + GMOCK_PP_IF(_Override, override, ) \ + GMOCK_PP_IF(_Final, final, ) { \ + GMOCK_MOCKER_(_N, _Constness, _MethodName) \ + .SetOwnerAndName(this, #_MethodName); \ + return GMOCK_MOCKER_(_N, _Constness, _MethodName) \ + .Invoke(GMOCK_PP_REPEAT(GMOCK_INTERNAL_FORWARD_ARG, _Signature, _N)); \ + } \ + ::testing::MockSpec gmock_##_MethodName( \ + GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_PARAMETER, _Signature, _N)) \ + GMOCK_PP_IF(_Constness, const, ) { \ + GMOCK_MOCKER_(_N, _Constness, _MethodName).RegisterOwner(this); \ + return GMOCK_MOCKER_(_N, _Constness, _MethodName) \ + .With(GMOCK_PP_REPEAT(GMOCK_INTERNAL_MATCHER_ARGUMENT, , _N)); \ + } \ + ::testing::MockSpec gmock_##_MethodName( \ + const ::testing::internal::WithoutMatchers&, \ + GMOCK_PP_IF(_Constness, const, )::testing::internal::Function< \ + GMOCK_PP_REMOVE_PARENS(_Signature)>*) \ + const GMOCK_PP_IF(_Noexcept, noexcept, ) { \ + return GMOCK_PP_CAT(::testing::internal::AdjustConstness_, \ + GMOCK_PP_IF(_Constness, const, ))(this) \ + ->gmock_##_MethodName(GMOCK_PP_REPEAT( \ + GMOCK_INTERNAL_A_MATCHER_ARGUMENT, _Signature, _N)); \ + } \ + mutable ::testing::FunctionMocker \ + GMOCK_MOCKER_(_N, _Constness, _MethodName) + +#define GMOCK_INTERNAL_EXPAND(...) __VA_ARGS__ + +// Five Valid modifiers. +#define GMOCK_INTERNAL_HAS_CONST(_Tuple) \ + GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_CONST, ~, _Tuple)) + +#define GMOCK_INTERNAL_HAS_OVERRIDE(_Tuple) \ + GMOCK_PP_HAS_COMMA( \ + GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_OVERRIDE, ~, _Tuple)) + +#define GMOCK_INTERNAL_HAS_FINAL(_Tuple) \ + GMOCK_PP_HAS_COMMA(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_FINAL, ~, _Tuple)) + +#define GMOCK_INTERNAL_HAS_NOEXCEPT(_Tuple) \ + GMOCK_PP_HAS_COMMA( \ + GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_DETECT_NOEXCEPT, ~, _Tuple)) + +#define GMOCK_INTERNAL_GET_CALLTYPE(_Tuple) \ + GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_CALLTYPE_IMPL, ~, _Tuple) + +#define GMOCK_INTERNAL_ASSERT_VALID_SPEC_ELEMENT(_i, _, _elem) \ + static_assert( \ + (GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem)) + \ + GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem)) + \ + GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem)) + \ + GMOCK_PP_HAS_COMMA(GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem)) + \ + GMOCK_INTERNAL_IS_CALLTYPE(_elem)) == 1, \ + GMOCK_PP_STRINGIZE( \ + _elem) " cannot be recognized as a valid specification modifier."); + +// Modifiers implementation. +#define GMOCK_INTERNAL_DETECT_CONST(_i, _, _elem) \ + GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_CONST_I_, _elem) + +#define GMOCK_INTERNAL_DETECT_CONST_I_const , + +#define GMOCK_INTERNAL_DETECT_OVERRIDE(_i, _, _elem) \ + GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_OVERRIDE_I_, _elem) + +#define GMOCK_INTERNAL_DETECT_OVERRIDE_I_override , + +#define GMOCK_INTERNAL_DETECT_FINAL(_i, _, _elem) \ + GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_FINAL_I_, _elem) + +#define GMOCK_INTERNAL_DETECT_FINAL_I_final , + +// TODO(iserna): Maybe noexcept should accept an argument here as well. +#define GMOCK_INTERNAL_DETECT_NOEXCEPT(_i, _, _elem) \ + GMOCK_PP_CAT(GMOCK_INTERNAL_DETECT_NOEXCEPT_I_, _elem) + +#define GMOCK_INTERNAL_DETECT_NOEXCEPT_I_noexcept , + +#define GMOCK_INTERNAL_GET_CALLTYPE_IMPL(_i, _, _elem) \ + GMOCK_PP_IF(GMOCK_INTERNAL_IS_CALLTYPE(_elem), \ + GMOCK_INTERNAL_GET_VALUE_CALLTYPE, GMOCK_PP_EMPTY) \ + (_elem) + +// TODO(iserna): GMOCK_INTERNAL_IS_CALLTYPE and +// GMOCK_INTERNAL_GET_VALUE_CALLTYPE needed more expansions to work on windows +// maybe they can be simplified somehow. +#define GMOCK_INTERNAL_IS_CALLTYPE(_arg) \ + GMOCK_INTERNAL_IS_CALLTYPE_I( \ + GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg)) +#define GMOCK_INTERNAL_IS_CALLTYPE_I(_arg) GMOCK_PP_IS_ENCLOSED_PARENS(_arg) + +#define GMOCK_INTERNAL_GET_VALUE_CALLTYPE(_arg) \ + GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I( \ + GMOCK_PP_CAT(GMOCK_INTERNAL_IS_CALLTYPE_HELPER_, _arg)) +#define GMOCK_INTERNAL_GET_VALUE_CALLTYPE_I(_arg) \ + GMOCK_PP_CAT(GMOCK_PP_IDENTITY, _arg) + +#define GMOCK_INTERNAL_IS_CALLTYPE_HELPER_Calltype + +#define GMOCK_INTERNAL_SIGNATURE(_Ret, _Args) \ + GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_Ret), GMOCK_PP_REMOVE_PARENS, \ + GMOCK_PP_IDENTITY) \ + (_Ret)(GMOCK_PP_FOR_EACH(GMOCK_INTERNAL_GET_TYPE, _, _Args)) + +#define GMOCK_INTERNAL_GET_TYPE(_i, _, _elem) \ + GMOCK_PP_COMMA_IF(_i) \ + GMOCK_PP_IF(GMOCK_PP_IS_BEGIN_PARENS(_elem), GMOCK_PP_REMOVE_PARENS, \ + GMOCK_PP_IDENTITY) \ + (_elem) + +#define GMOCK_INTERNAL_PARAMETER(_i, _Signature, _) \ + GMOCK_PP_COMMA_IF(_i) \ + GMOCK_INTERNAL_ARG_O(typename, GMOCK_PP_INC(_i), \ + GMOCK_PP_REMOVE_PARENS(_Signature)) \ + gmock_a##_i + +#define GMOCK_INTERNAL_FORWARD_ARG(_i, _Signature, _) \ + GMOCK_PP_COMMA_IF(_i) \ + ::std::forward( \ + gmock_a##_i) + +#define GMOCK_INTERNAL_MATCHER_PARAMETER(_i, _Signature, _) \ + GMOCK_PP_COMMA_IF(_i) \ + GMOCK_INTERNAL_MATCHER_O(typename, GMOCK_PP_INC(_i), \ + GMOCK_PP_REMOVE_PARENS(_Signature)) \ + gmock_a##_i + +#define GMOCK_INTERNAL_MATCHER_ARGUMENT(_i, _1, _2) \ + GMOCK_PP_COMMA_IF(_i) \ + gmock_a##_i + +#define GMOCK_INTERNAL_A_MATCHER_ARGUMENT(_i, _Signature, _) \ + GMOCK_PP_COMMA_IF(_i) \ + ::testing::A() + +#define GMOCK_INTERNAL_ARG_O(_tn, _i, ...) GMOCK_ARG_(_tn, _i, __VA_ARGS__) + +#define GMOCK_INTERNAL_MATCHER_O(_tn, _i, ...) \ + GMOCK_MATCHER_(_tn, _i, __VA_ARGS__) + +#endif // THIRD_PARTY_GOOGLETEST_GOOGLEMOCK_INCLUDE_GMOCK_INTERNAL_GMOCK_FUNCTION_MOCKER_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h b/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h new file mode 100644 index 00000000..981af78f --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h @@ -0,0 +1,1884 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-actions.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic actions. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ + +#include +#include + +#include "gmock/gmock-actions.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { +namespace internal { + +// A macro from the ACTION* family (defined later in this file) +// defines an action that can be used in a mock function. Typically, +// these actions only care about a subset of the arguments of the mock +// function. For example, if such an action only uses the second +// argument, it can be used in any mock function that takes >= 2 +// arguments where the type of the second argument is compatible. +// +// Therefore, the action implementation must be prepared to take more +// arguments than it needs. The ExcessiveArg type is used to +// represent those excessive arguments. In order to keep the compiler +// error messages tractable, we define it in the testing namespace +// instead of testing::internal. However, this is an INTERNAL TYPE +// and subject to change without notice, so a user MUST NOT USE THIS +// TYPE DIRECTLY. +struct ExcessiveArg {}; + +// A helper class needed for implementing the ACTION* macros. +template +class ActionHelper { + public: + static Result Perform(Impl* impl, const ::std::tuple<>& args) { + return impl->template gmock_PerformImpl<>(args, ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, std::get<0>(args), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, std::get<0>(args), + std::get<1>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, + std::get<0>(args), std::get<1>(args), std::get<2>(args), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, + std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, + std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), ExcessiveArg(), ExcessiveArg(), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, + std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), std::get<5>(args), + ExcessiveArg(), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, + std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), std::get<5>(args), + std::get<6>(args), ExcessiveArg(), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), std::get<5>(args), + std::get<6>(args), std::get<7>(args), ExcessiveArg(), ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), std::get<5>(args), + std::get<6>(args), std::get<7>(args), std::get<8>(args), + ExcessiveArg()); + } + + template + static Result Perform(Impl* impl, const ::std::tuple& args) { + return impl->template gmock_PerformImpl(args, std::get<0>(args), std::get<1>(args), std::get<2>(args), + std::get<3>(args), std::get<4>(args), std::get<5>(args), + std::get<6>(args), std::get<7>(args), std::get<8>(args), + std::get<9>(args)); + } +}; + +} // namespace internal +} // namespace testing + +// The ACTION* family of macros can be used in a namespace scope to +// define custom actions easily. The syntax: +// +// ACTION(name) { statements; } +// +// will define an action with the given name that executes the +// statements. The value returned by the statements will be used as +// the return value of the action. Inside the statements, you can +// refer to the K-th (0-based) argument of the mock function by +// 'argK', and refer to its type by 'argK_type'. For example: +// +// ACTION(IncrementArg1) { +// arg1_type temp = arg1; +// return ++(*temp); +// } +// +// allows you to write +// +// ...WillOnce(IncrementArg1()); +// +// You can also refer to the entire argument tuple and its type by +// 'args' and 'args_type', and refer to the mock function type and its +// return type by 'function_type' and 'return_type'. +// +// Note that you don't need to specify the types of the mock function +// arguments. However rest assured that your code is still type-safe: +// you'll get a compiler error if *arg1 doesn't support the ++ +// operator, or if the type of ++(*arg1) isn't compatible with the +// mock function's return type, for example. +// +// Sometimes you'll want to parameterize the action. For that you can use +// another macro: +// +// ACTION_P(name, param_name) { statements; } +// +// For example: +// +// ACTION_P(Add, n) { return arg0 + n; } +// +// will allow you to write: +// +// ...WillOnce(Add(5)); +// +// Note that you don't need to provide the type of the parameter +// either. If you need to reference the type of a parameter named +// 'foo', you can write 'foo_type'. For example, in the body of +// ACTION_P(Add, n) above, you can write 'n_type' to refer to the type +// of 'n'. +// +// We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P10 to support +// multi-parameter actions. +// +// For the purpose of typing, you can view +// +// ACTION_Pk(Foo, p1, ..., pk) { ... } +// +// as shorthand for +// +// template +// FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } +// +// In particular, you can provide the template type arguments +// explicitly when invoking Foo(), as in Foo(5, false); +// although usually you can rely on the compiler to infer the types +// for you automatically. You can assign the result of expression +// Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. +// +// You can also overload actions with different numbers of parameters: +// +// ACTION_P(Plus, a) { ... } +// ACTION_P2(Plus, a, b) { ... } +// +// While it's tempting to always use the ACTION* macros when defining +// a new action, you should also consider implementing ActionInterface +// or using MakePolymorphicAction() instead, especially if you need to +// use the action a lot. While these approaches require more work, +// they give you more control on the types of the mock function +// arguments and the action parameters, which in general leads to +// better compiler error messages that pay off in the long run. They +// also allow overloading actions based on parameter types (as opposed +// to just based on the number of parameters). +// +// CAVEAT: +// +// ACTION*() can only be used in a namespace scope as templates cannot be +// declared inside of a local class. +// Users can, however, define any local functors (e.g. a lambda) that +// can be used as actions. +// +// MORE INFORMATION: +// +// To learn more about using these macros, please search for 'ACTION' on +// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md + +// An internal macro needed for implementing ACTION*(). +#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ + const args_type& args GTEST_ATTRIBUTE_UNUSED_, \ + const arg0_type& arg0 GTEST_ATTRIBUTE_UNUSED_, \ + const arg1_type& arg1 GTEST_ATTRIBUTE_UNUSED_, \ + const arg2_type& arg2 GTEST_ATTRIBUTE_UNUSED_, \ + const arg3_type& arg3 GTEST_ATTRIBUTE_UNUSED_, \ + const arg4_type& arg4 GTEST_ATTRIBUTE_UNUSED_, \ + const arg5_type& arg5 GTEST_ATTRIBUTE_UNUSED_, \ + const arg6_type& arg6 GTEST_ATTRIBUTE_UNUSED_, \ + const arg7_type& arg7 GTEST_ATTRIBUTE_UNUSED_, \ + const arg8_type& arg8 GTEST_ATTRIBUTE_UNUSED_, \ + const arg9_type& arg9 GTEST_ATTRIBUTE_UNUSED_ + +// Sometimes you want to give an action explicit template parameters +// that cannot be inferred from its value parameters. ACTION() and +// ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that +// and can be viewed as an extension to ACTION() and ACTION_P*(). +// +// The syntax: +// +// ACTION_TEMPLATE(ActionName, +// HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), +// AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +// +// defines an action template that takes m explicit template +// parameters and n value parameters. name_i is the name of the i-th +// template parameter, and kind_i specifies whether it's a typename, +// an integral constant, or a template. p_i is the name of the i-th +// value parameter. +// +// Example: +// +// // DuplicateArg(output) converts the k-th argument of the mock +// // function to type T and copies it to *output. +// ACTION_TEMPLATE(DuplicateArg, +// HAS_2_TEMPLATE_PARAMS(int, k, typename, T), +// AND_1_VALUE_PARAMS(output)) { +// *output = T(::std::get(args)); +// } +// ... +// int n; +// EXPECT_CALL(mock, Foo(_, _)) +// .WillOnce(DuplicateArg<1, unsigned char>(&n)); +// +// To create an instance of an action template, write: +// +// ActionName(v1, ..., v_n) +// +// where the ts are the template arguments and the vs are the value +// arguments. The value argument types are inferred by the compiler. +// If you want to explicitly specify the value argument types, you can +// provide additional template arguments: +// +// ActionName(v1, ..., v_n) +// +// where u_i is the desired type of v_i. +// +// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the +// number of value parameters, but not on the number of template +// parameters. Without the restriction, the meaning of the following +// is unclear: +// +// OverloadedAction(x); +// +// Are we using a single-template-parameter action where 'bool' refers +// to the type of x, or are we using a two-template-parameter action +// where the compiler is asked to infer the type of x? +// +// Implementation notes: +// +// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and +// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for +// implementing ACTION_TEMPLATE. The main trick we use is to create +// new macro invocations when expanding a macro. For example, we have +// +// #define ACTION_TEMPLATE(name, template_params, value_params) +// ... GMOCK_INTERNAL_DECL_##template_params ... +// +// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) +// to expand to +// +// ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... +// +// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the +// preprocessor will continue to expand it to +// +// ... typename T ... +// +// This technique conforms to the C++ standard and is portable. It +// allows us to implement action templates using O(N) code, where N is +// the maximum number of template/value parameters supported. Without +// using it, we'd have to devote O(N^2) amount of code to implement all +// combinations of m and n. + +// Declares the template parameters. +#define GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(kind0, name0) kind0 name0 +#define GMOCK_INTERNAL_DECL_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1) kind0 name0, kind1 name1 +#define GMOCK_INTERNAL_DECL_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2) kind0 name0, kind1 name1, kind2 name2 +#define GMOCK_INTERNAL_DECL_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3) kind0 name0, kind1 name1, kind2 name2, \ + kind3 name3 +#define GMOCK_INTERNAL_DECL_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4) kind0 name0, kind1 name1, \ + kind2 name2, kind3 name3, kind4 name4 +#define GMOCK_INTERNAL_DECL_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5) kind0 name0, \ + kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5 +#define GMOCK_INTERNAL_DECL_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6) kind0 name0, kind1 name1, kind2 name2, kind3 name3, kind4 name4, \ + kind5 name5, kind6 name6 +#define GMOCK_INTERNAL_DECL_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7) kind0 name0, kind1 name1, kind2 name2, kind3 name3, \ + kind4 name4, kind5 name5, kind6 name6, kind7 name7 +#define GMOCK_INTERNAL_DECL_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7, kind8, name8) kind0 name0, kind1 name1, kind2 name2, \ + kind3 name3, kind4 name4, kind5 name5, kind6 name6, kind7 name7, \ + kind8 name8 +#define GMOCK_INTERNAL_DECL_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6, kind7, name7, kind8, name8, kind9, name9) kind0 name0, \ + kind1 name1, kind2 name2, kind3 name3, kind4 name4, kind5 name5, \ + kind6 name6, kind7 name7, kind8 name8, kind9 name9 + +// Lists the template parameters. +#define GMOCK_INTERNAL_LIST_HAS_1_TEMPLATE_PARAMS(kind0, name0) name0 +#define GMOCK_INTERNAL_LIST_HAS_2_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1) name0, name1 +#define GMOCK_INTERNAL_LIST_HAS_3_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2) name0, name1, name2 +#define GMOCK_INTERNAL_LIST_HAS_4_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3) name0, name1, name2, name3 +#define GMOCK_INTERNAL_LIST_HAS_5_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4) name0, name1, name2, name3, \ + name4 +#define GMOCK_INTERNAL_LIST_HAS_6_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5) name0, name1, \ + name2, name3, name4, name5 +#define GMOCK_INTERNAL_LIST_HAS_7_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6) name0, name1, name2, name3, name4, name5, name6 +#define GMOCK_INTERNAL_LIST_HAS_8_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7) name0, name1, name2, name3, name4, name5, name6, name7 +#define GMOCK_INTERNAL_LIST_HAS_9_TEMPLATE_PARAMS(kind0, name0, kind1, name1, \ + kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, name6, \ + kind7, name7, kind8, name8) name0, name1, name2, name3, name4, name5, \ + name6, name7, name8 +#define GMOCK_INTERNAL_LIST_HAS_10_TEMPLATE_PARAMS(kind0, name0, kind1, \ + name1, kind2, name2, kind3, name3, kind4, name4, kind5, name5, kind6, \ + name6, kind7, name7, kind8, name8, kind9, name9) name0, name1, name2, \ + name3, name4, name5, name6, name7, name8, name9 + +// Declares the types of value parameters. +#define GMOCK_INTERNAL_DECL_TYPE_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DECL_TYPE_AND_1_VALUE_PARAMS(p0) , typename p0##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_2_VALUE_PARAMS(p0, p1) , \ + typename p0##_type, typename p1##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , \ + typename p0##_type, typename p1##_type, typename p2##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ + typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type, typename p7##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8) , typename p0##_type, typename p1##_type, typename p2##_type, \ + typename p3##_type, typename p4##_type, typename p5##_type, \ + typename p6##_type, typename p7##_type, typename p8##_type +#define GMOCK_INTERNAL_DECL_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9) , typename p0##_type, typename p1##_type, \ + typename p2##_type, typename p3##_type, typename p4##_type, \ + typename p5##_type, typename p6##_type, typename p7##_type, \ + typename p8##_type, typename p9##_type + +// Initializes the value parameters. +#define GMOCK_INTERNAL_INIT_AND_0_VALUE_PARAMS()\ + () +#define GMOCK_INTERNAL_INIT_AND_1_VALUE_PARAMS(p0)\ + (p0##_type gmock_p0) : p0(::std::move(gmock_p0)) +#define GMOCK_INTERNAL_INIT_AND_2_VALUE_PARAMS(p0, p1)\ + (p0##_type gmock_p0, p1##_type gmock_p1) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)) +#define GMOCK_INTERNAL_INIT_AND_3_VALUE_PARAMS(p0, p1, p2)\ + (p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)) +#define GMOCK_INTERNAL_INIT_AND_4_VALUE_PARAMS(p0, p1, p2, p3)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)) +#define GMOCK_INTERNAL_INIT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)) +#define GMOCK_INTERNAL_INIT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)) +#define GMOCK_INTERNAL_INIT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)) +#define GMOCK_INTERNAL_INIT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)) +#define GMOCK_INTERNAL_INIT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)) +#define GMOCK_INTERNAL_INIT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9)\ + (p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)), \ + p9(::std::move(gmock_p9)) + +// Declares the fields for storing the value parameters. +#define GMOCK_INTERNAL_DEFN_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DEFN_AND_1_VALUE_PARAMS(p0) p0##_type p0; +#define GMOCK_INTERNAL_DEFN_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0; \ + p1##_type p1; +#define GMOCK_INTERNAL_DEFN_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0; \ + p1##_type p1; p2##_type p2; +#define GMOCK_INTERNAL_DEFN_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0; \ + p1##_type p1; p2##_type p2; p3##_type p3; +#define GMOCK_INTERNAL_DEFN_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ + p4) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; +#define GMOCK_INTERNAL_DEFN_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ + p5) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; +#define GMOCK_INTERNAL_DEFN_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; p6##_type p6; +#define GMOCK_INTERNAL_DEFN_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; p4##_type p4; \ + p5##_type p5; p6##_type p6; p7##_type p7; +#define GMOCK_INTERNAL_DEFN_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ + p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; +#define GMOCK_INTERNAL_DEFN_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0##_type p0; p1##_type p1; p2##_type p2; p3##_type p3; \ + p4##_type p4; p5##_type p5; p6##_type p6; p7##_type p7; p8##_type p8; \ + p9##_type p9; + +// Lists the value parameters. +#define GMOCK_INTERNAL_LIST_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_LIST_AND_1_VALUE_PARAMS(p0) p0 +#define GMOCK_INTERNAL_LIST_AND_2_VALUE_PARAMS(p0, p1) p0, p1 +#define GMOCK_INTERNAL_LIST_AND_3_VALUE_PARAMS(p0, p1, p2) p0, p1, p2 +#define GMOCK_INTERNAL_LIST_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0, p1, p2, p3 +#define GMOCK_INTERNAL_LIST_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) p0, p1, \ + p2, p3, p4 +#define GMOCK_INTERNAL_LIST_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) p0, \ + p1, p2, p3, p4, p5 +#define GMOCK_INTERNAL_LIST_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0, p1, p2, p3, p4, p5, p6 +#define GMOCK_INTERNAL_LIST_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0, p1, p2, p3, p4, p5, p6, p7 +#define GMOCK_INTERNAL_LIST_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0, p1, p2, p3, p4, p5, p6, p7, p8 +#define GMOCK_INTERNAL_LIST_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0, p1, p2, p3, p4, p5, p6, p7, p8, p9 + +// Lists the value parameter types. +#define GMOCK_INTERNAL_LIST_TYPE_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_LIST_TYPE_AND_1_VALUE_PARAMS(p0) , p0##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_2_VALUE_PARAMS(p0, p1) , p0##_type, \ + p1##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_3_VALUE_PARAMS(p0, p1, p2) , p0##_type, \ + p1##_type, p2##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_4_VALUE_PARAMS(p0, p1, p2, p3) , \ + p0##_type, p1##_type, p2##_type, p3##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) , \ + p0##_type, p1##_type, p2##_type, p3##_type, p4##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) , \ + p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, p5##_type, \ + p6##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type, p8##_type +#define GMOCK_INTERNAL_LIST_TYPE_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9) , p0##_type, p1##_type, p2##_type, p3##_type, p4##_type, \ + p5##_type, p6##_type, p7##_type, p8##_type, p9##_type + +// Declares the value parameters. +#define GMOCK_INTERNAL_DECL_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_DECL_AND_1_VALUE_PARAMS(p0) p0##_type p0 +#define GMOCK_INTERNAL_DECL_AND_2_VALUE_PARAMS(p0, p1) p0##_type p0, \ + p1##_type p1 +#define GMOCK_INTERNAL_DECL_AND_3_VALUE_PARAMS(p0, p1, p2) p0##_type p0, \ + p1##_type p1, p2##_type p2 +#define GMOCK_INTERNAL_DECL_AND_4_VALUE_PARAMS(p0, p1, p2, p3) p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3 +#define GMOCK_INTERNAL_DECL_AND_5_VALUE_PARAMS(p0, p1, p2, p3, \ + p4) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4 +#define GMOCK_INTERNAL_DECL_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, \ + p5) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5 +#define GMOCK_INTERNAL_DECL_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, \ + p6) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5, p6##_type p6 +#define GMOCK_INTERNAL_DECL_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, \ + p5##_type p5, p6##_type p6, p7##_type p7 +#define GMOCK_INTERNAL_DECL_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8 +#define GMOCK_INTERNAL_DECL_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9 + +// The suffix of the class template implementing the action template. +#define GMOCK_INTERNAL_COUNT_AND_0_VALUE_PARAMS() +#define GMOCK_INTERNAL_COUNT_AND_1_VALUE_PARAMS(p0) P +#define GMOCK_INTERNAL_COUNT_AND_2_VALUE_PARAMS(p0, p1) P2 +#define GMOCK_INTERNAL_COUNT_AND_3_VALUE_PARAMS(p0, p1, p2) P3 +#define GMOCK_INTERNAL_COUNT_AND_4_VALUE_PARAMS(p0, p1, p2, p3) P4 +#define GMOCK_INTERNAL_COUNT_AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4) P5 +#define GMOCK_INTERNAL_COUNT_AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5) P6 +#define GMOCK_INTERNAL_COUNT_AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6) P7 +#define GMOCK_INTERNAL_COUNT_AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7) P8 +#define GMOCK_INTERNAL_COUNT_AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8) P9 +#define GMOCK_INTERNAL_COUNT_AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, \ + p7, p8, p9) P10 + +// The name of the class template implementing the action template. +#define GMOCK_ACTION_CLASS_(name, value_params)\ + GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) + +#define ACTION_TEMPLATE(name, template_params, value_params)\ + template \ + class GMOCK_ACTION_CLASS_(name, value_params) {\ + public:\ + explicit GMOCK_ACTION_CLASS_(name, value_params)\ + GMOCK_INTERNAL_INIT_##value_params {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(\ + new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ + }\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ + };\ + template \ + inline GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ + GMOCK_INTERNAL_DECL_##value_params) {\ + return GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ + GMOCK_INTERNAL_LIST_##value_params);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ + gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION(name)\ + class name##Action {\ + public:\ + name##Action() {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl() {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl());\ + }\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##Action);\ + };\ + inline name##Action name() {\ + return name##Action();\ + }\ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##Action::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P(name, p0)\ + template \ + class name##ActionP {\ + public:\ + explicit name##ActionP(p0##_type gmock_p0) : \ + p0(::std::forward(gmock_p0)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl(p0##_type gmock_p0) : \ + p0(::std::forward(gmock_p0)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0));\ + }\ + p0##_type p0;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP);\ + };\ + template \ + inline name##ActionP name(p0##_type p0) {\ + return name##ActionP(p0);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P2(name, p0, p1)\ + template \ + class name##ActionP2 {\ + public:\ + name##ActionP2(p0##_type gmock_p0, \ + p1##_type gmock_p1) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, \ + p1##_type gmock_p1) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP2);\ + };\ + template \ + inline name##ActionP2 name(p0##_type p0, \ + p1##_type p1) {\ + return name##ActionP2(p0, p1);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP2::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P3(name, p0, p1, p2)\ + template \ + class name##ActionP3 {\ + public:\ + name##ActionP3(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP3);\ + };\ + template \ + inline name##ActionP3 name(p0##_type p0, \ + p1##_type p1, p2##_type p2) {\ + return name##ActionP3(p0, p1, p2);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP3::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P4(name, p0, p1, p2, p3)\ + template \ + class name##ActionP4 {\ + public:\ + name##ActionP4(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, \ + p3##_type gmock_p3) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP4);\ + };\ + template \ + inline name##ActionP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3) {\ + return name##ActionP4(p0, p1, \ + p2, p3);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP4::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P5(name, p0, p1, p2, p3, p4)\ + template \ + class name##ActionP5 {\ + public:\ + name##ActionP5(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, \ + p4##_type gmock_p4) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, \ + p4##_type gmock_p4) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP5);\ + };\ + template \ + inline name##ActionP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4) {\ + return name##ActionP5(p0, p1, p2, p3, p4);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP5::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P6(name, p0, p1, p2, p3, p4, p5)\ + template \ + class name##ActionP6 {\ + public:\ + name##ActionP6(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP6);\ + };\ + template \ + inline name##ActionP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3, p4##_type p4, p5##_type p5) {\ + return name##ActionP6(p0, p1, p2, p3, p4, p5);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP6::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P7(name, p0, p1, p2, p3, p4, p5, p6)\ + template \ + class name##ActionP7 {\ + public:\ + name##ActionP7(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, \ + p6##_type gmock_p6) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP7);\ + };\ + template \ + inline name##ActionP7 name(p0##_type p0, p1##_type p1, \ + p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6) {\ + return name##ActionP7(p0, p1, p2, p3, p4, p5, p6);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP7::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P8(name, p0, p1, p2, p3, p4, p5, p6, p7)\ + template \ + class name##ActionP8 {\ + public:\ + name##ActionP8(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, \ + p7##_type gmock_p7) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, \ + p7##_type gmock_p7) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP8);\ + };\ + template \ + inline name##ActionP8 name(p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6, p7##_type p7) {\ + return name##ActionP8(p0, p1, p2, p3, p4, p5, \ + p6, p7);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP8::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8)\ + template \ + class name##ActionP9 {\ + public:\ + name##ActionP9(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)), \ + p8(::std::forward(gmock_p8)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)), \ + p8(::std::forward(gmock_p8)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP9);\ + };\ + template \ + inline name##ActionP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ + p8##_type p8) {\ + return name##ActionP9(p0, p1, p2, \ + p3, p4, p5, p6, p7, p8);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP9::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +#define ACTION_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)\ + template \ + class name##ActionP10 {\ + public:\ + name##ActionP10(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8, \ + p9##_type gmock_p9) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)), \ + p8(::std::forward(gmock_p8)), \ + p9(::std::forward(gmock_p9)) {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9) : p0(::std::forward(gmock_p0)), \ + p1(::std::forward(gmock_p1)), \ + p2(::std::forward(gmock_p2)), \ + p3(::std::forward(gmock_p3)), \ + p4(::std::forward(gmock_p4)), \ + p5(::std::forward(gmock_p5)), \ + p6(::std::forward(gmock_p6)), \ + p7(::std::forward(gmock_p7)), \ + p8(::std::forward(gmock_p8)), \ + p9(::std::forward(gmock_p9)) {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template \ + return_type gmock_PerformImpl(const args_type& args, \ + const arg0_type& arg0, const arg1_type& arg1, \ + const arg2_type& arg2, const arg3_type& arg3, \ + const arg4_type& arg4, const arg5_type& arg5, \ + const arg6_type& arg6, const arg7_type& arg7, \ + const arg8_type& arg8, const arg9_type& arg9) const;\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl(p0, p1, p2, p3, p4, p5, \ + p6, p7, p8, p9));\ + }\ + p0##_type p0;\ + p1##_type p1;\ + p2##_type p2;\ + p3##_type p3;\ + p4##_type p4;\ + p5##_type p5;\ + p6##_type p6;\ + p7##_type p7;\ + p8##_type p8;\ + p9##_type p9;\ + private:\ + GTEST_DISALLOW_ASSIGN_(name##ActionP10);\ + };\ + template \ + inline name##ActionP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9) {\ + return name##ActionP10(p0, \ + p1, p2, p3, p4, p5, p6, p7, p8, p9);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + name##ActionP10::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +namespace testing { + + +// The ACTION*() macros trigger warning C4100 (unreferenced formal +// parameter) in MSVC with -W4. Unfortunately they cannot be fixed in +// the macro definition, as the warnings are generated when the macro +// is expanded and macro expansion cannot contain #pragma. Therefore +// we suppress them here. +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4100) +#endif + +// Various overloads for InvokeArgument(). +// +// The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th +// (0-based) argument, which must be a k-ary callable, of the mock +// function, with arguments a1, a2, ..., a_k. +// +// Notes: +// +// 1. The arguments are passed by value by default. If you need to +// pass an argument by reference, wrap it inside ByRef(). For +// example, +// +// InvokeArgument<1>(5, string("Hello"), ByRef(foo)) +// +// passes 5 and string("Hello") by value, and passes foo by +// reference. +// +// 2. If the callable takes an argument by reference but ByRef() is +// not used, it will receive the reference to a copy of the value, +// instead of the original value. For example, when the 0-th +// argument of the mock function takes a const string&, the action +// +// InvokeArgument<0>(string("Hello")) +// +// makes a copy of the temporary string("Hello") object and passes a +// reference of the copy, instead of the original temporary object, +// to the callable. This makes it easy for a user to define an +// InvokeArgument action from temporary values and have it performed +// later. + +namespace internal { +namespace invoke_argument { + +// Appears in InvokeArgumentAdl's argument list to help avoid +// accidental calls to user functions of the same name. +struct AdlTag {}; + +// InvokeArgumentAdl - a helper for InvokeArgument. +// The basic overloads are provided here for generic functors. +// Overloads for other custom-callables are provided in the +// internal/custom/callback-actions.h header. + +template +R InvokeArgumentAdl(AdlTag, F f) { + return f(); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1) { + return f(a1); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2) { + return f(a1, a2); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3) { + return f(a1, a2, a3); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4) { + return f(a1, a2, a3, a4); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5) { + return f(a1, a2, a3, a4, a5); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6) { + return f(a1, a2, a3, a4, a5, a6); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7) { + return f(a1, a2, a3, a4, a5, a6, a7); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8) { + return f(a1, a2, a3, a4, a5, a6, a7, a8); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8, A9 a9) { + return f(a1, a2, a3, a4, a5, a6, a7, a8, a9); +} +template +R InvokeArgumentAdl(AdlTag, F f, A1 a1, A2 a2, A3 a3, A4 a4, A5 a5, A6 a6, + A7 a7, A8 a8, A9 a9, A10 a10) { + return f(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10); +} +} // namespace invoke_argument +} // namespace internal + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_0_VALUE_PARAMS()) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args)); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_1_VALUE_PARAMS(p0)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_2_VALUE_PARAMS(p0, p1)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_3_VALUE_PARAMS(p0, p1, p2)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4, p5); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4, p5, p6); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8); +} + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args), p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); +} + +// Various overloads for ReturnNew(). +// +// The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new +// instance of type T, constructed on the heap with constructor arguments +// a1, a2, ..., and a_k. The caller assumes ownership of the returned value. +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_0_VALUE_PARAMS()) { + return new T(); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_1_VALUE_PARAMS(p0)) { + return new T(p0); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_2_VALUE_PARAMS(p0, p1)) { + return new T(p0, p1); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_3_VALUE_PARAMS(p0, p1, p2)) { + return new T(p0, p1, p2); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_4_VALUE_PARAMS(p0, p1, p2, p3)) { + return new T(p0, p1, p2, p3); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_5_VALUE_PARAMS(p0, p1, p2, p3, p4)) { + return new T(p0, p1, p2, p3, p4); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_6_VALUE_PARAMS(p0, p1, p2, p3, p4, p5)) { + return new T(p0, p1, p2, p3, p4, p5); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_7_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6)) { + return new T(p0, p1, p2, p3, p4, p5, p6); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_8_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_9_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8); +} + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_10_VALUE_PARAMS(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)) { + return new T(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9); +} + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +} // namespace testing + +// Include any custom callback actions added by the local installation. +// We must include this header at the end to make sure it can use the +// declarations from this file. +#include "gmock/internal/custom/gmock-generated-actions.h" + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h.pump b/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h.pump new file mode 100644 index 00000000..209603c5 --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-actions.h.pump @@ -0,0 +1,627 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert it to +$$ gmock-generated-actions.h. +$$ +$var n = 10 $$ The maximum arity we support. +$$}} This meta comment fixes auto-indentation in editors. +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic actions. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ + +#include +#include + +#include "gmock/gmock-actions.h" +#include "gmock/internal/gmock-port.h" + +namespace testing { +namespace internal { + +// A macro from the ACTION* family (defined later in this file) +// defines an action that can be used in a mock function. Typically, +// these actions only care about a subset of the arguments of the mock +// function. For example, if such an action only uses the second +// argument, it can be used in any mock function that takes >= 2 +// arguments where the type of the second argument is compatible. +// +// Therefore, the action implementation must be prepared to take more +// arguments than it needs. The ExcessiveArg type is used to +// represent those excessive arguments. In order to keep the compiler +// error messages tractable, we define it in the testing namespace +// instead of testing::internal. However, this is an INTERNAL TYPE +// and subject to change without notice, so a user MUST NOT USE THIS +// TYPE DIRECTLY. +struct ExcessiveArg {}; + +// A helper class needed for implementing the ACTION* macros. +template +class ActionHelper { + public: +$range i 0..n +$for i + +[[ +$var template = [[$if i==0 [[]] $else [[ +$range j 0..i-1 + template <$for j, [[typename A$j]]> +]]]] +$range j 0..i-1 +$var As = [[$for j, [[A$j]]]] +$var as = [[$for j, [[std::get<$j>(args)]]]] +$range k 1..n-i +$var eas = [[$for k, [[ExcessiveArg()]]]] +$var arg_list = [[$if (i==0) | (i==n) [[$as$eas]] $else [[$as, $eas]]]] +$template + static Result Perform(Impl* impl, const ::std::tuple<$As>& args) { + return impl->template gmock_PerformImpl<$As>(args, $arg_list); + } + +]] +}; + +} // namespace internal +} // namespace testing + +// The ACTION* family of macros can be used in a namespace scope to +// define custom actions easily. The syntax: +// +// ACTION(name) { statements; } +// +// will define an action with the given name that executes the +// statements. The value returned by the statements will be used as +// the return value of the action. Inside the statements, you can +// refer to the K-th (0-based) argument of the mock function by +// 'argK', and refer to its type by 'argK_type'. For example: +// +// ACTION(IncrementArg1) { +// arg1_type temp = arg1; +// return ++(*temp); +// } +// +// allows you to write +// +// ...WillOnce(IncrementArg1()); +// +// You can also refer to the entire argument tuple and its type by +// 'args' and 'args_type', and refer to the mock function type and its +// return type by 'function_type' and 'return_type'. +// +// Note that you don't need to specify the types of the mock function +// arguments. However rest assured that your code is still type-safe: +// you'll get a compiler error if *arg1 doesn't support the ++ +// operator, or if the type of ++(*arg1) isn't compatible with the +// mock function's return type, for example. +// +// Sometimes you'll want to parameterize the action. For that you can use +// another macro: +// +// ACTION_P(name, param_name) { statements; } +// +// For example: +// +// ACTION_P(Add, n) { return arg0 + n; } +// +// will allow you to write: +// +// ...WillOnce(Add(5)); +// +// Note that you don't need to provide the type of the parameter +// either. If you need to reference the type of a parameter named +// 'foo', you can write 'foo_type'. For example, in the body of +// ACTION_P(Add, n) above, you can write 'n_type' to refer to the type +// of 'n'. +// +// We also provide ACTION_P2, ACTION_P3, ..., up to ACTION_P$n to support +// multi-parameter actions. +// +// For the purpose of typing, you can view +// +// ACTION_Pk(Foo, p1, ..., pk) { ... } +// +// as shorthand for +// +// template +// FooActionPk Foo(p1_type p1, ..., pk_type pk) { ... } +// +// In particular, you can provide the template type arguments +// explicitly when invoking Foo(), as in Foo(5, false); +// although usually you can rely on the compiler to infer the types +// for you automatically. You can assign the result of expression +// Foo(p1, ..., pk) to a variable of type FooActionPk. This can be useful when composing actions. +// +// You can also overload actions with different numbers of parameters: +// +// ACTION_P(Plus, a) { ... } +// ACTION_P2(Plus, a, b) { ... } +// +// While it's tempting to always use the ACTION* macros when defining +// a new action, you should also consider implementing ActionInterface +// or using MakePolymorphicAction() instead, especially if you need to +// use the action a lot. While these approaches require more work, +// they give you more control on the types of the mock function +// arguments and the action parameters, which in general leads to +// better compiler error messages that pay off in the long run. They +// also allow overloading actions based on parameter types (as opposed +// to just based on the number of parameters). +// +// CAVEAT: +// +// ACTION*() can only be used in a namespace scope as templates cannot be +// declared inside of a local class. +// Users can, however, define any local functors (e.g. a lambda) that +// can be used as actions. +// +// MORE INFORMATION: +// +// To learn more about using these macros, please search for 'ACTION' on +// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md + +$range i 0..n +$range k 0..n-1 + +// An internal macro needed for implementing ACTION*(). +#define GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_\ + const args_type& args GTEST_ATTRIBUTE_UNUSED_ +$for k [[, \ + const arg$k[[]]_type& arg$k GTEST_ATTRIBUTE_UNUSED_]] + + +// Sometimes you want to give an action explicit template parameters +// that cannot be inferred from its value parameters. ACTION() and +// ACTION_P*() don't support that. ACTION_TEMPLATE() remedies that +// and can be viewed as an extension to ACTION() and ACTION_P*(). +// +// The syntax: +// +// ACTION_TEMPLATE(ActionName, +// HAS_m_TEMPLATE_PARAMS(kind1, name1, ..., kind_m, name_m), +// AND_n_VALUE_PARAMS(p1, ..., p_n)) { statements; } +// +// defines an action template that takes m explicit template +// parameters and n value parameters. name_i is the name of the i-th +// template parameter, and kind_i specifies whether it's a typename, +// an integral constant, or a template. p_i is the name of the i-th +// value parameter. +// +// Example: +// +// // DuplicateArg(output) converts the k-th argument of the mock +// // function to type T and copies it to *output. +// ACTION_TEMPLATE(DuplicateArg, +// HAS_2_TEMPLATE_PARAMS(int, k, typename, T), +// AND_1_VALUE_PARAMS(output)) { +// *output = T(::std::get(args)); +// } +// ... +// int n; +// EXPECT_CALL(mock, Foo(_, _)) +// .WillOnce(DuplicateArg<1, unsigned char>(&n)); +// +// To create an instance of an action template, write: +// +// ActionName(v1, ..., v_n) +// +// where the ts are the template arguments and the vs are the value +// arguments. The value argument types are inferred by the compiler. +// If you want to explicitly specify the value argument types, you can +// provide additional template arguments: +// +// ActionName(v1, ..., v_n) +// +// where u_i is the desired type of v_i. +// +// ACTION_TEMPLATE and ACTION/ACTION_P* can be overloaded on the +// number of value parameters, but not on the number of template +// parameters. Without the restriction, the meaning of the following +// is unclear: +// +// OverloadedAction(x); +// +// Are we using a single-template-parameter action where 'bool' refers +// to the type of x, or are we using a two-template-parameter action +// where the compiler is asked to infer the type of x? +// +// Implementation notes: +// +// GMOCK_INTERNAL_*_HAS_m_TEMPLATE_PARAMS and +// GMOCK_INTERNAL_*_AND_n_VALUE_PARAMS are internal macros for +// implementing ACTION_TEMPLATE. The main trick we use is to create +// new macro invocations when expanding a macro. For example, we have +// +// #define ACTION_TEMPLATE(name, template_params, value_params) +// ... GMOCK_INTERNAL_DECL_##template_params ... +// +// which causes ACTION_TEMPLATE(..., HAS_1_TEMPLATE_PARAMS(typename, T), ...) +// to expand to +// +// ... GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS(typename, T) ... +// +// Since GMOCK_INTERNAL_DECL_HAS_1_TEMPLATE_PARAMS is a macro, the +// preprocessor will continue to expand it to +// +// ... typename T ... +// +// This technique conforms to the C++ standard and is portable. It +// allows us to implement action templates using O(N) code, where N is +// the maximum number of template/value parameters supported. Without +// using it, we'd have to devote O(N^2) amount of code to implement all +// combinations of m and n. + +// Declares the template parameters. + +$range j 1..n +$for j [[ +$range m 0..j-1 +#define GMOCK_INTERNAL_DECL_HAS_$j[[]] +_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[kind$m name$m]] + + +]] + +// Lists the template parameters. + +$for j [[ +$range m 0..j-1 +#define GMOCK_INTERNAL_LIST_HAS_$j[[]] +_TEMPLATE_PARAMS($for m, [[kind$m, name$m]]) $for m, [[name$m]] + + +]] + +// Declares the types of value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DECL_TYPE_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[, typename p$j##_type]] + + +]] + +// Initializes the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_INIT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])\ + ($for j, [[p$j##_type gmock_p$j]])$if i>0 [[ : ]]$for j, [[p$j(::std::move(gmock_p$j))]] + + +]] + +// Declares the fields for storing the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DEFN_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[p$j##_type p$j; ]] + + +]] + +// Lists the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_LIST_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j, [[p$j]] + + +]] + +// Lists the value parameter types. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_LIST_TYPE_AND_$i[[]] +_VALUE_PARAMS($for j, [[p$j]]) $for j [[, p$j##_type]] + + +]] + +// Declares the value parameters. + +$for i [[ +$range j 0..i-1 +#define GMOCK_INTERNAL_DECL_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] +$for j, [[p$j##_type p$j]] + + +]] + +// The suffix of the class template implementing the action template. +$for i [[ + + +$range j 0..i-1 +#define GMOCK_INTERNAL_COUNT_AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]]) [[]] +$if i==1 [[P]] $elif i>=2 [[P$i]] +]] + + +// The name of the class template implementing the action template. +#define GMOCK_ACTION_CLASS_(name, value_params)\ + GTEST_CONCAT_TOKEN_(name##Action, GMOCK_INTERNAL_COUNT_##value_params) + +$range k 0..n-1 + +#define ACTION_TEMPLATE(name, template_params, value_params)\ + template \ + class GMOCK_ACTION_CLASS_(name, value_params) {\ + public:\ + explicit GMOCK_ACTION_CLASS_(name, value_params)\ + GMOCK_INTERNAL_INIT_##value_params {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + explicit gmock_Impl GMOCK_INTERNAL_INIT_##value_params {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template <$for k, [[typename arg$k[[]]_type]]>\ + return_type gmock_PerformImpl(const args_type& args[[]] +$for k [[, const arg$k[[]]_type& arg$k]]) const;\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(\ + new gmock_Impl(GMOCK_INTERNAL_LIST_##value_params));\ + }\ + GMOCK_INTERNAL_DEFN_##value_params\ + private:\ + GTEST_DISALLOW_ASSIGN_(GMOCK_ACTION_CLASS_(name, value_params));\ + };\ + template \ + inline GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params> name(\ + GMOCK_INTERNAL_DECL_##value_params) {\ + return GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>(\ + GMOCK_INTERNAL_LIST_##value_params);\ + }\ + template \ + template \ + template \ + typename ::testing::internal::Function::Result\ + GMOCK_ACTION_CLASS_(name, value_params)<\ + GMOCK_INTERNAL_LIST_##template_params\ + GMOCK_INTERNAL_LIST_TYPE_##value_params>::gmock_Impl::\ + gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const + +$for i + +[[ +$var template = [[$if i==0 [[]] $else [[ +$range j 0..i-1 + + template <$for j, [[typename p$j##_type]]>\ +]]]] +$var class_name = [[name##Action[[$if i==0 [[]] $elif i==1 [[P]] + $else [[P$i]]]]]] +$range j 0..i-1 +$var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] +$var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::forward(gmock_p$j))]]]]]] +$var param_field_decls = [[$for j +[[ + + p$j##_type p$j;\ +]]]] +$var param_field_decls2 = [[$for j +[[ + + p$j##_type p$j;\ +]]]] +$var params = [[$for j, [[p$j]]]] +$var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] +$var typename_arg_types = [[$for k, [[typename arg$k[[]]_type]]]] +$var arg_types_and_names = [[$for k, [[const arg$k[[]]_type& arg$k]]]] +$var macro_name = [[$if i==0 [[ACTION]] $elif i==1 [[ACTION_P]] + $else [[ACTION_P$i]]]] + +#define $macro_name(name$for j [[, p$j]])\$template + class $class_name {\ + public:\ + [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {}\ + template \ + class gmock_Impl : public ::testing::ActionInterface {\ + public:\ + typedef F function_type;\ + typedef typename ::testing::internal::Function::Result return_type;\ + typedef typename ::testing::internal::Function::ArgumentTuple\ + args_type;\ + [[$if i==1 [[explicit ]]]]gmock_Impl($ctor_param_list)$inits {}\ + virtual return_type Perform(const args_type& args) {\ + return ::testing::internal::ActionHelper::\ + Perform(this, args);\ + }\ + template <$typename_arg_types>\ + return_type gmock_PerformImpl(const args_type& args, [[]] +$arg_types_and_names) const;\$param_field_decls + private:\ + GTEST_DISALLOW_ASSIGN_(gmock_Impl);\ + };\ + template operator ::testing::Action() const {\ + return ::testing::Action(new gmock_Impl($params));\ + }\$param_field_decls2 + private:\ + GTEST_DISALLOW_ASSIGN_($class_name);\ + };\$template + inline $class_name$param_types name($param_types_and_names) {\ + return $class_name$param_types($params);\ + }\$template + template \ + template <$typename_arg_types>\ + typename ::testing::internal::Function::Result\ + $class_name$param_types::gmock_Impl::gmock_PerformImpl(\ + GMOCK_ACTION_ARG_TYPES_AND_NAMES_UNUSED_) const +]] +$$ } // This meta comment fixes auto-indentation in Emacs. It won't +$$ // show up in the generated code. + + +namespace testing { + + +// The ACTION*() macros trigger warning C4100 (unreferenced formal +// parameter) in MSVC with -W4. Unfortunately they cannot be fixed in +// the macro definition, as the warnings are generated when the macro +// is expanded and macro expansion cannot contain #pragma. Therefore +// we suppress them here. +#ifdef _MSC_VER +# pragma warning(push) +# pragma warning(disable:4100) +#endif + +// Various overloads for InvokeArgument(). +// +// The InvokeArgument(a1, a2, ..., a_k) action invokes the N-th +// (0-based) argument, which must be a k-ary callable, of the mock +// function, with arguments a1, a2, ..., a_k. +// +// Notes: +// +// 1. The arguments are passed by value by default. If you need to +// pass an argument by reference, wrap it inside ByRef(). For +// example, +// +// InvokeArgument<1>(5, string("Hello"), ByRef(foo)) +// +// passes 5 and string("Hello") by value, and passes foo by +// reference. +// +// 2. If the callable takes an argument by reference but ByRef() is +// not used, it will receive the reference to a copy of the value, +// instead of the original value. For example, when the 0-th +// argument of the mock function takes a const string&, the action +// +// InvokeArgument<0>(string("Hello")) +// +// makes a copy of the temporary string("Hello") object and passes a +// reference of the copy, instead of the original temporary object, +// to the callable. This makes it easy for a user to define an +// InvokeArgument action from temporary values and have it performed +// later. + +namespace internal { +namespace invoke_argument { + +// Appears in InvokeArgumentAdl's argument list to help avoid +// accidental calls to user functions of the same name. +struct AdlTag {}; + +// InvokeArgumentAdl - a helper for InvokeArgument. +// The basic overloads are provided here for generic functors. +// Overloads for other custom-callables are provided in the +// internal/custom/callback-actions.h header. + +$range i 0..n +$for i +[[ +$range j 1..i + +template +R InvokeArgumentAdl(AdlTag, F f[[$for j [[, A$j a$j]]]]) { + return f([[$for j, [[a$j]]]]); +} +]] + +} // namespace invoke_argument +} // namespace internal + +$range i 0..n +$for i [[ +$range j 0..i-1 + +ACTION_TEMPLATE(InvokeArgument, + HAS_1_TEMPLATE_PARAMS(int, k), + AND_$i[[]]_VALUE_PARAMS($for j, [[p$j]])) { + using internal::invoke_argument::InvokeArgumentAdl; + return InvokeArgumentAdl( + internal::invoke_argument::AdlTag(), + ::std::get(args)$for j [[, p$j]]); +} + +]] + +// Various overloads for ReturnNew(). +// +// The ReturnNew(a1, a2, ..., a_k) action returns a pointer to a new +// instance of type T, constructed on the heap with constructor arguments +// a1, a2, ..., and a_k. The caller assumes ownership of the returned value. +$range i 0..n +$for i [[ +$range j 0..i-1 +$var ps = [[$for j, [[p$j]]]] + +ACTION_TEMPLATE(ReturnNew, + HAS_1_TEMPLATE_PARAMS(typename, T), + AND_$i[[]]_VALUE_PARAMS($ps)) { + return new T($ps); +} + +]] + +#ifdef _MSC_VER +# pragma warning(pop) +#endif + +} // namespace testing + +// Include any custom callback actions added by the local installation. +// We must include this header at the end to make sure it can use the +// declarations from this file. +#include "gmock/internal/custom/gmock-generated-actions.h" + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_ACTIONS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h b/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h new file mode 100644 index 00000000..cd957817 --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h @@ -0,0 +1,752 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-function-mockers.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements function mockers of various arities. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ + +#include +#include + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-internal-utils.h" + +namespace testing { +namespace internal { +// Removes the given pointer; this is a helper for the expectation setter method +// for parameterless matchers. +// +// We want to make sure that the user cannot set a parameterless expectation on +// overloaded methods, including methods which are overloaded on const. Example: +// +// class MockClass { +// MOCK_METHOD0(GetName, string&()); +// MOCK_CONST_METHOD0(GetName, const string&()); +// }; +// +// TEST() { +// // This should be an error, as it's not clear which overload is expected. +// EXPECT_CALL(mock, GetName).WillOnce(ReturnRef(value)); +// } +// +// Here are the generated expectation-setter methods: +// +// class MockClass { +// // Overload 1 +// MockSpec gmock_GetName() { ... } +// // Overload 2. Declared const so that the compiler will generate an +// // error when trying to resolve between this and overload 4 in +// // 'gmock_GetName(WithoutMatchers(), nullptr)'. +// MockSpec gmock_GetName( +// const WithoutMatchers&, const Function*) const { +// // Removes const from this, calls overload 1 +// return AdjustConstness_(this)->gmock_GetName(); +// } +// +// // Overload 3 +// const string& gmock_GetName() const { ... } +// // Overload 4 +// MockSpec gmock_GetName( +// const WithoutMatchers&, const Function*) const { +// // Does not remove const, calls overload 3 +// return AdjustConstness_const(this)->gmock_GetName(); +// } +// } +// +template +const MockType* AdjustConstness_const(const MockType* mock) { + return mock; +} + +// Removes const from and returns the given pointer; this is a helper for the +// expectation setter method for parameterless matchers. +template +MockType* AdjustConstness_(const MockType* mock) { + return const_cast(mock); +} + +} // namespace internal + +// The style guide prohibits "using" statements in a namespace scope +// inside a header file. However, the FunctionMocker class template +// is meant to be defined in the ::testing namespace. The following +// line is just a trick for working around a bug in MSVC 8.0, which +// cannot handle it if we define FunctionMocker in ::testing. +using internal::FunctionMocker; + +// GMOCK_RESULT_(tn, F) expands to the result type of function type F. +// We define this as a variadic macro in case F contains unprotected +// commas (the same reason that we use variadic macros in other places +// in this file). +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_RESULT_(tn, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Result + +// The type of argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_ARG_(tn, N, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::template Arg::type + +// The matcher type for argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MATCHER_(tn, N, ...) \ + const ::testing::Matcher& + +// The variable for mocking the given method. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MOCKER_(arity, constness, Method) \ + GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD0_(tn, constness, ct, Method, ...) \ + static_assert(0 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + ) constness { \ + GMOCK_MOCKER_(0, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(0, constness, Method).Invoke(); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method() constness { \ + GMOCK_MOCKER_(0, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(0, constness, Method).With(); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(0, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD1_(tn, constness, ct, Method, ...) \ + static_assert(1 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ + GMOCK_MOCKER_(1, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(1, constness, \ + Method).Invoke(::std::forward(gmock_a1)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1) constness { \ + GMOCK_MOCKER_(1, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(1, constness, Method).With(gmock_a1); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(1, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD2_(tn, constness, ct, Method, ...) \ + static_assert(2 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2) constness { \ + GMOCK_MOCKER_(2, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(2, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2) constness { \ + GMOCK_MOCKER_(2, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(2, constness, Method).With(gmock_a1, gmock_a2); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(2, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD3_(tn, constness, ct, Method, ...) \ + static_assert(3 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, \ + __VA_ARGS__) gmock_a3) constness { \ + GMOCK_MOCKER_(3, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(3, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3) constness { \ + GMOCK_MOCKER_(3, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(3, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(3, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD4_(tn, constness, ct, Method, ...) \ + static_assert(4 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ + GMOCK_MOCKER_(4, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(4, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4) constness { \ + GMOCK_MOCKER_(4, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(4, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(4, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD5_(tn, constness, ct, Method, ...) \ + static_assert(5 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5) constness { \ + GMOCK_MOCKER_(5, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(5, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5) constness { \ + GMOCK_MOCKER_(5, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(5, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(5, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD6_(tn, constness, ct, Method, ...) \ + static_assert(6 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, \ + __VA_ARGS__) gmock_a6) constness { \ + GMOCK_MOCKER_(6, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(6, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5), \ + ::std::forward(gmock_a6)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6) constness { \ + GMOCK_MOCKER_(6, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(6, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(6, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD7_(tn, constness, ct, Method, ...) \ + static_assert(7 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ + GMOCK_MOCKER_(7, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(7, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5), \ + ::std::forward(gmock_a6), \ + ::std::forward(gmock_a7)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7) constness { \ + GMOCK_MOCKER_(7, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(7, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(7, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD8_(tn, constness, ct, Method, ...) \ + static_assert(8 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ + __VA_ARGS__) gmock_a8) constness { \ + GMOCK_MOCKER_(8, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(8, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5), \ + ::std::forward(gmock_a6), \ + ::std::forward(gmock_a7), \ + ::std::forward(gmock_a8)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8) constness { \ + GMOCK_MOCKER_(8, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(8, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(8, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD9_(tn, constness, ct, Method, ...) \ + static_assert(9 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ + __VA_ARGS__) gmock_a8, GMOCK_ARG_(tn, 9, \ + __VA_ARGS__) gmock_a9) constness { \ + GMOCK_MOCKER_(9, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(9, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5), \ + ::std::forward(gmock_a6), \ + ::std::forward(gmock_a7), \ + ::std::forward(gmock_a8), \ + ::std::forward(gmock_a9)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9) constness { \ + GMOCK_MOCKER_(9, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(9, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, \ + gmock_a9); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(9, constness, \ + Method) + +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD10_(tn, constness, ct, Method, ...) \ + static_assert(10 == \ + ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, \ + "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + GMOCK_ARG_(tn, 1, __VA_ARGS__) gmock_a1, GMOCK_ARG_(tn, 2, \ + __VA_ARGS__) gmock_a2, GMOCK_ARG_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_ARG_(tn, 4, __VA_ARGS__) gmock_a4, GMOCK_ARG_(tn, 5, \ + __VA_ARGS__) gmock_a5, GMOCK_ARG_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_ARG_(tn, 7, __VA_ARGS__) gmock_a7, GMOCK_ARG_(tn, 8, \ + __VA_ARGS__) gmock_a8, GMOCK_ARG_(tn, 9, __VA_ARGS__) gmock_a9, \ + GMOCK_ARG_(tn, 10, __VA_ARGS__) gmock_a10) constness { \ + GMOCK_MOCKER_(10, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_(10, constness, \ + Method).Invoke(::std::forward(gmock_a1), \ + ::std::forward(gmock_a2), \ + ::std::forward(gmock_a3), \ + ::std::forward(gmock_a4), \ + ::std::forward(gmock_a5), \ + ::std::forward(gmock_a6), \ + ::std::forward(gmock_a7), \ + ::std::forward(gmock_a8), \ + ::std::forward(gmock_a9), \ + ::std::forward(gmock_a10)); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method(GMOCK_MATCHER_(tn, 1, __VA_ARGS__) gmock_a1, \ + GMOCK_MATCHER_(tn, 2, __VA_ARGS__) gmock_a2, \ + GMOCK_MATCHER_(tn, 3, __VA_ARGS__) gmock_a3, \ + GMOCK_MATCHER_(tn, 4, __VA_ARGS__) gmock_a4, \ + GMOCK_MATCHER_(tn, 5, __VA_ARGS__) gmock_a5, \ + GMOCK_MATCHER_(tn, 6, __VA_ARGS__) gmock_a6, \ + GMOCK_MATCHER_(tn, 7, __VA_ARGS__) gmock_a7, \ + GMOCK_MATCHER_(tn, 8, __VA_ARGS__) gmock_a8, \ + GMOCK_MATCHER_(tn, 9, __VA_ARGS__) gmock_a9, \ + GMOCK_MATCHER_(tn, 10, \ + __VA_ARGS__) gmock_a10) constness { \ + GMOCK_MOCKER_(10, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_(10, constness, Method).With(gmock_a1, gmock_a2, \ + gmock_a3, gmock_a4, gmock_a5, gmock_a6, gmock_a7, gmock_a8, gmock_a9, \ + gmock_a10); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method(::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A(), \ + ::testing::A()); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_(10, constness, \ + Method) + +#define MOCK_METHOD0(m, ...) GMOCK_METHOD0_(, , , m, __VA_ARGS__) +#define MOCK_METHOD1(m, ...) GMOCK_METHOD1_(, , , m, __VA_ARGS__) +#define MOCK_METHOD2(m, ...) GMOCK_METHOD2_(, , , m, __VA_ARGS__) +#define MOCK_METHOD3(m, ...) GMOCK_METHOD3_(, , , m, __VA_ARGS__) +#define MOCK_METHOD4(m, ...) GMOCK_METHOD4_(, , , m, __VA_ARGS__) +#define MOCK_METHOD5(m, ...) GMOCK_METHOD5_(, , , m, __VA_ARGS__) +#define MOCK_METHOD6(m, ...) GMOCK_METHOD6_(, , , m, __VA_ARGS__) +#define MOCK_METHOD7(m, ...) GMOCK_METHOD7_(, , , m, __VA_ARGS__) +#define MOCK_METHOD8(m, ...) GMOCK_METHOD8_(, , , m, __VA_ARGS__) +#define MOCK_METHOD9(m, ...) GMOCK_METHOD9_(, , , m, __VA_ARGS__) +#define MOCK_METHOD10(m, ...) GMOCK_METHOD10_(, , , m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0(m, ...) GMOCK_METHOD0_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD1(m, ...) GMOCK_METHOD1_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD2(m, ...) GMOCK_METHOD2_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD3(m, ...) GMOCK_METHOD3_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD4(m, ...) GMOCK_METHOD4_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD5(m, ...) GMOCK_METHOD5_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD6(m, ...) GMOCK_METHOD6_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD7(m, ...) GMOCK_METHOD7_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD8(m, ...) GMOCK_METHOD8_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD9(m, ...) GMOCK_METHOD9_(, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD10(m, ...) GMOCK_METHOD10_(, const, , m, __VA_ARGS__) + +#define MOCK_METHOD0_T(m, ...) GMOCK_METHOD0_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD1_T(m, ...) GMOCK_METHOD1_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD2_T(m, ...) GMOCK_METHOD2_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD3_T(m, ...) GMOCK_METHOD3_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD4_T(m, ...) GMOCK_METHOD4_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD5_T(m, ...) GMOCK_METHOD5_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD6_T(m, ...) GMOCK_METHOD6_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD7_T(m, ...) GMOCK_METHOD7_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD8_T(m, ...) GMOCK_METHOD8_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD9_T(m, ...) GMOCK_METHOD9_(typename, , , m, __VA_ARGS__) +#define MOCK_METHOD10_T(m, ...) GMOCK_METHOD10_(typename, , , m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_T(m, ...) \ + GMOCK_METHOD0_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_T(m, ...) \ + GMOCK_METHOD1_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_T(m, ...) \ + GMOCK_METHOD2_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_T(m, ...) \ + GMOCK_METHOD3_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_T(m, ...) \ + GMOCK_METHOD4_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_T(m, ...) \ + GMOCK_METHOD5_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_T(m, ...) \ + GMOCK_METHOD6_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_T(m, ...) \ + GMOCK_METHOD7_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_T(m, ...) \ + GMOCK_METHOD8_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_T(m, ...) \ + GMOCK_METHOD9_(typename, const, , m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_T(m, ...) \ + GMOCK_METHOD10_(typename, const, , m, __VA_ARGS__) + +#define MOCK_METHOD0_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD1_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD2_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD3_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD4_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD5_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD6_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD7_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD8_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD9_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(, , ct, m, __VA_ARGS__) +#define MOCK_METHOD10_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(, , ct, m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(, const, ct, m, __VA_ARGS__) + +#define MOCK_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(typename, , ct, m, __VA_ARGS__) +#define MOCK_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(typename, , ct, m, __VA_ARGS__) + +#define MOCK_CONST_METHOD0_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD0_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD1_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD1_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD2_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD2_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD3_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD3_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD4_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD4_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD5_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD5_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD6_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD6_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD7_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD7_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD8_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD8_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD9_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD9_(typename, const, ct, m, __VA_ARGS__) +#define MOCK_CONST_METHOD10_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD10_(typename, const, ct, m, __VA_ARGS__) + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump b/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump new file mode 100644 index 00000000..a56e132f --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-function-mockers.h.pump @@ -0,0 +1,227 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert +$$ it to gmock-generated-function-mockers.h. +$$ +$var n = 10 $$ The maximum arity we support. +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements function mockers of various arities. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ + +#include +#include + +#include "gmock/gmock-spec-builders.h" +#include "gmock/internal/gmock-internal-utils.h" + +namespace testing { +namespace internal { + +$range i 0..n +// Removes the given pointer; this is a helper for the expectation setter method +// for parameterless matchers. +// +// We want to make sure that the user cannot set a parameterless expectation on +// overloaded methods, including methods which are overloaded on const. Example: +// +// class MockClass { +// MOCK_METHOD0(GetName, string&()); +// MOCK_CONST_METHOD0(GetName, const string&()); +// }; +// +// TEST() { +// // This should be an error, as it's not clear which overload is expected. +// EXPECT_CALL(mock, GetName).WillOnce(ReturnRef(value)); +// } +// +// Here are the generated expectation-setter methods: +// +// class MockClass { +// // Overload 1 +// MockSpec gmock_GetName() { ... } +// // Overload 2. Declared const so that the compiler will generate an +// // error when trying to resolve between this and overload 4 in +// // 'gmock_GetName(WithoutMatchers(), nullptr)'. +// MockSpec gmock_GetName( +// const WithoutMatchers&, const Function*) const { +// // Removes const from this, calls overload 1 +// return AdjustConstness_(this)->gmock_GetName(); +// } +// +// // Overload 3 +// const string& gmock_GetName() const { ... } +// // Overload 4 +// MockSpec gmock_GetName( +// const WithoutMatchers&, const Function*) const { +// // Does not remove const, calls overload 3 +// return AdjustConstness_const(this)->gmock_GetName(); +// } +// } +// +template +const MockType* AdjustConstness_const(const MockType* mock) { + return mock; +} + +// Removes const from and returns the given pointer; this is a helper for the +// expectation setter method for parameterless matchers. +template +MockType* AdjustConstness_(const MockType* mock) { + return const_cast(mock); +} + +} // namespace internal + +// The style guide prohibits "using" statements in a namespace scope +// inside a header file. However, the FunctionMocker class template +// is meant to be defined in the ::testing namespace. The following +// line is just a trick for working around a bug in MSVC 8.0, which +// cannot handle it if we define FunctionMocker in ::testing. +using internal::FunctionMocker; + +// GMOCK_RESULT_(tn, F) expands to the result type of function type F. +// We define this as a variadic macro in case F contains unprotected +// commas (the same reason that we use variadic macros in other places +// in this file). +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_RESULT_(tn, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::Result + +// The type of argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_ARG_(tn, N, ...) \ + tn ::testing::internal::Function<__VA_ARGS__>::template Arg::type + +// The matcher type for argument N of the given function type. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MATCHER_(tn, N, ...) \ + const ::testing::Matcher& + +// The variable for mocking the given method. +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_MOCKER_(arity, constness, Method) \ + GTEST_CONCAT_TOKEN_(gmock##constness##arity##_##Method##_, __LINE__) + + +$for i [[ +$range j 1..i +$var arg_as = [[$for j, [[GMOCK_ARG_(tn, $j, __VA_ARGS__) gmock_a$j]]]] +$var as = [[$for j, \ + [[::std::forward(gmock_a$j)]]]] +$var matcher_arg_as = [[$for j, \ + [[GMOCK_MATCHER_(tn, $j, __VA_ARGS__) gmock_a$j]]]] +$var matcher_as = [[$for j, [[gmock_a$j]]]] +$var anything_matchers = [[$for j, \ + [[::testing::A()]]]] +// INTERNAL IMPLEMENTATION - DON'T USE IN USER CODE!!! +#define GMOCK_METHOD$i[[]]_(tn, constness, ct, Method, ...) \ + static_assert($i == ::testing::internal::Function<__VA_ARGS__>::ArgumentCount, "MOCK_METHOD must match argument count.");\ + GMOCK_RESULT_(tn, __VA_ARGS__) ct Method( \ + $arg_as) constness { \ + GMOCK_MOCKER_($i, constness, Method).SetOwnerAndName(this, #Method); \ + return GMOCK_MOCKER_($i, constness, Method).Invoke($as); \ + } \ + ::testing::MockSpec<__VA_ARGS__> \ + gmock_##Method($matcher_arg_as) constness { \ + GMOCK_MOCKER_($i, constness, Method).RegisterOwner(this); \ + return GMOCK_MOCKER_($i, constness, Method).With($matcher_as); \ + } \ + ::testing::MockSpec<__VA_ARGS__> gmock_##Method( \ + const ::testing::internal::WithoutMatchers&, \ + constness ::testing::internal::Function<__VA_ARGS__>* ) const { \ + return ::testing::internal::AdjustConstness_##constness(this)-> \ + gmock_##Method($anything_matchers); \ + } \ + mutable ::testing::FunctionMocker<__VA_ARGS__> GMOCK_MOCKER_($i, constness, Method) + + +]] +$for i [[ +#define MOCK_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, , , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i(m, ...) GMOCK_METHOD$i[[]]_(, const, , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_T(m, ...) GMOCK_METHOD$i[[]]_(typename, , , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_T(m, ...) \ + GMOCK_METHOD$i[[]]_(typename, const, , m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(, , ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(, const, ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(typename, , ct, m, __VA_ARGS__) + +]] + + +$for i [[ +#define MOCK_CONST_METHOD$i[[]]_T_WITH_CALLTYPE(ct, m, ...) \ + GMOCK_METHOD$i[[]]_(typename, const, ct, m, __VA_ARGS__) + +]] + +} // namespace testing + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_FUNCTION_MOCKERS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h b/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h new file mode 100644 index 00000000..690a57f1 --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h @@ -0,0 +1,1097 @@ +// This file was GENERATED by command: +// pump.py gmock-generated-matchers.h.pump +// DO NOT EDIT BY HAND!!! + +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic matchers. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ + +#include +#include +#include +#include +#include +#include "gmock/gmock-matchers.h" + +// The MATCHER* family of macros can be used in a namespace scope to +// define custom matchers easily. +// +// Basic Usage +// =========== +// +// The syntax +// +// MATCHER(name, description_string) { statements; } +// +// defines a matcher with the given name that executes the statements, +// which must return a bool to indicate if the match succeeds. Inside +// the statements, you can refer to the value being matched by 'arg', +// and refer to its type by 'arg_type'. +// +// The description string documents what the matcher does, and is used +// to generate the failure message when the match fails. Since a +// MATCHER() is usually defined in a header file shared by multiple +// C++ source files, we require the description to be a C-string +// literal to avoid possible side effects. It can be empty, in which +// case we'll use the sequence of words in the matcher name as the +// description. +// +// For example: +// +// MATCHER(IsEven, "") { return (arg % 2) == 0; } +// +// allows you to write +// +// // Expects mock_foo.Bar(n) to be called where n is even. +// EXPECT_CALL(mock_foo, Bar(IsEven())); +// +// or, +// +// // Verifies that the value of some_expression is even. +// EXPECT_THAT(some_expression, IsEven()); +// +// If the above assertion fails, it will print something like: +// +// Value of: some_expression +// Expected: is even +// Actual: 7 +// +// where the description "is even" is automatically calculated from the +// matcher name IsEven. +// +// Argument Type +// ============= +// +// Note that the type of the value being matched (arg_type) is +// determined by the context in which you use the matcher and is +// supplied to you by the compiler, so you don't need to worry about +// declaring it (nor can you). This allows the matcher to be +// polymorphic. For example, IsEven() can be used to match any type +// where the value of "(arg % 2) == 0" can be implicitly converted to +// a bool. In the "Bar(IsEven())" example above, if method Bar() +// takes an int, 'arg_type' will be int; if it takes an unsigned long, +// 'arg_type' will be unsigned long; and so on. +// +// Parameterizing Matchers +// ======================= +// +// Sometimes you'll want to parameterize the matcher. For that you +// can use another macro: +// +// MATCHER_P(name, param_name, description_string) { statements; } +// +// For example: +// +// MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +// +// will allow you to write: +// +// EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +// +// which may lead to this message (assuming n is 10): +// +// Value of: Blah("a") +// Expected: has absolute value 10 +// Actual: -9 +// +// Note that both the matcher description and its parameter are +// printed, making the message human-friendly. +// +// In the matcher definition body, you can write 'foo_type' to +// reference the type of a parameter named 'foo'. For example, in the +// body of MATCHER_P(HasAbsoluteValue, value) above, you can write +// 'value_type' to refer to the type of 'value'. +// +// We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P10 to +// support multi-parameter matchers. +// +// Describing Parameterized Matchers +// ================================= +// +// The last argument to MATCHER*() is a string-typed expression. The +// expression can reference all of the matcher's parameters and a +// special bool-typed variable named 'negation'. When 'negation' is +// false, the expression should evaluate to the matcher's description; +// otherwise it should evaluate to the description of the negation of +// the matcher. For example, +// +// using testing::PrintToString; +// +// MATCHER_P2(InClosedRange, low, hi, +// std::string(negation ? "is not" : "is") + " in range [" + +// PrintToString(low) + ", " + PrintToString(hi) + "]") { +// return low <= arg && arg <= hi; +// } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: is in range [4, 6] +// ... +// Expected: is not in range [2, 4] +// +// If you specify "" as the description, the failure message will +// contain the sequence of words in the matcher name followed by the +// parameter values printed as a tuple. For example, +// +// MATCHER_P2(InClosedRange, low, hi, "") { ... } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: in closed range (4, 6) +// ... +// Expected: not (in closed range (2, 4)) +// +// Types of Matcher Parameters +// =========================== +// +// For the purpose of typing, you can view +// +// MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +// +// as shorthand for +// +// template +// FooMatcherPk +// Foo(p1_type p1, ..., pk_type pk) { ... } +// +// When you write Foo(v1, ..., vk), the compiler infers the types of +// the parameters v1, ..., and vk for you. If you are not happy with +// the result of the type inference, you can specify the types by +// explicitly instantiating the template, as in Foo(5, +// false). As said earlier, you don't get to (or need to) specify +// 'arg_type' as that's determined by the context in which the matcher +// is used. You can assign the result of expression Foo(p1, ..., pk) +// to a variable of type FooMatcherPk. This +// can be useful when composing matchers. +// +// While you can instantiate a matcher template with reference types, +// passing the parameters by pointer usually makes your code more +// readable. If, however, you still want to pass a parameter by +// reference, be aware that in the failure message generated by the +// matcher you will see the value of the referenced object but not its +// address. +// +// Explaining Match Results +// ======================== +// +// Sometimes the matcher description alone isn't enough to explain why +// the match has failed or succeeded. For example, when expecting a +// long string, it can be very helpful to also print the diff between +// the expected string and the actual one. To achieve that, you can +// optionally stream additional information to a special variable +// named result_listener, whose type is a pointer to class +// MatchResultListener: +// +// MATCHER_P(EqualsLongString, str, "") { +// if (arg == str) return true; +// +// *result_listener << "the difference: " +/// << DiffStrings(str, arg); +// return false; +// } +// +// Overloading Matchers +// ==================== +// +// You can overload matchers with different numbers of parameters: +// +// MATCHER_P(Blah, a, description_string1) { ... } +// MATCHER_P2(Blah, a, b, description_string2) { ... } +// +// Caveats +// ======= +// +// When defining a new matcher, you should also consider implementing +// MatcherInterface or using MakePolymorphicMatcher(). These +// approaches require more work than the MATCHER* macros, but also +// give you more control on the types of the value being matched and +// the matcher parameters, which may leads to better compiler error +// messages when the matcher is used wrong. They also allow +// overloading matchers based on parameter types (as opposed to just +// based on the number of parameters). +// +// MATCHER*() can only be used in a namespace scope as templates cannot be +// declared inside of a local class. +// +// More Information +// ================ +// +// To learn more about using these macros, please search for 'MATCHER' +// on +// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md + +#define MATCHER(name, description)\ + class name##Matcher {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl()\ + {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple<>()));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl());\ + }\ + name##Matcher() {\ + }\ + private:\ + };\ + inline name##Matcher name() {\ + return name##Matcher();\ + }\ + template \ + bool name##Matcher::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P(name, p0, description)\ + template \ + class name##MatcherP {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + explicit gmock_Impl(p0##_type gmock_p0)\ + : p0(::std::move(gmock_p0)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0));\ + }\ + explicit name##MatcherP(p0##_type gmock_p0) : p0(::std::move(gmock_p0)) {\ + }\ + p0##_type const p0;\ + private:\ + };\ + template \ + inline name##MatcherP name(p0##_type p0) {\ + return name##MatcherP(p0);\ + }\ + template \ + template \ + bool name##MatcherP::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P2(name, p0, p1, description)\ + template \ + class name##MatcherP2 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1));\ + }\ + name##MatcherP2(p0##_type gmock_p0, \ + p1##_type gmock_p1) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + private:\ + };\ + template \ + inline name##MatcherP2 name(p0##_type p0, \ + p1##_type p1) {\ + return name##MatcherP2(p0, p1);\ + }\ + template \ + template \ + bool name##MatcherP2::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P3(name, p0, p1, p2, description)\ + template \ + class name##MatcherP3 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2));\ + }\ + name##MatcherP3(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + private:\ + };\ + template \ + inline name##MatcherP3 name(p0##_type p0, \ + p1##_type p1, p2##_type p2) {\ + return name##MatcherP3(p0, p1, p2);\ + }\ + template \ + template \ + bool name##MatcherP3::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P4(name, p0, p1, p2, p3, description)\ + template \ + class name##MatcherP4 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, \ + p1, p2, p3)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3));\ + }\ + name##MatcherP4(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + private:\ + };\ + template \ + inline name##MatcherP4 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3) {\ + return name##MatcherP4(p0, \ + p1, p2, p3);\ + }\ + template \ + template \ + bool name##MatcherP4::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P5(name, p0, p1, p2, p3, p4, description)\ + template \ + class name##MatcherP5 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, p3, p4)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4));\ + }\ + name##MatcherP5(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, \ + p4##_type gmock_p4) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + private:\ + };\ + template \ + inline name##MatcherP5 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4) {\ + return name##MatcherP5(p0, p1, p2, p3, p4);\ + }\ + template \ + template \ + bool name##MatcherP5::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P6(name, p0, p1, p2, p3, p4, p5, description)\ + template \ + class name##MatcherP6 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, p3, p4, p5)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5));\ + }\ + name##MatcherP6(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + private:\ + };\ + template \ + inline name##MatcherP6 name(p0##_type p0, p1##_type p1, p2##_type p2, \ + p3##_type p3, p4##_type p4, p5##_type p5) {\ + return name##MatcherP6(p0, p1, p2, p3, p4, p5);\ + }\ + template \ + template \ + bool name##MatcherP6::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P7(name, p0, p1, p2, p3, p4, p5, p6, description)\ + template \ + class name##MatcherP7 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ + p6(::std::move(gmock_p6)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, p3, p4, p5, \ + p6)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6));\ + }\ + name##MatcherP7(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + private:\ + };\ + template \ + inline name##MatcherP7 name(p0##_type p0, p1##_type p1, \ + p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6) {\ + return name##MatcherP7(p0, p1, p2, p3, p4, p5, p6);\ + }\ + template \ + template \ + bool name##MatcherP7::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P8(name, p0, p1, p2, p3, p4, p5, p6, p7, description)\ + template \ + class name##MatcherP8 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ + p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, \ + p3, p4, p5, p6, p7)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7));\ + }\ + name##MatcherP8(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, \ + p7##_type gmock_p7) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + private:\ + };\ + template \ + inline name##MatcherP8 name(p0##_type p0, \ + p1##_type p1, p2##_type p2, p3##_type p3, p4##_type p4, p5##_type p5, \ + p6##_type p6, p7##_type p7) {\ + return name##MatcherP8(p0, p1, p2, p3, p4, p5, \ + p6, p7);\ + }\ + template \ + template \ + bool name##MatcherP8::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P9(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, description)\ + template \ + class name##MatcherP9 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ + p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)), \ + p8(::std::move(gmock_p8)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + p8##_type const p8;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8));\ + }\ + name##MatcherP9(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + p8##_type const p8;\ + private:\ + };\ + template \ + inline name##MatcherP9 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, \ + p8##_type p8) {\ + return name##MatcherP9(p0, p1, p2, \ + p3, p4, p5, p6, p7, p8);\ + }\ + template \ + template \ + bool name##MatcherP9::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#define MATCHER_P10(name, p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, description)\ + template \ + class name##MatcherP10 {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + gmock_Impl(p0##_type gmock_p0, p1##_type gmock_p1, p2##_type gmock_p2, \ + p3##_type gmock_p3, p4##_type gmock_p4, p5##_type gmock_p5, \ + p6##_type gmock_p6, p7##_type gmock_p7, p8##_type gmock_p8, \ + p9##_type gmock_p9)\ + : p0(::std::move(gmock_p0)), p1(::std::move(gmock_p1)), \ + p2(::std::move(gmock_p2)), p3(::std::move(gmock_p3)), \ + p4(::std::move(gmock_p4)), p5(::std::move(gmock_p5)), \ + p6(::std::move(gmock_p6)), p7(::std::move(gmock_p7)), \ + p8(::std::move(gmock_p8)), p9(::std::move(gmock_p9)) {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + p8##_type const p8;\ + p9##_type const p9;\ + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9)));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9));\ + }\ + name##MatcherP10(p0##_type gmock_p0, p1##_type gmock_p1, \ + p2##_type gmock_p2, p3##_type gmock_p3, p4##_type gmock_p4, \ + p5##_type gmock_p5, p6##_type gmock_p6, p7##_type gmock_p7, \ + p8##_type gmock_p8, p9##_type gmock_p9) : p0(::std::move(gmock_p0)), \ + p1(::std::move(gmock_p1)), p2(::std::move(gmock_p2)), \ + p3(::std::move(gmock_p3)), p4(::std::move(gmock_p4)), \ + p5(::std::move(gmock_p5)), p6(::std::move(gmock_p6)), \ + p7(::std::move(gmock_p7)), p8(::std::move(gmock_p8)), \ + p9(::std::move(gmock_p9)) {\ + }\ + p0##_type const p0;\ + p1##_type const p1;\ + p2##_type const p2;\ + p3##_type const p3;\ + p4##_type const p4;\ + p5##_type const p5;\ + p6##_type const p6;\ + p7##_type const p7;\ + p8##_type const p8;\ + p9##_type const p9;\ + private:\ + };\ + template \ + inline name##MatcherP10 name(p0##_type p0, p1##_type p1, p2##_type p2, p3##_type p3, \ + p4##_type p4, p5##_type p5, p6##_type p6, p7##_type p7, p8##_type p8, \ + p9##_type p9) {\ + return name##MatcherP10(p0, \ + p1, p2, p3, p4, p5, p6, p7, p8, p9);\ + }\ + template \ + template \ + bool name##MatcherP10::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h.pump b/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h.pump new file mode 100644 index 00000000..ae90917c --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-generated-matchers.h.pump @@ -0,0 +1,346 @@ +$$ -*- mode: c++; -*- +$$ This is a Pump source file. Please use Pump to convert +$$ it to gmock-generated-matchers.h. +$$ +$var n = 10 $$ The maximum arity we support. +$$ }} This line fixes auto-indentation of the following code in Emacs. +// Copyright 2008, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used variadic matchers. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ + +#include +#include +#include +#include +#include +#include "gmock/gmock-matchers.h" + +// The MATCHER* family of macros can be used in a namespace scope to +// define custom matchers easily. +// +// Basic Usage +// =========== +// +// The syntax +// +// MATCHER(name, description_string) { statements; } +// +// defines a matcher with the given name that executes the statements, +// which must return a bool to indicate if the match succeeds. Inside +// the statements, you can refer to the value being matched by 'arg', +// and refer to its type by 'arg_type'. +// +// The description string documents what the matcher does, and is used +// to generate the failure message when the match fails. Since a +// MATCHER() is usually defined in a header file shared by multiple +// C++ source files, we require the description to be a C-string +// literal to avoid possible side effects. It can be empty, in which +// case we'll use the sequence of words in the matcher name as the +// description. +// +// For example: +// +// MATCHER(IsEven, "") { return (arg % 2) == 0; } +// +// allows you to write +// +// // Expects mock_foo.Bar(n) to be called where n is even. +// EXPECT_CALL(mock_foo, Bar(IsEven())); +// +// or, +// +// // Verifies that the value of some_expression is even. +// EXPECT_THAT(some_expression, IsEven()); +// +// If the above assertion fails, it will print something like: +// +// Value of: some_expression +// Expected: is even +// Actual: 7 +// +// where the description "is even" is automatically calculated from the +// matcher name IsEven. +// +// Argument Type +// ============= +// +// Note that the type of the value being matched (arg_type) is +// determined by the context in which you use the matcher and is +// supplied to you by the compiler, so you don't need to worry about +// declaring it (nor can you). This allows the matcher to be +// polymorphic. For example, IsEven() can be used to match any type +// where the value of "(arg % 2) == 0" can be implicitly converted to +// a bool. In the "Bar(IsEven())" example above, if method Bar() +// takes an int, 'arg_type' will be int; if it takes an unsigned long, +// 'arg_type' will be unsigned long; and so on. +// +// Parameterizing Matchers +// ======================= +// +// Sometimes you'll want to parameterize the matcher. For that you +// can use another macro: +// +// MATCHER_P(name, param_name, description_string) { statements; } +// +// For example: +// +// MATCHER_P(HasAbsoluteValue, value, "") { return abs(arg) == value; } +// +// will allow you to write: +// +// EXPECT_THAT(Blah("a"), HasAbsoluteValue(n)); +// +// which may lead to this message (assuming n is 10): +// +// Value of: Blah("a") +// Expected: has absolute value 10 +// Actual: -9 +// +// Note that both the matcher description and its parameter are +// printed, making the message human-friendly. +// +// In the matcher definition body, you can write 'foo_type' to +// reference the type of a parameter named 'foo'. For example, in the +// body of MATCHER_P(HasAbsoluteValue, value) above, you can write +// 'value_type' to refer to the type of 'value'. +// +// We also provide MATCHER_P2, MATCHER_P3, ..., up to MATCHER_P$n to +// support multi-parameter matchers. +// +// Describing Parameterized Matchers +// ================================= +// +// The last argument to MATCHER*() is a string-typed expression. The +// expression can reference all of the matcher's parameters and a +// special bool-typed variable named 'negation'. When 'negation' is +// false, the expression should evaluate to the matcher's description; +// otherwise it should evaluate to the description of the negation of +// the matcher. For example, +// +// using testing::PrintToString; +// +// MATCHER_P2(InClosedRange, low, hi, +// std::string(negation ? "is not" : "is") + " in range [" + +// PrintToString(low) + ", " + PrintToString(hi) + "]") { +// return low <= arg && arg <= hi; +// } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: is in range [4, 6] +// ... +// Expected: is not in range [2, 4] +// +// If you specify "" as the description, the failure message will +// contain the sequence of words in the matcher name followed by the +// parameter values printed as a tuple. For example, +// +// MATCHER_P2(InClosedRange, low, hi, "") { ... } +// ... +// EXPECT_THAT(3, InClosedRange(4, 6)); +// EXPECT_THAT(3, Not(InClosedRange(2, 4))); +// +// would generate two failures that contain the text: +// +// Expected: in closed range (4, 6) +// ... +// Expected: not (in closed range (2, 4)) +// +// Types of Matcher Parameters +// =========================== +// +// For the purpose of typing, you can view +// +// MATCHER_Pk(Foo, p1, ..., pk, description_string) { ... } +// +// as shorthand for +// +// template +// FooMatcherPk +// Foo(p1_type p1, ..., pk_type pk) { ... } +// +// When you write Foo(v1, ..., vk), the compiler infers the types of +// the parameters v1, ..., and vk for you. If you are not happy with +// the result of the type inference, you can specify the types by +// explicitly instantiating the template, as in Foo(5, +// false). As said earlier, you don't get to (or need to) specify +// 'arg_type' as that's determined by the context in which the matcher +// is used. You can assign the result of expression Foo(p1, ..., pk) +// to a variable of type FooMatcherPk. This +// can be useful when composing matchers. +// +// While you can instantiate a matcher template with reference types, +// passing the parameters by pointer usually makes your code more +// readable. If, however, you still want to pass a parameter by +// reference, be aware that in the failure message generated by the +// matcher you will see the value of the referenced object but not its +// address. +// +// Explaining Match Results +// ======================== +// +// Sometimes the matcher description alone isn't enough to explain why +// the match has failed or succeeded. For example, when expecting a +// long string, it can be very helpful to also print the diff between +// the expected string and the actual one. To achieve that, you can +// optionally stream additional information to a special variable +// named result_listener, whose type is a pointer to class +// MatchResultListener: +// +// MATCHER_P(EqualsLongString, str, "") { +// if (arg == str) return true; +// +// *result_listener << "the difference: " +/// << DiffStrings(str, arg); +// return false; +// } +// +// Overloading Matchers +// ==================== +// +// You can overload matchers with different numbers of parameters: +// +// MATCHER_P(Blah, a, description_string1) { ... } +// MATCHER_P2(Blah, a, b, description_string2) { ... } +// +// Caveats +// ======= +// +// When defining a new matcher, you should also consider implementing +// MatcherInterface or using MakePolymorphicMatcher(). These +// approaches require more work than the MATCHER* macros, but also +// give you more control on the types of the value being matched and +// the matcher parameters, which may leads to better compiler error +// messages when the matcher is used wrong. They also allow +// overloading matchers based on parameter types (as opposed to just +// based on the number of parameters). +// +// MATCHER*() can only be used in a namespace scope as templates cannot be +// declared inside of a local class. +// +// More Information +// ================ +// +// To learn more about using these macros, please search for 'MATCHER' +// on +// https://github.com/google/googletest/blob/master/googlemock/docs/cook_book.md + +$range i 0..n +$for i + +[[ +$var macro_name = [[$if i==0 [[MATCHER]] $elif i==1 [[MATCHER_P]] + $else [[MATCHER_P$i]]]] +$var class_name = [[name##Matcher[[$if i==0 [[]] $elif i==1 [[P]] + $else [[P$i]]]]]] +$range j 0..i-1 +$var template = [[$if i==0 [[]] $else [[ + + template <$for j, [[typename p$j##_type]]>\ +]]]] +$var ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var impl_ctor_param_list = [[$for j, [[p$j##_type gmock_p$j]]]] +$var impl_inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::move(gmock_p$j))]]]]]] +$var inits = [[$if i==0 [[]] $else [[ : $for j, [[p$j(::std::move(gmock_p$j))]]]]]] +$var params = [[$for j, [[p$j]]]] +$var param_types = [[$if i==0 [[]] $else [[<$for j, [[p$j##_type]]>]]]] +$var param_types_and_names = [[$for j, [[p$j##_type p$j]]]] +$var param_field_decls = [[$for j +[[ + + p$j##_type const p$j;\ +]]]] +$var param_field_decls2 = [[$for j +[[ + + p$j##_type const p$j;\ +]]]] + +#define $macro_name(name$for j [[, p$j]], description)\$template + class $class_name {\ + public:\ + template \ + class gmock_Impl : public ::testing::MatcherInterface<\ + GTEST_REFERENCE_TO_CONST_(arg_type)> {\ + public:\ + [[$if i==1 [[explicit ]]]]gmock_Impl($impl_ctor_param_list)\ + $impl_inits {}\ + virtual bool MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener) const;\ + virtual void DescribeTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(false);\ + }\ + virtual void DescribeNegationTo(::std::ostream* gmock_os) const {\ + *gmock_os << FormatDescription(true);\ + }\$param_field_decls + private:\ + ::std::string FormatDescription(bool negation) const {\ + ::std::string gmock_description = (description);\ + if (!gmock_description.empty()) {\ + return gmock_description;\ + }\ + return ::testing::internal::FormatMatcherDescription(\ + negation, #name, \ + ::testing::internal::UniversalTersePrintTupleFieldsToStrings(\ + ::std::tuple<$for j, [[p$j##_type]]>($for j, [[p$j]])));\ + }\ + };\ + template \ + operator ::testing::Matcher() const {\ + return ::testing::Matcher(\ + new gmock_Impl($params));\ + }\ + [[$if i==1 [[explicit ]]]]$class_name($ctor_param_list)$inits {\ + }\$param_field_decls2 + private:\ + };\$template + inline $class_name$param_types name($param_types_and_names) {\ + return $class_name$param_types($params);\ + }\$template + template \ + bool $class_name$param_types::gmock_Impl::MatchAndExplain(\ + GTEST_REFERENCE_TO_CONST_(arg_type) arg,\ + ::testing::MatchResultListener* result_listener GTEST_ATTRIBUTE_UNUSED_)\ + const +]] + + +#endif // GMOCK_INCLUDE_GMOCK_GMOCK_GENERATED_MATCHERS_H_ diff --git a/src/test/gtest/googlemock/include/gmock/gmock-matchers.h b/src/test/gtest/googlemock/include/gmock/gmock-matchers.h new file mode 100644 index 00000000..28e188bb --- /dev/null +++ b/src/test/gtest/googlemock/include/gmock/gmock-matchers.h @@ -0,0 +1,4568 @@ +// Copyright 2007, Google Inc. +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following disclaimer +// in the documentation and/or other materials provided with the +// distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +// Google Mock - a framework for writing C++ mock classes. +// +// This file implements some commonly used argument matchers. More +// matchers can be defined by the user implementing the +// MatcherInterface interface if necessary. +// +// See googletest/include/gtest/gtest-matchers.h for the definition of class +// Matcher, class MatcherInterface, and others. + +// GOOGLETEST_CM0002 DO NOT DELETE + +#ifndef GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ +#define GMOCK_INCLUDE_GMOCK_GMOCK_MATCHERS_H_ + +#include +#include +#include +#include +#include +#include +#include // NOLINT +#include +#include +#include +#include +#include +#include "gmock/internal/gmock-internal-utils.h" +#include "gmock/internal/gmock-port.h" +#include "gtest/gtest.h" + +// MSVC warning C5046 is new as of VS2017 version 15.8. +#if defined(_MSC_VER) && _MSC_VER >= 1915 +#define GMOCK_MAYBE_5046_ 5046 +#else +#define GMOCK_MAYBE_5046_ +#endif + +GTEST_DISABLE_MSC_WARNINGS_PUSH_( + 4251 GMOCK_MAYBE_5046_ /* class A needs to have dll-interface to be used by + clients of class B */ + /* Symbol involving type with internal linkage not defined */) + +namespace testing { + +// To implement a matcher Foo for type T, define: +// 1. a class FooMatcherImpl that implements the +// MatcherInterface interface, and +// 2. a factory function that creates a Matcher object from a +// FooMatcherImpl*. +// +// The two-level delegation design makes it possible to allow a user +// to write "v" instead of "Eq(v)" where a Matcher is expected, which +// is impossible if we pass matchers by pointers. It also eases +// ownership management as Matcher objects can now be copied like +// plain values. + +// A match result listener that stores the explanation in a string. +class StringMatchResultListener : public MatchResultListener { + public: + StringMatchResultListener() : MatchResultListener(&ss_) {} + + // Returns the explanation accumulated so far. + std::string str() const { return ss_.str(); } + + // Clears the explanation accumulated so far. + void Clear() { ss_.str(""); } + + private: + ::std::stringstream ss_; + + GTEST_DISALLOW_COPY_AND_ASSIGN_(StringMatchResultListener); +}; + +// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION +// and MUST NOT BE USED IN USER CODE!!! +namespace internal { + +// The MatcherCastImpl class template is a helper for implementing +// MatcherCast(). We need this helper in order to partially +// specialize the implementation of MatcherCast() (C++ allows +// class/struct templates to be partially specialized, but not +// function templates.). + +// This general version is used when MatcherCast()'s argument is a +// polymorphic matcher (i.e. something that can be converted to a +// Matcher but is not one yet; for example, Eq(value)) or a value (for +// example, "hello"). +template +class MatcherCastImpl { + public: + static Matcher Cast(const M& polymorphic_matcher_or_value) { + // M can be a polymorphic matcher, in which case we want to use + // its conversion operator to create Matcher. Or it can be a value + // that should be passed to the Matcher's constructor. + // + // We can't call Matcher(polymorphic_matcher_or_value) when M is a + // polymorphic matcher because it'll be ambiguous if T has an implicit + // constructor from M (this usually happens when T has an implicit + // constructor from any type). + // + // It won't work to unconditionally implict_cast + // polymorphic_matcher_or_value to Matcher because it won't trigger + // a user-defined conversion from M to T if one exists (assuming M is + // a value). + return CastImpl(polymorphic_matcher_or_value, + std::is_convertible>{}, + std::is_convertible{}); + } + + private: + template + static Matcher CastImpl(const M& polymorphic_matcher_or_value, + std::true_type /* convertible_to_matcher */, + bool_constant) { + // M is implicitly convertible to Matcher, which means that either + // M is a polymorphic matcher or Matcher has an implicit constructor + // from M. In both cases using the implicit conversion will produce a + // matcher. + // + // Even if T has an implicit constructor from M, it won't be called because + // creating Matcher would require a chain of two user-defined conversions + // (first to create T from M and then to create Matcher from T). + return polymorphic_matcher_or_value; + } + + // M can't be implicitly converted to Matcher, so M isn't a polymorphic + // matcher. It's a value of a type implicitly convertible to T. Use direct + // initialization to create a matcher. + static Matcher CastImpl(const M& value, + std::false_type /* convertible_to_matcher */, + std::true_type /* convertible_to_T */) { + return Matcher(ImplicitCast_(value)); + } + + // M can't be implicitly converted to either Matcher or T. Attempt to use + // polymorphic matcher Eq(value) in this case. + // + // Note that we first attempt to perform an implicit cast on the value and + // only fall back to the polymorphic Eq() matcher afterwards because the + // latter calls bool operator==(const Lhs& lhs, const Rhs& rhs) in the end + // which might be undefined even when Rhs is implicitly convertible to Lhs + // (e.g. std::pair vs. std::pair). + // + // We don't define this method inline as we need the declaration of Eq(). + static Matcher CastImpl(const M& value, + std::false_type /* convertible_to_matcher */, + std::false_type /* convertible_to_T */); +}; + +// This more specialized version is used when MatcherCast()'s argument +// is already a Matcher. This only compiles when type T can be +// statically converted to type U. +template +class MatcherCastImpl > { + public: + static Matcher Cast(const Matcher& source_matcher) { + return Matcher(new Impl(source_matcher)); + } + + private: + class Impl : public MatcherInterface { + public: + explicit Impl(const Matcher& source_matcher) + : source_matcher_(source_matcher) {} + + // We delegate the matching logic to the source matcher. + bool MatchAndExplain(T x, MatchResultListener* listener) const override { + using FromType = typename std::remove_cv::type>::type>::type; + using ToType = typename std::remove_cv::type>::type>::type; + // Do not allow implicitly converting base*/& to derived*/&. + static_assert( + // Do not trigger if only one of them is a pointer. That implies a + // regular conversion and not a down_cast. + (std::is_pointer::type>::value != + std::is_pointer::type>::value) || + std::is_same::value || + !std::is_base_of::value, + "Can't implicitly convert from to "); + + return source_matcher_.MatchAndExplain(static_cast(x), listener); + } + + void DescribeTo(::std::ostream* os) const override { + source_matcher_.DescribeTo(os); + } + + void DescribeNegationTo(::std::ostream* os) const override { + source_matcher_.DescribeNegationTo(os); + } + + private: + const Matcher source_matcher_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; +}; + +// This even more specialized version is used for efficiently casting +// a matcher to its own type. +template +class MatcherCastImpl > { + public: + static Matcher Cast(const Matcher& matcher) { return matcher; } +}; + +} // namespace internal + +// In order to be safe and clear, casting between different matcher +// types is done explicitly via MatcherCast(m), which takes a +// matcher m and returns a Matcher. It compiles only when T can be +// statically converted to the argument type of m. +template +inline Matcher MatcherCast(const M& matcher) { + return internal::MatcherCastImpl::Cast(matcher); +} + +// Implements SafeMatcherCast(). +// +// FIXME: The intermediate SafeMatcherCastImpl class was introduced as a +// workaround for a compiler bug, and can now be removed. +template +class SafeMatcherCastImpl { + public: + // This overload handles polymorphic matchers and values only since + // monomorphic matchers are handled by the next one. + template + static inline Matcher Cast(const M& polymorphic_matcher_or_value) { + return internal::MatcherCastImpl::Cast(polymorphic_matcher_or_value); + } + + // This overload handles monomorphic matchers. + // + // In general, if type T can be implicitly converted to type U, we can + // safely convert a Matcher to a Matcher (i.e. Matcher is + // contravariant): just keep a copy of the original Matcher, convert the + // argument from type T to U, and then pass it to the underlying Matcher. + // The only exception is when U is a reference and T is not, as the + // underlying Matcher may be interested in the argument's address, which + // is not preserved in the conversion from T to U. + template + static inline Matcher Cast(const Matcher& matcher) { + // Enforce that T can be implicitly converted to U. + GTEST_COMPILE_ASSERT_((std::is_convertible::value), + "T must be implicitly convertible to U"); + // Enforce that we are not converting a non-reference type T to a reference + // type U. + GTEST_COMPILE_ASSERT_( + std::is_reference::value || !std::is_reference::value, + cannot_convert_non_reference_arg_to_reference); + // In case both T and U are arithmetic types, enforce that the + // conversion is not lossy. + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(T) RawT; + typedef GTEST_REMOVE_REFERENCE_AND_CONST_(U) RawU; + const bool kTIsOther = GMOCK_KIND_OF_(RawT) == internal::kOther; + const bool kUIsOther = GMOCK_KIND_OF_(RawU) == internal::kOther; + GTEST_COMPILE_ASSERT_( + kTIsOther || kUIsOther || + (internal::LosslessArithmeticConvertible::value), + conversion_of_arithmetic_types_must_be_lossless); + return MatcherCast(matcher); + } +}; + +template +inline Matcher SafeMatcherCast(const M& polymorphic_matcher) { + return SafeMatcherCastImpl::Cast(polymorphic_matcher); +} + +// A() returns a matcher that matches any value of type T. +template +Matcher A(); + +// Anything inside the 'internal' namespace IS INTERNAL IMPLEMENTATION +// and MUST NOT BE USED IN USER CODE!!! +namespace internal { + +// If the explanation is not empty, prints it to the ostream. +inline void PrintIfNotEmpty(const std::string& explanation, + ::std::ostream* os) { + if (explanation != "" && os != nullptr) { + *os << ", " << explanation; + } +} + +// Returns true if the given type name is easy to read by a human. +// This is used to decide whether printing the type of a value might +// be helpful. +inline bool IsReadableTypeName(const std::string& type_name) { + // We consider a type name readable if it's short or doesn't contain + // a template or function type. + return (type_name.length() <= 20 || + type_name.find_first_of("<(") == std::string::npos); +} + +// Matches the value against the given matcher, prints the value and explains +// the match result to the listener. Returns the match result. +// 'listener' must not be NULL. +// Value cannot be passed by const reference, because some matchers take a +// non-const argument. +template +bool MatchPrintAndExplain(Value& value, const Matcher& matcher, + MatchResultListener* listener) { + if (!listener->IsInterested()) { + // If the listener is not interested, we do not need to construct the + // inner explanation. + return matcher.Matches(value); + } + + StringMatchResultListener inner_listener; + const bool match = matcher.MatchAndExplain(value, &inner_listener); + + UniversalPrint(value, listener->stream()); +#if GTEST_HAS_RTTI + const std::string& type_name = GetTypeName(); + if (IsReadableTypeName(type_name)) + *listener->stream() << " (of type " << type_name << ")"; +#endif + PrintIfNotEmpty(inner_listener.str(), listener->stream()); + + return match; +} + +// An internal helper class for doing compile-time loop on a tuple's +// fields. +template +class TuplePrefix { + public: + // TuplePrefix::Matches(matcher_tuple, value_tuple) returns true + // if and only if the first N fields of matcher_tuple matches + // the first N fields of value_tuple, respectively. + template + static bool Matches(const MatcherTuple& matcher_tuple, + const ValueTuple& value_tuple) { + return TuplePrefix::Matches(matcher_tuple, value_tuple) && + std::get(matcher_tuple).Matches(std::get(value_tuple)); + } + + // TuplePrefix::ExplainMatchFailuresTo(matchers, values, os) + // describes failures in matching the first N fields of matchers + // against the first N fields of values. If there is no failure, + // nothing will be streamed to os. + template + static void ExplainMatchFailuresTo(const MatcherTuple& matchers, + const ValueTuple& values, + ::std::ostream* os) { + // First, describes failures in the first N - 1 fields. + TuplePrefix::ExplainMatchFailuresTo(matchers, values, os); + + // Then describes the failure (if any) in the (N - 1)-th (0-based) + // field. + typename std::tuple_element::type matcher = + std::get(matchers); + typedef typename std::tuple_element::type Value; + const Value& value = std::get(values); + StringMatchResultListener listener; + if (!matcher.MatchAndExplain(value, &listener)) { + *os << " Expected arg #" << N - 1 << ": "; + std::get(matchers).DescribeTo(os); + *os << "\n Actual: "; + // We remove the reference in type Value to prevent the + // universal printer from printing the address of value, which + // isn't interesting to the user most of the time. The + // matcher's MatchAndExplain() method handles the case when + // the address is interesting. + internal::UniversalPrint(value, os); + PrintIfNotEmpty(listener.str(), os); + *os << "\n"; + } + } +}; + +// The base case. +template <> +class TuplePrefix<0> { + public: + template + static bool Matches(const MatcherTuple& /* matcher_tuple */, + const ValueTuple& /* value_tuple */) { + return true; + } + + template + static void ExplainMatchFailuresTo(const MatcherTuple& /* matchers */, + const ValueTuple& /* values */, + ::std::ostream* /* os */) {} +}; + +// TupleMatches(matcher_tuple, value_tuple) returns true if and only if +// all matchers in matcher_tuple match the corresponding fields in +// value_tuple. It is a compiler error if matcher_tuple and +// value_tuple have different number of fields or incompatible field +// types. +template +bool TupleMatches(const MatcherTuple& matcher_tuple, + const ValueTuple& value_tuple) { + // Makes sure that matcher_tuple and value_tuple have the same + // number of fields. + GTEST_COMPILE_ASSERT_(std::tuple_size::value == + std::tuple_size::value, + matcher_and_value_have_different_numbers_of_fields); + return TuplePrefix::value>::Matches(matcher_tuple, + value_tuple); +} + +// Describes failures in matching matchers against values. If there +// is no failure, nothing will be streamed to os. +template +void ExplainMatchFailureTupleTo(const MatcherTuple& matchers, + const ValueTuple& values, + ::std::ostream* os) { + TuplePrefix::value>::ExplainMatchFailuresTo( + matchers, values, os); +} + +// TransformTupleValues and its helper. +// +// TransformTupleValuesHelper hides the internal machinery that +// TransformTupleValues uses to implement a tuple traversal. +template +class TransformTupleValuesHelper { + private: + typedef ::std::tuple_size TupleSize; + + public: + // For each member of tuple 't', taken in order, evaluates '*out++ = f(t)'. + // Returns the final value of 'out' in case the caller needs it. + static OutIter Run(Func f, const Tuple& t, OutIter out) { + return IterateOverTuple()(f, t, out); + } + + private: + template + struct IterateOverTuple { + OutIter operator() (Func f, const Tup& t, OutIter out) const { + *out++ = f(::std::get(t)); + return IterateOverTuple()(f, t, out); + } + }; + template + struct IterateOverTuple { + OutIter operator() (Func /* f */, const Tup& /* t */, OutIter out) const { + return out; + } + }; +}; + +// Successively invokes 'f(element)' on each element of the tuple 't', +// appending each result to the 'out' iterator. Returns the final value +// of 'out'. +template +OutIter TransformTupleValues(Func f, const Tuple& t, OutIter out) { + return TransformTupleValuesHelper::Run(f, t, out); +} + +// Implements A(). +template +class AnyMatcherImpl : public MatcherInterface { + public: + bool MatchAndExplain(const T& /* x */, + MatchResultListener* /* listener */) const override { + return true; + } + void DescribeTo(::std::ostream* os) const override { *os << "is anything"; } + void DescribeNegationTo(::std::ostream* os) const override { + // This is mostly for completeness' safe, as it's not very useful + // to write Not(A()). However we cannot completely rule out + // such a possibility, and it doesn't hurt to be prepared. + *os << "never matches"; + } +}; + +// Implements _, a matcher that matches any value of any +// type. This is a polymorphic matcher, so we need a template type +// conversion operator to make it appearing as a Matcher for any +// type T. +class AnythingMatcher { + public: + template + operator Matcher() const { return A(); } +}; + +// Implements the polymorphic IsNull() matcher, which matches any raw or smart +// pointer that is NULL. +class IsNullMatcher { + public: + template + bool MatchAndExplain(const Pointer& p, + MatchResultListener* /* listener */) const { + return p == nullptr; + } + + void DescribeTo(::std::ostream* os) const { *os << "is NULL"; } + void DescribeNegationTo(::std::ostream* os) const { + *os << "isn't NULL"; + } +}; + +// Implements the polymorphic NotNull() matcher, which matches any raw or smart +// pointer that is not NULL. +class NotNullMatcher { + public: + template + bool MatchAndExplain(const Pointer& p, + MatchResultListener* /* listener */) const { + return p != nullptr; + } + + void DescribeTo(::std::ostream* os) const { *os << "isn't NULL"; } + void DescribeNegationTo(::std::ostream* os) const { + *os << "is NULL"; + } +}; + +// Ref(variable) matches any argument that is a reference to +// 'variable'. This matcher is polymorphic as it can match any +// super type of the type of 'variable'. +// +// The RefMatcher template class implements Ref(variable). It can +// only be instantiated with a reference type. This prevents a user +// from mistakenly using Ref(x) to match a non-reference function +// argument. For example, the following will righteously cause a +// compiler error: +// +// int n; +// Matcher m1 = Ref(n); // This won't compile. +// Matcher m2 = Ref(n); // This will compile. +template +class RefMatcher; + +template +class RefMatcher { + // Google Mock is a generic framework and thus needs to support + // mocking any function types, including those that take non-const + // reference arguments. Therefore the template parameter T (and + // Super below) can be instantiated to either a const type or a + // non-const type. + public: + // RefMatcher() takes a T& instead of const T&, as we want the + // compiler to catch using Ref(const_value) as a matcher for a + // non-const reference. + explicit RefMatcher(T& x) : object_(x) {} // NOLINT + + template + operator Matcher() const { + // By passing object_ (type T&) to Impl(), which expects a Super&, + // we make sure that Super is a super type of T. In particular, + // this catches using Ref(const_value) as a matcher for a + // non-const reference, as you cannot implicitly convert a const + // reference to a non-const reference. + return MakeMatcher(new Impl(object_)); + } + + private: + template + class Impl : public MatcherInterface { + public: + explicit Impl(Super& x) : object_(x) {} // NOLINT + + // MatchAndExplain() takes a Super& (as opposed to const Super&) + // in order to match the interface MatcherInterface. + bool MatchAndExplain(Super& x, + MatchResultListener* listener) const override { + *listener << "which is located @" << static_cast(&x); + return &x == &object_; + } + + void DescribeTo(::std::ostream* os) const override { + *os << "references the variable "; + UniversalPrinter::Print(object_, os); + } + + void DescribeNegationTo(::std::ostream* os) const override { + *os << "does not reference the variable "; + UniversalPrinter::Print(object_, os); + } + + private: + const Super& object_; + + GTEST_DISALLOW_ASSIGN_(Impl); + }; + + T& object_; + + GTEST_DISALLOW_ASSIGN_(RefMatcher); +}; + +// Polymorphic helper functions for narrow and wide string matchers. +inline bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs) { + return String::CaseInsensitiveCStringEquals(lhs, rhs); +} + +inline bool CaseInsensitiveCStringEquals(const wchar_t* lhs, + const wchar_t* rhs) { + return String::CaseInsensitiveWideCStringEquals(lhs, rhs); +} + +// String comparison for narrow or wide strings that can have embedded NUL +// characters. +template +bool CaseInsensitiveStringEquals(const StringType& s1, + const StringType& s2) { + // Are the heads equal? + if (!CaseInsensitiveCStringEquals(s1.c_str(), s2.c_str())) { + return false; + } + + // Skip the equal heads. + const typename StringType::value_type nul = 0; + const size_t i1 = s1.find(nul), i2 = s2.find(nul); + + // Are we at the end of either s1 or s2? + if (i1 == StringType::npos || i2 == StringType::npos) { + return i1 == i2; + } + + // Are the tails equal? + return CaseInsensitiveStringEquals(s1.substr(i1 + 1), s2.substr(i2 + 1)); +} + +// String matchers. + +// Implements equality-based string matchers like StrEq, StrCaseNe, and etc. +template +class StrEqualityMatcher { + public: + StrEqualityMatcher(const StringType& str, bool expect_eq, + bool case_sensitive) + : string_(str), expect_eq_(expect_eq), case_sensitive_(case_sensitive) {} + +#if GTEST_HAS_ABSL + bool MatchAndExplain(const absl::string_view& s, + MatchResultListener* listener) const { + // This should fail to compile if absl::string_view is used with wide + // strings. + const StringType& str = std::string(s); + return MatchAndExplain(str, listener); + } +#endif // GTEST_HAS_ABSL + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + if (s == nullptr) { + return !expect_eq_; + } + return MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because absl::string_view has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + const bool eq = case_sensitive_ ? s2 == string_ : + CaseInsensitiveStringEquals(s2, string_); + return expect_eq_ == eq; + } + + void DescribeTo(::std::ostream* os) const { + DescribeToHelper(expect_eq_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + DescribeToHelper(!expect_eq_, os); + } + + private: + void DescribeToHelper(bool expect_eq, ::std::ostream* os) const { + *os << (expect_eq ? "is " : "isn't "); + *os << "equal to "; + if (!case_sensitive_) { + *os << "(ignoring case) "; + } + UniversalPrint(string_, os); + } + + const StringType string_; + const bool expect_eq_; + const bool case_sensitive_; + + GTEST_DISALLOW_ASSIGN_(StrEqualityMatcher); +}; + +// Implements the polymorphic HasSubstr(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class HasSubstrMatcher { + public: + explicit HasSubstrMatcher(const StringType& substring) + : substring_(substring) {} + +#if GTEST_HAS_ABSL + bool MatchAndExplain(const absl::string_view& s, + MatchResultListener* listener) const { + // This should fail to compile if absl::string_view is used with wide + // strings. + const StringType& str = std::string(s); + return MatchAndExplain(str, listener); + } +#endif // GTEST_HAS_ABSL + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != nullptr && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because absl::string_view has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.find(substring_) != StringType::npos; + } + + // Describes what this matcher matches. + void DescribeTo(::std::ostream* os) const { + *os << "has substring "; + UniversalPrint(substring_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "has no substring "; + UniversalPrint(substring_, os); + } + + private: + const StringType substring_; + + GTEST_DISALLOW_ASSIGN_(HasSubstrMatcher); +}; + +// Implements the polymorphic StartsWith(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class StartsWithMatcher { + public: + explicit StartsWithMatcher(const StringType& prefix) : prefix_(prefix) { + } + +#if GTEST_HAS_ABSL + bool MatchAndExplain(const absl::string_view& s, + MatchResultListener* listener) const { + // This should fail to compile if absl::string_view is used with wide + // strings. + const StringType& str = std::string(s); + return MatchAndExplain(str, listener); + } +#endif // GTEST_HAS_ABSL + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != nullptr && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because absl::string_view has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.length() >= prefix_.length() && + s2.substr(0, prefix_.length()) == prefix_; + } + + void DescribeTo(::std::ostream* os) const { + *os << "starts with "; + UniversalPrint(prefix_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "doesn't start with "; + UniversalPrint(prefix_, os); + } + + private: + const StringType prefix_; + + GTEST_DISALLOW_ASSIGN_(StartsWithMatcher); +}; + +// Implements the polymorphic EndsWith(substring) matcher, which +// can be used as a Matcher as long as T can be converted to a +// string. +template +class EndsWithMatcher { + public: + explicit EndsWithMatcher(const StringType& suffix) : suffix_(suffix) {} + +#if GTEST_HAS_ABSL + bool MatchAndExplain(const absl::string_view& s, + MatchResultListener* listener) const { + // This should fail to compile if absl::string_view is used with wide + // strings. + const StringType& str = std::string(s); + return MatchAndExplain(str, listener); + } +#endif // GTEST_HAS_ABSL + + // Accepts pointer types, particularly: + // const char* + // char* + // const wchar_t* + // wchar_t* + template + bool MatchAndExplain(CharType* s, MatchResultListener* listener) const { + return s != nullptr && MatchAndExplain(StringType(s), listener); + } + + // Matches anything that can convert to StringType. + // + // This is a template, not just a plain function with const StringType&, + // because absl::string_view has some interfering non-explicit constructors. + template + bool MatchAndExplain(const MatcheeStringType& s, + MatchResultListener* /* listener */) const { + const StringType& s2(s); + return s2.length() >= suffix_.length() && + s2.substr(s2.length() - suffix_.length()) == suffix_; + } + + void DescribeTo(::std::ostream* os) const { + *os << "ends with "; + UniversalPrint(suffix_, os); + } + + void DescribeNegationTo(::std::ostream* os) const { + *os << "doesn't end with "; + UniversalPrint(suffix_, os); + } + + private: + const StringType suffix_; + + GTEST_DISALLOW_ASSIGN_(EndsWithMatcher); +}; + +// Implements a matcher that compares the two fields of a 2-tuple +// using one of the ==, <=, <, etc, operators. The two fields being +// compared don't have to have the same type. +// +// The matcher defined here is polymorphic (for example, Eq() can be +// used to match a std::tuple, a std::tuple, +// etc). Therefore we use a template type conversion operator in the +// implementation. +template +class PairMatchBase { + public: + template + operator Matcher<::std::tuple>() const { + return Matcher<::std::tuple>(new Impl&>); + } + template + operator Matcher&>() const { + return MakeMatcher(new Impl&>); + } + + private: + static ::std::ostream& GetDesc(::std::ostream& os) { // NOLINT + return os << D::Desc(); + } + + template + class Impl : public MatcherInterface { + public: + bool MatchAndExplain(Tuple args, + MatchResultListener* /* listener */) const override { + return Op()(::std::get<0>(args), ::std::get<1>(args)); + } + void DescribeTo(::std::ostream* os) const override { + *os << "are " << GetDesc; + } + void DescribeNegationTo(::std::ostream* os) const override { + *os << "aren't " << GetDesc; + } + }; +}; + +class Eq2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "an equal pair"; } +}; +class Ne2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "an unequal pair"; } +}; +class Lt2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first < the second"; } +}; +class Gt2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first > the second"; } +}; +class Le2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first <= the second"; } +}; +class Ge2Matcher : public PairMatchBase { + public: + static const char* Desc() { return "a pair where the first >= the second"; } +}; + +// Implements the Not(...) matcher for a particular argument type T. +// We do not nest it inside the NotMatcher class template, as that +// will prevent different instantiations of NotMatcher from sharing +// the same NotMatcherImpl class. +template +class NotMatcherImpl : public MatcherInterface { + public: + explicit NotMatcherImpl(const Matcher& matcher) + : matcher_(matcher) {} + + bool MatchAndExplain(const T& x, + MatchResultListener* listener) const override { + return !matcher_.MatchAndExplain(x, listener); + } + + void DescribeTo(::std::ostream* os) const override { + matcher_.DescribeNegationTo(os); + } + + void DescribeNegationTo(::std::ostream* os) const override { + matcher_.DescribeTo(os); + } + + private: + const Matcher matcher_; + + GTEST_DISALLOW_ASSIGN_(NotMatcherImpl); +}; + +// Implements the Not(m) matcher, which matches a value that doesn't +// match matcher m. +template +class NotMatcher { + public: + explicit NotMatcher(InnerMatcher matcher) : matcher_(matcher) {} + + // This template type conversion operator allows Not(m) to be used + // to match any type m can match. + template + operator Matcher() const { + return Matcher(new NotMatcherImpl(SafeMatcherCast(matcher_))); + } + + private: + InnerMatcher matcher_; + + GTEST_DISALLOW_ASSIGN_(NotMatcher); +}; + +// Implements the AllOf(m1, m2) matcher for a particular argument type +// T. We do not nest it inside the BothOfMatcher class template, as +// that will prevent different instantiations of BothOfMatcher from +// sharing the same BothOfMatcherImpl class. +template +class AllOfMatcherImpl : public MatcherInterface { + public: + explicit AllOfMatcherImpl(std::vector > matchers) + : matchers_(std::move(matchers)) {} + + void DescribeTo(::std::ostream* os) const override { + *os << "("; + for (size_t i = 0; i < matchers_.size(); ++i) { + if (i != 0) *os << ") and ("; + matchers_[i].DescribeTo(os); + } + *os << ")"; + } + + void DescribeNegationTo(::std::ostream* os) const override { + *os << "("; + for (size_t i = 0; i < matchers_.size(); ++i) { + if (i != 0) *os << ") or ("; + matchers_[i].DescribeNegationTo(os); + } + *os << ")"; + } + + bool MatchAndExplain(const T& x, + MatchResultListener* listener) const override { + // If either matcher1_ or matcher2_ doesn't match x, we only need + // to explain why one of them fails. + std::string all_match_result; + + for (size_t i = 0; i < matchers_.size(); ++i) { + StringMatchResultListener slistener; + if (matchers_[i].MatchAndExplain(x, &slistener)) { + if (all_match_result.empty()) { + all_match_result = slistener.str(); + } else { + std::string result = slistener.str(); + if (!result.empty()) { + all_match_result += ", and "; + all_match_result += result; + } + } + } else { + *listener << slistener.str(); + return false; + } + } + + // Otherwise we need to explain why *both* of them match. + *listener << all_match_result; + return true; + } + + private: + const std::vector > matchers_; + + GTEST_DISALLOW_ASSIGN_(AllOfMatcherImpl); +}; + +// VariadicMatcher is used for the variadic implementation of +// AllOf(m_1, m_2, ...) and AnyOf(m_1, m_2, ...). +// CombiningMatcher is used to recursively combine the provided matchers +// (of type Args...). +template